def traverse(iNode):
    if (iNode.tag == 'document') or ((iNode.tag == 'content') and (iNode.getparent().tag == 'document')) or ((iNode.tag == 'section') and (iNode.getparent().tag != 'activity')):
        children = iNode.getchildren()
        for child in children:
            if not traverse(child):
                iNode.remove(child)
        return True
    elif (iNode.tag == 'title') and (iNode.getparent().tag == 'section'):
        return True
    elif iNode.tag == 'exercises':
        for correctNode in iNode.xpath('.//correct'):
            fullDomPath = get_full_dom_path(correctNode)
            if 'solution' in fullDomPath:
                correctNode.tail = ''
                entryNode = correctNode.getparent()
                while entryNode.tag != 'entry':
                    entryNode = entryNode.getparent()
                if 'latex' in fullDomPath:
                    remove_ampersands(correctNode)
                    correctNode.tag = 'latex'
                    entryNode.append(etree.Element('correct'))
                    entryNode[-1].append(correctNode)
                else:
                    entryNode.append(correctNode)
        for tag in ['header', 'footer', 'shortcode']:
            for child in iNode.xpath('.//' + tag):
                child.getparent().remove(child)
        for tag in ['problem', 'solution']:
            for child in iNode.xpath('.//' + tag):
                child.clear()
        for child in iNode.xpath('.//problem-set'):
            if child.attrib.get('{http://siyavula.com/cnxml/style/0.1}columns') is not None:
                del child.attrib['{http://siyavula.com/cnxml/style/0.1}columns']
        return True
    else:
        if len(iNode.xpath('.//exercises')) > 0:
            sys.stderr.write('WARNING: Deleting node that contains exercises.\n')
            sys.stderr.write(etree.tostring(iNode) + '\n')
        return False
Example #2
0
 def __get_full_dom_path(self, iNode):
     return utils.get_full_dom_path(iNode, iSpec=self.spec)
Example #3
0
def find_substitutions(iText, iNode=None, iPreTailTag=None, iPostTailTag=None):
    global numberAndCurrencyPattern, lazyMode
    oSubstitutions = []
    for match in numberAndCurrencyPattern.finditer(iText):
        context = [None, None]
        contextLength = 20
        preContextWarning = 0
        postContextWarning = 0

        start, stop = match.span()
        oldNumber = iText[start:stop]
        if oldNumber[0] == 'R':
            # Currency
            newNode = etree.Element('currency')
            newNode.append(etree.Element('number'))
            newNode[0].text = oldNumber[1:].replace(' ','').replace(',','.')
            # Check whether we need to use a non-default precision
            pos = newNode[0].text.find('.')
            if (pos != -1) and (pos < len(newNode[0].text)-3):
                newNode.attrib['precision'] = str(len(newNode[0].text) - pos - 1)
            # Check if the pre-context is funny
            while (preContextWarning < contextLength) and (start-preContextWarning > 0) and ('a' <= iText[start-preContextWarning-1].lower() <= 'z'):
                preContextWarning += 1
        else:
            # Number
            newNode = etree.Element('number')
            newNode.text = oldNumber.replace(' ','').replace(',','.')
            # Check if the pre-context is funny
            offset = 0
            while (start-offset > 0) and (iText[start-offset-1] == ' '):
                offset += 1
            if (start-offset > 0) and (iText[start-offset-1] in '.,0123456789'):
                preContextWarning = min(contextLength, offset+1)
                while (preContextWarning < contextLength) and (start-preContextWarning > 0) and (iText[start-preContextWarning-1] in '.,0123456789'):
                    preContextWarning += 1
            # Check if the post-context is funny
            if iText[stop:stop+2] in ['st', 'nd', 'rd', 'th']:
                postContextWarning = 2
            else:
                # Check if there are units
                match = unitPattern.match(iText[stop:])
                if (iNode.tag != 'latex') and (match is not None):
                    numberNode = newNode
                    newNode = etree.Element('unit_number')
                    newNode.append(numberNode)
                    newNode.append(etree.Element('unit'))
                    newNode[-1].text = match.group().strip()
                    oldNumber += iText[stop:stop+len(match.group())]
                    stop += len(match.group())
                else:
                    # Check if there are units in LaTeX mode
                    match = latexUnitPattern.match(iText[stop:])
                    if match is not None:
                        numberNode = newNode
                        newNode = etree.Element('unit_number')
                        newNode.append(numberNode)
                        newNode.append(etree.Element('unit'))
                        unitText = match.group().strip()
                        if unitText == r'\ell':
                            unitText = u'ℓ'
                        elif unitText == u'°':
                            unitText = u'°'
                        else:
                            assert unitText[:8] == r'\textrm{'
                            assert unitText[-1] == '}'
                            unitText = unitText[8:-1].strip()
                        newNode[-1].text = unitText
                        oldNumber += iText[stop:stop+len(match.group())]
                        stop += len(match.group())
                    else:
                        if lazyMode and ((iNode.tag == 'latex') or ((iNode.tag == 'correct') and ('latex' in get_full_dom_path(iNode)))):
                            # Skip unit-less numbers that are small integers and in LaTeX mode
                            try:
                                value = int(oldNumber.replace(' ','').replace(',','.'))
                                if abs(value) < 1000:
                                    continue
                            except ValueError:
                                pass
        # Check if the post-context is funny (for both currency and number)
        if postContextWarning == 0:
            offset = 0
            while (stop+offset+1 < len(iText)) and (iText[stop+offset+1] == ' '):
                offset += 1
            if (stop+offset+1 < len(iText)) and (iText[stop+offset+1] in '.,0123456789'):
                postContextWarning = min(contextLength, offset+1)
            while (postContextWarning < contextLength) and (stop+postContextWarning+1 < len(iText)) and (iText[stop+postContextWarning+1] in '.,0123456789'):
                postContextWarning += 1

        if iNode.tag is not None:
            context[0] = '<' + iNode.tag + '>'
            if iPreTailTag is not None:
                context[0] += ' ... </' + iPreTailTag + '>'
        else:
            context[0] = ''
        if start < contextLength:
            context[0] += iText[:start]
        else:
            context[0] += ' ...' + iText[start-contextLength:start]
        if stop+contextLength > len(iText):
            context[1] = iText[stop:]
            if iPostTailTag is not None:
                context[1] += '<' + iPostTailTag + '>' + ' ... '
        else:
            context[1] = iText[stop:stop+contextLength] + '... '
        context[1] += '</' + iNode.tag + '>'

        if prompt_replace(oldNumber, etree.tostring(newNode, encoding='utf-8').decode('utf-8'), context, (preContextWarning, postContextWarning)):
            oSubstitutions.append((start, stop, newNode))

    return oSubstitutions