Esempio n. 1
0
 def __handleParagraph__(self, dstree, dsnode, paragraph):
     sentences = self.sentence.split(paragraph)
     
     prevItem = ''
     for i, sentence in enumerate(sentences):
         if i % 2 != 0:
             prevItem = sentence
         elif len(sentence) > 0:
             
             if len(prevItem) > 0:
                 firstLetter = prevItem[(len(prevItem) - 1)]
                 sentence = firstLetter + sentence
                 prevItem = ''
             else:
                 firstLetter = sentence[0]
             
             if sentence[(len(prevItem) - 1)] != '.':
                 sentence = sentence + '.'
                 
             if firstLetter == DocumentStructure.SYMBOL_DBLQUOTE:
                 if sentence.count(DocumentStructure.SYMBOL_DBLQUOTE) % 2 != 0:
                     sentence += firstLetter
             elif firstLetter ==  DocumentStructure.SYMBOL_SINGLEQUOTE:
                 if sentence.count(DocumentStructure.SYMBOL_SINGLEQUOTE) % 2 != 0:
                     sentence += firstLetter
                 
                 
             self.__handleTextSentence__(dstree, dsnode, sentence.strip())
         
     DocumentStructure.deleteValue(dsnode)
Esempio n. 2
0
 def __handle_title__(self, xmlnode, dstree, dsnode):
     # Assumes document node
     if DocumentStructure.getLevel(dsnode) != DocumentStructure.DOCUMENT:
         raise Exception, "Unexpected level" 
     
     DocumentStructure.setNode(dsnode, value=util.parseValueFrom(xmlnode))
     return dsnode
Esempio n. 3
0
def appendToValue(dsnode, value):
    newValue = DocumentStructure.getValue(dsnode)
    
    if value != None:
        if newValue != None:
            # Ensure a space is added between tags
            newValue = newValue + ' ' + value
        else:
            newValue = value
        
    DocumentStructure.setNode(dsnode, value = newValue)
Esempio n. 4
0
 def handle(self, dstree, dsnode):
     value = DocumentStructure.getValue(dsnode)
     
     if len(value) != 0:
         #value = self.dot.sub('. ', value)
         value = self.multispaces.sub(' ', value)
         value = self.spacepunct1.sub(',', value)
         value = self.spacepunct2.sub('.', value)
         
         value = value.strip()
         DocumentStructure.setValue(dsnode, value)
         
         if util.getLevel(dsnode) == DocumentStructure.PARAGRAPH:
             self.__handleParagraph__(dstree, dsnode, value.strip())
Esempio n. 5
0
 def __handleTextClause__(self, dstree, dsnode, clause):
     phrases = re.split(self.phrase, clause.strip())
     
     if len(phrases) == 1:
         dstree.addNode(dsnode, DocumentStructure.TEXT_CLAUSE, value = clause)
     else:
         dschildnode = dstree.addNode(dsnode, DocumentStructure.TEXT_CLAUSE)
         for phrase in phrases:
             if phrase != None:
                 phrase = phrase.strip()
                 if len(phrase) > 0:
                     self.__handleTextPhrase__(dstree, dschildnode, phrase.strip())
     
     DocumentStructure.deleteValue(dsnode)
Esempio n. 6
0
 def __handle_caption__(self, xmlnode, dstree, dsnode):
     dsnode = self.prepareNode(dstree, dsnode, DocumentStructure.PARAGRAPH)
     value = util.parseValueFrom(xmlnode)
     
     if DocumentStructure.getLevel(dsnode) == DocumentStructure.PARAGRAPH:
         value = util.ensureEndsInPeriod(value)
             
     self.abstractHandler.handleReplace(dstree, dsnode, value)
     return self.handleIndent(dsnode, xmlnode)
Esempio n. 7
0
 def __handle_table__(self, xmlnode, dstree, dsnode):
     if 'summary' in xmlnode.attrib:
         value = xmlnode.attrib['summary']
         dsnode = self.prepareNode(dstree, dsnode, DocumentStructure.PARAGRAPH)
         
         if DocumentStructure.getLevel(dsnode) == DocumentStructure.PARAGRAPH:
             value = util.ensureEndsInPeriod(value)
         
         util.appendToValue(dsnode, ' ' + value + ' ')
     
     return self.handleIndent(dsnode, xmlnode)
Esempio n. 8
0
 def __handleTextSentence__(self, dstree, dsnode, sentence):
     clauses = self.clause.split(sentence.strip())
     
     if len(clauses) == 1:
         dstree.addNode(dsnode, DocumentStructure.TEXT_SENTENCE, value = sentence)
     else:
         dschildnode = dstree.addNode(dsnode, DocumentStructure.TEXT_SENTENCE)
         
         prevItem = ''
         for i, clause in enumerate(clauses):
             if i % 2 != 0:
                 prevItem = clause
             else:
                 if len(prevItem) > 0:
                     clause = prevItem + clause
                     prevItem = ''
                 if clause.endswith('.') == False:
                     clause = clause
                 self.__handleTextClause__(dstree, dschildnode, clause)
     
     DocumentStructure.deleteValue(dsnode)
Esempio n. 9
0
 def prepareNode(self, dstree, dsnode, level, indent = None):
     curLevel     = util.levelToInt(DocumentStructure.getLevel(dsnode))
     levelAbove   = util.levelToInt(util.levelAbove(level))
     levelDesired = util.levelToInt(level)
     
     if curLevel >= levelAbove:
         while curLevel > levelAbove:
             newLevel = util.levelBelow(DocumentStructure.getLevel(dsnode))
             dsnode = dstree.addNode(dsnode, level = newLevel, indent = indent)
             curLevel = util.levelToInt(DocumentStructure.getLevel(dsnode))
             
         if level == DocumentStructure.PARAGRAPH:
             self.preInsertedParagraph = True
             dsnode = dstree.addNode(dsnode, level = DocumentStructure.PARAGRAPH, indent = indent)
         else:
             self.preInsertedParagraph = False
             
     elif curLevel < levelAbove:
         while curLevel < levelDesired:
             dsnode = dsnode.getparent()
             curLevel = util.levelToInt(DocumentStructure.getLevel(dsnode))
             
         if level != DocumentStructure.PARAGRAPH:
             dsnode = dsnode.getparent()
             
         DocumentStructure.setIndent(dsnode, indent)
             
     return dsnode
Esempio n. 10
0
 def __handle_img__(self, xmlnode, dstree, dsnode):
     value = ''
     
     if 'alt' in xmlnode.attrib:
         value = xmlnode.attrib['alt']
     elif 'title' in xmlnode.attrib:
         value = xmlnode.attrib['title']
     
     dsnode = self.prepareNode(dstree, dsnode, DocumentStructure.PARAGRAPH)
     
     if DocumentStructure.getLevel(dsnode) == DocumentStructure.PARAGRAPH:
         value = util.ensureEndsInPeriod(value)
     
     util.appendToValue(dsnode, ' ' + value + ' ')
     return self.handleIndent(dsnode, xmlnode)
Esempio n. 11
0
 def handleIndent(self, dsnode, xmlnode):
     indent = DocumentStructure.getIndent(xmlnode)
     DocumentStructure.setIndent(dsnode, indent)
     return dsnode
Esempio n. 12
0
def getLevel(dsnode):
    return DocumentStructure.getLevel(dsnode)