Ejemplo n.º 1
0
def parseText(text):
    """
    	Wraps the parser with logic to create the appropriate data structures
    """
    #splits text into paragraphs to parse, which is easier than making the parser
    #more complex
    separated = LINESEP.split(text)
    rootSection = None
    currentSection = None
    while bool(separated):
        current = separated.pop(0)
        if current == "":
            continue
        try:
            results = ROOT.parseString(current)
        except Exception as e:
            logging.info('Parser Issue with: {}'.format(current))
            raise e
        #parsed a header, create a section
        if not bool(results):
            results = "Nothing, just add the paragraph"

        if isinstance(results[0], HEADER) and rootSection is None:
            #a new, root section
            currentSection = Section(results[0].title, results[0].level)
            rootSection = currentSection
            for res in results[1:]:
                if isinstance(res, TAG):
                    currentSection.add_tag(res.text)

        elif isinstance(results[0], HEADER):
            #a subsection
            if results[0].level > currentSection._level:
                currentSection = currentSection.add_subsection(results[0].title,
                                                               results[0].level)
            elif results[0].level <= currentSection._level:
                #a subsection of an ancestor
                while results[0].level <= currentSection._level:
                    currentSection = currentSection.get_parent()

                currentSection = currentSection.add_subsection(results[0].title,
                                                               results[0].level)
            for res in results[1:]:
                if isinstance(res, TAG):
                    currentSection.add_tag(res.text)

        else:
            #Paragraphs of a section
            currentParagraph = currentSection.add_paragraph(current)
            for res in results:
                if isinstance(res, TAG):
                    currentParagraph['tags'].add(res.text)
                elif isinstance(res, CITATION):
                    currentParagraph['citations'].add(res.text)

    return rootSection
Ejemplo n.º 2
0
 def test_bad_set_parent(self):
     aSection = Section('aTitle', 1)
     anotherSection = Section('AnotherTitle', 2)
     self.assertIsNone(aSection.get_parent())
     with self.assertRaises(Exception):
         aSection.set_parent(anotherSection)
Ejemplo n.º 3
0
 def test_set_parent(self):
     aSection = Section('aTitle', 1)
     anotherSection = Section('AnotherTitle', 2)
     self.assertIsNone(anotherSection.get_parent())
     anotherSection.set_parent(aSection)
     self.assertIsNotNone(anotherSection.get_parent())