def test_initialise(self): aSection = Section('aTitle', 1) self.assertIsInstance(aSection, Section) self.assertEqual(aSection._title, 'aTitle') self.assertEqual(aSection._level, 1) self.assertTrue(aSection.is_section()) self.assertFalse(aSection.is_document())
def test_subsection(self): aSection = Section('aTitle', 1) newSection = aSection.add_subsection('other', 2) self.assertEqual(len(aSection._ordered_subsections), 1) retrieved = aSection.section('other') self.assertIsNotNone(retrieved) self.assertIsInstance(retrieved, Section)
def test_get_all_paragraphs(self): aSection = Section('aTitle', 1) subSection = aSection.add_subsection('other', 2) subSection.add_paragraph('This is a sub section paragraph') paragraphs = aSection.get_all_paragraphs() self.assertEqual(len(paragraphs), 1) self.assertEqual(paragraphs[0]['text'].text, 'This is a sub section paragraph')
def parseText(text): """ Wraps the parser with logic to create the appropriate data structures """ #splits text into paragraphs to parse, which is easier than making the parser #more complex separated = LINESEP.split(text) rootSection = None currentSection = None while bool(separated): current = separated.pop(0) if current == "": continue try: results = ROOT.parseString(current) except Exception as e: logging.info('Parser Issue with: {}'.format(current)) raise e #parsed a header, create a section if not bool(results): results = "Nothing, just add the paragraph" if isinstance(results[0], HEADER) and rootSection is None: #a new, root section currentSection = Section(results[0].title, results[0].level) rootSection = currentSection for res in results[1:]: if isinstance(res, TAG): currentSection.add_tag(res.text) elif isinstance(results[0], HEADER): #a subsection if results[0].level > currentSection._level: currentSection = currentSection.add_subsection(results[0].title, results[0].level) elif results[0].level <= currentSection._level: #a subsection of an ancestor while results[0].level <= currentSection._level: currentSection = currentSection.get_parent() currentSection = currentSection.add_subsection(results[0].title, results[0].level) for res in results[1:]: if isinstance(res, TAG): currentSection.add_tag(res.text) else: #Paragraphs of a section currentParagraph = currentSection.add_paragraph(current) for res in results: if isinstance(res, TAG): currentParagraph['tags'].add(res.text) elif isinstance(res, CITATION): currentParagraph['citations'].add(res.text) return rootSection
def test_deep_tags_paragraphs(self): aSection = Section('aTitle', 1) aParagraph = aSection.add_paragraph('This is some random text') self.assertIn('text', aParagraph) self.assertIn('tags', aParagraph) self.assertIn('citations', aParagraph) aParagraph['tags'].add('atag') self.assertIn('atag', aParagraph['tags']) self.assertTrue(aSection.has_tag('aTag')) self.assertFalse(aSection.has_tag('bloo'))
def test_local_sentence_count(self): aSection = Section('aTitle', 1) aSection.add_paragraph('This is a single sentence.') self.assertEqual(aSection.get_sentence_count(), 1) aSection.add_paragraph( 'This adds a second, and third, sentence. How is the weather today?' ) self.assertEqual(aSection.get_sentence_count(), 3)
def test_deep_word_count(self): aSection = Section('aTitle', 1) subSection = aSection.add_subsection('other', 2) aSection.add_paragraph( 'This is the word sections paragraph. And a second sentence.') subSection.add_paragraph( 'This is a sub word paragraph. The weather today is fairly nice.') self.assertEqual(aSection.get_word_count(), 22)
def test_paragraphs(self): aSection = Section('aTitle', 1) self.assertEqual(len(aSection._paragraphs), 0) aSection.add_paragraph("this is a test text.") self.assertEqual(len(aSection._paragraphs), 1) aSection.add_paragraph("this is another test text.") self.assertEqual(len(aSection._paragraphs), 2) paragraphs = aSection.get_paragraphs() self.assertEqual(len(paragraphs), 2) self.assertEqual(paragraphs[0]['text'].text, "this is a test text.") self.assertEqual(paragraphs[1]['text'].text, "this is another test text.")
def test_subsection_bad_level(self): aSection = Section('aTitle', 1) with self.assertRaises(Exception): aSection.add_subsection('other', 1)
def test_tags(self): aSection = Section('aTitle', 1) aSection.add_tag('blah').add_tag('bloo') self.assertTrue(aSection.has_tag('blah')) self.assertTrue(aSection.has_tag('bloo')) self.assertFalse(aSection.has_tag('blee'))
def test_bad_set_parent(self): aSection = Section('aTitle', 1) anotherSection = Section('AnotherTitle', 2) self.assertIsNone(aSection.get_parent()) with self.assertRaises(Exception): aSection.set_parent(anotherSection)
def test_set_parent(self): aSection = Section('aTitle', 1) anotherSection = Section('AnotherTitle', 2) self.assertIsNone(anotherSection.get_parent()) anotherSection.set_parent(aSection) self.assertIsNotNone(anotherSection.get_parent())
def test_should_retrieval(self): aSection = Section('aTitle', 1) self.assertIsInstance(aSection.should, Should) self.assertEqual(aSection.should._ref, aSection)
def test_citations_paragraphs(self): aSection = Section('aTitle', 1) aParagraph = aSection.add_paragraph('this is some text') aParagraph['citations'].add('graeber 99') self.assertTrue(aSection.has_citation('graeber 99'))
def test_deep_tags_sections(self): aSection = Section('aTitle', 1) aSubSection = aSection.add_subsection('subsection', 2) aSubSection.add_tag('blah') self.assertTrue(aSection.has_tag('blah'))