def test_normalize_parse_tree(self): num_matches = 0 for parse_tree, s, e in TEILiteParser.pRegion.scanString(self.tei_data1): num_matches = num_matches + 1 nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList()) self.assertTrue(nparse_tree['type'] == 'p') self.assertTrue(nparse_tree['id'] != None) self.assertEqual(num_matches, 6) num_matches = 0 for parse_tree, s, e in TEILiteParser.headRegion.scanString(self.tei_data1): num_matches = num_matches + 1 nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList()) self.assertTrue(nparse_tree['type'] == 'head') self.assertTrue(nparse_tree['id'] != None) self.assertEqual(num_matches, 5) num_matches = 0 for parse_tree, s, e in TEILiteParser.sssRegion.scanString(self.tei_data1): num_matches = num_matches + 1 nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList()) self.assertTrue(nparse_tree['type'] == 'subsubsection') self.assertTrue(nparse_tree['id'] != None) self.assertEqual(num_matches, 2) num_matches = 0 for parse_tree, s, e in TEILiteParser.ssRegion.scanString(self.tei_data1): num_matches = num_matches + 1 nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList()) self.assertTrue(nparse_tree['type'] == 'subsection') self.assertTrue(nparse_tree['id'] != None) self.assertTrue(num_matches, 1) num_matches = 0 for parse_tree, s, e in TEILiteParser.sRegion.scanString(self.tei_data1): num_matches = num_matches + 1 nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList()) self.assertTrue(nparse_tree['type'] == 'section') self.assertTrue(nparse_tree['id'] != None) self.assertTrue(num_matches, 2)
def test_get_region_id(self): # Write TEI tests for subsubsection, subsection, and section parser = TEILiteParser() expected_ids = {'subsubsection': ['1', '2'], 'subsection': ['1'], 'section': ['1', '9']} for region_type in expected_ids.keys(): production = parser.getGrammarForUnit(region_type) ids = [] match_forest = production.scanString(self.tei_data1) subtree_idx = 0 for subtree, s, e in match_forest: new_id = parser.getRegionID(subtree, region_type, subtree_idx) ids.append(new_id) subtree_idx = subtree_idx + 1 i = 0 for id in ids: self.assertEqual(id, expected_ids[region_type][i]) i = i + 1