def test_annotate_basicnltk(self): sentence = 'Apples and pears are like badgers and bears.' annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) item_at_2 = annotated_sentence.at(2) self.assertEqual(item_at_2.pos, 'NNS') self.assertEqual(item_at_2.word, 'pears') self.assertEqual(item_at_2.lemma, 'pear')
def test_annotate_nltk_geoextension1(self): sentence = 'Houses and rabbits look like badgers, oil rigs, and gas stations.' annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) geo_categories = { 'GNN': ('NN*', 'tests/test-files/gnns.txt', 'n'), 'ANIMAL': ('NN*', 'tests/test-files/animals.txt', 'n'), } geo_annotator = TypeExtensionAnnotator(geo_categories, stem=True) geo_sentence = geo_annotator.extend(annotated_sentence) animal_count = len( [w for w in geo_sentence.words if 'ANIMAL' in w.types]) gnn_count = len([w for w in geo_sentence.words if 'GNN' in w.types]) self.assertEqual(animal_count, 2) self.assertEqual(gnn_count, 5)
def test_annotate_nltk_geoextension2(self): sentence = "Musical instruments don't sound like badgers." annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) geo_categories = { 'GNN': ('NN*', 'tests/test-files/gnns.txt', 'n'), 'ANIMAL': ('NN*', 'tests/test-files/animals.txt', 'n'), } geo_annotator = TypeExtensionAnnotator(geo_categories, stem=True) geo_sentence = geo_annotator.extend(annotated_sentence) animal_count = len( [w for w in geo_sentence.words if 'ANIMAL' in w.types]) gnn_count = len([w for w in geo_sentence.words if 'GNN' in w.types]) self.assertEqual(animal_count, 1) self.assertEqual(gnn_count, 0)
def test_pattern_graph_matches5(self): pattern = pattern_pfx + ''' <pattern name="ex" class="ex-patterns"> <word lemma="word*" /> <word word="abcd" /> </pattern>''' tree = etree.fromstring(pattern) pattern = Pattern(tree) sentence = "He is running the race by eating the the mungo." annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) bfs_search = BreadthFirstWithQueue() matches = MatchBuilder.find_all_matches(annotated_sentence, pattern, bfs_search) self.assertEqual(len(matches), 0)
def test_annotate_nltk_geoextension_fileload(self): sentence = 'Houses and rabbits look like badgers, oil rigs, and gas stations.' annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) exfile = open('tests/test-files/test-extensions.txt', 'r') geo_categories = parse.ExtensionParser.parse(exfile) exfile.close() geo_annotator = TypeExtensionAnnotator(geo_categories) geo_sentence = geo_annotator.extend(annotated_sentence) animal_count = len( [w for w in geo_sentence.words if 'ANIMAL' in w.types]) gnn_count = len([w for w in geo_sentence.words if 'GNN' in w.types]) self.assertEqual(animal_count, 2) self.assertEqual(gnn_count, 5)
def test_pattern_graph_matches4(self): pattern = pattern_pfx + ''' <pattern name="ex" class="ex-patterns"> <word pos="VB*" /> <word pos="DT" min="2"/> </pattern>''' tree = etree.fromstring(pattern) pattern = Pattern(tree) sentence = "He is running the race by eating the the mungo." annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) bfs_search = BreadthFirstWithQueue() matches = MatchBuilder.find_all_matches(annotated_sentence, pattern, bfs_search) actual = ' '.join([x.word for x in matches[0]]) expected = 'eating the the' self.assertEqual(actual, expected)
def test_pattern_graph_matches3(self): pattern = pattern_pfx + ''' <pattern name="ex" class="ex-patterns"> <word pos="DT" min="0"/> <word pos="NN*" /> </pattern>''' tree = etree.fromstring(pattern) pattern = Pattern(tree) sentence = "This is the dog who barks at moons." annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) bfs_search = BreadthFirstWithQueue() matches = MatchBuilder.find_all_matches(annotated_sentence, pattern, bfs_search) actual1 = ' '.join([x.word for x in matches[0]]) self.assertEqual(actual1, 'the dog') actual2 = ' '.join([x.word for x in matches[1]]) self.assertEqual(actual2, 'dog') actual3 = ' '.join([x.word for x in matches[2]]) self.assertEqual(actual3, 'moons')
def test_pattern_graph_matches2(self): pattern = pattern_pfx + ''' <pattern name="ex" class="ex-patterns"> <word pos="JJ*" /> <word pos="NN*" /> </pattern>''' tree = etree.fromstring(pattern) pattern = Pattern(tree) sentence = "He has a big dog that lives in the green house." annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) bfs_search = BreadthFirstWithQueue() matches = MatchBuilder.find_all_matches(annotated_sentence, pattern, bfs_search) actual1 = ' '.join([x.word for x in matches[0]]) expected1 = 'big dog' actual2 = ' '.join([x.word for x in matches[1]]) expected2 = 'green house' self.assertEqual(actual1, expected1) self.assertEqual(actual2, expected2)
def test_annotate_outside_range_low(self): sentence = 'Apples and pears are like badgers and bears.' annotator = BasicNltkAnnotator() annotated_sentence = annotator.annotate(sentence) self.assertEqual(None, annotated_sentence.at(-2))