コード例 #1
0
 def test_annotate_basicnltk(self):
     sentence = 'Apples and pears are like badgers and bears.'
     annotator = BasicNltkAnnotator()
     annotated_sentence = annotator.annotate(sentence)
     item_at_2 = annotated_sentence.at(2)
     self.assertEqual(item_at_2.pos, 'NNS')
     self.assertEqual(item_at_2.word, 'pears')
     self.assertEqual(item_at_2.lemma, 'pear')
コード例 #2
0
    def test_annotate_nltk_geoextension1(self):
        sentence = 'Houses and rabbits look like badgers, oil rigs, and gas stations.'
        annotator = BasicNltkAnnotator()
        annotated_sentence = annotator.annotate(sentence)
        geo_categories = {
            'GNN': ('NN*', 'tests/test-files/gnns.txt', 'n'),
            'ANIMAL': ('NN*', 'tests/test-files/animals.txt', 'n'),
        }
        geo_annotator = TypeExtensionAnnotator(geo_categories, stem=True)
        geo_sentence = geo_annotator.extend(annotated_sentence)

        animal_count = len(
            [w for w in geo_sentence.words if 'ANIMAL' in w.types])
        gnn_count = len([w for w in geo_sentence.words if 'GNN' in w.types])
        self.assertEqual(animal_count, 2)
        self.assertEqual(gnn_count, 5)
コード例 #3
0
    def test_annotate_nltk_geoextension2(self):
        sentence = "Musical instruments don't sound like badgers."
        annotator = BasicNltkAnnotator()
        annotated_sentence = annotator.annotate(sentence)
        geo_categories = {
            'GNN': ('NN*', 'tests/test-files/gnns.txt', 'n'),
            'ANIMAL': ('NN*', 'tests/test-files/animals.txt', 'n'),
        }
        geo_annotator = TypeExtensionAnnotator(geo_categories, stem=True)
        geo_sentence = geo_annotator.extend(annotated_sentence)

        animal_count = len(
            [w for w in geo_sentence.words if 'ANIMAL' in w.types])
        gnn_count = len([w for w in geo_sentence.words if 'GNN' in w.types])
        self.assertEqual(animal_count, 1)
        self.assertEqual(gnn_count, 0)
コード例 #4
0
    def test_pattern_graph_matches5(self):
        pattern = pattern_pfx + '''
    <pattern name="ex" class="ex-patterns">
      <word lemma="word*" />
      <word word="abcd" />
    </pattern>'''
        tree = etree.fromstring(pattern)
        pattern = Pattern(tree)

        sentence = "He is running the race by eating the the mungo."
        annotator = BasicNltkAnnotator()
        annotated_sentence = annotator.annotate(sentence)
        bfs_search = BreadthFirstWithQueue()

        matches = MatchBuilder.find_all_matches(annotated_sentence, pattern,
                                                bfs_search)
        self.assertEqual(len(matches), 0)
コード例 #5
0
    def test_annotate_nltk_geoextension_fileload(self):
        sentence = 'Houses and rabbits look like badgers, oil rigs, and gas stations.'
        annotator = BasicNltkAnnotator()
        annotated_sentence = annotator.annotate(sentence)

        exfile = open('tests/test-files/test-extensions.txt', 'r')
        geo_categories = parse.ExtensionParser.parse(exfile)
        exfile.close()

        geo_annotator = TypeExtensionAnnotator(geo_categories)
        geo_sentence = geo_annotator.extend(annotated_sentence)

        animal_count = len(
            [w for w in geo_sentence.words if 'ANIMAL' in w.types])
        gnn_count = len([w for w in geo_sentence.words if 'GNN' in w.types])
        self.assertEqual(animal_count, 2)
        self.assertEqual(gnn_count, 5)
コード例 #6
0
    def test_pattern_graph_matches4(self):
        pattern = pattern_pfx + '''
    <pattern name="ex" class="ex-patterns">
      <word pos="VB*" />
      <word pos="DT" min="2"/>
    </pattern>'''
        tree = etree.fromstring(pattern)
        pattern = Pattern(tree)

        sentence = "He is running the race by eating the the mungo."
        annotator = BasicNltkAnnotator()
        annotated_sentence = annotator.annotate(sentence)
        bfs_search = BreadthFirstWithQueue()

        matches = MatchBuilder.find_all_matches(annotated_sentence, pattern,
                                                bfs_search)
        actual = ' '.join([x.word for x in matches[0]])
        expected = 'eating the the'
        self.assertEqual(actual, expected)
コード例 #7
0
    def test_pattern_graph_matches3(self):
        pattern = pattern_pfx + '''
    <pattern name="ex" class="ex-patterns">
      <word pos="DT" min="0"/>
      <word pos="NN*" />
    </pattern>'''
        tree = etree.fromstring(pattern)
        pattern = Pattern(tree)

        sentence = "This is the dog who barks at moons."
        annotator = BasicNltkAnnotator()
        annotated_sentence = annotator.annotate(sentence)
        bfs_search = BreadthFirstWithQueue()

        matches = MatchBuilder.find_all_matches(annotated_sentence, pattern,
                                                bfs_search)
        actual1 = ' '.join([x.word for x in matches[0]])
        self.assertEqual(actual1, 'the dog')
        actual2 = ' '.join([x.word for x in matches[1]])
        self.assertEqual(actual2, 'dog')
        actual3 = ' '.join([x.word for x in matches[2]])
        self.assertEqual(actual3, 'moons')
コード例 #8
0
    def test_pattern_graph_matches2(self):
        pattern = pattern_pfx + '''
    <pattern name="ex" class="ex-patterns">
      <word pos="JJ*" />
      <word pos="NN*" />
    </pattern>'''
        tree = etree.fromstring(pattern)
        pattern = Pattern(tree)

        sentence = "He has a big dog that lives in the green house."
        annotator = BasicNltkAnnotator()
        annotated_sentence = annotator.annotate(sentence)
        bfs_search = BreadthFirstWithQueue()

        matches = MatchBuilder.find_all_matches(annotated_sentence, pattern,
                                                bfs_search)
        actual1 = ' '.join([x.word for x in matches[0]])
        expected1 = 'big dog'
        actual2 = ' '.join([x.word for x in matches[1]])
        expected2 = 'green house'
        self.assertEqual(actual1, expected1)
        self.assertEqual(actual2, expected2)
コード例 #9
0
 def test_annotate_outside_range_low(self):
     sentence = 'Apples and pears are like badgers and bears.'
     annotator = BasicNltkAnnotator()
     annotated_sentence = annotator.annotate(sentence)
     self.assertEqual(None, annotated_sentence.at(-2))