Beispiel #1
0
 def test_multiple_matches(self):
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain(
             [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]),
     ]
     doc.mention_chains[0].candidates = [
         Entity('122',
                EntityType.PER,
                'John Smith',
                EntityOrigin.WLL,
                urls=['http://en.wikipedia.org/wiki/John_Smith']),
         Entity('123',
                EntityType.PER,
                'John Smith',
                EntityOrigin.WLL,
                urls=['http://en.wikipedia.org/wiki/John_Smith']),
         Entity('124',
                EntityType.PER,
                'Jake Smith',
                EntityOrigin.WLL,
                urls=['http://en.wikipedia.org/wiki/John_P_Smith']),
     ]
     WikipediaResolver().resolve(doc)
     self.assertEqual(1, len(doc.mention_chains))
     self.assertEqual(2, len(doc.mention_chains[0].candidates))
     self.assertIsNone(doc.mention_chains[0].entity)
Beispiel #2
0
 def test(self):
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain(
             [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]),
     ]
     doc.mention_chains[0].candidates = [
         Entity('122',
                EntityType.PER,
                'John Smith',
                EntityOrigin.WLL,
                urls=['http://en.wikipedia.org/wiki/John_Smith']),
         Entity('123',
                EntityType.PER,
                'John Smith',
                EntityOrigin.WLL,
                urls=['http://en.wikipedia.org/wiki/John_H_Smith']),
         Entity('124',
                EntityType.PER,
                'Jake Smith',
                EntityOrigin.WLL,
                urls=['http://en.wikipedia.org/wiki/John_Smith']),
     ]
     CascadeResolver([ExactNameResolver(),
                      WikipediaResolver()]).resolve(doc)
     self.assertEqual(1, len(doc.mention_chains))
     self.assertEqual(2, len(doc.mention_chains[0].candidates))
     self.assertEqual('122', doc.mention_chains[0].entity.id)
Beispiel #3
0
 def test(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.GPE, 'New York City', EntityOrigin.GEO)
     entity.names = {'NYC', 'New York'}
     chain = MentionChain(
         [Mention('Nueva York', 'doc1', (), (), EntityType.GPE)])
     SharedTokensFeature().extract(chain, entity, None, v)
     self.assertAlmostEqual(0.5, v.data[0])
Beispiel #4
0
 def test_match(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.GPE, 'New York', EntityOrigin.GEO)
     entity.names = {'New York', 'New York City', 'NYC'}
     chain = MentionChain(
         [Mention('Nueva York', 'doc1', (), (), EntityType.GPE)])
     chain.mentions[0].translate_string = 'new york'
     ExactMatchFeature().extract(chain, entity, None, v)
     self.assertTrue(v.data[0])
Beispiel #5
0
 def test_positive(self):
     v = FeatureVector()
     entity = Entity('1',
                     EntityType.GPE,
                     'New York',
                     EntityOrigin.GEO,
                     urls=['http://en.wikipedia.org/wiki/New_York_City'])
     entity.names = {'New York', 'New York City', 'NYC'}
     chain = MentionChain(
         [Mention('New York City', 'doc1', (), (), EntityType.GPE)])
     WikipediaFeature().extract(chain, entity, None, v)
     self.assertTrue(v.data[0])
Beispiel #6
0
    def test(self):
        chains = [
            MentionChain([
                Mention('Henry', '_WL_doc34', (123, 128), (17, 17),
                        EntityType.PER, 'Men1')
            ]),
            MentionChain([
                Mention('Ed Smith', '_WL_doc34', (141, 149), (22, 23),
                        EntityType.PER, 'Men2'),
                Mention('Ed', '_WL_doc34', (197, 199), (44, 44),
                        EntityType.PER, 'Men3')
            ])
        ]
        chains[0].entity = Entity('67', EntityType.PER, 'Henry',
                                  EntityOrigin.WLL)
        chains[1].entity = None
        doc = Document(chains[0].mentions + chains[1].mentions, DocType.WL,
                       Lang.ENG, [], [])
        doc.mention_chains = chains

        buffer = io.StringIO()
        writer = OutputWriter(buffer, 'test', 0.75)
        writer.write(doc)

        buffer.seek(0)
        lines = buffer.readlines()
        line1 = "test\tMen1\tHenry\t_WL_doc34:123-128\t67\tPER\tNAM\t0.75"
        self.assertEqual(line1, lines[0].strip())
Beispiel #7
0
 def test_multiple_matches(self):
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain(
             [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]),
     ]
     doc.mention_chains[0].candidates = [
         Entity('122', EntityType.PER, 'John Smith', EntityOrigin.WLL),
         Entity('123', EntityType.PER, 'John Smith', EntityOrigin.WLL),
         Entity('124', EntityType.PER, 'Jake Smith', EntityOrigin.WLL),
     ]
     ExactNameResolver().resolve(doc)
     self.assertEqual(1, len(doc.mention_chains))
     self.assertEqual(2, len(doc.mention_chains[0].candidates))
     self.assertIsNone(doc.mention_chains[0].entity)
Beispiel #8
0
 def test_no_match(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.GPE, 'Nueva York', EntityOrigin.GEO)
     chain = MentionChain(
         [Mention('New York', 'doc1', (), (), EntityType.GPE)])
     ExactMatchFeature().extract(chain, entity, None, v)
     self.assertFalse(v.data[0])
Beispiel #9
0
 def test_not_multi_token_name(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.PER, 'John Smith', EntityOrigin.WLL)
     chain = MentionChain(
         [Mention('Smith', 'doc1', (), (), EntityType.PER)])
     LastNameFeature().extract(chain, entity, None, v)
     self.assertFalse(v.data[0])
Beispiel #10
0
 def test_positive(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.PER, 'John Smith', EntityOrigin.WLL)
     chain = MentionChain(
         [Mention('Jep Smith', 'doc1', (), (), EntityType.PER)])
     LastNameFeature().extract(chain, entity, None, v)
     self.assertTrue(v.data[0])
Beispiel #11
0
 def test(self):
     gt = {
         'doc1': {
             (0, 2): Link(EntityType.PER, LinkType.LINK, ['123', '122'],
                          None),
             (4, 8): Link(EntityType.PER, LinkType.NIL, [], 'NIL999'),
             (10, 12): Link(EntityType.PER, LinkType.LINK, ['222'], None),
             (16, 17): Link(EntityType.PER, LinkType.LINK, ['333'], None),
             (18, 19): Link(EntityType.PER, LinkType.LINK, ['444'], None),
         }
     }
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain([
             Mention('', 'doc1', (0, 2), (), EntityType.PER),
             Mention('', 'doc1', (3, 7), (), EntityType.PER),
             Mention('', 'doc1', (16, 17), (), EntityType.PER),
         ]),
         MentionChain([Mention('', 'doc1', (4, 8), (), EntityType.PER)]),
         MentionChain([Mention('', 'doc1', (10, 12), (), EntityType.PER)]),
         MentionChain([Mention('', 'doc1', (18, 19), (), EntityType.PER)]),
     ]
     doc.mention_chains[0].candidates = [
         Entity('122', EntityType.PER, '', EntityOrigin.WLL)
     ]
     doc.mention_chains[0].entity = doc.mention_chains[0].candidates[0]
     doc.mention_chains[1].candidates = [
         Entity('147', EntityType.PER, '', EntityOrigin.WLL)
     ]
     doc.mention_chains[1].entity = doc.mention_chains[1].candidates[0]
     doc.mention_chains[2].candidates = [
         Entity('198', EntityType.PER, '', EntityOrigin.WLL),
         Entity('222', EntityType.PER, '', EntityOrigin.WLL),
     ]
     doc.mention_chains[2].entity = None
     doc.mention_chains[3].candidates = [
         Entity('17', EntityType.PER, '', EntityOrigin.WLL)
     ]
     doc.mention_chains[3].entity = doc.mention_chains[3].candidates[0]
     scorer = ResolverScorer(gt)
     scorer.update(doc)
     self.assertEqual(
         2,
         scorer.report.num_mentions_with_correct_candidate[EntityType.PER])
     self.assertEqual(
         1, scorer.report.num_mentions_correct_entity[EntityType.PER])
Beispiel #12
0
 def test_one_match(self):
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain(
             [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]),
     ]
     entity1 = Entity('122', EntityType.PER, 'John', EntityOrigin.WLL)
     entity1.names = {'John', 'J. Smith', 'john smith'}
     doc.mention_chains[0].candidates = [
         entity1,
         Entity('123', EntityType.PER, 'Not John', EntityOrigin.WLL)
     ]
     ExactNameResolver().resolve(doc)
     self.assertEqual(1, len(doc.mention_chains))
     self.assertEqual(2, len(doc.mention_chains[0].candidates))
     self.assertEqual(entity1, doc.mention_chains[0].entity)
Beispiel #13
0
    def test(self):

        classifier = self.trainClassifier()
        extractor = EntityFeatureExtractor(SimpleFeature())
        resolver = SvmResolver(classifier, extractor)

        doc = unittest.mock.Mock()
        doc.mention_chains = [
            MentionChain(
                [Mention('John Smith', 'doc1', (), (), EntityType.PER)]),
        ]
        doc.mention_chains[0].candidates = [
            Entity('122', EntityType.PER, 'John Smith', EntityOrigin.WLL),
            Entity('123', EntityType.PER, 'John Smith', EntityOrigin.WLL),
            Entity('124', EntityType.PER, 'John P. Smith', EntityOrigin.WLL),
            Entity('125', EntityType.PER, 'Jake Smith', EntityOrigin.WLL),
        ]

        resolver.resolve(doc)
        self.assertEqual('124', doc.mention_chains[0].entity.id)
Beispiel #14
0
 def test_one_match(self):
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain(
             [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]),
     ]
     entity1 = Entity('122',
                      EntityType.PER,
                      'John',
                      EntityOrigin.WLL,
                      urls=['http://en.wikipedia.org/wiki/John_Smith'])
     doc.mention_chains[0].candidates = [
         entity1,
         Entity('123', EntityType.PER, 'Not John', EntityOrigin.WLL)
     ]
     WikipediaResolver().resolve(doc)
     self.assertEqual(1, len(doc.mention_chains))
     self.assertEqual(2, len(doc.mention_chains[0].candidates))
     self.assertEqual(entity1, doc.mention_chains[0].entity)
Beispiel #15
0
 def test_no_match(self):
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain(
             [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]),
     ]
     doc.mention_chains[0].candidates = [
         Entity('122', EntityType.PER, 'John', EntityOrigin.WLL)
     ]
     WikipediaResolver().resolve(doc)
     self.assertEqual(1, len(doc.mention_chains))
     self.assertEqual(1, len(doc.mention_chains[0].candidates))
     self.assertIsNone(doc.mention_chains[0].entity)
Beispiel #16
0
 def test2(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.PER, 'Christopher', EntityOrigin.AUG)
     OriginFeature().extract(None, entity, None, v)
     self.assertEqual(1, v.data[0])