def test_extraction(self): extractor = EntityExtractor() test_sentence = u'Rich Burdon and Jeff Reynar are building Dalek.' entities = extractor.extract_named_entities(test_sentence) self.assertTrue('Dalek' in entities) self.assertTrue('Jeff Reynar' in entities) self.assertTrue('Rich Burdon' in entities)
def main(): extractor = EntityExtractor() fetcher = Fetcher() url = "http://www.guardian.co.uk/world/2013/jun/23/edward-snowden-gchq" text = fetcher.fetch_text_from_url(url) entities = extractor.extract_named_entities(text) print text print entities