def test_delete_registry(self): """ Test delete a registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') registry.delete() registries = Registry.list('RegistryTests') self.assertNotIn(inspect.stack()[0][3], registries)
def test_add_registry(self): """ Test add registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') self.assertIsInstance(registry, Registry) registries = Registry.list('RegistryTests') self.assertIn(inspect.stack()[0][3], registries)
def test_find_registry(self): """ Test find registry """ Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') registries = Registry.list('RegistryTests') self.assertIn(inspect.stack()[0][3], registries)
def post(self, database, prefix, registry): try: # TODO: Check size and format of registry name Registry.find_or_create(registry, key_prefix=prefix, redis_db=int(database)) except MissingArgumentError: raise except Exception: self.logger.error(traceback.format_exc()) self.send_error(500)
def get(self, database, prefix, registry): try: if registry not in Registry.list(key_prefix=prefix, redis_db=int(database)): return self.send_error(400) reg = Registry.find_or_create(registry, key_prefix=prefix, redis_db=int(database)) self.write(json_encode({'entities': reg.dictionaries()})) except MissingArgumentError: raise except Exception: self.logger.error(traceback.format_exc()) self.send_error(500)
def handle(self, args): """ CLI to extract entities from text. """ if args.text is None and args.path is None: return color.format('* no text source specified', color.RED) registry = Registry.find_or_create(args.registry, key_prefix=args.prefix) entity = registry.get_dict(args.entity) engine = Extractor(entity, args.grammar) if args.path is not None: text = open(args.path).read() else: text = args.text results = engine.extract(text, args.lang) entities = [] for element in results[0]: if element['entity_found'] == 1: entities = list( set(entities).union(element['entity_candidates'])) if len(entities) > 0: print( color.format('%d entities detected' % len(entities), color.GREEN)) print('\n'.join(entities)) else: print(color.format('no entities detected', color.RED)) # print(color.format('%d' % results[1].elapsed, color.LIGHT_MAGENTA)) return '* text processed according to %s entity' %\ (color.format(args.entity, color.GREEN))
def entity(self, database, prefix, registry, dictionary): reg = Registry.find_or_create(registry, key_prefix=prefix, redis_db=int(database)) if dictionary not in reg.dictionaries(): return None entity_dictionary = reg.get_dict(dictionary) return entity_dictionary
def test_list_dictionaries_empty(self): """ Test list of dictionaries is emnpty """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') self.assertEqual(registry.list(), []) self.assertFalse(registry.reset())
def test_delete_unknown_dictionary(self): """ Test delete an unknown dictionary from the registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') registry.get_dict('countries') registry.get_dict('cities') self.assertFalse(registry.del_dict('events'))
def post(self, database, prefix, registry, dictionary, language): try: data = json_decode(self.request.body) reg = Registry(registry, key_prefix=prefix, redis_db=int(database)) entity_dictionary = reg.get_dict(dictionary) if entity_dictionary is None: return self.send_error(400) if 'text' not in data: return self.send_error(500) res = self.extract_entities(entity_dictionary, language, data['text'], data.get('grammar', None)) self.write(json_encode({'result': res})) except MissingArgumentError: raise except Exception: self.logger.error(traceback.format_exc()) self.send_error(500)
def test_reset_registry(self): """ Test reset a registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') self.assertIsInstance(registry.get_dict('countries'), Dictionary) self.assertTrue(registry.reset()) self.assertEqual(registry.list(), [])
def test_add_dictionary(self): """ Test add a dictionary into the registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') entity = registry.get_dict('countries') self.assertIsInstance(entity, Dictionary) self.assertIn('countries', registry.dictionaries())
def test_find_dictionary(self): """ Test find a dictionary from registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') registry.get_dict('countries') entity_dictionary = registry.get_dict('countries') self.assertIsInstance(entity_dictionary, Dictionary)
def get(self, database, prefix): try: registries = Registry.list(key_prefix=prefix, redis_db=int(database)) self.write(json_encode({'registries': registries})) except MissingArgumentError: raise except Exception: self.logger.error(traceback.format_exc()) self.send_error(500)
def handle(self, args): """ CLI to load an entity dictionary. """ file_path = args.txt if args.csv is not None: registry = Registry.find_or_create(args.registry, dict_class=CSVDictionary, key_prefix=args.prefix) file_path = args.csv else: registry = Registry.find_or_create(args.registry, key_prefix=args.prefix) file_path = args.txt dictionary = registry.get_dict(args.entity) count = dictionary.load_file(file_path, args.lang) print('+ %d entities processed' % count) return '* %s dictionary loaded' % (color.format(args.entity, color.GREEN))
def delete(self, database, prefix, registry): try: reg = Registry.find_or_create(registry, key_prefix=prefix, redis_db=int(database)) reg.delete() except MissingArgumentError: raise except Exception: self.logger.error(traceback.format_exc()) self.send_error(500)
def test_delete_dictionary(self): """ Test delete a dictionary from the registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') registry.get_dict('countries') registry.get_dict('cities') registry.get_dict('events') self.assertTrue(registry.del_dict('countries')) self.assertNotIn('countries', registry.dictionaries())
def test_list_dictionaries(self): """ Test get the list of dictionaries from a registry """ registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='RegistryTests') registry.get_dict('countries') registry.get_dict('cities') registry.get_dict('events') self.assertItemsEqual(registry.dictionaries(), ['countries', 'cities', 'events'])
def handle(self, args): """ CLI to drop an entity dictionary. """ registry = Registry.find_or_create(args.registry, key_prefix=args.prefix) if registry.del_dict(args.entity): return '* %s dictionary dropped' % (color.format( args.entity, color.GREEN)) else: return '* %s unknown dictionary' % (color.format( args.entity, color.RED))
def delete(self, database, prefix, registry, dictionary): try: reg = Registry.find_or_create(registry, key_prefix=prefix, redis_db=int(database)) if dictionary not in reg.dictionaries(): return self.send_error(400) if not reg.del_dict(dictionary): return self.send_error(500) except MissingArgumentError: raise except Exception: self.logger.error(traceback.format_exc()) self.send_error(500)
def test_extract_sentence(self): """ Test extracting entities from a sentence in English """ registry = Registry.find_or_create(inspect.stack()[0][3], dict_class=CSVDictionary, key_prefix='ExtractorTests') countries = registry.get_dict('countries') countries.load_file(COUNTRIES_FILE, 'en') text = "I want to buy flight tickets for Japan" engine = Extractor(countries) results = engine.extract(text, 'en') terms = [] for element in results[0]: if element['entity_found'] == 1: terms = list(set(terms).union(element['entity_candidates'])) self.assertIn('Japan', terms)
def test_extract_jp_sentence(self): """ Test extracting entities from text in Japanase """ test_sentence_jp = u'[TOMMY HILFIGER]3階「TOMMY HILFIGER」\ リニューアルオープン! 6月2日より3階Plaza South「TOMMY HILFIGER」\ がリニューアルオープン! https://t.co/Nf3GTF0OQD #Lazona' registry = Registry.find_or_create(inspect.stack()[0][3], key_prefix='ExtractorTests') events = registry.get_dict('events') events.load_file(EVENTS_FILE, 'ja') engine = Extractor(events) results = engine.extract(test_sentence_jp.encode('utf8'), 'ja') terms = [] for element in results[0]: if element['entity_found'] == 1: terms = list(set(terms).union(element['entity_candidates'])) self.assertIn('リニューアルオープン', terms)
def test_extract_document(self): """ Test extracting cities from a Wikipedia article that describes the UN. This is a very good example to show problems of ambiguities in selecting entities: e.g. Venezuela is both a country and a city, liberty is a city and a concept, Roosevelt is a man and a city etc. Conclusion: We mostly have a problem of noise in the results not silence """ registry = Registry.find_or_create(inspect.stack()[0][3], dict_class=CSVDictionary, key_prefix='ExtractorTests') cities = registry.get_dict('cities') cities.load_file(CITIES_FILE, 'en') text = open(ENGLISH_TEXT).read() engine = Extractor(cities) results = engine.extract(text, 'en') terms = [] for element in results[0]: if element['entity_found'] == 1: terms = list(set(terms).union(element['entity_candidates'])) self.assertIn('Nairobi', terms)
def tearDown(self): Registry.flush('ExtractorTests')
def tearDown(self): Registry.flush('registry_api_test') super(AsyncHTTPTestCase, self).tearDown()
def tearDown(self): Registry.flush('DictionaryTests')
def tearDown(self): Registry.flush('RegistryTests')
def test_no_registries(self): """ Test get registries from empty prefix """ registries = Registry.list(inspect.stack()[0][3]) self.assertEqual(registries, [])