Exemple #1
0
 def test_delete_registry(self):
     """
     Test delete a registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     registry.delete()
     registries = Registry.list('RegistryTests')
     self.assertNotIn(inspect.stack()[0][3], registries)
Exemple #2
0
 def test_add_registry(self):
     """
     Test add registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     self.assertIsInstance(registry, Registry)
     registries = Registry.list('RegistryTests')
     self.assertIn(inspect.stack()[0][3], registries)
Exemple #3
0
 def test_find_registry(self):
     """
     Test find registry
     """
     Registry.find_or_create(inspect.stack()[0][3],
                             key_prefix='RegistryTests')
     Registry.find_or_create(inspect.stack()[0][3],
                             key_prefix='RegistryTests')
     registries = Registry.list('RegistryTests')
     self.assertIn(inspect.stack()[0][3], registries)
Exemple #4
0
 def post(self, database, prefix, registry):
     try:
         # TODO: Check size and format of registry name
         Registry.find_or_create(registry,
                                 key_prefix=prefix,
                                 redis_db=int(database))
     except MissingArgumentError:
         raise
     except Exception:
         self.logger.error(traceback.format_exc())
         self.send_error(500)
Exemple #5
0
 def get(self, database, prefix, registry):
     try:
         if registry not in Registry.list(key_prefix=prefix,
                                          redis_db=int(database)):
             return self.send_error(400)
         reg = Registry.find_or_create(registry,
                                       key_prefix=prefix,
                                       redis_db=int(database))
         self.write(json_encode({'entities': reg.dictionaries()}))
     except MissingArgumentError:
         raise
     except Exception:
         self.logger.error(traceback.format_exc())
         self.send_error(500)
Exemple #6
0
 def handle(self, args):
     """
     CLI to extract entities from text.
     """
     if args.text is None and args.path is None:
         return color.format('* no text source specified', color.RED)
     registry = Registry.find_or_create(args.registry,
                                        key_prefix=args.prefix)
     entity = registry.get_dict(args.entity)
     engine = Extractor(entity, args.grammar)
     if args.path is not None:
         text = open(args.path).read()
     else:
         text = args.text
     results = engine.extract(text, args.lang)
     entities = []
     for element in results[0]:
         if element['entity_found'] == 1:
             entities = list(
                 set(entities).union(element['entity_candidates']))
     if len(entities) > 0:
         print(
             color.format('%d entities detected' % len(entities),
                          color.GREEN))
         print('\n'.join(entities))
     else:
         print(color.format('no entities detected', color.RED))
     # print(color.format('%d' % results[1].elapsed, color.LIGHT_MAGENTA))
     return '* text processed according to %s entity' %\
         (color.format(args.entity, color.GREEN))
Exemple #7
0
 def entity(self, database, prefix, registry, dictionary):
     reg = Registry.find_or_create(registry,
                                   key_prefix=prefix,
                                   redis_db=int(database))
     if dictionary not in reg.dictionaries():
         return None
     entity_dictionary = reg.get_dict(dictionary)
     return entity_dictionary
Exemple #8
0
 def test_list_dictionaries_empty(self):
     """
     Test list of dictionaries is emnpty
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     self.assertEqual(registry.list(), [])
     self.assertFalse(registry.reset())
Exemple #9
0
 def test_delete_unknown_dictionary(self):
     """
     Test delete an unknown dictionary from the registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     registry.get_dict('countries')
     registry.get_dict('cities')
     self.assertFalse(registry.del_dict('events'))
Exemple #10
0
 def post(self, database, prefix, registry, dictionary, language):
     try:
         data = json_decode(self.request.body)
         reg = Registry(registry, key_prefix=prefix, redis_db=int(database))
         entity_dictionary = reg.get_dict(dictionary)
         if entity_dictionary is None:
             return self.send_error(400)
         if 'text' not in data:
             return self.send_error(500)
         res = self.extract_entities(entity_dictionary,
                                     language, data['text'],
                                     data.get('grammar', None))
         self.write(json_encode({'result': res}))
     except MissingArgumentError:
         raise
     except Exception:
         self.logger.error(traceback.format_exc())
         self.send_error(500)
Exemple #11
0
 def test_reset_registry(self):
     """
     Test reset a registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     self.assertIsInstance(registry.get_dict('countries'), Dictionary)
     self.assertTrue(registry.reset())
     self.assertEqual(registry.list(), [])
Exemple #12
0
 def test_add_dictionary(self):
     """
     Test add a dictionary into the registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     entity = registry.get_dict('countries')
     self.assertIsInstance(entity, Dictionary)
     self.assertIn('countries', registry.dictionaries())
Exemple #13
0
 def test_find_dictionary(self):
     """
     Test find a dictionary from registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     registry.get_dict('countries')
     entity_dictionary = registry.get_dict('countries')
     self.assertIsInstance(entity_dictionary, Dictionary)
Exemple #14
0
 def get(self, database, prefix):
     try:
         registries = Registry.list(key_prefix=prefix,
                                    redis_db=int(database))
         self.write(json_encode({'registries': registries}))
     except MissingArgumentError:
         raise
     except Exception:
         self.logger.error(traceback.format_exc())
         self.send_error(500)
Exemple #15
0
 def handle(self, args):
     """
     CLI to load an entity dictionary.
     """
     file_path = args.txt
     if args.csv is not None:
         registry = Registry.find_or_create(args.registry,
                                            dict_class=CSVDictionary,
                                            key_prefix=args.prefix)
         file_path = args.csv
     else:
         registry = Registry.find_or_create(args.registry,
                                            key_prefix=args.prefix)
         file_path = args.txt
     dictionary = registry.get_dict(args.entity)
     count = dictionary.load_file(file_path, args.lang)
     print('+ %d entities processed' % count)
     return '* %s dictionary loaded' % (color.format(args.entity,
                                                     color.GREEN))
Exemple #16
0
 def delete(self, database, prefix, registry):
     try:
         reg = Registry.find_or_create(registry,
                                       key_prefix=prefix,
                                       redis_db=int(database))
         reg.delete()
     except MissingArgumentError:
         raise
     except Exception:
         self.logger.error(traceback.format_exc())
         self.send_error(500)
Exemple #17
0
 def test_delete_dictionary(self):
     """
     Test delete a dictionary from the registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     registry.get_dict('countries')
     registry.get_dict('cities')
     registry.get_dict('events')
     self.assertTrue(registry.del_dict('countries'))
     self.assertNotIn('countries', registry.dictionaries())
Exemple #18
0
 def test_list_dictionaries(self):
     """
     Test get the list of dictionaries from a registry
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        key_prefix='RegistryTests')
     registry.get_dict('countries')
     registry.get_dict('cities')
     registry.get_dict('events')
     self.assertItemsEqual(registry.dictionaries(),
                           ['countries', 'cities', 'events'])
Exemple #19
0
 def handle(self, args):
     """
     CLI to drop an entity dictionary.
     """
     registry = Registry.find_or_create(args.registry,
                                        key_prefix=args.prefix)
     if registry.del_dict(args.entity):
         return '* %s dictionary dropped' % (color.format(
             args.entity, color.GREEN))
     else:
         return '* %s unknown dictionary' % (color.format(
             args.entity, color.RED))
Exemple #20
0
 def delete(self, database, prefix, registry, dictionary):
     try:
         reg = Registry.find_or_create(registry,
                                       key_prefix=prefix,
                                       redis_db=int(database))
         if dictionary not in reg.dictionaries():
             return self.send_error(400)
         if not reg.del_dict(dictionary):
             return self.send_error(500)
     except MissingArgumentError:
         raise
     except Exception:
         self.logger.error(traceback.format_exc())
         self.send_error(500)
Exemple #21
0
 def test_extract_sentence(self):
     """
     Test extracting entities from a sentence in English
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        dict_class=CSVDictionary,
                                        key_prefix='ExtractorTests')
     countries = registry.get_dict('countries')
     countries.load_file(COUNTRIES_FILE, 'en')
     text = "I want to buy flight tickets for Japan"
     engine = Extractor(countries)
     results = engine.extract(text, 'en')
     terms = []
     for element in results[0]:
         if element['entity_found'] == 1:
             terms = list(set(terms).union(element['entity_candidates']))
     self.assertIn('Japan', terms)
Exemple #22
0
    def test_extract_jp_sentence(self):
        """
        Test extracting entities from text in Japanase
        """
        test_sentence_jp = u'[TOMMY HILFIGER]3階「TOMMY HILFIGER」\
リニューアルオープン! 6月2日より3階Plaza South「TOMMY HILFIGER」\
がリニューアルオープン! https://t.co/Nf3GTF0OQD #Lazona'

        registry = Registry.find_or_create(inspect.stack()[0][3],
                                           key_prefix='ExtractorTests')
        events = registry.get_dict('events')
        events.load_file(EVENTS_FILE, 'ja')
        engine = Extractor(events)
        results = engine.extract(test_sentence_jp.encode('utf8'), 'ja')
        terms = []
        for element in results[0]:
            if element['entity_found'] == 1:
                terms = list(set(terms).union(element['entity_candidates']))
        self.assertIn('リニューアルオープン', terms)
Exemple #23
0
 def test_extract_document(self):
     """
     Test extracting cities from a Wikipedia article that describes the UN.
     This is a very good example to show problems of ambiguities in selecting
     entities: e.g. Venezuela is both a country and a city, liberty is a
     city and a concept, Roosevelt is a man and a city etc.
     Conclusion: We mostly have a problem of noise in the results not silence
     """
     registry = Registry.find_or_create(inspect.stack()[0][3],
                                        dict_class=CSVDictionary,
                                        key_prefix='ExtractorTests')
     cities = registry.get_dict('cities')
     cities.load_file(CITIES_FILE, 'en')
     text = open(ENGLISH_TEXT).read()
     engine = Extractor(cities)
     results = engine.extract(text, 'en')
     terms = []
     for element in results[0]:
         if element['entity_found'] == 1:
             terms = list(set(terms).union(element['entity_candidates']))
     self.assertIn('Nairobi', terms)
Exemple #24
0
 def tearDown(self):
     Registry.flush('ExtractorTests')
Exemple #25
0
 def tearDown(self):
     Registry.flush('registry_api_test')
     super(AsyncHTTPTestCase, self).tearDown()
Exemple #26
0
 def tearDown(self):
     Registry.flush('DictionaryTests')
Exemple #27
0
 def tearDown(self):
     Registry.flush('RegistryTests')
Exemple #28
0
 def test_no_registries(self):
     """
     Test get registries from empty prefix
     """
     registries = Registry.list(inspect.stack()[0][3])
     self.assertEqual(registries, [])