def test_entity_finder_ignore_punctuation(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values("I want a cake, maybe carrot?") assert (len(found_matches["carrot"]) == 1) assert ("CakeType" in found_matches["carrot"])
def test_entity_finder_case_insensitive(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values("I want a carrot cake") assert (len(found_matches["carrot"]) == 1) assert ("CakeType" in found_matches["carrot"])
def test_entity_finder_multi_word_values(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values( "I want some red wine and a cake") assert (len(found_matches["red wine"]) == 1) assert ("Drinks" in found_matches["red wine"])
def test_entity_finder_substring_matches(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values("I want a Diet Coke") assert (len(found_matches) == 1) assert (len(found_matches["Diet Coke"]) == 1) assert ("Drinks" in found_matches["Diet Coke"])
def test_entity_finder_multiple_matches(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values( "I want a Carrot cake and then more carrot cake") assert (len(found_matches["Carrot"]) == 1) assert ("CakeType" in found_matches["Carrot"])
def test_entity_finder_duplicate_matches(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values( "I want a chocolate cake and a chocolate biscuit") assert (len(found_matches["chocolate"]) == 2) assert ("CakeType" in found_matches["chocolate"]) assert ("Biscuit" in found_matches["chocolate"])
def test_entity_finder_delete_cached_entity(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values("I want a Carrot cake") assert (len(found_matches["Carrot"]) == 1) assert ("CakeType" in found_matches["Carrot"]) finder.delete_cached_entity_values( {"CakeType": ["Large", "Medium", "Tiny"]}) found_matches = finder.find_entity_values("I want a Carrot cake") assert (len(found_matches) == 0)
def __init__(self, minimal_ers_mode=False, language='en'): self.logger = _get_logger() self.spacy_wrapper = SpacyWrapper(minimal_ers_mode, language) self.finder = EntityFinder()
class EntityRecognizerServer: def __init__(self, minimal_ers_mode=False, language='en'): self.logger = _get_logger() self.spacy_wrapper = SpacyWrapper(minimal_ers_mode, language) self.finder = EntityFinder() def initialize(self): self.spacy_wrapper.initialize() async def reload(self, request): """ allows loading a spacy model with, e.g. a different language """ data = await request.json() if 'lang' not in data or 'minimal_ers_mode' not in data: raise web.HTTPBadRequest() size = data['minimal_ers_mode'] lang = data['lang'] self.spacy_wrapper.reload_model(minimal_ers_mode=size, language=lang) return web.Response() async def health(self, request): """ health endpoint, just respond 200 """ return web.Response() async def handle_ner(self, request): ''' the function returns a collection of recognized entities as JSON response ''' url = request.url q = url.query.get('q', None) if q is None: self.logger.warning( 'Invalid NER request, no q query parameter, url was %s', url) raise web.HTTPBadRequest() self.logger.info("Entity request '%s'", q) entities, _ = self.spacy_wrapper.get_entities(q) self.logger.info("Entities found: '%s'", entities) resp = web.json_response(entities, dumps=dumps_custom) return resp async def handle_tokenize(self, request): ''' the function returns a collection of recognized entities as JSON response ''' url = request.url q = url.query.get('q', None) filter_ents_str = url.query.get('filter_ents') sw_size_str = url.query.get('sw_size') if filter_ents_str is not None and filter_ents_str.lower() == "true": filter_ents = True else: filter_ents = False if sw_size_str is None: sw_size = StopWordSize.SMALL else: sw_size = StopWordSize[sw_size_str.upper()] if q is None: self.logger.warning( 'Invalid NER request, no q query parameter, url was %s', url) raise web.HTTPBadRequest() self.logger.info("Tokenize request '%s'", q) tokens = self.spacy_wrapper.tokenize(q, filter_ents, sw_size) self.logger.info("Tokens found: '%s'", tokens) resp = web.json_response(tokens) return resp async def handle_findentities(self, request): ''' the function returns the supplied chat text with the entities identified ''' url = request.url if not request.can_read_body: self.logger.warning( 'Invalid NER findentities request, no body found, url was %s', url) raise web.HTTPBadRequest body = await request.json() self.logger.info("Find entity request, populating entities") # Note that this version does not persist entity values, # so use a temporary instance of the finder legacy_finder = LegacyEntityFinder() regex_good = True if 'entities' in body: self.logger.info("List entities found") legacy_finder.setup_entity_values(body['entities']) if 'regex_entities' in body: self.logger.info("Regex entities found") regex_good = legacy_finder.setup_regex_entities( body['regex_entities']) if not regex_good: self.logger.info('Invalid regex found in findentities') raise web.HTTPBadRequest(reason='Invalid regex found') else: self.logger.info('No regex submitted or regex compiled') self.logger.info("Find entity request, matching entities") values = legacy_finder.find_entity_values(body['conversation']) data = {'conversation': body['conversation'], 'entities': values} resp = web.json_response(data) return resp @profile async def populate_entities(self, request): ''' populates the entity tries ''' url = request.url if not request.can_read_body: self.logger.warning( 'Invalid populate_entities request, no body found, url was %s', url) raise web.HTTPBadRequest body = await request.json() self.logger.info("Populating entities") if 'entities' in body: self.logger.info("List entities found") self.finder.setup_cached_entity_values(body['entities']) if 'regex_entities' in body: self.logger.info("Regex entities supplied but ignored") return web.Response() async def delete_entities(self, request): ''' populates the entity tries ''' url = request.url if not request.can_read_body: self.logger.warning( 'Invalid delete_entities request, no body found, url was %s', url) raise web.HTTPBadRequest body = await request.json() self.logger.info("Populating entities") if 'entities' in body: self.logger.info("List entities found") self.finder.delete_cached_entity_values(body['entities']) if 'regex_entities' in body: self.logger.info("Regex entities supplied but ignored") return web.Response() async def entity_check(self, request): ''' looks for matching entities ''' url = request.url if not request.can_read_body: self.logger.warning( 'Invalid entity_check request, no body found, url was %s', url) raise web.HTTPBadRequest body = await request.json() self.logger.info("entity_check request, matching entities") values = self.finder.find_entity_values(body['conversation']) data = {'conversation': body['conversation'], 'entities': values} resp = web.json_response(data) return resp async def reset(self, request): self.finder = EntityFinder() return web.Response()
async def reset(self, request): self.finder = EntityFinder() return web.Response()
def test_entity_finder_no_matches(): finder = EntityFinder() values = setup_data() finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values("I want a cake") assert (len(found_matches) == 0)
def test_entity_finder_no_entities(): finder = EntityFinder() values = {} finder.setup_cached_entity_values(values) found_matches = finder.find_entity_values("I want a Carrot cake") assert (len(found_matches) == 0)
def test_entity_finder_split_message(): finder = EntityFinder() words = finder.split_message("This is short") assert (len(words) == 6)