Beispiel #1
0
def test_entity_finder_ignore_punctuation():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values("I want a cake, maybe carrot?")
    assert (len(found_matches["carrot"]) == 1)
    assert ("CakeType" in found_matches["carrot"])
Beispiel #2
0
def test_entity_finder_case_insensitive():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values("I want a carrot cake")
    assert (len(found_matches["carrot"]) == 1)
    assert ("CakeType" in found_matches["carrot"])
Beispiel #3
0
def test_entity_finder_multi_word_values():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values(
        "I want some red wine and a cake")
    assert (len(found_matches["red wine"]) == 1)
    assert ("Drinks" in found_matches["red wine"])
Beispiel #4
0
def test_entity_finder_substring_matches():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values("I want a Diet Coke")
    assert (len(found_matches) == 1)
    assert (len(found_matches["Diet Coke"]) == 1)
    assert ("Drinks" in found_matches["Diet Coke"])
Beispiel #5
0
def test_entity_finder_multiple_matches():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values(
        "I want a Carrot cake and then more carrot cake")
    assert (len(found_matches["Carrot"]) == 1)
    assert ("CakeType" in found_matches["Carrot"])
Beispiel #6
0
def test_entity_finder_duplicate_matches():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values(
        "I want a chocolate cake and a chocolate biscuit")
    assert (len(found_matches["chocolate"]) == 2)
    assert ("CakeType" in found_matches["chocolate"])
    assert ("Biscuit" in found_matches["chocolate"])
Beispiel #7
0
def test_entity_finder_delete_cached_entity():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values("I want a Carrot cake")
    assert (len(found_matches["Carrot"]) == 1)
    assert ("CakeType" in found_matches["Carrot"])
    finder.delete_cached_entity_values(
        {"CakeType": ["Large", "Medium", "Tiny"]})
    found_matches = finder.find_entity_values("I want a Carrot cake")
    assert (len(found_matches) == 0)
Beispiel #8
0
 def __init__(self, minimal_ers_mode=False, language='en'):
     self.logger = _get_logger()
     self.spacy_wrapper = SpacyWrapper(minimal_ers_mode, language)
     self.finder = EntityFinder()
Beispiel #9
0
class EntityRecognizerServer:
    def __init__(self, minimal_ers_mode=False, language='en'):
        self.logger = _get_logger()
        self.spacy_wrapper = SpacyWrapper(minimal_ers_mode, language)
        self.finder = EntityFinder()

    def initialize(self):
        self.spacy_wrapper.initialize()

    async def reload(self, request):
        """
        allows loading a spacy model with, e.g. a different language
        """
        data = await request.json()
        if 'lang' not in data or 'minimal_ers_mode' not in data:
            raise web.HTTPBadRequest()
        size = data['minimal_ers_mode']
        lang = data['lang']
        self.spacy_wrapper.reload_model(minimal_ers_mode=size, language=lang)
        return web.Response()

    async def health(self, request):
        """
        health endpoint, just respond 200
        """
        return web.Response()

    async def handle_ner(self, request):
        '''
        the function returns a collection of recognized entities as JSON response
        '''
        url = request.url
        q = url.query.get('q', None)
        if q is None:
            self.logger.warning(
                'Invalid NER request, no q query parameter, url was %s', url)
            raise web.HTTPBadRequest()

        self.logger.info("Entity request '%s'", q)
        entities, _ = self.spacy_wrapper.get_entities(q)
        self.logger.info("Entities found: '%s'", entities)
        resp = web.json_response(entities, dumps=dumps_custom)
        return resp

    async def handle_tokenize(self, request):
        '''
        the function returns a collection of recognized entities as JSON response
        '''
        url = request.url
        q = url.query.get('q', None)
        filter_ents_str = url.query.get('filter_ents')
        sw_size_str = url.query.get('sw_size')
        if filter_ents_str is not None and filter_ents_str.lower() == "true":
            filter_ents = True
        else:
            filter_ents = False

        if sw_size_str is None:
            sw_size = StopWordSize.SMALL
        else:
            sw_size = StopWordSize[sw_size_str.upper()]

        if q is None:
            self.logger.warning(
                'Invalid NER request, no q query parameter, url was %s', url)
            raise web.HTTPBadRequest()

        self.logger.info("Tokenize request '%s'", q)
        tokens = self.spacy_wrapper.tokenize(q, filter_ents, sw_size)
        self.logger.info("Tokens found: '%s'", tokens)
        resp = web.json_response(tokens)
        return resp

    async def handle_findentities(self, request):
        '''
        the function returns the supplied chat text with the entities identified
        '''
        url = request.url
        if not request.can_read_body:
            self.logger.warning(
                'Invalid NER findentities request, no body found, url was %s',
                url)
            raise web.HTTPBadRequest

        body = await request.json()

        self.logger.info("Find entity request, populating entities")
        # Note that this version does not persist entity values,
        # so use a temporary instance of the finder
        legacy_finder = LegacyEntityFinder()
        regex_good = True
        if 'entities' in body:
            self.logger.info("List entities found")
            legacy_finder.setup_entity_values(body['entities'])
        if 'regex_entities' in body:
            self.logger.info("Regex entities found")
            regex_good = legacy_finder.setup_regex_entities(
                body['regex_entities'])

        if not regex_good:
            self.logger.info('Invalid regex found in findentities')
            raise web.HTTPBadRequest(reason='Invalid regex found')
        else:
            self.logger.info('No regex submitted or regex compiled')

        self.logger.info("Find entity request, matching entities")
        values = legacy_finder.find_entity_values(body['conversation'])
        data = {'conversation': body['conversation'], 'entities': values}
        resp = web.json_response(data)

        return resp

    @profile
    async def populate_entities(self, request):
        '''
        populates the entity tries
        '''
        url = request.url
        if not request.can_read_body:
            self.logger.warning(
                'Invalid populate_entities request, no body found, url was %s',
                url)
            raise web.HTTPBadRequest

        body = await request.json()

        self.logger.info("Populating entities")
        if 'entities' in body:
            self.logger.info("List entities found")
            self.finder.setup_cached_entity_values(body['entities'])
        if 'regex_entities' in body:
            self.logger.info("Regex entities supplied but ignored")

        return web.Response()

    async def delete_entities(self, request):
        '''
        populates the entity tries
        '''
        url = request.url
        if not request.can_read_body:
            self.logger.warning(
                'Invalid delete_entities request, no body found, url was %s',
                url)
            raise web.HTTPBadRequest

        body = await request.json()

        self.logger.info("Populating entities")
        if 'entities' in body:
            self.logger.info("List entities found")
            self.finder.delete_cached_entity_values(body['entities'])
        if 'regex_entities' in body:
            self.logger.info("Regex entities supplied but ignored")

        return web.Response()

    async def entity_check(self, request):
        '''
        looks for matching entities
        '''
        url = request.url
        if not request.can_read_body:
            self.logger.warning(
                'Invalid entity_check request, no body found, url was %s', url)
            raise web.HTTPBadRequest

        body = await request.json()

        self.logger.info("entity_check request, matching entities")
        values = self.finder.find_entity_values(body['conversation'])
        data = {'conversation': body['conversation'], 'entities': values}
        resp = web.json_response(data)

        return resp

    async def reset(self, request):
        self.finder = EntityFinder()
        return web.Response()
Beispiel #10
0
 async def reset(self, request):
     self.finder = EntityFinder()
     return web.Response()
Beispiel #11
0
def test_entity_finder_no_matches():
    finder = EntityFinder()
    values = setup_data()
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values("I want a cake")
    assert (len(found_matches) == 0)
Beispiel #12
0
def test_entity_finder_no_entities():
    finder = EntityFinder()
    values = {}
    finder.setup_cached_entity_values(values)
    found_matches = finder.find_entity_values("I want a Carrot cake")
    assert (len(found_matches) == 0)
Beispiel #13
0
def test_entity_finder_split_message():
    finder = EntityFinder()
    words = finder.split_message("This is short")
    assert (len(words) == 6)