Esempio n. 1
0
class EntityTaggerTest(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, EnglishTokenizer())
        self.trie.insert("play", "PlayVerb")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Not a Thing")

    def tearDown(self):
        pass

    def test_tag(self):
        tags = list(self.tagger.tag("play season 1 of the big bang theory"))
        assert len(tags) == 3

    def test_regex_tag(self):
        regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
        tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
        tags = tagger.tag("the big bang theory")
        assert len(tags) == 3
        event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
        assert len(event_tags) == 1
        assert len(event_tags[0].get('entities')) == 1
        assert len(event_tags[0].get('entities')[0].get('data')) == 1
        assert ('big bang', 'Event') in event_tags[0].get('entities')[0].get('data')

    def test_start_end_token_match_when_sorting_tagged_entities(self):
        repro_payload = [{"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 1, "key": "1", "entities": [{"key": "1", "data": [["1", "Which"]], "confidence": 0.5, "match": "1"}], "start_token": 1, "match": "1"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "SnoozeTime"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 4, "key": "20 minutes", "entities": [{"key": "20 minutes", "data": [["20 minutes", "SnoozeTime"]], "confidence": 0.5, "match": "20 minutes"}], "start_token": 3, "match": "20 minutes"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 3, "key": "20", "entities": [{"key": "20", "data": [["20", "Which"]], "confidence": 0.5, "match": "20"}], "start_token": 3, "match": "20"}, {"end_token": 0, "key": "snooze", "entities": [{"key": "snooze", "data": [["snooze", "SnoozeKeyword"]], "confidence": 1.0, "match": "snooze"}], "start_token": 0, "match": "snooze"}, {"end_token": 2, "key": "for", "entities": [{"key": "for", "data": [["for", "SnoozeFiller"]], "confidence": 1.0, "match": "for"}], "start_token": 2, "match": "for"}]
        # just asserting that the sort does not crash in py3
        self.tagger._sort_and_merge_tags(repro_payload)
Esempio n. 2
0
 def test_regex_tag(self):
     regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
     tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
     tags = tagger.tag("the big bang theory")
     assert len(tags) == 3
     event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
     assert len(event_tags) == 1
     assert len(event_tags[0].get('entities')) == 1
     assert len(event_tags[0].get('entities')[0].get('data')) == 1
     assert 'Event' in event_tags[0].get('entities')[0].get('data')
Esempio n. 3
0
 def setUp(self):
     self.tokenizer = EnglishTokenizer()
     self.trie = Trie(max_edit_distance=2)
     self.trie.insert("x-play", "Television Show")
     self.trie.insert("play", "Play Verb")
     self.trie.insert("play season", "Time Period")
     self.trie.insert("play", "Player Control")
     self.trie.insert("season", "Season Prefix")
     self.trie.insert("1", "Number")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Television Show")
     self.trie.insert("big bang", "event")
     self.trie.insert("bang theory", "Scientific Theory")
     self.tagger = EntityTagger(self.trie, self.tokenizer)
Esempio n. 4
0
class EntityTaggerTest(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, EnglishTokenizer())
        self.trie.insert("play", "PlayVerb")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Not a Thing")

    def tearDown(self):
        pass

    def test_tag(self):
        tags = list(self.tagger.tag("play season 1 of the big bang theory"))
        assert len(tags) == 3

    def test_regex_tag(self):
        regex = re.compile(r"the (?P<Event>\w+\s\w+) theory")
        tagger = EntityTagger(self.trie, EnglishTokenizer(), regex_entities=[regex])
        tags = tagger.tag("the big bang theory")
        assert len(tags) == 3
        event_tags = [tag for tag in tags if tag.get('match') == 'big bang']
        assert len(event_tags) == 1
        assert len(event_tags[0].get('entities')) == 1
        assert len(event_tags[0].get('entities')[0].get('data')) == 1
        assert 'Event' in event_tags[0].get('entities')[0].get('data')
Esempio n. 5
0
 def __init__(self, tokenizer=None, trie=None):
     pyee.EventEmitter.__init__(self)
     self.tokenizer = tokenizer or EnglishTokenizer()
     self.trie = trie or Trie()
     self.regular_expressions_entities = []
     self._regex_strings = set()
     self.tagger = EntityTagger(self.trie, self.tokenizer, self.regular_expressions_entities)
     self.intent_parsers = []
Esempio n. 6
0
 def setUp(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities)
     self.trie.insert("play", ("play", "PlayVerb"))
     self.trie.insert("the big bang theory", ("the big bang theory", "Television Show"))
     self.trie.insert("the big", ("the big", "Not a Thing"))
     self.trie.insert("barenaked ladies", ("barenaked ladies", "Radio Station"))
     self.parser = Parser(self.tokenizer, self.tagger)
Esempio n. 7
0
 def setUp(self):
     self.tokenizer = EnglishTokenizer()
     self.trie = Trie(max_edit_distance=2)
     self.trie.insert("x-play", "Television Show")
     self.trie.insert("play", "Play Verb")
     self.trie.insert("play season", "Time Period")
     self.trie.insert("play", "Player Control")
     self.trie.insert("season", "Season Prefix")
     self.trie.insert("1", "Number")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Television Show")
     self.trie.insert("big bang", "event")
     self.trie.insert("bang theory", "Scientific Theory")
     self.tagger = EntityTagger(self.trie, self.tokenizer)
Esempio n. 8
0
    def __init__(self, tokenizer=None, trie=None):
        """
        Initialize the IntentDeterminationEngine

        Args:
            tokenizer(tokenizer) : tokenizer used to break up spoken text
                example EnglishTokenizer()
            trie(Trie): tree of matches to Entites
        """
        pyee.EventEmitter.__init__(self)
        self.tokenizer = tokenizer or EnglishTokenizer()
        self.trie = trie or Trie()
        self.regular_expressions_entities = []
        self._regex_strings = set()
        self.tagger = EntityTagger(self.trie, self.tokenizer, self.regular_expressions_entities)
        self.intent_parsers = []
Esempio n. 9
0
    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent")\
            .require("Event")\
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"
Esempio n. 10
0
 def __init__(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie,
                                self.tokenizer,
                                regex_entities=self.regex_entities)
     self.trie.insert("play", ("play", "PlayVerb"))
     self.trie.insert("play", ("play", "Command"))
     self.trie.insert("the big bang theory",
                      ("the big bang theory", "Television Show"))
     self.trie.insert("all that", ("all that", "Television Show"))
     self.trie.insert("all that", ("all that", "Radio Station"))
     self.trie.insert("the big", ("the big", "Not a Thing"))
     self.trie.insert("barenaked ladies",
                      ("barenaked ladies", "Radio Station"))
     self.trie.insert("show", ("show", "Command"))
     self.trie.insert("what", ("what", "Question"))
     self.parser = Parser(self.tokenizer, self.tagger)
     self.intent = IntentBuilder("Test Intent").require(
         "PlayVerb").one_of("Television Show", "Radio Station").build()
Esempio n. 11
0
class BronKerboschExpanderTest(unittest.TestCase):
    def setUp(self):
        self.tokenizer = EnglishTokenizer()
        self.trie = Trie(max_edit_distance=2)
        self.trie.insert("x-play", "Television Show")
        self.trie.insert("play", "Play Verb")
        self.trie.insert("play season", "Time Period")
        self.trie.insert("play", "Player Control")
        self.trie.insert("season", "Season Prefix")
        self.trie.insert("1", "Number")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Television Show")
        self.trie.insert("big bang", "event")
        self.trie.insert("bang theory", "Scientific Theory")
        self.tagger = EntityTagger(self.trie, self.tokenizer)

    def testExpander(self):
        self.tagger.trie.max_edit_distance = 0
        tags = self.tagger.tag("play season 1 of the big bang theory")
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags))
        assert len(parse_results) == 6

    def testExpandedResult(self):
        tags = self.tagger.tag("season 1")
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags))
        assert len(parse_results) == 1
        assert len(parse_results[0]) == 2

    def testConsistentExpandWithSameOverlapMultipleTimes(self):
        """
        example: play season 1 of the big bang theory play season one of the big bang theory
        series should contain two instances of the big bang theory
        :return:
        """
        utterance = "play season 1 of the big bang theory"
        tags = self.tagger.tag(utterance)

        def score_clique(clique):
            score = 0.0
            for tagged_entity in clique:
                ec = tagged_entity.get('entities', [{
                    'confidence': 0.0
                }])[0].get('confidence')
                score += ec * len(
                    tagged_entity.get('entities', [{
                        'match': ''
                    }])[0].get('match')) / (len(utterance) + 1)
            return score

        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(
            expander.expand(tags, clique_scoring_func=score_clique))
        assert len(parse_results) == 6
        result_text = ' '.join(
            [tag.get('entities')[0].get('key') for tag in parse_results[0]])
        result_parse = ', '.join([
            tag.get('entities')[0].get('data')[0][1]
            for tag in parse_results[0]
        ])

        assert result_text == 'play season 1 the big bang theory'

    def testExpandWithRegexAndLiteralTokenMatch(self):
        # two tags for the same token, different confidence, should expand to two cliques
        tags = [{
            'end_token':
            0,
            'start_token':
            0,
            'key':
            u'spell',
            'match':
            u'spell',
            'entities': [{
                'confidence': 0.5,
                'data': [u'SearchTerms'],
                'match': u'spell',
                'key': u'spell'
            }]
        }, {
            'end_token':
            0,
            'start_token':
            0,
            'key':
            u'spell',
            'match':
            u'spell',
            'entities': [{
                'confidence': 1.0,
                'data': [u'SpellingKeyword'],
                'match': u'spell',
                'key': u'spell'
            }]
        }]

        expander = BronKerboschExpander(self.tokenizer)

        cliques = list(expander._sub_expand(tags))
        assert len(cliques) == 2
Esempio n. 12
0
class AdaptTTIPlugin(plugin.TTIPlugin):
    tokenizer = EnglishTokenizer()
    trie = Trie()
    tagger = EntityTagger(trie, tokenizer)
    parser = Parser(tokenizer, tagger)
    engine = IntentDeterminationEngine()

    def add_word(self, intent, word):
        # Check if this is a collection
        if is_keyword(word):
            keyword_name = "{}_{}".format(intent, word[1:][:-1])
            # print("Registering words for '{}'".format(keyword_name))
            # This doesn't have to exist:
            if keyword_name in self.keywords:
                for keyword_word in self.keywords[keyword_name]['words']:
                    # print("Registering '{}'".format(keyword_word))
                    self.engine.register_entity(keyword_word, keyword_name)
            if keyword_name in self.regex:
                for regex in self.regex[keyword_name]:
                    self.engine.register_regex_entity(regex)
        else:
            # Just register the word as a required word
            self.keyword_index += 1
            keyword_name = "{}_{}".format(intent,
                                          makeindex(self.keyword_index))
            # print("Registering word '{}' as {}".format(word,keyword_name))
            self.engine.register_entity(word, keyword_name)
        return keyword_name

    def add_intents(self, intents):
        for intent in intents:
            # print("Adding intent {}".format(intent))
            # this prevents collisions between intents
            intent_base = intent
            intent_inc = 0
            locale = profile.get("language")
            while intent in self.intent_map['intents']:
                intent_inc += 1
                intent = "{}{}".format(intent_base, intent_inc)
            if ('locale' in intents[intent_base]):
                # If the selected locale is not available, try matching just
                # the language ("en-US" -> "en")
                if (locale not in intents[intent_base]['locale']):
                    for language in intents[intent_base]['locale']:
                        if (language[:2] == locale[:2]):
                            locale = language
                            break
            while intent in self.intent_map['intents']:
                intent_inc += 1
                intent = "{}{}".format(intent_base, intent_inc)
            if ('keywords' in intents[intent_base]['locale'][locale]):
                for keyword in intents[intent_base]['locale'][locale][
                        'keywords']:
                    keyword_token = "{}_{}".format(intent, keyword)
                    self.keywords[keyword_token] = {
                        'words':
                        intents[intent_base]['locale'][locale]['keywords']
                        [keyword],
                        'name':
                        keyword
                    }
            if ('regex' in intents[intent_base]['locale'][locale]):
                for regex_name in intents[intent_base]['locale'][locale][
                        'regex']:
                    regex_token = "{}_{}".format(intent, regex_name)
                    self.regex[regex_token] = []
                    for regex in intents[intent_base]['locale'][locale][
                            'regex'][regex_name]:
                        self.regex[regex_token].append(
                            regex.replace(regex_name, regex_token))
                # pprint(self.regex)
            self.intent_map['intents'][intent] = {
                'action': intents[intent_base]['action'],
                'name': intent_base,
                'templates': [],
                'words': {}
            }
            for phrase in intents[intent_base]['locale'][locale]['templates']:
                # Save the phrase so we can search for undefined keywords
                self.intent_map['intents'][intent]['templates'].append(phrase)
                # Make a count of word frequency. The fact that small connector
                # type words sometimes appear multiple times in a single
                # sentence while the focal words usually only appear once is
                # giving too much weight to those connector words.
                words = list(set(phrase.split()))
                for word in words:
                    if not is_keyword(word):
                        word = word.upper()
                    # Count the number of times the word appears in this intent
                    try:
                        self.intent_map['intents'][intent]['words'][word][
                            'count'] += 1
                    except KeyError:
                        self.intent_map['intents'][intent]['words'][word] = {
                            'count': 1,
                            'weight': None,
                            'required': False
                        }
                    # Count the number of intents the word appears in
                    try:
                        self.words[word].update({intent: True})
                    except KeyError:
                        self.words[word] = {intent: True}
            # for each word in each intent, divide the word frequency by the number of examples.
            # Since a word is only counted once per example, regardless of how many times it appears,
            # if the number of times it was counted matches the number of examples, then
            # this is a "required" word.
            phrase_count = len(
                intents[intent_base]['locale'][locale]['templates'])
            for word in self.intent_map['intents'][intent]['words']:
                # print("Word: '{}' Count: {} Phrases: {} Weight: {}".format(word, self.intent_map['intents'][intent]['words'][word], phrase_count, weight(self.intent_map['intents'][intent]['words'][word], phrase_count)))
                Weight = weight(
                    self.intent_map['intents'][intent]['words'][word]['count'],
                    phrase_count)
                self.intent_map['intents'][intent]['words'][word][
                    'weight'] = Weight
                if Weight == 1:
                    self.intent_map['intents'][intent]['words'][word][
                        'required'] = True

    # Call train after loading all the intents.
    def train(self):
        # print("Words:")
        # pprint(self.words)
        # print("")
        # print("Intents:")
        # pprint(self.intent_map['intents'])
        # print("Keywords:")
        # pprint(self.keywords)
        for intent in self.intent_map['intents']:
            required_words = []
            optional_words = []
            # print("Training {}".format(intent))
            # pprint(self.keywords)
            for word in self.intent_map['intents'][intent]['words']:
                intents_count = len(self.intent_map['intents'])
                word_appears_in = len(self.words[word])
                # print("Word: {} Weight: {} Intents: {} Appears in: {}".format(word, weight, intents_count, word_appears_in))
                self.intent_map['intents'][intent]['words'][word][
                    'weight'] = self.intent_map['intents'][intent]['words'][
                        word]['weight'] * (intents_count -
                                           word_appears_in) / intents_count
                if (self.intent_map['intents'][intent]['words'][word]
                    ['required']):
                    # add the word as required.
                    # print("adding '{}' as required".format(word_token))
                    required_words.append(self.add_word(intent, word))
                else:
                    # if the word is a keyword list, add it
                    if (word[:1] + word[-1:] == "{}"):
                        optional_words.append(self.add_word(intent, word))
                    else:
                        if (self.intent_map['intents'][intent]['words'][word]
                            ['weight'] > 0.35):
                            # print("adding '{}' as optional".format(word_token))
                            optional_words.append(self.add_word(intent, word))
            construction = IntentBuilder(intent)
            for keyword in required_words:
                # print("Required word: {}".format(keyword))
                construction = construction.require(keyword)
            for keyword in optional_words:
                # print("Optional word: {}".format(keyword))
                construction = construction.optionally(keyword)
            if (construction):
                # print("Building {}".format(intent))
                self.engine.register_intent_parser(construction.build())
        # pprint(self.intent_map['intents'])
        # print("")
        self.trained = True

    def get_plugin_phrases(self, passive_listen=False):
        phrases = []
        # include the keyword, otherwise
        if (passive_listen):
            keywords = profile.get(["keyword"])
            if not (isinstance(keywords, list)):
                keywords = [keywords]
            phrases.extend([word.upper() for word in keywords])
        # Include any custom phrases (things you say to Naomi
        # that don't match plugin phrases. Otherwise, there is
        # a high probability that something you say will be
        # interpreted as a command. For instance, the
        # "check_email" plugin has only "EMAIL" and "INBOX" as
        # standard phrases, so every time I would say
        # "Naomi, check email" Naomi would hear "NAOMI SHUT EMAIL"
        # and shut down.
        custom_standard_phrases_file = paths.data(
            "standard_phrases",
            "{}.txt".format(profile.get(['language'], 'en-US')))
        if (os.path.isfile(custom_standard_phrases_file)):
            with open(custom_standard_phrases_file, mode='r') as f:
                for line in f:
                    phrase = line.strip()
                    if phrase:
                        phrases.append(phrase)

        # for plugin in self._plugins:
        for intent in self.intent_map['intents']:
            if ('templates' in self.intent_map['intents'][intent]):
                templates = self.intent_map['intents'][intent]['templates']
                keywords_list = [keyword for keyword in self.keywords]
                # print("Keywords: {}".format(keywords_list))
                for keyword in keywords_list:
                    # This will not replace keywords that do not have a list associated with them, like regex and open keywords
                    # print("Replacing {} with words from {} in templates".format(keyword,keywords[keyword]))
                    if (keyword[:len(intent) + 1] == "{}_".format(intent)):
                        short_keyword = self.keywords[keyword]['name']
                        for template in templates:
                            # print("Checking template: {} for keyword {}".format(template,short_keyword))
                            if (to_keyword(short_keyword) in template):
                                templates.extend([
                                    template.replace(to_keyword(short_keyword),
                                                     word.upper())
                                    for word in self.keywords[keyword]['words']
                                ])
                            # Now that we have expanded every instance of keyword in templates, delete any template that still contains keyword
                            templates = [
                                template for template in templates
                                if not to_keyword(short_keyword) in template
                            ]
                phrases.extend(templates)
        return sorted(phrases)

    def determine_intent(self, phrase):
        response = {}
        try:
            for intent in self.engine.determine_intent(phrase):
                if intent and intent.get("confidence") > 0:
                    keywords = {}
                    for keyword in intent:
                        if keyword not in [
                                'confidence', 'intent_type', 'target'
                        ]:
                            if keyword in self.keywords:
                                # Since the Naomi parser can return a list of matching words,
                                # this needs to be a list
                                keywords[self.keywords[keyword]['name']] = [
                                    intent[keyword]
                                ]
                    response.update({
                        self.intent_map['intents'][intent['intent_type']]['name']:
                        {
                            'action':
                            self.intent_map['intents'][intent['intent_type']]
                            ['action'],
                            'input':
                            phrase,
                            'matches':
                            keywords,
                            'score':
                            intent['confidence']
                        }
                    })
        except ZeroDivisionError:
            print("Could not determine an intent")
        return response
Esempio n. 13
0
class BronKerboschExpanderTest(unittest.TestCase):
    def setUp(self):
        self.tokenizer = EnglishTokenizer()
        self.trie = Trie(max_edit_distance=2)
        self.trie.insert("x-play", "Television Show")
        self.trie.insert("play", "Play Verb")
        self.trie.insert("play season", "Time Period")
        self.trie.insert("play", "Player Control")
        self.trie.insert("season", "Season Prefix")
        self.trie.insert("1", "Number")
        self.trie.insert("the big bang theory", "Television Show")
        self.trie.insert("the big", "Television Show")
        self.trie.insert("big bang", "event")
        self.trie.insert("bang theory", "Scientific Theory")
        self.tagger = EntityTagger(self.trie, self.tokenizer)

    def testExpander(self):
        self.tagger.trie.max_edit_distance = 0
        tags = self.tagger.tag("play season 1 of the big bang theory")
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags))
        assert len(parse_results) == 6

    def testExpandedResult(self):
        tags = self.tagger.tag("season 1")
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags))
        assert len(parse_results) == 1
        assert len(parse_results[0]) == 2


    def testConsistentExpandWithSameOverlapMultipleTimes(self):
        """
        example: play season 1 of the big bang theory play season one of the big bang theory
        series should contain two instances of the big bang theory
        :return:
        """
        utterance = "play season 1 of the big bang theory"
        tags = self.tagger.tag(utterance)

        def score_clique(clique):
            score = 0.0
            for tagged_entity in clique:
                ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence')
                score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / (
                    len(utterance) + 1)
            return score
        expander = BronKerboschExpander(self.tokenizer)
        parse_results = list(expander.expand(tags, clique_scoring_func=score_clique))
        assert len(parse_results) == 6
        result_text = ' '.join([tag.get('entities')[0].get('key') for tag in parse_results[0]])
        result_parse = ', '.join(
            [tag.get('entities')[0].get('data')[0][1] for tag in parse_results[0]]
        )

        assert result_text == 'play season 1 the big bang theory'

    def testExpandWithRegexAndLiteralTokenMatch(self):
        # two tags for the same token, different confidence, should expand to two cliques
        tags = [{'end_token': 0, 'start_token': 0, 'key': u'spell', 'match': u'spell',
                 'entities': [{'confidence': 0.5, 'data': [u'SearchTerms'], 'match': u'spell', 'key': u'spell'}]},
                {'end_token': 0, 'start_token': 0, 'key': u'spell', 'match': u'spell',
                 'entities': [{'confidence': 1.0, 'data': [u'SpellingKeyword'], 'match': u'spell', 'key': u'spell'}]}]

        expander = BronKerboschExpander(self.tokenizer)

        cliques = list(expander._sub_expand(tags))
        assert len(cliques) == 2
Esempio n. 14
0
 def setUp(self):
     self.trie = Trie()
     self.tagger = EntityTagger(self.trie, EnglishTokenizer())
     self.trie.insert("play", "PlayVerb")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Not a Thing")
Esempio n. 15
0
 def tagger(self):
     return EntityTagger(self.trie, self.tokenizer,
                         self.regular_expressions_entities)
Esempio n. 16
0
PYTHONPATH=. python examples/multi_intent_parser.py "what's the weather like in tokyo"
PYTHONPATH=. python examples/multi_intent_parser.py "play some music by the clash"
"""

import json
import sys
from adapt.entity_tagger import EntityTagger
from adapt.tools.text.tokenizer import EnglishTokenizer
from adapt.tools.text.trie import Trie
from adapt.intent import IntentBuilder
from adapt.parser import Parser
from adapt.engine import DomainIntentDeterminationEngine

tokenizer = EnglishTokenizer()
trie = Trie()
tagger = EntityTagger(trie, tokenizer)
parser = Parser(tokenizer, tagger)

engine = DomainIntentDeterminationEngine()

engine.register_domain('Domain1')
engine.register_domain('Domain2')

# define vocabulary
weather_keyword = ["weather"]

for wk in weather_keyword:
    engine.register_entity(wk, "WeatherKeyword", domain='Domain1')

weather_types = ["snow", "rain", "wind", "sleet", "sun"]
Esempio n. 17
0
    from adapt.tools.text.tokenizer import EnglishTokenizer
    from adapt.entity_tagger import EntityTagger
    from adapt.tools.text.trie import Trie
    import pprint
    tokenizer = EnglishTokenizer()
    trie = Trie()
    trie.insert("x-play", "Television Show")
    trie.insert("play", "Play Verb")
    trie.insert("play season", "Time Period")
    trie.insert("play", "Player Control")
    trie.insert("season", "Season Prefix")
    trie.insert("1", "Number")
    trie.insert("the big bang theory", "Television Show")
    trie.insert("the big", "Television Show")
    trie.insert("big bang", "event")
    trie.insert("bang theory", "Scientific Theory")
    tagger = EntityTagger(trie, tokenizer)
    tags = tagger.tag("play season 2 the big 1 of the big bang theory")
    Bke = BronKerboschExpander(tokenizer)
    graphA = Bke._build_graph(tags)
    for v in graphA.vertex_set():
        print("vertex", v, list(graphA.get_neighbors_of(v)))
    parse_results = list(Bke.expand(tags))
    for r in parse_results:
        print("Bke ----- ")
        for x in r:
            pprint.pprint(x)
    # print "Results X ",parse_results
    # print "GraphA",graphA
    # print "vertexes",graphA.vertex_set()