Esempio n. 1
0
    def determine_intent(self, utterance, num_results=1, include_tags=False, context_manager=None):
        """
        Given an utterance, provide a valid intent.

        :param utterance: an ascii or unicode string representing natural language speech

        :param include_tags: includes the parsed tags (including position and confidence)
            as part of result

        :param context_manager: a context manager to provide context to the utterance

        :param num_results: a maximum number of results to be returned.

        :return: A generator that yields dictionaries.
        """
        parser = Parser(self.tokenizer, self.tagger)
        parser.on('tagged_entities',
                  (lambda result:
                   self.emit("tagged_entities", result)))

        context = []
        if context_manager:
            context = context_manager.get_context()

        for result in parser.parse(utterance, N=num_results, context=context):
            self.emit("parse_result", result)
            # create a context without entities used in result
            remaining_context = self.__get_unused_context(result, context)
            best_intent, tags = self.__best_intent(result, remaining_context)
            if best_intent and best_intent.get('confidence', 0.0) > 0:
                if include_tags:
                    best_intent['__tags__'] = tags
                yield best_intent
Esempio n. 2
0
    def determine_intent(self,
                         utterance,
                         num_results=1,
                         include_tags=False,
                         context_manager=None):
        """
        Given an utterance, provide a valid intent.

        Args:
            utterance(str): an ascii or unicode string representing natural language speech
            include_tags(list): includes the parsed tags (including position and confidence)
                as part of result
            context_manager(list): a context manager to provide context to the utterance
            num_results(int): a maximum number of results to be returned.

        Returns: A generator that yields dictionaries.
        """
        parser = Parser(self.tokenizer, self.tagger)
        parser.on('tagged_entities',
                  (lambda result: self.emit("tagged_entities", result)))

        context = []
        if context_manager:
            context = context_manager.get_context()

        for result in parser.parse(utterance, N=num_results, context=context):
            self.emit("parse_result", result)
            # create a context without entities used in result
            remaining_context = self.__get_unused_context(result, context)
            best_intent, tags = self.__best_intent(result, remaining_context)
            if best_intent and best_intent.get('confidence', 0.0) > 0:
                if include_tags:
                    best_intent['__tags__'] = tags
                yield best_intent
Esempio n. 3
0
 def setUp(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities)
     self.trie.insert("play", ("play", "PlayVerb"))
     self.trie.insert("the big bang theory", ("the big bang theory", "Television Show"))
     self.trie.insert("the big", ("the big", "Not a Thing"))
     self.trie.insert("barenaked ladies", ("barenaked ladies", "Radio Station"))
     self.parser = Parser(self.tokenizer, self.tagger)
Esempio n. 4
0
    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent")\
            .require("Event")\
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"
Esempio n. 5
0
    class IntentTest:
        def __init__(self):
            self.trie = Trie()
            self.tokenizer = EnglishTokenizer()
            self.regex_entities = []
            self.tagger = EntityTagger(self.trie,
                                       self.tokenizer,
                                       regex_entities=self.regex_entities)
            self.trie.insert("play", ("play", "PlayVerb"))
            self.trie.insert("play", ("play", "Command"))
            self.trie.insert("the big bang theory",
                             ("the big bang theory", "Television Show"))
            self.trie.insert("all that", ("all that", "Television Show"))
            self.trie.insert("all that", ("all that", "Radio Station"))
            self.trie.insert("the big", ("the big", "Not a Thing"))
            self.trie.insert("barenaked ladies",
                             ("barenaked ladies", "Radio Station"))
            self.trie.insert("show", ("show", "Command"))
            self.trie.insert("what", ("what", "Question"))
            self.parser = Parser(self.tokenizer, self.tagger)
            self.intent = IntentBuilder("Test Intent").require(
                "PlayVerb").one_of("Television Show", "Radio Station").build()

        def teststring(self, stringA):
            results = []
            for result in self.parser.parse(stringA):
                result_intent = self.intent.validate(result.get('tags'),
                                                     result.get('confidence'))
                results.append(result_intent)
            return results
Esempio n. 6
0
    def determine_intent(self, utterance, num_results=1):
        """
        Given an utterance, provide a valid intent.

        :param utterance: an ascii or unicode string representing natural language speech

        :param num_results: a maximum number of results to be returned.

        :return: A generator the yields dictionaries.
        """
        parser = Parser(self.tokenizer, self.tagger)
        parser.on("tagged_entities", (lambda result: self.emit("tagged_entities", result)))

        for result in parser.parse(utterance, N=num_results):
            self.emit("parse_result", result)
            best_intent = self.__best_intent(result)
            if best_intent and best_intent.get("confidence", 0.0) > 0:
                yield best_intent
Esempio n. 7
0
 def setUp(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities)
     self.trie.insert("play", "PlayVerb")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Not a Thing")
     self.trie.insert("barenaked ladies", "Radio Station")
     self.parser = Parser(self.tokenizer, self.tagger)
Esempio n. 8
0
    def determine_intent(self, utterance, num_results=1):
        """
        Given an utterance, provide a valid intent.

        :param utterance: an ascii or unicode string representing natural language speech

        :param num_results: a maximum number of results to be returned.

        :return: A generator the yields dictionaries.
        """
        parser = Parser(self.tokenizer, self.tagger)
        parser.on('tagged_entities',
                  (lambda result: self.emit("tagged_entities", result)))

        for result in parser.parse(utterance, N=num_results):
            self.emit("parse_result", result)
            best_intent = self.__best_intent(result)
            if best_intent and best_intent.get('confidence', 0.0) > 0:
                yield best_intent
Esempio n. 9
0
    def determine_good_intents(self,
                               utterance,
                               num_results=1,
                               include_tags=False,
                               context_manager=None):
        """
        Given an utterance, provide a valid intent.

        :param utterance: an ascii or unicode string representing natural language speech

        :param include_tags: includes the parsed tags (including position and confidence)
            as part of result

        :param context_manager: a context manager to provide context to the utterance

        :param num_results: a maximum number of results to be returned.

        :return: A generator that yields dictionaries.
        """
        parser = Parser(self.tokenizer, self.tagger)
        parser.on('tagged_entities',
                  (lambda result: self.emit("tagged_entities", result)))

        context = []
        if context_manager:
            context = context_manager.get_context()

        all_good_intents = []
        for result in parser.parse(utterance, N=num_results, context=context):
            self.emit("parse_result", result)
            # create a context without entities used in result
            remaining_context = self.__get_unused_context(result, context)
            good_intents = self.__good_intents(result, include_tags,
                                               remaining_context)

            all_good_intents += good_intents

        all_good_intents = sorted(all_good_intents,
                                  key=lambda i: i['confidence'],
                                  reverse=True)
        for intent in all_good_intents:
            yield intent
Esempio n. 10
0
 def __init__(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie,
                                self.tokenizer,
                                regex_entities=self.regex_entities)
     self.trie.insert("play", ("play", "PlayVerb"))
     self.trie.insert("play", ("play", "Command"))
     self.trie.insert("the big bang theory",
                      ("the big bang theory", "Television Show"))
     self.trie.insert("all that", ("all that", "Television Show"))
     self.trie.insert("all that", ("all that", "Radio Station"))
     self.trie.insert("the big", ("the big", "Not a Thing"))
     self.trie.insert("barenaked ladies",
                      ("barenaked ladies", "Radio Station"))
     self.trie.insert("show", ("show", "Command"))
     self.trie.insert("what", ("what", "Question"))
     self.parser = Parser(self.tokenizer, self.tagger)
     self.intent = IntentBuilder("Test Intent").require(
         "PlayVerb").one_of("Television Show", "Radio Station").build()
Esempio n. 11
0
    def determine_intent(self,
                         utterance,
                         num_results=1,
                         include_tags=False,
                         context_manager=None):
        """
        Given an utterance, provide a valid intent.

        Args:
            utterance(str): an ascii or unicode string representing natural language speech
            include_tags(bool): includes the parsed tags (including position and confidence)
                as part of result
            context_manager(list): a context manager to provide context to the utterance
            num_results(int): a maximum number of results to be returned.

        Returns: A generator that yields dictionaries.
        """
        parser = Parser(self.tokenizer, self.tagger)

        context = []
        if context_manager:
            context = context_manager.get_context()

        # Adapt consumers assume that results are sorted by confidence. parser
        # will yield results sorted by utterance coverage, but regex
        # and context entities will have different weights, and
        # can influence final sorting.
        requires_final_sort = self.regular_expressions_entities or context

        def generate_intents():
            for result in parser.parse(utterance,
                                       N=num_results,
                                       context=context):
                # create a context without entities used in result
                remaining_context = self.__get_unused_context(result, context)
                best_intent, tags = self.__best_intent(result,
                                                       remaining_context)
                if best_intent and best_intent.get('confidence', 0.0) > 0:
                    if include_tags:
                        best_intent['__tags__'] = tags
                    yield best_intent

        if requires_final_sort:
            sorted_iterable = sorted([i for i in generate_intents()],
                                     key=lambda x: -x.get('confidence', 0.0))
        else:
            sorted_iterable = generate_intents()

        for intent in sorted_iterable:
            yield intent
Esempio n. 12
0
    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent")\
            .require("Event")\
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"
Esempio n. 13
0
class ParserTest(unittest.TestCase):
    def setUp(self):
        self.trie = Trie()
        self.tokenizer = EnglishTokenizer()
        self.regex_entities = []
        self.tagger = EntityTagger(self.trie,
                                   self.tokenizer,
                                   regex_entities=self.regex_entities)
        self.trie.insert("play", ("play", "PlayVerb"))
        self.trie.insert("the big bang theory",
                         ("the big bang theory", "Television Show"))
        self.trie.insert("the big", ("the big", "Not a Thing"))
        self.trie.insert("barenaked ladies",
                         ("barenaked ladies", "Radio Station"))
        self.trie.insert("show", ("show", "Command"))
        self.trie.insert("what", ("what", "Question"))
        self.parser = Parser(self.tokenizer, self.tagger)
        pass

    def test_basic_intent(self):
        s = "show play the big bang theory"
        verify = {
            'confidence':
            0.9310344827586207,
            'tags': [{
                'end_token':
                0,
                'entities': [{
                    'confidence': 1.0,
                    'data': [('show', 'Command')],
                    'key': 'show',
                    'match': 'show'
                }],
                'from_context':
                False,
                'key':
                'show',
                'match':
                'show',
                'start_token':
                0
            }, {
                'end_token':
                1,
                'entities': [{
                    'confidence': 1.0,
                    'data': [('play', 'PlayVerb')],
                    'key': 'play',
                    'match': 'play'
                }],
                'from_context':
                False,
                'key':
                'play',
                'match':
                'play',
                'start_token':
                1
            }, {
                'confidence':
                1.0,
                'end_token':
                5,
                'entities': [{
                    'confidence':
                    1.0,
                    'data': [('the big bang theory', 'Television Show')],
                    'key':
                    'the big bang theory',
                    'match':
                    'the big bang theory'
                }],
                'from_context':
                False,
                'key':
                'the big bang theory',
                'match':
                'the big bang theory',
                'start_token':
                2
            }],
            'time':
            0.0001361370086669922,
            'utterance':
            'show play the big bang theory'
        }
        for result in self.parser.parse(s):
            assert (result['tags'] == verify['tags'])
Esempio n. 14
0
class IntentTest(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()
        self.tokenizer = EnglishTokenizer()
        self.regex_entities = []
        self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities)
        self.trie.insert("play", ("play", "PlayVerb"))
        self.trie.insert("the big bang theory", ("the big bang theory", "Television Show"))
        self.trie.insert("the big", ("the big", "Not a Thing"))
        self.trie.insert("barenaked ladies", ("barenaked ladies", "Radio Station"))
        self.trie.insert("show", ("show", "Command"))
        self.trie.insert("what", ("what", "Question"))
        self.parser = Parser(self.tokenizer, self.tagger)

    def tearDown(self):
        pass

    def test_basic_intent(self):
        intent = IntentBuilder("play television intent")\
            .require("PlayVerb")\
            .require("Television Show")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get('Television Show') == "the big bang theory"

    def test_at_least_one(self):
        intent = IntentBuilder("play intent")\
            .require("PlayVerb")\
            .one_of("Television Show", "Radio Station")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get('Television Show') == "the big bang theory"

        for result in self.parser.parse("play the barenaked ladies"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get('Radio Station') == "barenaked ladies"

    def test_at_least_on_no_required(self):
        intent = IntentBuilder("play intent") \
            .one_of("Television Show", "Radio Station") \
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Television Show') == "the big bang theory"

        for result in self.parser.parse("play the barenaked ladies"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Radio Station') == "barenaked ladies"

    def test_at_least_one_alone(self):
        intent = IntentBuilder("OptionsForLunch") \
            .one_of("Question", "Command") \
            .build()

        for result in self.parser.parse("show"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Command') == "show"

    def test_basic_intent_with_alternate_names(self):
        intent = IntentBuilder("play television intent")\
            .require("PlayVerb", "Play Verb")\
            .require("Television Show", "series")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Play Verb') == 'play'
            assert result_intent.get('series') == "the big bang theory"

    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent")\
            .require("Event")\
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"

    def test_intent_using_alias(self):
        self.trie.insert("big bang", ("the big bang theory", "Television Show"))
        intent = IntentBuilder("play television intent")\
            .require("PlayVerb", "Play Verb")\
            .require("Television Show", "series")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Play Verb') == 'play'
            assert result_intent.get('series') == "the big bang theory"
Esempio n. 15
0
class IntentTest(unittest.TestCase):
    def setUp(self):
        self.trie = Trie()
        self.tokenizer = EnglishTokenizer()
        self.regex_entities = []
        self.tagger = EntityTagger(self.trie,
                                   self.tokenizer,
                                   regex_entities=self.regex_entities)
        self.trie.insert("play", ("play", "PlayVerb"))
        self.trie.insert("the big bang theory",
                         ("the big bang theory", "Television Show"))
        self.trie.insert("the big", ("the big", "Not a Thing"))
        self.trie.insert("barenaked ladies",
                         ("barenaked ladies", "Radio Station"))
        self.trie.insert("show", ("show", "Command"))
        self.trie.insert("what", ("what", "Question"))
        self.parser = Parser(self.tokenizer, self.tagger)

    def tearDown(self):
        pass

    def test_basic_intent(self):
        intent = IntentBuilder("play television intent")\
            .require("PlayVerb")\
            .require("Television Show")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get(
                'Television Show') == "the big bang theory"

    def test_at_least_one(self):
        intent = IntentBuilder("play intent")\
            .require("PlayVerb")\
            .one_of("Television Show", "Radio Station")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get(
                'Television Show') == "the big bang theory"

        for result in self.parser.parse("play the barenaked ladies"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get('Radio Station') == "barenaked ladies"

    def test_at_least_on_no_required(self):
        intent = IntentBuilder("play intent") \
            .one_of("Television Show", "Radio Station") \
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get(
                'Television Show') == "the big bang theory"

        for result in self.parser.parse("play the barenaked ladies"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Radio Station') == "barenaked ladies"

    def test_at_least_one_alone(self):
        intent = IntentBuilder("OptionsForLunch") \
            .one_of("Question", "Command") \
            .build()

        for result in self.parser.parse("show"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Command') == "show"

    def test_basic_intent_with_alternate_names(self):
        intent = IntentBuilder("play television intent")\
            .require("PlayVerb", "Play Verb")\
            .require("Television Show", "series")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Play Verb') == 'play'
            assert result_intent.get('series') == "the big bang theory"

    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer,
                                   self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent")\
            .require("Event")\
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"

    def test_intent_using_alias(self):
        self.trie.insert("big bang",
                         ("the big bang theory", "Television Show"))
        intent = IntentBuilder("play television intent")\
            .require("PlayVerb", "Play Verb")\
            .require("Television Show", "series")\
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Play Verb') == 'play'
            assert result_intent.get('series') == "the big bang theory"
Esempio n. 16
0
PYTHONPATH=. python examples/multi_intent_parser.py "play some music by the clash"
"""

import json
import sys
from adapt.entity_tagger import EntityTagger
from adapt.tools.text.tokenizer import EnglishTokenizer
from adapt.tools.text.trie import Trie
from adapt.intent import IntentBuilder
from adapt.parser import Parser
from adapt.engine import DomainIntentDeterminationEngine

tokenizer = EnglishTokenizer()
trie = Trie()
tagger = EntityTagger(trie, tokenizer)
parser = Parser(tokenizer, tagger)

engine = DomainIntentDeterminationEngine()

engine.register_domain('Domain1')
engine.register_domain('Domain2')

# define vocabulary
weather_keyword = ["weather"]

for wk in weather_keyword:
    engine.register_entity(wk, "WeatherKeyword", domain='Domain1')

weather_types = ["snow", "rain", "wind", "sleet", "sun"]

for wt in weather_types:
Esempio n. 17
0
class IntentTest(unittest.TestCase):
    def setUp(self):
        self.trie = Trie()
        self.tokenizer = EnglishTokenizer()
        self.regex_entities = []
        self.tagger = EntityTagger(self.trie,
                                   self.tokenizer,
                                   regex_entities=self.regex_entities)
        self.trie.insert("play", ("play", "PlayVerb"))
        self.trie.insert("stop", ("stop", "StopVerb"))
        self.trie.insert("the big bang theory",
                         ("the big bang theory", "Television Show"))
        self.trie.insert("the big", ("the big", "Not a Thing"))
        self.trie.insert("barenaked ladies",
                         ("barenaked ladies", "Radio Station"))
        self.trie.insert("show", ("show", "Command"))
        self.trie.insert("what", ("what", "Question"))
        self.parser = Parser(self.tokenizer, self.tagger)

    def tearDown(self):
        pass

    def test_basic_intent(self):
        intent = IntentBuilder("play television intent") \
            .require("PlayVerb") \
            .require("Television Show") \
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get(
                'Television Show') == "the big bang theory"

    def test_at_least_one(self):
        intent = IntentBuilder("play intent") \
            .require("PlayVerb") \
            .one_of("Television Show", "Radio Station") \
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get(
                'Television Show') == "the big bang theory"

        for result in self.parser.parse("play the barenaked ladies"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('PlayVerb') == 'play'
            assert result_intent.get('Radio Station') == "barenaked ladies"

    def test_at_least_one_with_tag_in_multiple_slots(self):
        self.trie.insert("temperature", ("temperature", "temperature"))
        self.trie.insert("living room", ("living room", "living room"))
        self.trie.insert("what is", ("what is", "what is"))

        intent = IntentBuilder("test intent") \
            .one_of("what is") \
            .one_of("temperature", "living room") \
            .one_of("temperature") \
            .build()

        for result in self.parser.parse(
                "what is the temperature in the living room"):
            result_intent = intent.validate(result.get("tags"),
                                            result.get("confidence"))
            assert result_intent.get("confidence") > 0.0
            assert result_intent.get("temperature") == "temperature"
            assert result_intent.get("living room") == "living room"
            assert result_intent.get("what is") == "what is"

    def test_at_least_on_no_required(self):
        intent = IntentBuilder("play intent") \
            .one_of("Television Show", "Radio Station") \
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get(
                'Television Show') == "the big bang theory"

        for result in self.parser.parse("play the barenaked ladies"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Radio Station') == "barenaked ladies"

    def test_at_least_one_alone(self):
        intent = IntentBuilder("OptionsForLunch") \
            .one_of("Question", "Command") \
            .build()

        for result in self.parser.parse("show"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Command') == "show"

    def test_basic_intent_with_alternate_names(self):
        intent = IntentBuilder("play television intent") \
            .require("PlayVerb", "Play Verb") \
            .require("Television Show", "series") \
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Play Verb') == 'play'
            assert result_intent.get('series') == "the big bang theory"

    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer,
                                   self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent") \
            .require("Event") \
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"

    def test_intent_using_alias(self):
        self.trie.insert("big bang",
                         ("the big bang theory", "Television Show"))
        intent = IntentBuilder("play television intent") \
            .require("PlayVerb", "Play Verb") \
            .require("Television Show", "series") \
            .build()
        for result in self.parser.parse("play the big bang theory"):
            result_intent = intent.validate(result.get('tags'),
                                            result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Play Verb') == 'play'
            assert result_intent.get('series') == "the big bang theory"

    def test_resolve_one_of(self):
        tags = [{
            "confidence":
            1.0,
            "end_token":
            1,
            "entities": [{
                "confidence":
                1.0,
                "data": [["what is", "skill_iot_controlINFORMATION_QUERY"]],
                "key":
                "what is",
                "match":
                "what is"
            }],
            "from_context":
            False,
            "key":
            "what is",
            "match":
            "what is",
            "start_token":
            0
        }, {
            "end_token":
            3,
            "entities": [{
                "confidence":
                1.0,
                "data": [["temperature", "skill_weatherTemperature"],
                         ["temperature", "skill_iot_controlTEMPERATURE"]],
                "key":
                "temperature",
                "match":
                "temperature"
            }],
            "from_context":
            False,
            "key":
            "temperature",
            "match":
            "temperature",
            "start_token":
            3
        }, {
            "confidence":
            1.0,
            "end_token":
            7,
            "entities": [{
                "confidence": 1.0,
                "data": [["living room", "skill_iot_controlENTITY"]],
                "key": "living room",
                "match": "living room"
            }],
            "from_context":
            False,
            "key":
            "living room",
            "match":
            "living room",
            "start_token":
            6
        }]

        at_least_one = [["skill_iot_controlINFORMATION_QUERY"],
                        [
                            "skill_iot_controlTEMPERATURE",
                            "skill_iot_controlENTITY"
                        ], ["skill_iot_controlTEMPERATURE"]]

        result = {
            "skill_iot_controlENTITY": [{
                "confidence":
                1.0,
                "end_token":
                7,
                "entities": [{
                    "confidence":
                    1.0,
                    "data": [["living room", "skill_iot_controlENTITY"]],
                    "key":
                    "living room",
                    "match":
                    "living room"
                }],
                "from_context":
                False,
                "key":
                "living room",
                "match":
                "living room",
                "start_token":
                6
            }],
            "skill_iot_controlINFORMATION_QUERY": [{
                "confidence":
                1.0,
                "end_token":
                1,
                "entities": [{
                    "confidence":
                    1.0,
                    "data": [["what is",
                              "skill_iot_controlINFORMATION_QUERY"]],
                    "key":
                    "what is",
                    "match":
                    "what is"
                }],
                "from_context":
                False,
                "key":
                "what is",
                "match":
                "what is",
                "start_token":
                0
            }],
            "skill_iot_controlTEMPERATURE": [{
                "end_token":
                3,
                "entities": [{
                    "confidence":
                    1.0,
                    "data": [["temperature", "skill_weatherTemperature"],
                             ["temperature", "skill_iot_controlTEMPERATURE"]],
                    "key":
                    "temperature",
                    "match":
                    "temperature"
                }],
                "from_context":
                False,
                "key":
                "temperature",
                "match":
                "temperature",
                "start_token":
                3
            }]
        }

        assert resolve_one_of(tags, at_least_one) == result
Esempio n. 18
0
class AdaptTTIPlugin(plugin.TTIPlugin):
    tokenizer = EnglishTokenizer()
    trie = Trie()
    tagger = EntityTagger(trie, tokenizer)
    parser = Parser(tokenizer, tagger)
    engine = IntentDeterminationEngine()

    def add_word(self, intent, word):
        # Check if this is a collection
        if is_keyword(word):
            keyword_name = "{}_{}".format(intent, word[1:][:-1])
            # print("Registering words for '{}'".format(keyword_name))
            # This doesn't have to exist:
            if keyword_name in self.keywords:
                for keyword_word in self.keywords[keyword_name]['words']:
                    # print("Registering '{}'".format(keyword_word))
                    self.engine.register_entity(keyword_word, keyword_name)
            if keyword_name in self.regex:
                for regex in self.regex[keyword_name]:
                    self.engine.register_regex_entity(regex)
        else:
            # Just register the word as a required word
            self.keyword_index += 1
            keyword_name = "{}_{}".format(intent,
                                          makeindex(self.keyword_index))
            # print("Registering word '{}' as {}".format(word,keyword_name))
            self.engine.register_entity(word, keyword_name)
        return keyword_name

    def add_intents(self, intents):
        for intent in intents:
            # print("Adding intent {}".format(intent))
            # this prevents collisions between intents
            intent_base = intent
            intent_inc = 0
            locale = profile.get("language")
            while intent in self.intent_map['intents']:
                intent_inc += 1
                intent = "{}{}".format(intent_base, intent_inc)
            if ('locale' in intents[intent_base]):
                # If the selected locale is not available, try matching just
                # the language ("en-US" -> "en")
                if (locale not in intents[intent_base]['locale']):
                    for language in intents[intent_base]['locale']:
                        if (language[:2] == locale[:2]):
                            locale = language
                            break
            while intent in self.intent_map['intents']:
                intent_inc += 1
                intent = "{}{}".format(intent_base, intent_inc)
            if ('keywords' in intents[intent_base]['locale'][locale]):
                for keyword in intents[intent_base]['locale'][locale][
                        'keywords']:
                    keyword_token = "{}_{}".format(intent, keyword)
                    self.keywords[keyword_token] = {
                        'words':
                        intents[intent_base]['locale'][locale]['keywords']
                        [keyword],
                        'name':
                        keyword
                    }
            if ('regex' in intents[intent_base]['locale'][locale]):
                for regex_name in intents[intent_base]['locale'][locale][
                        'regex']:
                    regex_token = "{}_{}".format(intent, regex_name)
                    self.regex[regex_token] = []
                    for regex in intents[intent_base]['locale'][locale][
                            'regex'][regex_name]:
                        self.regex[regex_token].append(
                            regex.replace(regex_name, regex_token))
                # pprint(self.regex)
            self.intent_map['intents'][intent] = {
                'action': intents[intent_base]['action'],
                'name': intent_base,
                'templates': [],
                'words': {}
            }
            for phrase in intents[intent_base]['locale'][locale]['templates']:
                # Save the phrase so we can search for undefined keywords
                self.intent_map['intents'][intent]['templates'].append(phrase)
                # Make a count of word frequency. The fact that small connector
                # type words sometimes appear multiple times in a single
                # sentence while the focal words usually only appear once is
                # giving too much weight to those connector words.
                words = list(set(phrase.split()))
                for word in words:
                    if not is_keyword(word):
                        word = word.upper()
                    # Count the number of times the word appears in this intent
                    try:
                        self.intent_map['intents'][intent]['words'][word][
                            'count'] += 1
                    except KeyError:
                        self.intent_map['intents'][intent]['words'][word] = {
                            'count': 1,
                            'weight': None,
                            'required': False
                        }
                    # Count the number of intents the word appears in
                    try:
                        self.words[word].update({intent: True})
                    except KeyError:
                        self.words[word] = {intent: True}
            # for each word in each intent, divide the word frequency by the number of examples.
            # Since a word is only counted once per example, regardless of how many times it appears,
            # if the number of times it was counted matches the number of examples, then
            # this is a "required" word.
            phrase_count = len(
                intents[intent_base]['locale'][locale]['templates'])
            for word in self.intent_map['intents'][intent]['words']:
                # print("Word: '{}' Count: {} Phrases: {} Weight: {}".format(word, self.intent_map['intents'][intent]['words'][word], phrase_count, weight(self.intent_map['intents'][intent]['words'][word], phrase_count)))
                Weight = weight(
                    self.intent_map['intents'][intent]['words'][word]['count'],
                    phrase_count)
                self.intent_map['intents'][intent]['words'][word][
                    'weight'] = Weight
                if Weight == 1:
                    self.intent_map['intents'][intent]['words'][word][
                        'required'] = True

    # Call train after loading all the intents.
    def train(self):
        # print("Words:")
        # pprint(self.words)
        # print("")
        # print("Intents:")
        # pprint(self.intent_map['intents'])
        # print("Keywords:")
        # pprint(self.keywords)
        for intent in self.intent_map['intents']:
            required_words = []
            optional_words = []
            # print("Training {}".format(intent))
            # pprint(self.keywords)
            for word in self.intent_map['intents'][intent]['words']:
                intents_count = len(self.intent_map['intents'])
                word_appears_in = len(self.words[word])
                # print("Word: {} Weight: {} Intents: {} Appears in: {}".format(word, weight, intents_count, word_appears_in))
                self.intent_map['intents'][intent]['words'][word][
                    'weight'] = self.intent_map['intents'][intent]['words'][
                        word]['weight'] * (intents_count -
                                           word_appears_in) / intents_count
                if (self.intent_map['intents'][intent]['words'][word]
                    ['required']):
                    # add the word as required.
                    # print("adding '{}' as required".format(word_token))
                    required_words.append(self.add_word(intent, word))
                else:
                    # if the word is a keyword list, add it
                    if (word[:1] + word[-1:] == "{}"):
                        optional_words.append(self.add_word(intent, word))
                    else:
                        if (self.intent_map['intents'][intent]['words'][word]
                            ['weight'] > 0.35):
                            # print("adding '{}' as optional".format(word_token))
                            optional_words.append(self.add_word(intent, word))
            construction = IntentBuilder(intent)
            for keyword in required_words:
                # print("Required word: {}".format(keyword))
                construction = construction.require(keyword)
            for keyword in optional_words:
                # print("Optional word: {}".format(keyword))
                construction = construction.optionally(keyword)
            if (construction):
                # print("Building {}".format(intent))
                self.engine.register_intent_parser(construction.build())
        # pprint(self.intent_map['intents'])
        # print("")
        self.trained = True

    def get_plugin_phrases(self, passive_listen=False):
        phrases = []
        # include the keyword, otherwise
        if (passive_listen):
            keywords = profile.get(["keyword"])
            if not (isinstance(keywords, list)):
                keywords = [keywords]
            phrases.extend([word.upper() for word in keywords])
        # Include any custom phrases (things you say to Naomi
        # that don't match plugin phrases. Otherwise, there is
        # a high probability that something you say will be
        # interpreted as a command. For instance, the
        # "check_email" plugin has only "EMAIL" and "INBOX" as
        # standard phrases, so every time I would say
        # "Naomi, check email" Naomi would hear "NAOMI SHUT EMAIL"
        # and shut down.
        custom_standard_phrases_file = paths.data(
            "standard_phrases",
            "{}.txt".format(profile.get(['language'], 'en-US')))
        if (os.path.isfile(custom_standard_phrases_file)):
            with open(custom_standard_phrases_file, mode='r') as f:
                for line in f:
                    phrase = line.strip()
                    if phrase:
                        phrases.append(phrase)

        # for plugin in self._plugins:
        for intent in self.intent_map['intents']:
            if ('templates' in self.intent_map['intents'][intent]):
                templates = self.intent_map['intents'][intent]['templates']
                keywords_list = [keyword for keyword in self.keywords]
                # print("Keywords: {}".format(keywords_list))
                for keyword in keywords_list:
                    # This will not replace keywords that do not have a list associated with them, like regex and open keywords
                    # print("Replacing {} with words from {} in templates".format(keyword,keywords[keyword]))
                    if (keyword[:len(intent) + 1] == "{}_".format(intent)):
                        short_keyword = self.keywords[keyword]['name']
                        for template in templates:
                            # print("Checking template: {} for keyword {}".format(template,short_keyword))
                            if (to_keyword(short_keyword) in template):
                                templates.extend([
                                    template.replace(to_keyword(short_keyword),
                                                     word.upper())
                                    for word in self.keywords[keyword]['words']
                                ])
                            # Now that we have expanded every instance of keyword in templates, delete any template that still contains keyword
                            templates = [
                                template for template in templates
                                if not to_keyword(short_keyword) in template
                            ]
                phrases.extend(templates)
        return sorted(phrases)

    def determine_intent(self, phrase):
        response = {}
        try:
            for intent in self.engine.determine_intent(phrase):
                if intent and intent.get("confidence") > 0:
                    keywords = {}
                    for keyword in intent:
                        if keyword not in [
                                'confidence', 'intent_type', 'target'
                        ]:
                            if keyword in self.keywords:
                                # Since the Naomi parser can return a list of matching words,
                                # this needs to be a list
                                keywords[self.keywords[keyword]['name']] = [
                                    intent[keyword]
                                ]
                    response.update({
                        self.intent_map['intents'][intent['intent_type']]['name']:
                        {
                            'action':
                            self.intent_map['intents'][intent['intent_type']]
                            ['action'],
                            'input':
                            phrase,
                            'matches':
                            keywords,
                            'score':
                            intent['confidence']
                        }
                    })
        except ZeroDivisionError:
            print("Could not determine an intent")
        return response