Ejemplos de Trie.Trie en Python, ejemplos de adapt.tools.text.trie.Trie.Trie en Python

Ejemplo n.º 1

0

Mostrar archivo

 def test_gather(self):
     trie = Trie()
     trie.insert("rest")
     trie.insert("restaurant")
     results = list(trie.gather("restaurant"))
     assert len(results) == 1
     assert results[0].get('key') == "restaurant"

Ejemplo n.º 2

0

Mostrar archivo

 def test_data_is_correct_on_insert(self):
     trie = Trie()
     trie.insert("restaurant", "Concept")
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1
     assert len(results[0].get('data')) == 1
     data = list(results[0].get('data'))
     assert data[0] == 'Concept'

Ejemplo n.º 3

0

Mostrar archivo

 def test_retrieval_based_on_insertion_order(self):
     trie = Trie()
     trie.insert("rest")
     trie.insert("restaurant")
     results = list(trie.lookup("rest"))
     assert len(results) == 1
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1

Ejemplo n.º 4

0

Mostrar archivo

Archivo: engine.py Proyecto: stevenduong1983/telegrambot

 def __init__(self, tokenizer=None, trie=None):
     pyee.EventEmitter.__init__(self)
     self.tokenizer = tokenizer or EnglishTokenizer()
     self.trie = trie or Trie()
     self.regular_expressions_entities = []
     self._regex_strings = set()
     self.tagger = EntityTagger(self.trie, self.tokenizer, self.regular_expressions_entities)
     self.intent_parsers = []

Ejemplo n.º 5

0

Mostrar archivo

 def test_edit_distance(self):
     trie = Trie(max_edit_distance=1)
     trie.insert("restaurant")
     results = list(trie.lookup("restauran"))
     assert len(results) == 1
     results = list(trie.lookup("estaurant"))
     assert len(results) == 1
     results = list(trie.lookup("estauran"))
     assert len(results) == 0

Ejemplo n.º 6

0

Mostrar archivo

    def test_remove(self):
        trie = Trie(max_edit_distance=2)
        trie.insert("1", "Number")
        trie.insert("2", "Number")
        trie.remove("2")

        one_lookup = list(trie.gather("1"))
        two_lookup = list(trie.gather("2"))
        assert len(one_lookup) == 1  # One match found
        assert len(two_lookup) == 0  # Zero matches since removed

Ejemplo n.º 7

0

Mostrar archivo

Archivo: TrieTest.py Proyecto: appotry/adapt

    def test_is_prefix(self):
        trie = Trie()
        trie.insert("play", "PlayVerb")
        trie.insert("the big bang theory", "Television Show")
        trie.insert("the big", "Not a Thing")
        trie.insert("barenaked ladies", "Radio Station")

        assert trie.root.is_prefix("the")
        assert trie.root.is_prefix("play")
        assert not trie.root.is_prefix("Kermit")

Ejemplo n.º 8

0

Mostrar archivo

    def test_simple_remove(self):
        trie = Trie()
        trie.insert("1", "Number")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 1

        assert trie.remove("1")
        results = list(trie.lookup("1"))
        assert len(results) == 0

Ejemplo n.º 9

0

Mostrar archivo

 def setUp(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie, self.tokenizer, regex_entities=self.regex_entities)
     self.trie.insert("play", ("play", "PlayVerb"))
     self.trie.insert("the big bang theory", ("the big bang theory", "Television Show"))
     self.trie.insert("the big", ("the big", "Not a Thing"))
     self.trie.insert("barenaked ladies", ("barenaked ladies", "Radio Station"))
     self.parser = Parser(self.tokenizer, self.tagger)

Ejemplo n.º 10

0

Mostrar archivo

    def test_remove_multi_first(self):
        trie = Trie(max_edit_distance=2)
        trie.insert("Kermit", "Muppets")
        trie.insert("Kermit", "Frogs")
        kermit_lookup = list(trie.lookup("Kermit"))[0]
        assert 'Frogs' in kermit_lookup['data']
        assert 'Muppets' in kermit_lookup['data']

        trie.remove("Kermit", "Muppets")

        kermit_lookup = list(trie.lookup("Kermit"))[0]
        assert kermit_lookup['data'] == {"Frogs"}  # Right data remains

Ejemplo n.º 11

0

Mostrar archivo

    def test_named_remove(self):
        trie = Trie()
        trie.insert("1", "Number")
        trie.insert("1", "The Loneliest")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 2

        assert trie.remove("1", "Number")
        results = list(trie.lookup("1"))
        assert len(results) == 1
        assert len(results[0].get('data')) == 1

Ejemplo n.º 12

0

Mostrar archivo

 def test_edit_distance_confidence(self):
     trie = Trie(max_edit_distance=2)
     trie.insert("a")
     trie.insert("bb")
     trie.insert("ccc")
     trie.insert("dddd")
     trie.insert("100")
     results = list(trie.gather("b"))
     assert len(results) == 1
     assert results[0].get('confidence') == 0.5
     results = list(trie.gather("1 of"))
     assert len(results) == 3

Ejemplo n.º 13

0

Mostrar archivo

Archivo: engine.py Proyecto: appotry/adapt

    def __init__(self, tokenizer=None, trie=None):
        """
        Initialize the IntentDeterminationEngine

        Args:
            tokenizer(tokenizer) : tokenizer used to break up spoken text
                example EnglishTokenizer()
            trie(Trie): tree of matches to Entites
        """
        self.tokenizer = tokenizer or EnglishTokenizer()
        self.trie = trie or Trie()
        self.regular_expressions_entities = []
        self._regex_strings = set()
        self.intent_parsers = []

Ejemplo n.º 14

0

Mostrar archivo

 def setUp(self):
     self.tokenizer = EnglishTokenizer()
     self.trie = Trie(max_edit_distance=2)
     self.trie.insert("x-play", "Television Show")
     self.trie.insert("play", "Play Verb")
     self.trie.insert("play season", "Time Period")
     self.trie.insert("play", "Player Control")
     self.trie.insert("season", "Season Prefix")
     self.trie.insert("1", "Number")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Television Show")
     self.trie.insert("big bang", "event")
     self.trie.insert("bang theory", "Scientific Theory")
     self.tagger = EntityTagger(self.trie, self.tokenizer)

Ejemplo n.º 15

0

Mostrar archivo

    def test_intent_with_regex_entity(self):
        self.trie = Trie()
        self.tagger = EntityTagger(self.trie, self.tokenizer, self.regex_entities)
        self.parser = Parser(self.tokenizer, self.tagger)
        self.trie.insert("theory", ("theory", "Concept"))
        regex = re.compile(r"the (?P<Event>.*)")
        self.regex_entities.append(regex)
        intent = IntentBuilder("mock intent")\
            .require("Event")\
            .require("Concept").build()

        for result in self.parser.parse("the big bang theory"):
            result_intent = intent.validate(result.get('tags'), result.get('confidence'))
            assert result_intent.get('confidence') > 0.0
            assert result_intent.get('Event') == 'big bang'
            assert result_intent.get('Concept') == "theory"

Ejemplo n.º 16

0

Mostrar archivo

 def test_missing_entities(self):
     trie = Trie()
     trie.insert("restaurant", "Concept")
     trie.insert("rest", "Concept")
     trie.insert("restaurant2", "Fast")
     results = list(trie.gather("restaurant"))
     assert len(results) == 1
     assert trie.root.entities() == ['root', 'Concept', 'Fast']
     assert trie.checkForMissingEntites("Concept") is None
     assert trie.checkForMissingEntites("root") is None
     assert trie.checkForMissingEntites("Fast") is None
     assert trie.checkForMissingEntites(['root', 'Concept', 'Fast']) is None
     assert trie.checkForMissingEntites(['root2', 'Concept',
                                         'Fast']) == ["root2"]
     assert trie.checkForMissingEntites(
         ('root2', 'Concept', 'Fast')) == ["root2"]

Ejemplo n.º 17

0

Mostrar archivo

    def test_scan(self):
        trie = Trie(max_edit_distance=2)
        trie.insert("Kermit", "Muppets")
        trie.insert("Gonzo", "Muppets")
        trie.insert("Rowlf", "Muppets")
        trie.insert("Gobo", "Fraggles")

        def match_func(data):
            return data == "Muppets"

        results = trie.scan(match_func)
        assert len(results) == 3
        muppet_names = [r[0] for r in results]
        assert "Kermit" in muppet_names
        assert "Gonzo" in muppet_names
        assert "Rowlf" in muppet_names

Ejemplo n.º 18

0

Mostrar archivo

 def __init__(self):
     self.trie = Trie()
     self.tokenizer = EnglishTokenizer()
     self.regex_entities = []
     self.tagger = EntityTagger(self.trie,
                                self.tokenizer,
                                regex_entities=self.regex_entities)
     self.trie.insert("play", ("play", "PlayVerb"))
     self.trie.insert("play", ("play", "Command"))
     self.trie.insert("the big bang theory",
                      ("the big bang theory", "Television Show"))
     self.trie.insert("all that", ("all that", "Television Show"))
     self.trie.insert("all that", ("all that", "Radio Station"))
     self.trie.insert("the big", ("the big", "Not a Thing"))
     self.trie.insert("barenaked ladies",
                      ("barenaked ladies", "Radio Station"))
     self.trie.insert("show", ("show", "Command"))
     self.trie.insert("what", ("what", "Question"))
     self.parser = Parser(self.tokenizer, self.tagger)
     self.intent = IntentBuilder("Test Intent").require(
         "PlayVerb").one_of("Television Show", "Radio Station").build()

Ejemplo n.º 19

0

Mostrar archivo

Archivo: parser.py Proyecto: bodiroga/mayordomo-server

    def parse(self, utterance, context=None, N=1):
        """

        :param utterance:
        :param context: a list of entities
        :param N:
        :return:
        """
        start = time.time()
        context_trie = None
        if context and isinstance(context, list):
            # sort by confidence in ascending order, so
            # highest confidence for an entity is last.
            # see comment on TrieNode ctor
            context.sort(key=lambda x: x.get('confidence'))

            context_trie = Trie()
            for entity in context:
                entity_value, entity_type, metadata = entity.get('data')[0]
                context_trie.insert(entity_value.lower(),
                                    data=(entity_value, entity_type, metadata),
                                    weight=entity.get('confidence'))

        tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie)
        self.emit(
            "tagged_entities", {
                'utterance': utterance,
                'tags': list(tagged),
                'time': time.time() - start
            })
        start = time.time()
        bke = BronKerboschExpander(self._tokenizer)

        def score_clique(clique):
            score = 0.0
            for tagged_entity in clique:
                ec = tagged_entity.get('entities', [{
                    'confidence': 0.0
                }])[0].get('confidence')
                score += ec * len(
                    tagged_entity.get('entities', [{
                        'match': ''
                    }])[0].get('match')) / (len(utterance) + 1)
            return score

        parse_results = bke.expand(tagged, clique_scoring_func=score_clique)
        count = 0
        for result in parse_results:
            count += 1
            parse_confidence = 0.0
            for tag in result:
                sample_entity = tag['entities'][0]
                entity_confidence = sample_entity.get(
                    'confidence', 0.0) * float(len(
                        sample_entity.get('match'))) / len(utterance)
                parse_confidence += entity_confidence
            yield {
                'utterance': utterance,
                'tags': result,
                'time': time.time() - start,
                'confidence': parse_confidence
            }

            if count >= N:
                break

Ejemplo n.º 20

0

Mostrar archivo

 def test_insert_single_character_entity(self):
     trie = Trie()
     trie.insert("1", "Number")
     results = list(trie.gather("1 of the big bang theory"))
     assert len(results) == 1
     assert len(results[0].get('data')) == 1

Ejemplo n.º 21

0

Mostrar archivo

 def test_retrieval_of_multi_word_entity(self):
     trie = Trie()
     trie.insert("play", "PlayVerb")
     trie.insert("the big bang theory", "Television Series")
     results = list(trie.gather("1 of the big bang theory"))
     assert len(results) == 0

Ejemplo n.º 22

0

Mostrar archivo

 def setUp(self):
     self.trie = Trie()
     self.tagger = EntityTagger(self.trie, EnglishTokenizer())
     self.trie.insert("play", "PlayVerb")
     self.trie.insert("the big bang theory", "Television Show")
     self.trie.insert("the big", "Not a Thing")

Ejemplo n.º 23

0

Mostrar archivo

Archivo: parser.py Proyecto: vs-sakthi/adapt

    def parse(self, utterance, context=None, N=1):
        """Used to find tags within utterance with a given confidence

        Args:
            utterance(str): conversational piece given by the user
            context(list): a list of entities
            N(int): number of results
        Returns: yield an object with the following fields
            utterance(str): the value passed in
            tags(list) : a list of tags found in utterance
            time(time) : duration since call of function
            confidence(float) : float indicating how confident of a match to the
                utterance. This might be used to determan the most likely intent.

        """
        start = time.time()
        context_trie = None
        if context and isinstance(context, list):
            # sort by confidence in ascending order, so
            # highest confidence for an entity is last.
            # see comment on TrieNode ctor
            context.sort(key=lambda x: x.get('confidence'))

            context_trie = Trie()
            for entity in context:
                entity_value, entity_type = entity.get('data')[0]
                context_trie.insert(entity_value.lower(),
                                    data=(entity_value, entity_type),
                                    weight=entity.get('confidence'))

        tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie)
        self.emit(
            "tagged_entities", {
                'utterance': utterance,
                'tags': list(tagged),
                'time': time.time() - start
            })
        start = time.time()
        bke = BronKerboschExpander(self._tokenizer)

        def score_clique(clique):
            score = 0.0
            for tagged_entity in clique:
                ec = tagged_entity.get('entities', [{
                    'confidence': 0.0
                }])[0].get('confidence')
                score += ec * len(
                    tagged_entity.get('entities', [{
                        'match': ''
                    }])[0].get('match')) / (len(utterance) + 1)
            return score

        parse_results = bke.expand(tagged, clique_scoring_func=score_clique)
        count = 0
        for result in parse_results:
            count += 1
            parse_confidence = 0.0
            for tag in result:
                sample_entity = tag['entities'][0]
                entity_confidence = sample_entity.get(
                    'confidence', 0.0) * float(len(
                        sample_entity.get('match'))) / len(utterance)
                parse_confidence += entity_confidence
            yield {
                'utterance': utterance,
                'tags': result,
                'time': time.time() - start,
                'confidence': parse_confidence
            }

            if count >= N:
                break

Ejemplo n.º 24

0

Mostrar archivo

 def test_edit_distance_no_confidence(self):
     trie = Trie(max_edit_distance=2)
     trie.insert("1", "Number")
     results = list(trie.gather("of the big bang theory"))
     assert len(results) == 0

Ejemplo n.º 25

0

Mostrar archivo

try with the following:
PYTHONPATH=. python examples/multi_intent_parser.py "what's the weather like in tokyo"
PYTHONPATH=. python examples/multi_intent_parser.py "play some music by the clash"
"""

import json
import sys
from adapt.entity_tagger import EntityTagger
from adapt.tools.text.tokenizer import EnglishTokenizer
from adapt.tools.text.trie import Trie
from adapt.intent import IntentBuilder
from adapt.parser import Parser
from adapt.engine import DomainIntentDeterminationEngine

tokenizer = EnglishTokenizer()
trie = Trie()
tagger = EntityTagger(trie, tokenizer)
parser = Parser(tokenizer, tagger)

engine = DomainIntentDeterminationEngine()

engine.register_domain('Domain1')
engine.register_domain('Domain2')

# define vocabulary
weather_keyword = ["weather"]

for wk in weather_keyword:
    engine.register_entity(wk, "WeatherKeyword", domain='Domain1')

weather_types = ["snow", "rain", "wind", "sleet", "sun"]

Ejemplo n.º 26

0

Mostrar archivo

Archivo: entity_tagger.py Proyecto: paulscott56/adapt

    def tag(self, utterance):
        """
        Tag known entities within the utterance.

        :param utterance: a string of natural language text

        :return: dictionary, with the following keys

        match: str - the proper entity matched

        key: str - the string that was matched to the entity

        start_token: int - 0-based index of the first token matched

        end_token: int - 0-based index of the last token matched

        entities: list - a list of entity kinds as strings (Ex: Artist, Location)
        """
        tokens = self.tokenizer.tokenize(utterance)
        entities = []
        if len(self.regex_entities) > 0:
            for part, idx in self._iterate_subsequences(tokens):
                local_trie = Trie()
                for regex_entity in self.regex_entities:
                    match = regex_entity.match(part)
                    groups = match.groupdict() if match else {}
                    for key in list(groups):
                        match_str = groups.get(key)
                        local_trie.insert(match_str, key)
                sub_tagger = EntityTagger(local_trie,
                                          self.tokenizer,
                                          max_tokens=self.max_tokens)
                for sub_entity in sub_tagger.tag(part):
                    sub_entity['start_token'] += idx
                    sub_entity['end_token'] += idx
                    for e in sub_entity['entities']:
                        e['confidence'] = 0.5
                    entities.append(sub_entity)
        additional_sort = len(entities) > 0

        for i in xrange(len(tokens)):
            part = ' '.join(tokens[i:])

            for new_entity in self.trie.gather(part):
                new_entity['data'] = list(new_entity['data'])
                entities.append({
                    'match':
                    new_entity.get('match'),
                    'key':
                    new_entity.get('key'),
                    'start_token':
                    i,
                    'entities': [new_entity],
                    'end_token':
                    i + len(self.tokenizer.tokenize(new_entity.get('match'))) -
                    1
                })

        if additional_sort:
            entities = self._sort_and_merge_tags(entities)

        return entities

Ejemplo n.º 27

0

Mostrar archivo

 def test_basic_retrieval(self):
     trie = Trie()
     trie.insert("restaurant")
     results = list(trie.lookup("restaurant"))
     assert len(results) == 1

Ejemplo n.º 28

0

Mostrar archivo

    def tag(self, utterance, context_trie=None):
        """
        Tag known entities within the utterance.
        Args:
            utterance(str): a string of natural language text
            context_trie(trie): optional, a trie containing only entities from context
                for this request

        Returns: dictionary, with the following keys
            match(str): the proper entity matched
            key(str): the string that was matched to the entity
            start_token(int): 0-based index of the first token matched
            end_token(int): 0-based index of the last token matched
            entities(list): a list of entity kinds as strings (Ex: Artist, Location)
        """
        tokens = self.tokenizer.tokenize(utterance)
        entities = []
        if len(self.regex_entities) > 0:
            for part, idx in self._iterate_subsequences(tokens):
                local_trie = Trie()
                for regex_entity in self.regex_entities:
                    match = regex_entity.match(part)
                    groups = match.groupdict() if match else {}
                    for key in list(groups):
                        match_str = groups.get(key)
                        local_trie.insert(match_str, (match_str, key))
                sub_tagger = EntityTagger(local_trie,
                                          self.tokenizer,
                                          max_tokens=self.max_tokens)
                for sub_entity in sub_tagger.tag(part):
                    sub_entity['start_token'] += idx
                    sub_entity['end_token'] += idx
                    for e in sub_entity['entities']:
                        e['confidence'] = 0.5
                    entities.append(sub_entity)
        additional_sort = len(entities) > 0

        context_entities = []
        for i in xrange(len(tokens)):
            part = ' '.join(tokens[i:])

            for new_entity in self.trie.gather(part):
                new_entity['data'] = list(new_entity['data'])
                entities.append({
                    'match':
                    new_entity.get('match'),
                    'key':
                    new_entity.get('key'),
                    'start_token':
                    i,
                    'entities': [new_entity],
                    'end_token':
                    i + len(self.tokenizer.tokenize(new_entity.get('match'))) -
                    1,
                    'from_context':
                    False
                })

            if context_trie:
                for new_entity in context_trie.gather(part):
                    new_entity['data'] = list(new_entity['data'])
                    new_entity[
                        'confidence'] *= 2.0  # context entities get double the weight!
                    context_entities.append({
                        'match':
                        new_entity.get('match'),
                        'key':
                        new_entity.get('key'),
                        'start_token':
                        i,
                        'entities': [new_entity],
                        'end_token':
                        i +
                        len(self.tokenizer.tokenize(new_entity.get('match'))) -
                        1,
                        'from_context':
                        True
                    })

        additional_sort = additional_sort or len(entities) > 0

        if additional_sort:
            entities = self._sort_and_merge_tags(entities + context_entities)

        return entities

Ejemplo n.º 29

0

Mostrar archivo

class AdaptTTIPlugin(plugin.TTIPlugin):
    tokenizer = EnglishTokenizer()
    trie = Trie()
    tagger = EntityTagger(trie, tokenizer)
    parser = Parser(tokenizer, tagger)
    engine = IntentDeterminationEngine()

    def add_word(self, intent, word):
        # Check if this is a collection
        if is_keyword(word):
            keyword_name = "{}_{}".format(intent, word[1:][:-1])
            # print("Registering words for '{}'".format(keyword_name))
            # This doesn't have to exist:
            if keyword_name in self.keywords:
                for keyword_word in self.keywords[keyword_name]['words']:
                    # print("Registering '{}'".format(keyword_word))
                    self.engine.register_entity(keyword_word, keyword_name)
            if keyword_name in self.regex:
                for regex in self.regex[keyword_name]:
                    self.engine.register_regex_entity(regex)
        else:
            # Just register the word as a required word
            self.keyword_index += 1
            keyword_name = "{}_{}".format(intent,
                                          makeindex(self.keyword_index))
            # print("Registering word '{}' as {}".format(word,keyword_name))
            self.engine.register_entity(word, keyword_name)
        return keyword_name

    def add_intents(self, intents):
        for intent in intents:
            # print("Adding intent {}".format(intent))
            # this prevents collisions between intents
            intent_base = intent
            intent_inc = 0
            locale = profile.get("language")
            while intent in self.intent_map['intents']:
                intent_inc += 1
                intent = "{}{}".format(intent_base, intent_inc)
            if ('locale' in intents[intent_base]):
                # If the selected locale is not available, try matching just
                # the language ("en-US" -> "en")
                if (locale not in intents[intent_base]['locale']):
                    for language in intents[intent_base]['locale']:
                        if (language[:2] == locale[:2]):
                            locale = language
                            break
            while intent in self.intent_map['intents']:
                intent_inc += 1
                intent = "{}{}".format(intent_base, intent_inc)
            if ('keywords' in intents[intent_base]['locale'][locale]):
                for keyword in intents[intent_base]['locale'][locale][
                        'keywords']:
                    keyword_token = "{}_{}".format(intent, keyword)
                    self.keywords[keyword_token] = {
                        'words':
                        intents[intent_base]['locale'][locale]['keywords']
                        [keyword],
                        'name':
                        keyword
                    }
            if ('regex' in intents[intent_base]['locale'][locale]):
                for regex_name in intents[intent_base]['locale'][locale][
                        'regex']:
                    regex_token = "{}_{}".format(intent, regex_name)
                    self.regex[regex_token] = []
                    for regex in intents[intent_base]['locale'][locale][
                            'regex'][regex_name]:
                        self.regex[regex_token].append(
                            regex.replace(regex_name, regex_token))
                # pprint(self.regex)
            self.intent_map['intents'][intent] = {
                'action': intents[intent_base]['action'],
                'name': intent_base,
                'templates': [],
                'words': {}
            }
            for phrase in intents[intent_base]['locale'][locale]['templates']:
                # Save the phrase so we can search for undefined keywords
                self.intent_map['intents'][intent]['templates'].append(phrase)
                # Make a count of word frequency. The fact that small connector
                # type words sometimes appear multiple times in a single
                # sentence while the focal words usually only appear once is
                # giving too much weight to those connector words.
                words = list(set(phrase.split()))
                for word in words:
                    if not is_keyword(word):
                        word = word.upper()
                    # Count the number of times the word appears in this intent
                    try:
                        self.intent_map['intents'][intent]['words'][word][
                            'count'] += 1
                    except KeyError:
                        self.intent_map['intents'][intent]['words'][word] = {
                            'count': 1,
                            'weight': None,
                            'required': False
                        }
                    # Count the number of intents the word appears in
                    try:
                        self.words[word].update({intent: True})
                    except KeyError:
                        self.words[word] = {intent: True}
            # for each word in each intent, divide the word frequency by the number of examples.
            # Since a word is only counted once per example, regardless of how many times it appears,
            # if the number of times it was counted matches the number of examples, then
            # this is a "required" word.
            phrase_count = len(
                intents[intent_base]['locale'][locale]['templates'])
            for word in self.intent_map['intents'][intent]['words']:
                # print("Word: '{}' Count: {} Phrases: {} Weight: {}".format(word, self.intent_map['intents'][intent]['words'][word], phrase_count, weight(self.intent_map['intents'][intent]['words'][word], phrase_count)))
                Weight = weight(
                    self.intent_map['intents'][intent]['words'][word]['count'],
                    phrase_count)
                self.intent_map['intents'][intent]['words'][word][
                    'weight'] = Weight
                if Weight == 1:
                    self.intent_map['intents'][intent]['words'][word][
                        'required'] = True

    # Call train after loading all the intents.
    def train(self):
        # print("Words:")
        # pprint(self.words)
        # print("")
        # print("Intents:")
        # pprint(self.intent_map['intents'])
        # print("Keywords:")
        # pprint(self.keywords)
        for intent in self.intent_map['intents']:
            required_words = []
            optional_words = []
            # print("Training {}".format(intent))
            # pprint(self.keywords)
            for word in self.intent_map['intents'][intent]['words']:
                intents_count = len(self.intent_map['intents'])
                word_appears_in = len(self.words[word])
                # print("Word: {} Weight: {} Intents: {} Appears in: {}".format(word, weight, intents_count, word_appears_in))
                self.intent_map['intents'][intent]['words'][word][
                    'weight'] = self.intent_map['intents'][intent]['words'][
                        word]['weight'] * (intents_count -
                                           word_appears_in) / intents_count
                if (self.intent_map['intents'][intent]['words'][word]
                    ['required']):
                    # add the word as required.
                    # print("adding '{}' as required".format(word_token))
                    required_words.append(self.add_word(intent, word))
                else:
                    # if the word is a keyword list, add it
                    if (word[:1] + word[-1:] == "{}"):
                        optional_words.append(self.add_word(intent, word))
                    else:
                        if (self.intent_map['intents'][intent]['words'][word]
                            ['weight'] > 0.35):
                            # print("adding '{}' as optional".format(word_token))
                            optional_words.append(self.add_word(intent, word))
            construction = IntentBuilder(intent)
            for keyword in required_words:
                # print("Required word: {}".format(keyword))
                construction = construction.require(keyword)
            for keyword in optional_words:
                # print("Optional word: {}".format(keyword))
                construction = construction.optionally(keyword)
            if (construction):
                # print("Building {}".format(intent))
                self.engine.register_intent_parser(construction.build())
        # pprint(self.intent_map['intents'])
        # print("")
        self.trained = True

    def get_plugin_phrases(self, passive_listen=False):
        phrases = []
        # include the keyword, otherwise
        if (passive_listen):
            keywords = profile.get(["keyword"])
            if not (isinstance(keywords, list)):
                keywords = [keywords]
            phrases.extend([word.upper() for word in keywords])
        # Include any custom phrases (things you say to Naomi
        # that don't match plugin phrases. Otherwise, there is
        # a high probability that something you say will be
        # interpreted as a command. For instance, the
        # "check_email" plugin has only "EMAIL" and "INBOX" as
        # standard phrases, so every time I would say
        # "Naomi, check email" Naomi would hear "NAOMI SHUT EMAIL"
        # and shut down.
        custom_standard_phrases_file = paths.data(
            "standard_phrases",
            "{}.txt".format(profile.get(['language'], 'en-US')))
        if (os.path.isfile(custom_standard_phrases_file)):
            with open(custom_standard_phrases_file, mode='r') as f:
                for line in f:
                    phrase = line.strip()
                    if phrase:
                        phrases.append(phrase)

        # for plugin in self._plugins:
        for intent in self.intent_map['intents']:
            if ('templates' in self.intent_map['intents'][intent]):
                templates = self.intent_map['intents'][intent]['templates']
                keywords_list = [keyword for keyword in self.keywords]
                # print("Keywords: {}".format(keywords_list))
                for keyword in keywords_list:
                    # This will not replace keywords that do not have a list associated with them, like regex and open keywords
                    # print("Replacing {} with words from {} in templates".format(keyword,keywords[keyword]))
                    if (keyword[:len(intent) + 1] == "{}_".format(intent)):
                        short_keyword = self.keywords[keyword]['name']
                        for template in templates:
                            # print("Checking template: {} for keyword {}".format(template,short_keyword))
                            if (to_keyword(short_keyword) in template):
                                templates.extend([
                                    template.replace(to_keyword(short_keyword),
                                                     word.upper())
                                    for word in self.keywords[keyword]['words']
                                ])
                            # Now that we have expanded every instance of keyword in templates, delete any template that still contains keyword
                            templates = [
                                template for template in templates
                                if not to_keyword(short_keyword) in template
                            ]
                phrases.extend(templates)
        return sorted(phrases)

    def determine_intent(self, phrase):
        response = {}
        try:
            for intent in self.engine.determine_intent(phrase):
                if intent and intent.get("confidence") > 0:
                    keywords = {}
                    for keyword in intent:
                        if keyword not in [
                                'confidence', 'intent_type', 'target'
                        ]:
                            if keyword in self.keywords:
                                # Since the Naomi parser can return a list of matching words,
                                # this needs to be a list
                                keywords[self.keywords[keyword]['name']] = [
                                    intent[keyword]
                                ]
                    response.update({
                        self.intent_map['intents'][intent['intent_type']]['name']:
                        {
                            'action':
                            self.intent_map['intents'][intent['intent_type']]
                            ['action'],
                            'input':
                            phrase,
                            'matches':
                            keywords,
                            'score':
                            intent['confidence']
                        }
                    })
        except ZeroDivisionError:
            print("Could not determine an intent")
        return response