Esempio n. 1
0
class PadaosEngine(IntentEngine):
    def __init__(self):
        self.name = "padaos"
        IntentEngine.__init__(self, self.name)
        self.config = Configuration.get().get(self.name, {})
        self.container = IntentContainer()

    def add_intent(self, name, samples):
        self.container.add_intent(name, samples)

    def remove_intent(self, name):
        self.container.remove_intent(name)

    def add_entity(self, name, samples):
        self.container.add_entity(name, samples)

    def remove_entity(self, name):
        self.container.remove_entity(name)

    def train(self, single_thread=False):
        """ train all registered intents and entities"""
        # Padaos is simply regex, it handles this when registering
        pass

    def calc_intent(self, query):
        """ return best intent for this query  """
        data = {"conf": 0, "utterance": query, "name": None}
        data.update(self.container.calc_intent(query))
        return data
Esempio n. 2
0
class BasicTeacher(object):
    """
    Poor-man's english connection extractor. Not even close to complete

    """
    nlp = None
    coref = None

    def __init__(self, nlp=None, coref=None, use_nlp=False):
        if use_nlp:
            self.nlp = nlp or self.nlp or get_nlp()
            self.coref = coref or self.coref

        self.container = IntentContainer()
        self.register_utterances()

    def register_utterances(self):
        self.container.add_intent('instance of',
                                  ['{source} (is|are|instance) {target}'])
        self.container.add_intent('sample of',
                                  ['{source} is (sample|example) {target}'])
        self.container.add_intent(
            'incompatible',
            ['{source} (can not|is forbidden|is not allowed) {target}'])
        self.container.add_intent('synonym',
                                  ['{source} is (same|synonym) {target}'])
        self.container.add_intent('antonym',
                                  ['{source} is (opposite|antonym) {target}'])
        self.container.add_intent('part of', [
            '{source} is part {target}', '{target} is (composed|made) {source}'
        ])
        self.container.add_intent('capable of',
                                  ['{source} (is capable|can) {target}'])
        self.container.add_intent('created by',
                                  ['{source} is created {target}'])
        self.container.add_intent('used for', ['{source} is used {target}'])

    def normalize(self, text):
        text = normalize(text, True, True, nlp=self.nlp, coref_nlp=self.coref)
        # lets be aggressive to improve parsing
        text = text.lower().replace("did you know that", "")
        text = text.replace("example", "sample of")
        words = text.split(" ")
        removes = [
            "a", "an", "of", "that", "this", "to", "with", "as", "by", "for"
        ]
        replaces = {
            "be": "is",
            "are": "is",
            "you": "self",
            "was": "is",
            "i": "user",
            "were": "is"
        }
        for idx, word in enumerate(words):
            if word in removes:
                words[idx] = ""
            if word in replaces:
                words[idx] = replaces[word]

        return " ".join([w for w in words if w])

    def parse(self, utterance):
        utterance = self.normalize(utterance)
        match = self.container.calc_intent(utterance)

        data = match["entities"]
        data["normalized_text"] = utterance
        data["connection_type"] = match["name"]
        return data
Esempio n. 3
0
class PadaosExtractor(IntentExtractor):
    keyword_based = False

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.container = IntentContainer()
        self.registered_intents = []

    def detach_intent(self, intent_name):
        if intent_name in self.registered_intents:
            LOG.debug("Detaching padaous intent: " + intent_name)
            self.container.remove_intent(intent_name)
            self.registered_intents.remove(intent_name)

    def detach_skill(self, skill_id):
        LOG.debug("Detaching padaos skill: " + str(skill_id))
        remove_list = [i for i in self.registered_intents if skill_id in i]
        for i in remove_list:
            self.detach_intent(i)

    def register_entity(self, entity_name, samples=None):
        samples = samples or [entity_name]
        self.container.add_entity(entity_name, samples)

    def register_intent(self, intent_name, samples=None):
        samples = samples or [intent_name]
        if intent_name not in self._intent_samples:
            self._intent_samples[intent_name] = samples
        else:
            self._intent_samples[intent_name] += samples
        self.container.add_intent(intent_name, samples)
        self.registered_intents.append(intent_name)

    def register_entity_from_file(self, entity_name, file_name):
        with open(file_name) as f:
            samples = f.read().split("\n")
        self.register_entity(entity_name, samples)

    def register_intent_from_file(self, intent_name, file_name):
        with open(file_name) as f:
            samples = f.read().split("\n")
        self.register_intent(intent_name, samples)

    def calc_intent(self, utterance, min_conf=0.5):
        utterance = utterance.strip().lower()
        intent = self.container.calc_intent(utterance)
        if intent["name"]:
            remainder = get_utterance_remainder(
                utterance, samples=self._intent_samples[intent["name"]])
            intent["intent_engine"] = "padaos"
            intent["intent_type"] = intent.pop("name")
            intent["utterance"] = utterance
            intent["utterance_remainder"] = remainder
            modifier = len(self.segmenter.segment(utterance))
            intent["conf"] = 1 / modifier - 0.1
            return intent
        return {
            'conf': 0,
            'intent_type': 'unknown',
            'entities': {},
            'utterance': utterance,
            'utterance_remainder': utterance,
            'intent_engine': 'padaos'
        }

    def intent_scores(self, utterance):
        utterance = utterance.strip().lower()
        intents = []
        bucket = self.calc_intents(utterance)
        for utt in bucket:
            intent = bucket[utt]
            if not intent:
                continue
            intents.append(intent)
        return intents

    def calc_intents(self, utterance, min_conf=0.5):
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            intent = self.calc_intent(ut)
            bucket[ut] = intent
        return bucket

    def calc_intents_list(self, utterance):
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            bucket[ut] = self.filter_intents(ut)
        return bucket

    def manifest(self):
        # TODO vocab, skill ids, intent_data
        return {"intent_names": self.registered_intents}
Esempio n. 4
0
class TestIntentContainer:
    def setup(self):
        self.container = IntentContainer()

    def test(self):
        self.container.add_intent('hello',
                                  ['hello', 'hi', 'how are you', "what's up"])
        self.container.add_intent('buy', [
            'buy {item}', 'purchase {item}', 'get {item}', 'get {item} for me'
        ])
        self.container.add_entity('item', ['milk', 'cheese'])
        self.container.add_intent('drive', [
            'drive me to {place}', 'take me to {place}', 'navigate to {place}'
        ])
        self.container.add_intent(
            'eat',
            ['eat {fruit}', 'eat some {fruit}', 'munch on (some|) {fruit}'])
        self.container.compile()
        assert self.container.calc_intent('hello')['name'] == 'hello'
        assert not self.container.calc_intent('bye')['name']
        assert self.container.calc_intent('buy milk') == {
            'name': 'buy',
            'entities': {
                'item': 'milk'
            }
        }
        assert self.container.calc_intent('eat some bananas') == {
            'name': 'eat',
            'entities': {
                'fruit': 'bananas'
            }
        }

    def test_case(self):
        self.container.add_intent('test', ['Testing cAPitalizAtion'])
        assert self.container.calc_intent(
            'teStiNg CapitalIzation')['name'] == 'test'

    def test_punctuation(self):
        self.container.add_intent('test', ['Test! Of: Punctuation'])
        assert self.container.calc_intent(
            'test of !punctuation...')['name'] == 'test'

    def test_spaces(self):
        self.container.add_intent('test', ['this is a test'])
        assert self.container.calc_intent('thisisatest')['name'] is None
        self.container.add_intent('test2', ['this has(one|two)options'])
        assert self.container.calc_intent(
            'this has two options')['name'] == 'test2'
        assert self.container.calc_intent('th is is a test')['name'] is None

        self.container.add_intent('test3', ['I see {thing} (in|on) {place}'])
        assert self.container.calc_intent('I see a bin test')['name'] is None
        assert self.container.calc_intent('I see a bin in there') == {
            'name': 'test3',
            'entities': {
                'thing': 'a bin',
                'place': 'there'
            }
        }
Esempio n. 5
0
class RegexQuestionParser(BasicQuestionParser):
    """
    Dead Simple Regex intent parser

    """
    def __init__(self, lang="en-us"):
        super().__init__(lang)
        self.container = IntentContainer()
        self._intents = []
        self.lang = lang
        self.register_default_intents()

    def normalize(self, text):
        # pos parsing normalization
        text = text.replace(" 's", "'s").replace("''", "").replace("``", "")
        text = normalize(text)
        text = text.lower()
        words = text.split(" ")
        questions = [
            'what', 'when', 'where', 'why', 'how', 'which', 'whose', 'who'
        ]
        removes = [
            "a", "an", "of", "that", "this", "to", "with", "as", "by", "for",
            "me", "do", "have", "does", "is", "your", "in", "i"
        ] + questions
        replaces = {"are": "is", "was": "is", "were": "is"}
        for idx, word in enumerate(words):
            if word in replaces:
                words[idx] = replaces[word]
            if word in removes:
                words[idx] = ""

        return " ".join([w for w in words if w])

    @property
    def intents(self):
        return self._intents

    def register_intent(self, name, rules):
        self._intents.append(name)
        self.container.add_intent(name, rules)

    def register_default_intents(self):
        self.container.add_entity(
            'question',
            ['what', 'when', 'where', 'why', 'how', 'which', 'whose', 'who'])

        self.from_folder(join(RESOURCES_PATH, self.lang))

    def from_folder(self, folder_path, reset=False):
        assert isdir(folder_path)
        if reset:
            self.container = IntentContainer()
            self._intents = []
        for f in listdir(folder_path):
            if f.endswith(".intent"):
                intent = f.replace(".intent", "")
                with open(join(folder_path, f)) as fi:
                    rules = fi.readlines()
                rules = [
                    r.strip() for r in rules if r and not r.startswith("#")
                ]
                self.register_intent(intent, rules)

    def parse(self, utterance):
        # normalization pre-parsing
        data = super().parse(utterance)
        utterance = normalize(str(utterance)).lower()

        COMMON_STARTERS = ["on average", "about", "tell me", "approximately"]
        for c in COMMON_STARTERS:
            if utterance.startswith(c):
                utterance = utterance.replace(c, "").strip()
        utterance = " ".join(utterance.split(" "))

        match = self.container.calc_intent(utterance)
        if match.get("name"):
            data["QuestionIntent"] = match["name"] or "unknown"
            entities = match["entities"]
            data.update(entities)
            if "query" in data:
                data["query"] = self.normalize(data["query"])
        return data