class PadaosEngine(IntentEngine): def __init__(self): self.name = "padaos" IntentEngine.__init__(self, self.name) self.config = Configuration.get().get(self.name, {}) self.container = IntentContainer() def add_intent(self, name, samples): self.container.add_intent(name, samples) def remove_intent(self, name): self.container.remove_intent(name) def add_entity(self, name, samples): self.container.add_entity(name, samples) def remove_entity(self, name): self.container.remove_entity(name) def train(self, single_thread=False): """ train all registered intents and entities""" # Padaos is simply regex, it handles this when registering pass def calc_intent(self, query): """ return best intent for this query """ data = {"conf": 0, "utterance": query, "name": None} data.update(self.container.calc_intent(query)) return data
class RuleNER(SimpleNER): def __init__(self): self._container = IntentContainer() self._rules = {} self._examples = {} @property def rules(self): return self._rules @property def examples(self): return self._examples def add_rule(self, name, rules): if isinstance(rules, str): rules = [rules] self._container.add_intent(name, rules) if name not in self._rules: self._rules[name] = [] # NOTE, there is a bug, entities need to be lower case # n.add_rule("name", "my name is {Person}") <- won't work rules = [r.lower() for r in rules] self._rules[name].append(Rule(name, rules)) def add_entity_examples(self, name, examples): if isinstance(examples, str): examples = [examples] self._container.add_entity(name, examples) if name not in self._examples: self._examples[name] = [] for e in examples: self._examples[name].append(Entity(e, name)) def extract_entities(self, text, as_json=False): for rule in self._container.calc_intents(text): for e in rule["entities"]: if as_json: yield Entity(rule["entities"][e], entity_type=e, source_text=text, rules=self._rules[rule["name"]]).as_json() else: yield Entity(rule["entities"][e], entity_type=e, source_text=text, rules=self._rules[rule["name"]])
class PadaosFileIntent(IntentPlugin): """Interface for Padatious intent engine""" def __init__(self, rt): super().__init__(rt) self.container = IntentContainer() def _read_file(self, file_name): with open(file_name) as f: return [i.strip() for i in f.readlines() if i.strip()] def register(self, intent: Any, skill_name: str, intent_id: str): if not isinstance(intent, DynamicIntent): file_name = join(self.rt.paths.skill_locale(skill_name), intent + '.intent') intent = DynamicIntent(intent, self._read_file(file_name)) self.container.add_intent(intent_id, intent.data) def register_entity(self, entity: Any, skill_name: str, entity_id: str): if not isinstance(entity, DynamicEntity): file_name = join(self.rt.paths.skill_locale(skill_name), entity + '.entity') entity = DynamicEntity(entity, self._read_file(file_name)) self.container.add_entity(entity_id, entity.data) def unregister(self, intent_id: str): self.container.remove_intent(intent_id) def unregister_entity(self, entity_id: str): self.container.remove_entity(entity_id) def compile(self): self.container.compile() def calc_intents(self, query): return [ IntentMatch(intent_id=match['name'], confidence=1.0, matches=match['entities'], query=query) for match in self.container.calc_intents(query) ]
class BasicTeacher(object): """ Poor-man's english connection extractor. Not even close to complete """ nlp = None coref = None def __init__(self, nlp=None, coref=None, use_nlp=False): if use_nlp: self.nlp = nlp or self.nlp or get_nlp() self.coref = coref or self.coref self.container = IntentContainer() self.register_utterances() def register_utterances(self): self.container.add_intent('instance of', ['{source} (is|are|instance) {target}']) self.container.add_intent('sample of', ['{source} is (sample|example) {target}']) self.container.add_intent( 'incompatible', ['{source} (can not|is forbidden|is not allowed) {target}']) self.container.add_intent('synonym', ['{source} is (same|synonym) {target}']) self.container.add_intent('antonym', ['{source} is (opposite|antonym) {target}']) self.container.add_intent('part of', [ '{source} is part {target}', '{target} is (composed|made) {source}' ]) self.container.add_intent('capable of', ['{source} (is capable|can) {target}']) self.container.add_intent('created by', ['{source} is created {target}']) self.container.add_intent('used for', ['{source} is used {target}']) def normalize(self, text): text = normalize(text, True, True, nlp=self.nlp, coref_nlp=self.coref) # lets be aggressive to improve parsing text = text.lower().replace("did you know that", "") text = text.replace("example", "sample of") words = text.split(" ") removes = [ "a", "an", "of", "that", "this", "to", "with", "as", "by", "for" ] replaces = { "be": "is", "are": "is", "you": "self", "was": "is", "i": "user", "were": "is" } for idx, word in enumerate(words): if word in removes: words[idx] = "" if word in replaces: words[idx] = replaces[word] return " ".join([w for w in words if w]) def parse(self, utterance): utterance = self.normalize(utterance) match = self.container.calc_intent(utterance) data = match["entities"] data["normalized_text"] = utterance data["connection_type"] = match["name"] return data
class TestIntentContainer: def setup(self): self.container = IntentContainer() def test(self): self.container.add_intent('hello', ['hello', 'hi', 'how are you', "what's up"]) self.container.add_intent('buy', [ 'buy {item}', 'purchase {item}', 'get {item}', 'get {item} for me' ]) self.container.add_entity('item', ['milk', 'cheese']) self.container.add_intent('drive', [ 'drive me to {place}', 'take me to {place}', 'navigate to {place}' ]) self.container.add_intent( 'eat', ['eat {fruit}', 'eat some {fruit}', 'munch on (some|) {fruit}']) self.container.compile() assert self.container.calc_intent('hello')['name'] == 'hello' assert not self.container.calc_intent('bye')['name'] assert self.container.calc_intent('buy milk') == { 'name': 'buy', 'entities': { 'item': 'milk' } } assert self.container.calc_intent('eat some bananas') == { 'name': 'eat', 'entities': { 'fruit': 'bananas' } } def test_case(self): self.container.add_intent('test', ['Testing cAPitalizAtion']) assert self.container.calc_intent( 'teStiNg CapitalIzation')['name'] == 'test' def test_punctuation(self): self.container.add_intent('test', ['Test! Of: Punctuation']) assert self.container.calc_intent( 'test of !punctuation...')['name'] == 'test' def test_spaces(self): self.container.add_intent('test', ['this is a test']) assert self.container.calc_intent('thisisatest')['name'] is None self.container.add_intent('test2', ['this has(one|two)options']) assert self.container.calc_intent( 'this has two options')['name'] == 'test2' assert self.container.calc_intent('th is is a test')['name'] is None self.container.add_intent('test3', ['I see {thing} (in|on) {place}']) assert self.container.calc_intent('I see a bin test')['name'] is None assert self.container.calc_intent('I see a bin in there') == { 'name': 'test3', 'entities': { 'thing': 'a bin', 'place': 'there' } }
class PadaosExtractor(IntentExtractor): keyword_based = False def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.container = IntentContainer() self.registered_intents = [] def detach_intent(self, intent_name): if intent_name in self.registered_intents: LOG.debug("Detaching padaous intent: " + intent_name) self.container.remove_intent(intent_name) self.registered_intents.remove(intent_name) def detach_skill(self, skill_id): LOG.debug("Detaching padaos skill: " + str(skill_id)) remove_list = [i for i in self.registered_intents if skill_id in i] for i in remove_list: self.detach_intent(i) def register_entity(self, entity_name, samples=None): samples = samples or [entity_name] self.container.add_entity(entity_name, samples) def register_intent(self, intent_name, samples=None): samples = samples or [intent_name] if intent_name not in self._intent_samples: self._intent_samples[intent_name] = samples else: self._intent_samples[intent_name] += samples self.container.add_intent(intent_name, samples) self.registered_intents.append(intent_name) def register_entity_from_file(self, entity_name, file_name): with open(file_name) as f: samples = f.read().split("\n") self.register_entity(entity_name, samples) def register_intent_from_file(self, intent_name, file_name): with open(file_name) as f: samples = f.read().split("\n") self.register_intent(intent_name, samples) def calc_intent(self, utterance, min_conf=0.5): utterance = utterance.strip().lower() intent = self.container.calc_intent(utterance) if intent["name"]: remainder = get_utterance_remainder( utterance, samples=self._intent_samples[intent["name"]]) intent["intent_engine"] = "padaos" intent["intent_type"] = intent.pop("name") intent["utterance"] = utterance intent["utterance_remainder"] = remainder modifier = len(self.segmenter.segment(utterance)) intent["conf"] = 1 / modifier - 0.1 return intent return { 'conf': 0, 'intent_type': 'unknown', 'entities': {}, 'utterance': utterance, 'utterance_remainder': utterance, 'intent_engine': 'padaos' } def intent_scores(self, utterance): utterance = utterance.strip().lower() intents = [] bucket = self.calc_intents(utterance) for utt in bucket: intent = bucket[utt] if not intent: continue intents.append(intent) return intents def calc_intents(self, utterance, min_conf=0.5): utterance = utterance.strip().lower() bucket = {} for ut in self.segmenter.segment(utterance): intent = self.calc_intent(ut) bucket[ut] = intent return bucket def calc_intents_list(self, utterance): utterance = utterance.strip().lower() bucket = {} for ut in self.segmenter.segment(utterance): bucket[ut] = self.filter_intents(ut) return bucket def manifest(self): # TODO vocab, skill ids, intent_data return {"intent_names": self.registered_intents}
class RegexQuestionParser(BasicQuestionParser): """ Dead Simple Regex intent parser """ def __init__(self, lang="en-us"): super().__init__(lang) self.container = IntentContainer() self._intents = [] self.lang = lang self.register_default_intents() def normalize(self, text): # pos parsing normalization text = text.replace(" 's", "'s").replace("''", "").replace("``", "") text = normalize(text) text = text.lower() words = text.split(" ") questions = [ 'what', 'when', 'where', 'why', 'how', 'which', 'whose', 'who' ] removes = [ "a", "an", "of", "that", "this", "to", "with", "as", "by", "for", "me", "do", "have", "does", "is", "your", "in", "i" ] + questions replaces = {"are": "is", "was": "is", "were": "is"} for idx, word in enumerate(words): if word in replaces: words[idx] = replaces[word] if word in removes: words[idx] = "" return " ".join([w for w in words if w]) @property def intents(self): return self._intents def register_intent(self, name, rules): self._intents.append(name) self.container.add_intent(name, rules) def register_default_intents(self): self.container.add_entity( 'question', ['what', 'when', 'where', 'why', 'how', 'which', 'whose', 'who']) self.from_folder(join(RESOURCES_PATH, self.lang)) def from_folder(self, folder_path, reset=False): assert isdir(folder_path) if reset: self.container = IntentContainer() self._intents = [] for f in listdir(folder_path): if f.endswith(".intent"): intent = f.replace(".intent", "") with open(join(folder_path, f)) as fi: rules = fi.readlines() rules = [ r.strip() for r in rules if r and not r.startswith("#") ] self.register_intent(intent, rules) def parse(self, utterance): # normalization pre-parsing data = super().parse(utterance) utterance = normalize(str(utterance)).lower() COMMON_STARTERS = ["on average", "about", "tell me", "approximately"] for c in COMMON_STARTERS: if utterance.startswith(c): utterance = utterance.replace(c, "").strip() utterance = " ".join(utterance.split(" ")) match = self.container.calc_intent(utterance) if match.get("name"): data["QuestionIntent"] = match["name"] or "unknown" entities = match["entities"] data.update(entities) if "query" in data: data["query"] = self.normalize(data["query"]) return data