def train(parser, args): if bool(args.input_files) == bool(args.data): parser.error( 'You must specify one of input_files or --data (but not both)') cont = IntentContainer(args.intent_cache) if args.data: cont.apply_training_args(args.data) else: for fn in args.input_files: obj_name, ext = splitext(basename(fn)) if ext == '.intent': cont.load_intent(obj_name, fn) elif ext == '.entity': cont.load_entity(obj_name, fn) else: parser.error('Unknown file extension: {}'.format(ext)) kwargs = inspect.signature(cont.train).bind(*(args.args or [])).arguments kwargs.update(args.kwargs or {}) kwargs.setdefault('debug', True) kwargs.setdefault('single_thread', args.single_thread) kwargs.setdefault('force', args.force) if cont.train(**kwargs): return 0 return 10 # timeout
def create_container(): container = IntentContainer('intent_cache') dir = os.getenv('VOCAB_DIR', '/qabot/vocab/en-us/') for file in os.listdir(dir): print(file) if file.endswith(".intent"): container.load_intent(basename(file), join(dir, file)) elif file.endswith(".entity"): container.load_entity(basename(file), join(dir, file)) container.train() return container
class PadatiousExtractor(IntentExtractor): keyword_based = False def __init__(self, cache_dir=None, *args, **kwargs): super().__init__(*args, **kwargs) # TODO xdg data_dir data_dir = expanduser(self.config.get("data_dir", "~/.padatious")) cache_dir = cache_dir or join(data_dir, "padatious") self.lock = Lock() self.container = IntentContainer(cache_dir) self.registered_intents = [] def detach_intent(self, intent_name): if intent_name in self.registered_intents: LOG.debug("Detaching padatious intent: " + intent_name) with self.lock: self.container.remove_intent(intent_name) self.registered_intents.remove(intent_name) def detach_skill(self, skill_id): LOG.debug("Detaching padatious skill: " + str(skill_id)) remove_list = [i for i in self.registered_intents if skill_id in i] for i in remove_list: self.detach_intent(i) def register_entity(self, entity_name, samples=None, reload_cache=True): samples = samples or [entity_name] with self.lock: self.container.add_entity(entity_name, samples, reload_cache=reload_cache) def register_intent(self, intent_name, samples=None, reload_cache=True): samples = samples or [intent_name] if intent_name not in self._intent_samples: self._intent_samples[intent_name] = samples else: self._intent_samples[intent_name] += samples with self.lock: self.container.add_intent(intent_name, samples, reload_cache=reload_cache) self.registered_intents.append(intent_name) def register_entity_from_file(self, entity_name, file_name, reload_cache=True): with self.lock: self.container.load_entity(entity_name, file_name, reload_cache=reload_cache) def register_intent_from_file(self, intent_name, file_name, single_thread=True, timeout=120, reload_cache=True, force_training=True): try: with self.lock: self.container.load_intent(intent_name, file_name, reload_cache=reload_cache) self.registered_intents.append(intent_name) success = self._train(single_thread=single_thread, timeout=timeout, force_training=force_training) if success: LOG.debug(file_name + " trained successfully") else: LOG.error(file_name + " FAILED TO TRAIN") except Exception as e: LOG.exception(e) def _get_remainder(self, intent, utterance): if intent["name"] in self.intent_samples: return get_utterance_remainder( utterance, samples=self.intent_samples[intent["name"]]) return utterance def calc_intent(self, utterance, min_conf=None): min_conf = min_conf or self.config.get("padatious_min_conf", 0.65) utterance = utterance.strip().lower() with self.lock: intent = self.container.calc_intent(utterance).__dict__ if intent["conf"] < min_conf: return { "intent_type": "unknown", "entities": {}, "conf": 0, "intent_engine": "padatious", "utterance": utterance, "utterance_remainder": utterance } intent["utterance_remainder"] = self._get_remainder(intent, utterance) intent["entities"] = intent.pop("matches") intent["intent_engine"] = "padatious" intent["intent_type"] = intent.pop("name") intent["utterance"] = intent.pop("sent") if isinstance(intent["utterance"], list): intent["utterance"] = " ".join(intent["utterance"]) return intent def intent_scores(self, utterance): utterance = utterance.strip().lower() intents = [i.__dict__ for i in self.container.calc_intents(utterance)] for idx, intent in enumerate(intents): intent["utterance_remainder"] = self._get_remainder( intent, utterance) intents[idx]["entities"] = intents[idx].pop("matches") intents[idx]["intent_type"] = intents[idx].pop("name") intent["intent_engine"] = "padatious" intent["utterance"] = intent.pop("sent") if isinstance(intents[idx]["utterance"], list): intents[idx]["utterance"] = " ".join(intents[idx]["utterance"]) return intents def calc_intents(self, utterance, min_conf=None): min_conf = min_conf or self.config.get("padatious_min_conf", 0.65) utterance = utterance.strip().lower() bucket = {} for ut in self.segmenter.segment(utterance): intent = self.calc_intent(ut) if intent["conf"] < min_conf: bucket[ut] = None else: bucket[ut] = intent return bucket def calc_intents_list(self, utterance): utterance = utterance.strip().lower() bucket = {} for ut in self.segmenter.segment(utterance): bucket[ut] = self.filter_intents(ut) return bucket def manifest(self): # TODO vocab, skill ids, intent_data return {"intent_names": self.registered_intents} def _train(self, single_thread=True, timeout=120, force_training=True): with self.lock: return self.container.train(single_thread=single_thread, timeout=timeout, force=force_training, debug=True)
#!/usr/bin/env python3 # Sample Padatious program used for testing import sys from glob import glob from os.path import basename from padatious import IntentContainer reload_cache = len(sys.argv) > 1 and sys.argv[1] == '-r' container = IntentContainer('intent_cache') for file_name in glob('data/*.intent'): name = basename(file_name).replace('.intent', '') container.load_file(name, file_name, reload_cache=reload_cache) for file_name in glob('data/*.entity'): name = basename(file_name).replace('.entity', '') container.load_entity(name, file_name, reload_cache=reload_cache) container.train() query = None while query != 'q': query = input('> ') data = container.calc_intent(query) print(data.name + ': ' + str(data.conf)) for key, val in data.matches.items(): print('\t' + key + ': ' + val)