예제 #1
0
def train(parser, args):
    if bool(args.input_files) == bool(args.data):
        parser.error(
            'You must specify one of input_files or --data (but not both)')

    cont = IntentContainer(args.intent_cache)
    if args.data:
        cont.apply_training_args(args.data)
    else:
        for fn in args.input_files:
            obj_name, ext = splitext(basename(fn))
            if ext == '.intent':
                cont.load_intent(obj_name, fn)
            elif ext == '.entity':
                cont.load_entity(obj_name, fn)
            else:
                parser.error('Unknown file extension: {}'.format(ext))
    kwargs = inspect.signature(cont.train).bind(*(args.args or [])).arguments
    kwargs.update(args.kwargs or {})
    kwargs.setdefault('debug', True)
    kwargs.setdefault('single_thread', args.single_thread)
    kwargs.setdefault('force', args.force)
    if cont.train(**kwargs):
        return 0
    return 10  # timeout
예제 #2
0
파일: nlp.py 프로젝트: paper2code/rpquiz
def create_container():
    container = IntentContainer('intent_cache')
    dir = os.getenv('VOCAB_DIR', '/qabot/vocab/en-us/')
    for file in os.listdir(dir):
        print(file)
        if file.endswith(".intent"):
            container.load_intent(basename(file), join(dir, file))
        elif file.endswith(".entity"):
            container.load_entity(basename(file), join(dir, file))

    container.train()
    return container
예제 #3
0
class PadatiousExtractor(IntentExtractor):
    keyword_based = False

    def __init__(self, cache_dir=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # TODO xdg data_dir
        data_dir = expanduser(self.config.get("data_dir", "~/.padatious"))
        cache_dir = cache_dir or join(data_dir, "padatious")
        self.lock = Lock()
        self.container = IntentContainer(cache_dir)
        self.registered_intents = []

    def detach_intent(self, intent_name):
        if intent_name in self.registered_intents:
            LOG.debug("Detaching padatious intent: " + intent_name)
            with self.lock:
                self.container.remove_intent(intent_name)
            self.registered_intents.remove(intent_name)

    def detach_skill(self, skill_id):
        LOG.debug("Detaching padatious skill: " + str(skill_id))
        remove_list = [i for i in self.registered_intents if skill_id in i]
        for i in remove_list:
            self.detach_intent(i)

    def register_entity(self, entity_name, samples=None, reload_cache=True):
        samples = samples or [entity_name]
        with self.lock:
            self.container.add_entity(entity_name,
                                      samples,
                                      reload_cache=reload_cache)

    def register_intent(self, intent_name, samples=None, reload_cache=True):
        samples = samples or [intent_name]
        if intent_name not in self._intent_samples:
            self._intent_samples[intent_name] = samples
        else:
            self._intent_samples[intent_name] += samples
        with self.lock:
            self.container.add_intent(intent_name,
                                      samples,
                                      reload_cache=reload_cache)
        self.registered_intents.append(intent_name)

    def register_entity_from_file(self,
                                  entity_name,
                                  file_name,
                                  reload_cache=True):
        with self.lock:
            self.container.load_entity(entity_name,
                                       file_name,
                                       reload_cache=reload_cache)

    def register_intent_from_file(self,
                                  intent_name,
                                  file_name,
                                  single_thread=True,
                                  timeout=120,
                                  reload_cache=True,
                                  force_training=True):
        try:
            with self.lock:
                self.container.load_intent(intent_name,
                                           file_name,
                                           reload_cache=reload_cache)
            self.registered_intents.append(intent_name)
            success = self._train(single_thread=single_thread,
                                  timeout=timeout,
                                  force_training=force_training)
            if success:
                LOG.debug(file_name + " trained successfully")
            else:
                LOG.error(file_name + " FAILED TO TRAIN")

        except Exception as e:
            LOG.exception(e)

    def _get_remainder(self, intent, utterance):
        if intent["name"] in self.intent_samples:
            return get_utterance_remainder(
                utterance, samples=self.intent_samples[intent["name"]])
        return utterance

    def calc_intent(self, utterance, min_conf=None):
        min_conf = min_conf or self.config.get("padatious_min_conf", 0.65)
        utterance = utterance.strip().lower()
        with self.lock:
            intent = self.container.calc_intent(utterance).__dict__
        if intent["conf"] < min_conf:
            return {
                "intent_type": "unknown",
                "entities": {},
                "conf": 0,
                "intent_engine": "padatious",
                "utterance": utterance,
                "utterance_remainder": utterance
            }
        intent["utterance_remainder"] = self._get_remainder(intent, utterance)
        intent["entities"] = intent.pop("matches")
        intent["intent_engine"] = "padatious"
        intent["intent_type"] = intent.pop("name")
        intent["utterance"] = intent.pop("sent")

        if isinstance(intent["utterance"], list):
            intent["utterance"] = " ".join(intent["utterance"])
        return intent

    def intent_scores(self, utterance):
        utterance = utterance.strip().lower()
        intents = [i.__dict__ for i in self.container.calc_intents(utterance)]
        for idx, intent in enumerate(intents):
            intent["utterance_remainder"] = self._get_remainder(
                intent, utterance)
            intents[idx]["entities"] = intents[idx].pop("matches")
            intents[idx]["intent_type"] = intents[idx].pop("name")
            intent["intent_engine"] = "padatious"
            intent["utterance"] = intent.pop("sent")
            if isinstance(intents[idx]["utterance"], list):
                intents[idx]["utterance"] = " ".join(intents[idx]["utterance"])
        return intents

    def calc_intents(self, utterance, min_conf=None):
        min_conf = min_conf or self.config.get("padatious_min_conf", 0.65)
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            intent = self.calc_intent(ut)
            if intent["conf"] < min_conf:
                bucket[ut] = None
            else:
                bucket[ut] = intent
        return bucket

    def calc_intents_list(self, utterance):
        utterance = utterance.strip().lower()
        bucket = {}
        for ut in self.segmenter.segment(utterance):
            bucket[ut] = self.filter_intents(ut)
        return bucket

    def manifest(self):
        # TODO vocab, skill ids, intent_data
        return {"intent_names": self.registered_intents}

    def _train(self, single_thread=True, timeout=120, force_training=True):
        with self.lock:
            return self.container.train(single_thread=single_thread,
                                        timeout=timeout,
                                        force=force_training,
                                        debug=True)
예제 #4
0
#!/usr/bin/env python3
# Sample Padatious program used for testing

import sys
from glob import glob
from os.path import basename

from padatious import IntentContainer

reload_cache = len(sys.argv) > 1 and sys.argv[1] == '-r'
container = IntentContainer('intent_cache')

for file_name in glob('data/*.intent'):
    name = basename(file_name).replace('.intent', '')
    container.load_file(name, file_name, reload_cache=reload_cache)

for file_name in glob('data/*.entity'):
    name = basename(file_name).replace('.entity', '')
    container.load_entity(name, file_name, reload_cache=reload_cache)

container.train()

query = None
while query != 'q':
    query = input('> ')
    data = container.calc_intent(query)
    print(data.name + ': ' + str(data.conf))
    for key, val in data.matches.items():
        print('\t' + key + ': ' + val)