コード例 #1
0
ファイル: config.py プロジェクト: radovankavicky/botshot-nlu
 def load_intent_model(config: dict, config_dir: str, pipeline_data):
     entity_config = config['entities']['intent']
     tokenizer = create_class_instance(entity_config.get('tokenizer'),
                                       config=entity_config)
     featurizer = create_class_instance(entity_config.get('featurizer'),
                                        config=entity_config)
     pipeline = Pipeline(tokenizer=tokenizer, featurizer=featurizer)
     pipeline.load(pipeline_data['pipelines']['intent'])
     model = create_class_instance(entity_config.get('model'),
                                   config=entity_config,
                                   pipeline=pipeline)  # type: IntentModel
     model.load(os.path.join(config_dir, 'intent'))
     return model
コード例 #2
0
ファイル: config.py プロジェクト: radovankavicky/botshot-nlu
    def _get_intent_model(self):
        intent_config = self.config["entities"]['intent']

        tokenizer = create_class_instance(intent_config.get('tokenizer'),
                                          config=intent_config)
        featurizer = create_class_instance(intent_config.get('featurizer'),
                                           config=intent_config)
        pipeline = Pipeline(tokenizer=tokenizer, featurizer=featurizer)

        model = create_class_instance(intent_config.get('model'),
                                      config=intent_config,
                                      pipeline=pipeline)  # type: IntentModel
        return pipeline, model
コード例 #3
0
 def __init__(self, config, entities, datasets):
     if 'tokenizer' in config:
         self.tokenizer = utils.create_class_instance(config['tokenizer'],
                                                      config=None)
     else:
         self.tokenizer = utils.get_default_tokenizer()
     super().__init__(config, entities, datasets)
コード例 #4
0
 def __init__(self, config, entities, datasets):
     if 'tokenizer' in config:
         self.tokenizer = utils.create_class_instance(config['tokenizer'], config=None)
     else:
         self.tokenizer = utils.get_default_tokenizer()
     self.embedding = Embedding(config['embedding_file'])
     self.threshold = config.get("threshold", 0.7)
     # TODO: self.cache_embeddings = config.get("cache_embeddings", True)  # disable if you have too many tokens
     super().__init__(config, entities, datasets)
コード例 #5
0
ファイル: config.py プロジェクト: radovankavicky/botshot-nlu
    def load_keyword_datasets(config: dict, config_dir: str):
        datasets = []

        # load keyword files
        sources = config['input'].get('keywords', [])
        if sources:
            if not isinstance(sources, list):
                sources = [sources]
            for i, filename in enumerate(sources):
                print(filename)
                if not os.path.isabs(filename):
                    abs_filename = os.path.join(config_dir, filename)
                    sources[i] = abs_filename
            dataset = StaticKeywordDataset.load(*sources)
            datasets.append(dataset)

        # load dynamic providers
        providers = config['input'].get('providers', [])
        for item in providers:
            if isinstance(item, str):
                provider = create_class_instance(item)
            elif isinstance(item, dict):
                for provider_cls, params in item.items():
                    if isinstance(params, list):
                        provider = create_class_instance(provider_cls, *params)
                    elif isinstance(params, dict):
                        provider = create_class_instance(
                            provider_cls, **params)
                    else:
                        raise Exception(
                            "Can't instantiate provider %s: parameters should be list or dict"
                            % provider_cls)
                    datasets.append(provider)
            else:
                raise Exception("Providers config is malformed")

        # load from (intent-)examples files

        return datasets
コード例 #6
0
ファイル: config.py プロジェクト: radovankavicky/botshot-nlu
 def load_keyword_models(config: dict, datasets: list):
     models_spec = {}
     models = []
     for entity, entity_conf in config['entities'].items():
         if entity == 'intent': continue
         keywords_config = entity_conf.get('keywords')
         if keywords_config:
             key = frozenset(keywords_config.items())
             models_spec.setdefault(key, []).append(entity)
     for model_spec, entities in models_spec.items():
         model_spec = dict(model_spec)
         model_cls = model_spec['model']
         required_datasets = [
             dataset for dataset in datasets
             if any(set(entities) & dataset.get_entities())
         ]
         model = create_class_instance(model_cls,
                                       config=model_spec,
                                       entities=entities,
                                       datasets=required_datasets)
         models.append(model)
     return models