def load_intent_model(config: dict, config_dir: str, pipeline_data): entity_config = config['entities']['intent'] tokenizer = create_class_instance(entity_config.get('tokenizer'), config=entity_config) featurizer = create_class_instance(entity_config.get('featurizer'), config=entity_config) pipeline = Pipeline(tokenizer=tokenizer, featurizer=featurizer) pipeline.load(pipeline_data['pipelines']['intent']) model = create_class_instance(entity_config.get('model'), config=entity_config, pipeline=pipeline) # type: IntentModel model.load(os.path.join(config_dir, 'intent')) return model
def _get_intent_model(self): intent_config = self.config["entities"]['intent'] tokenizer = create_class_instance(intent_config.get('tokenizer'), config=intent_config) featurizer = create_class_instance(intent_config.get('featurizer'), config=intent_config) pipeline = Pipeline(tokenizer=tokenizer, featurizer=featurizer) model = create_class_instance(intent_config.get('model'), config=intent_config, pipeline=pipeline) # type: IntentModel return pipeline, model
def __init__(self, config, entities, datasets): if 'tokenizer' in config: self.tokenizer = utils.create_class_instance(config['tokenizer'], config=None) else: self.tokenizer = utils.get_default_tokenizer() super().__init__(config, entities, datasets)
def __init__(self, config, entities, datasets): if 'tokenizer' in config: self.tokenizer = utils.create_class_instance(config['tokenizer'], config=None) else: self.tokenizer = utils.get_default_tokenizer() self.embedding = Embedding(config['embedding_file']) self.threshold = config.get("threshold", 0.7) # TODO: self.cache_embeddings = config.get("cache_embeddings", True) # disable if you have too many tokens super().__init__(config, entities, datasets)
def load_keyword_datasets(config: dict, config_dir: str): datasets = [] # load keyword files sources = config['input'].get('keywords', []) if sources: if not isinstance(sources, list): sources = [sources] for i, filename in enumerate(sources): print(filename) if not os.path.isabs(filename): abs_filename = os.path.join(config_dir, filename) sources[i] = abs_filename dataset = StaticKeywordDataset.load(*sources) datasets.append(dataset) # load dynamic providers providers = config['input'].get('providers', []) for item in providers: if isinstance(item, str): provider = create_class_instance(item) elif isinstance(item, dict): for provider_cls, params in item.items(): if isinstance(params, list): provider = create_class_instance(provider_cls, *params) elif isinstance(params, dict): provider = create_class_instance( provider_cls, **params) else: raise Exception( "Can't instantiate provider %s: parameters should be list or dict" % provider_cls) datasets.append(provider) else: raise Exception("Providers config is malformed") # load from (intent-)examples files return datasets
def load_keyword_models(config: dict, datasets: list): models_spec = {} models = [] for entity, entity_conf in config['entities'].items(): if entity == 'intent': continue keywords_config = entity_conf.get('keywords') if keywords_config: key = frozenset(keywords_config.items()) models_spec.setdefault(key, []).append(entity) for model_spec, entities in models_spec.items(): model_spec = dict(model_spec) model_cls = model_spec['model'] required_datasets = [ dataset for dataset in datasets if any(set(entities) & dataset.get_entities()) ] model = create_class_instance(model_cls, config=model_spec, entities=entities, datasets=required_datasets) models.append(model) return models