def train_nlu(data_path, configs, model_path): logging.basicConfig(filename=logfile, level=logging.DEBUG) training_data = load_data(data_path) trainer = Trainer(config.load(configs)) trainer.train(training_data) model_directory = trainer.persist(model_path, fixed_model_name='nlu') run_evaluation(data_path, model_directory)
def train_nlu(): training_data = load_data('./data/nlu.md') trainer = Trainer(config.load("config.yml")) trainer.train(training_data) model_directory = trainer.persist('./models/nlu/', fixed_model_name="current") return model_directory
def train_nlu(data, configs, model_dir): training_data = load_data(data) trainer = Trainer(config.load(configs)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name="nlu") logger.info(f"Model trained. Stored in '{model_directory}'.") return model_directory
def trained_nlu_model(request): cfg = RasaNLUModelConfig({"pipeline": "keyword"}) trainer = Trainer(cfg) td = training_data.load_data(DEFAULT_DATA_PATH) trainer.train(td) model_path = trainer.persist(NLU_MODEL_PATH) nlu_data = data.get_nlu_directory(DEFAULT_DATA_PATH) output_path = os.path.join(NLU_MODEL_PATH, NLU_MODEL_NAME) new_fingerprint = model.model_fingerprint(NLU_DEFAULT_CONFIG_PATH, nlu_data=nlu_data) model.create_package_rasa(model_path, output_path, new_fingerprint) def fin(): if os.path.exists(NLU_MODEL_PATH): shutil.rmtree(NLU_MODEL_PATH) if os.path.exists(output_path): shutil.rmtree(output_path) request.addfinalizer(fin) return output_path
def train(cfg_name, project_name): from rasa.nlu import training_data cfg = config.load(cfg_name) trainer = Trainer(cfg, component_builder) training_data = training_data.load_data(data) trainer.train(training_data) trainer.persist("test_projects", project_name=project_name)
def train_nlu(lang="en", production_build=False): model_name = "production" if not production_build: model_name = "latest" training_data = load_data('./data/nlu/' + lang + "/") trainer = Trainer(config.load("config.yml")) trainer.train(training_data) trainer.persist('./models/nlu/' + lang + "/", fixed_model_name=model_name)
def train_model(td_file, config_file, model_dir): """trains a model using the training data and config creates model and returns the path to this model for evaluation""" td = load_data(td_file) trainer = Trainer(config.load(config_file)) trainer.train(td) model_loc = trainer.persist(model_dir) return model_loc
def load_training_data(data_file="../data/testData.json", config_file="../configs/config_spacy.yml"): training_data = load_data(data_file) trainer = Trainer(config.load(config_file)) trainer.train(training_data) model_directory = trainer.persist('./projects/default/') # where model_directory points to the model folder return model_directory
def train_update( repository_version_language_id, by_user, repository_authorization, from_queue="celery" ): # pragma: no cover update_request = backend().request_backend_start_training_nlu( repository_version_language_id, by_user, repository_authorization, from_queue ) examples_list = get_examples_request(repository_version_language_id, repository_authorization) with PokeLogging() as pl: try: examples = [] for example in examples_list: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), ) ) update_request["dataset_size"] = len(examples) pipeline_builder = PipelineBuilder(update_request) pipeline_builder.print_pipeline() rasa_nlu_config = pipeline_builder.get_nlu_model() trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) training_data = TrainingData( training_examples=examples, lookup_tables=None ) trainer.train(training_data) persistor = BothubPersistor( repository_version_language_id, repository_authorization, rasa_version ) trainer.persist( mkdtemp(), persistor=persistor, fixed_model_name=f"{update_request.get('repository_version')}_" f"{update_request.get('total_training_end') + 1}_" f"{update_request.get('language')}", ) except Exception as e: logger.exception(e) backend().request_backend_trainfail_nlu( repository_version_language_id, repository_authorization ) raise e finally: backend().request_backend_traininglog_nlu( repository_version_language_id, pl.getvalue(), repository_authorization )
def test_train_with_empty_data(language, pipeline, component_builder, tmpdir): _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) trainer = Trainer(_config, component_builder) trainer.train(TrainingData()) persistor = create_persistor(_config) persisted_path = trainer.persist(tmpdir.strpath, persistor) loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def trained_nlu_model(): cfg = RasaNLUModelConfig({"pipeline": "keyword"}) trainer = Trainer(cfg) td = training_data.load_data(DEFAULT_DATA_PATH) trainer.train(td) model_path = trainer.persist("test_models", project_name="test_model_keyword") return model_path
def train_eval_rasa_nlu_model(lang='en', cross=False, save=''): """ Train snips data from all brat annotation object :param lang: abbreviate language name :param save: path where model will be save :rtype: None """ from rasa.nlu.training_data import load_data from rasa.nlu.model import Trainer from rasa.nlu.components import ComponentBuilder from rasa.nlu import config from rasa.nlu.test import run_evaluation config_file = source_config / "config_rasa_converrt.yml" if cross: filename_results = source_result / "rasa_cross_semeval_2020_model_task1_{}".format(save) train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_data = train_data_obj.build_rasa_data_task1() training_data = load_data(str(train_data[0])) builder = ComponentBuilder(use_cache=True) trainer = Trainer(config.load(str(config_file)), builder) print("--> Training patent data with Rasa...") trainer.train(training_data, num_threads=8, n_jobs=-1, verbose=True) print("--> Saving model trained with Rasa (Rasa)...") model_directory = trainer.persist(filename_results) print("--> Evaluating training data with Rasa metrics (Cross-validation)...") import os from datetime import datetime filename_test = str(train_data[1]) print(filename_test) dmtime = "test_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S")) out_test = source_result / "rasa_cross_evaluation_task1" / dmtime model_directory = sorted(filename_results.glob("nlu_*"), key=os.path.getmtime)[-1] run_evaluation(filename_test, str(model_directory), output_directory=str(out_test)) else: filename_results = source_result / "rasa_semeval_2020_model_task1_{}".format(save) train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_file = train_data_obj.build_rasa_data_task1() training_data = load_data(train_file) builder = ComponentBuilder(use_cache=True) trainer = Trainer(config.load(str(config_file)), builder) print("--> Training patent data with Rasa...") trainer.train(training_data, num_threads=8, verbose=True, n_jobs=-1, fixed_model_name="nlu") print("--> Saving model trained with Rasa (Rasa)...") model_directory = trainer.persist(filename_results)
def test_train_model_without_data(language, pipeline, component_builder, tmpdir): _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) trainer = Trainer(_config, component_builder) trainer.train(TrainingData()) persisted_path = trainer.persist(tmpdir.strpath) loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("Rasa is great!") is not None
def train_nlu(): from rasa.nlu.training_data import load_data from rasa.nlu import config from rasa.nlu.model import Trainer training_data = load_data('data/nlu.md') trainer = Trainer(config.load("config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") return model_directory
async def train(self): """Train the engine. """ nltk.download('punkt') lang = self.config['language'] if not os.path.exists('data/' + self.config['skill-id']): _LOGGER.info("Starting Skill training.") _LOGGER.info("Generating stories.") data, domain_data, stories = await GenerateStories.run( self.config['skill-id'], self.config['language'], self.asm) training_data = TrainingData(training_examples=data) nlu_config = RasaNLUModelConfig({ "language": lang, "pipeline": self.config['pipeline'], "data": None }) trainer = Trainer(nlu_config, None, True) _LOGGER.info("Training Arcus NLU") trainer.train(training_data) trainer.persist("data/" + self.config['skill-id'], None, 'nlu') # Rasa core domain = Domain.from_dict(domain_data) reader = StoryFileReader(domain, RegexInterpreter(), None, False) story_steps = await reader.process_lines(stories) graph = StoryGraph(story_steps) g = TrainingDataGenerator( graph, domain, remove_duplicates=True, unique_last_num_states=None, augmentation_factor=20, tracker_limit=None, use_story_concatenation=True, debug_plots=False, ) training_trackers = g.generate() policy_list = SimplePolicyEnsemble.from_dict( {"policies": self.config['policies']}) policy_ensemble = SimplePolicyEnsemble(policy_list) _LOGGER.info("Training Arcus Core") policy_ensemble.train(training_trackers, domain) policy_ensemble.persist( "data/" + self.config['skill-id'] + "/core", False) domain.persist("data/" + self.config['skill-id'] + "/core/model") domain.persist_specification("data/" + self.config['skill-id'] + "/core")
def train_test(td_file, config_file, model_dir): # helper function to split into test and train and evaluate on results. td = load_data(td_file) trainer = Trainer(config.load(config_file)) train, test = td.train_test_split(train_frac=0.6) trainer.train(train) model_loc = trainer.persist(model_dir) with open("data/tmp/temp_test.json", "w", encoding="utf8") as f: f.write(test.as_json()) with open("data/temp_train.json", "w", encoding="utf8") as f: f.write(train.as_json()) evaluate_model("data/tmp/temp_test.json", model_loc)
def call(): from rasa.nlu.training_data import load_data from rasa.nlu import config from rasa.nlu.components import ComponentBuilder from rasa.nlu.model import Trainer builder = ComponentBuilder(use_cache=True) training_data = load_data('./data/weapon.md') trainer = Trainer(config.load("./config.yml"), builder) trainer.train(training_data) model_directory = trainer.persist('./models', fixed_model_name="model") print('done') return model_directory
def train(): td = load_data("{}/demo_rasa.json".format(prj_dir)) _config = RasaNLUModelConfig(load_json("{}/config.json".format(prj_dir))) trainer = Trainer(_config) trainer.train(td) persisted_path = trainer.persist("{}/models".format(prj_dir)) loaded = Interpreter.load(persisted_path) assert loaded.pipeline # Inference result = loaded.parse("i'm looking for a place in the north of town") result = loaded.parse("show me chinese restaurants") result = dict(filter(lambda item: item[0] not in ["intent_ranking"], result.items())) show_dict(result)
def train_test(td_file, config_file, model_dir, key="company", noise=0.1): """trains a model using the training data (split into train-test) and config""" td = load_data(td_file) trainer = Trainer(config.load(config_file)) train, test = td.train_test_split(train_frac=0.8) test = add_noise(test, key, noise=noise) trainer.train(train) tmp_fname = "data/tmp/temp_test.json" model_loc = trainer.persist(model_dir) with open(tmp_fname, "w", encoding="utf8") as f: f.write(test.as_json()) evaluate_model(tmp_fname, model_loc)
def train(nlu_config: Union[Text, RasaNLUModelConfig], data: Text, path: Optional[Text] = None, project: Optional[Text] = None, fixed_model_name: Optional[Text] = None, storage: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, training_data_endpoint: Optional[EndpointConfig] = None, **kwargs: Any) -> Tuple[Trainer, Interpreter, Text]: """Loads the trainer and the data and runs the training of the model.""" if isinstance(nlu_config, str): nlu_config = config.load(nlu_config) # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subprocess trainer = Trainer(nlu_config, component_builder) persistor = create_persistor(storage) if training_data_endpoint is not None: training_data = load_data_from_endpoint(training_data_endpoint, nlu_config.language) else: training_data = load_data(data, nlu_config.language) interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, project, fixed_model_name) else: persisted_path = None return trainer, interpreter, persisted_path
def _async_train(self, config, nlu_data, model_name): training_start = timer() with self.lock: self.training_status[model_name] = { "status": "TRAINING", } data = self.data_reader.read_from_json({'rasa_nlu_data': nlu_data}) with self.interpreter_cache.lock: trainer = Trainer(RasaNLUModelConfig(config), self.interpreter_cache.component_builder) interpreter = trainer.train(data) tempdir = tempfile.mkdtemp() trainer.persist(tempdir, None, "nlu") _model_package = create_package_rasa(tempdir, os.path.join("models", model_name)) self.interpreter_cache.store(model_name, interpreter) with self.lock: training_end = timer() self.training_status[model_name] = { "status": "READY", "training_time": f"{training_end - training_start:.2f}" }
def train_nlu( config_file="config.yml", model_path="models/nlu", training_data_file="data/nlu.md" ): from rasa.nlu.training_data import load_data from rasa.nlu import config from rasa.nlu.model import Trainer training_data = load_data(training_data_file) trainer = Trainer(config.load(config_file)) trainer.train(training_data) # Attention: trainer.persist stores the model and all meta data into a folder. # The folder itself is not zipped. model_directory = trainer.persist(model_path) logger.info("Model trained. Stored in '{}'.".format(model_directory)) return model_directory
def train_update(update, examples_data, label_examples_data, algorithm, ner_spacy, similarity_type, language, connection): with PokeLogging() as pl: try: examples = [] label_examples = [] for example in examples_data: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), )) for label_example in label_examples_data: label_examples.append( Message.build( text=label_example.get("text"), entities=label_example.get("entities"), )) rasa_nlu_config = get_rasa_nlu_config_from_update( algorithm, ner_spacy, similarity_type, language) trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) training_data = BothubTrainingData( label_training_examples=label_examples, training_examples=examples) trainer.train(training_data) persistor = BothubPersistor(update, connection) trainer.persist( mkdtemp(), persistor=persistor, fixed_model_name=str(update), ) except Exception as e: logger.exception(e) raise e finally: pass
def train_data(domain, locale, prop): prop_ = prop format = prop.get('format') dataFile = os.path.join(scriptDir, '..', 'data', domain + '_' + locale + '.' + format) configFile = os.path.join(scriptDir, 'config', prop.get('config_file')) modelFile = os.path.join(scriptDir, '..', 'models', 'dnn') MODEL_NAME = domain + '_' + locale if format == 'md' or format == 'json': training_data = load_data(dataFile) trainer = Trainer(config.load(configFile)) if not is_config_stale(domain, locale, prop): trainer.train(training_data) trainer.persist(modelFile, fixed_model_name=MODEL_NAME) else: logger.warning("no changes found to training data, using pre-trained model") return json.loads('{"response":"WARNING: no changes found to training data, using pre-trained model"}') else: logger.error("unsupported format. Exiting...") return json.loads('{"response":"ERROR: unsupported format. Exiting..."}') training_examples = OrderedDict() INTENT = 'intent' for example in [e.as_dict_nlu() for e in training_data.training_examples]: intent = example[INTENT] training_examples.setdefault(intent, []) training_examples[intent].append(example) count = 0 for x in training_examples: if isinstance(training_examples[x], list): count += len(training_examples[x]) logger.info(f'Identified domain: {domain}') logger.info(f'Identified locale: {locale}') logger.info(f'Number of utterances for training: {count}') logger.info(f'Number of intents for training: {len(training_examples)}') message = {} message['domain'] = domain message['locale'] = locale message['Number of utterances'] = str(count) message['Number of intents'] = str(len(training_examples)) response ={} response['response'] = message return response
def test_nlu_interpreter(): #training_data = load_data("data/chitchat_nlu.md") training_data = load_data("data") trainer = Trainer(config.load("config.yml")) interpreter = trainer.train(training_data) test_interpreter_dir = trainer.persist("./tests/models", project_name="nlu") parsing = interpreter.parse('hello') assert parsing['intent']['name'] == 'greet' assert test_interpreter_dir
def train_nlu( config_file="config_simbert.yml", training_data_file="raw_data.json", model_directory: Text = "/model", model_name: Text = "current", ): training_data = load_data(training_data_file) # 基于load_data,加载训练数据 trainer = Trainer( config.load(config_file)) # 基于config.load加载配置文件,并定义Trainer类 trainer.train(training_data) # 基于训练数据training_data对每个组件进行训练 print('---------train done!------------') # Attention: trainer.persist stores the model and all meta data into a folder. # The folder itself is not zipped. model_path = os.path.join(model_directory, model_name) print('model_path:', model_path) model_directory = trainer.persist(model_path, fixed_model_name="nlu") # 保存模型文件 logger.info("Model trained. Stored in '{}'.".format(model_directory)) return model_directory # 返回模型保存的目录
def test_train_model_without_data(): td = load_data(DEFAULT_DATA_PATH) language, pipeline = pipelines_for_tests()[1] _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) show_dict({"pipeline": pipeline, "language": language}) exit() trainer = Trainer(_config) trainer.train(td) persisted_path = trainer.persist(model_dir) loaded = Interpreter.load(persisted_path) assert loaded.pipeline # Inference result = loaded.parse("i'm looking for a place in the north of town") result = loaded.parse("show me chinese restaurants") result = dict( filter(lambda item: item[0] not in ["intent_ranking"], result.items())) show_dict(result)
def train_nlu(data, configuration, model_dir, train): ''' input : data: training data, in json format configuration: configuration file model_dir: where to save model after training train : flag, to check that we really want to train output: model_directory : where the output model will be saved ''' rasamodel.Train = train assert rasamodel.Train == True training_data = load_data(data) trainer = Trainer(config.load(configuration)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='Intentnlu') return model_directory
def train_nlu(config_file="config.yml", model_directory="models", model_name="current", training_data_file="data/nlu.md"): from rasa.nlu.training_data import load_data from rasa.nlu import config from rasa.nlu.model import Trainer training_data = load_data(training_data_file) trainer = Trainer(config.load(config_file)) trainer.train(training_data) # Attention: trainer.persist stores the model and all meta data into a folder. # The folder itself is not zipped. model_path = os.path.join(model_directory, model_name) model_directory = trainer.persist(model_path, fixed_model_name="nlu") logger.info(f"Model trained. Stored in '{model_directory}'.") return model_directory
def __init__(self): try: test = Interpreter.load("./models/nlu/current") self.interpreter = test except Exception: training_data = load_data("./data/nlu.md") trainer = Trainer(config.load("config.yml")) self.interpreter = trainer.train(training_data) model_directory = trainer.persist("./models/nlu", fixed_model_name="current") self.music_verbs = ['Riproduci', 'Suona', 'Fai partire', 'Avvia']