def test_invalid_config_json(): file_config = """pipeline: [spacy_sklearn""" # invalid yaml with tempfile.NamedTemporaryFile("w+", suffix="_tmp_config_file.json") as f: f.write(file_config) f.flush() with pytest.raises(config.InvalidConfigError): config.load(f.name)
def test_set_attr_on_component(default_config): cfg = config.load("sample_configs/config_pretrained_embeddings_spacy.yml") cfg.set_component_attr(6, C=324) assert cfg.for_component(1) == {"name": "SpacyTokenizer"} assert cfg.for_component(6) == {"name": "SklearnIntentClassifier", "C": 324}
def test_pipeline_looksup_registry(): pipeline_template = list(registered_pipeline_templates)[0] args = {"pipeline": pipeline_template} f = write_file_config(args) final_config = config.load(f.name) components = [c.get("name") for c in final_config.pipeline] assert components == registered_pipeline_templates[pipeline_template]
def test_run_cv_evaluation(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') nlu_config = config.load( "sample_configs/config_pretrained_embeddings_spacy.yml") n_folds = 2 results, entity_results = cross_validate(td, n_folds, nlu_config) assert len(results.train["Accuracy"]) == n_folds assert len(results.train["Precision"]) == n_folds assert len(results.train["F1-score"]) == n_folds assert len(results.test["Accuracy"]) == n_folds assert len(results.test["Precision"]) == n_folds assert len(results.test["F1-score"]) == n_folds assert len(entity_results.train[ 'CRFEntityExtractor']["Accuracy"]) == n_folds assert len(entity_results.train[ 'CRFEntityExtractor']["Precision"]) == n_folds assert len(entity_results.train[ 'CRFEntityExtractor']["F1-score"]) == n_folds assert len(entity_results.test[ 'CRFEntityExtractor']["Accuracy"]) == n_folds assert len(entity_results.test[ 'CRFEntityExtractor']["Precision"]) == n_folds assert len(entity_results.test[ 'CRFEntityExtractor']["F1-score"]) == n_folds
def test_mitie_featurizer(mitie_feature_extractor, default_config): from rasa_nlu.featurizers.mitie_featurizer import MitieFeaturizer ftr = MitieFeaturizer.create(config.load("sample_configs/config_mitie.yml")) sentence = "Hey how are you today" tokens = MitieTokenizer().tokenize(sentence) vecs = ftr.features_for_tokens(tokens, mitie_feature_extractor) expected = np.array([0., -4.4551446, 0.26073121, -1.46632245, -1.84205751]) assert np.allclose(vecs[:5], expected, atol=1e-5)
def train(cfg_name, project_name): from rasa_nlu import training_data cfg = config.load(cfg_name) trainer = Trainer(cfg, component_builder) training_data = training_data.load_data(data) trainer.train(training_data) trainer.persist("test_projects", project_name=project_name)
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('data/nlu_data/') trainer = Trainer(config.load("nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu', fixed_model_name="current") return model_directory
def train_nlu_gao(): from rasa_nlu_gao.training_data import load_data from rasa_nlu_gao import config from rasa_nlu_gao.model import Trainer training_data = load_data('data/rasa_dataset_training.json') trainer = Trainer(config.load("config_embedding_bilstm.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu_gao/', fixed_model_name="current") return model_directory
def test_override_defaults_supervised_embeddings_pipeline(): cfg = config.load("data/test/config_embedding_test.yml") builder = ComponentBuilder() component1_cfg = cfg.for_component(0) component1 = builder.create_component(component1_cfg, cfg) assert component1.max_ngram == 3 component2_cfg = cfg.for_component(1) component2 = builder.create_component(component2_cfg, cfg) assert component2.epochs == 10
def cross_validate(data: TrainingData, n_folds: int, nlu_config: Union[RasaNLUModelConfig, Text] ) -> CVEvaluationResult: """Stratified cross validation on data. Args: data: Training Data n_folds: integer, number of cv folds nlu_config: nlu config file Returns: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ from collections import defaultdict import tempfile if isinstance(nlu_config, str): nlu_config = config.load(nlu_config) trainer = Trainer(nlu_config) train_results = defaultdict(list) test_results = defaultdict(list) entity_train_results = defaultdict(lambda: defaultdict(list)) entity_test_results = defaultdict(lambda: defaultdict(list)) tmp_dir = tempfile.mkdtemp() for train, test in generate_folds(n_folds, data): interpreter = trainer.train(train) # calculate train accuracy train_results = combine_intent_result(train_results, interpreter, train) test_results = combine_intent_result(test_results, interpreter, test) # calculate test accuracy entity_train_results = combine_entity_result(entity_train_results, interpreter, train) entity_test_results = combine_entity_result(entity_test_results, interpreter, test) shutil.rmtree(tmp_dir, ignore_errors=True) return (CVEvaluationResult(dict(train_results), dict(test_results)), CVEvaluationResult(dict(entity_train_results), dict(entity_test_results)))
def main(): parser = create_argument_parser() cmdline_args = parser.parse_args() utils.configure_colored_logging(cmdline_args.loglevel) if cmdline_args.mode == "crossvalidation": # TODO: move parsing into sub parser # manual check argument dependency if cmdline_args.model is not None: parser.error("Crossvalidation will train a new model " "- do not specify external model.") if cmdline_args.config is None: parser.error("Crossvalidation will train a new model " "you need to specify a model configuration.") nlu_config = config.load(cmdline_args.config) data = training_data.load_data(cmdline_args.data) data = drop_intents_below_freq(data, cutoff=5) results, entity_results = cross_validate( data, int(cmdline_args.folds), nlu_config) logger.info("CV evaluation (n={})".format(cmdline_args.folds)) if any(results): logger.info("Intent evaluation results") return_results(results.train, "train") return_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") return_entity_results(entity_results.train, "train") return_entity_results(entity_results.test, "test") elif cmdline_args.mode == "evaluation": run_evaluation(cmdline_args.data, cmdline_args.model, cmdline_args.report, cmdline_args.successes, cmdline_args.errors, cmdline_args.confmat, cmdline_args.histogram) logger.info("Finished evaluation")
def test_run_cv_evaluation(): td = training_data.load_data('data/examples/rasa/demo-rasa.json') nlu_config = config.load("sample_configs/config_spacy.yml") n_folds = 2 results, entity_results = run_cv_evaluation(td, n_folds, nlu_config) assert len(results.train["Accuracy"]) == n_folds assert len(results.train["Precision"]) == n_folds assert len(results.train["F1-score"]) == n_folds assert len(results.test["Accuracy"]) == n_folds assert len(results.test["Precision"]) == n_folds assert len(results.test["F1-score"]) == n_folds assert len(entity_results.train['ner_crf']["Accuracy"]) == n_folds assert len(entity_results.train['ner_crf']["Precision"]) == n_folds assert len(entity_results.train['ner_crf']["F1-score"]) == n_folds assert len(entity_results.test['ner_crf']["Accuracy"]) == n_folds assert len(entity_results.test['ner_crf']["Precision"]) == n_folds assert len(entity_results.test['ner_crf']["F1-score"]) == n_folds
def zipped_nlu_model(): spacy_config_path = "sample_configs/config_pretrained_embeddings_spacy.yml" cfg = config.load(spacy_config_path) trainer = Trainer(cfg) td = training_data.load_data(DEFAULT_DATA_PATH) trainer.train(td) trainer.persist("test_models", project_name="test_model_pretrained_embeddings") model_dir_list = os.listdir(TEST_MODEL_PATH) # directory name of latest model model_dir = sorted(model_dir_list)[-1] # path of that directory model_path = os.path.join(TEST_MODEL_PATH, model_dir) zip_path = zip_folder(model_path) return zip_path
def train(nlu_config: Union[Text, RasaNLUModelConfig], data: Text, path: Optional[Text] = None, project: Optional[Text] = None, fixed_model_name: Optional[Text] = None, storage: Optional[Text] = None, component_builder: Optional[ComponentBuilder] = None, training_data_endpoint: Optional[EndpointConfig] = None, **kwargs: Any ) -> Tuple[Trainer, Interpreter, Text]: """Loads the trainer and the data and runs the training of the model.""" if isinstance(nlu_config, str): nlu_config = config.load(nlu_config) # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is # trained in another subprocess trainer = Trainer(nlu_config, component_builder) persistor = create_persistor(storage) if training_data_endpoint is not None: training_data = load_data_from_endpoint(training_data_endpoint, nlu_config.language) else: training_data = load_data(data, nlu_config.language) interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, project, fixed_model_name) else: persisted_path = None return trainer, interpreter, persisted_path
def train_nlu(data, configuration, model_dir): training_data = load_data(data) trainer = Trainer(config.load(configuration)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name = 'customernlu') return model_directory
# -*- coding: utf-8 -*- """ Created on Thu Oct 25 15:12:13 2018 @author: Jan """ from rasa_nlu.training_data import load_data from rasa_nlu.model import Trainer from rasa_nlu import config import configparser conf = configparser.RawConfigParser() conf.read('conf/conf.cnf') training_data = load_data(conf.get('conf', 'trainingfile')) trainer = Trainer(config.load(conf.get('conf', 'rasa_conf'))) trainer.train(training_data) model_directory = trainer.persist(conf.get('conf', 'modelfolder'))
def test_blank_config(): file_config = {} f = write_file_config(file_config) final_config = config.load(f.name) assert final_config.as_dict() == defaults
from tkinter import * import tkinter.font as tf import spacy import random from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config import content import re # 训练 trainer = Trainer(config.load("config_spacy.yml")) training_data = load_data('train.json') interpreter = trainer.train(training_data) nlp = spacy.load("en_core_web_md") # 缓存设置 player_result = [content.PLAYER()] team_result = [content.TEAM()] league_result = [content.LEAGUE()] coach_result = [content.COACH()] fixture_result = [content.FIXTURE()] # 状态参数 INIT = 0 SEARCH_PLAYER = 1 SEARCH_LEAGUE = 2 SEARCH_TEAM = 3 SEARCH_COACH = 4 SEARCH_FIXTURE = 5 SEARCH_TRANSFER = 6
def train(data, config_file, model_dir): training_data = load_data(data) configuration = config.load(config_file) trainer = Trainer(configuration) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='MyNLU')
def _load_default_config(path): if path: return config.load(path).as_dict() else: return {}
def cal_time(): tic = time.time() i = 0 for examples in training_data.training_examples: text = examples.text result = interpreter.parse(text) print(result) # print(result["max_n_lm_score"]) if i % 10 == 0 and i > 0: toc = time.time() print("time:", (toc - tic) / i) # break i += 1 toc = time.time() time_per_text = (toc - tic) / len(training_data.training_examples) print("mean_time:", time_per_text) # training_data = load_data('./data/examples/rasa/demo-rasa_zh.json') training_data = load_data('./data/ner/bert_ner_train.json') config_file = './sample_configs/config_bert_ner.yml' ModelConfig = config.load(config_file) trainer = Trainer(ModelConfig) interpreter = trainer.train(training_data) query = "这是中国领导人首次在哈佛大学发表演讲。" while query != "Stop": print(interpreter.parse(query)) query = input("input query: (insert Stop to close)\n") print('Ner close')
interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, project, fixed_model_name) else: persisted_path = None return trainer, interpreter, persisted_path if __name__ == '__main__': cmdline_args = create_argument_parser().parse_args() utils.configure_colored_logging(cmdline_args.loglevel) if cmdline_args.url: data_endpoint = EndpointConfig(cmdline_args.url) else: data_endpoint = read_endpoints(cmdline_args.endpoints).data do_train(config.load(cmdline_args.config), cmdline_args.data, cmdline_args.path, cmdline_args.project, cmdline_args.fixed_model_name, cmdline_args.storage, training_data_endpoint=data_endpoint, num_threads=cmdline_args.num_threads) logger.info("Finished training")
utils.configure_colored_logging(cmdline_args.loglevel) if cmdline_args.mode == "crossvalidation": # TODO: move parsing into sub parser # manual check argument dependency if cmdline_args.model is not None: parser.error("Crossvalidation will train a new model " "- do not specify external model.") if cmdline_args.config is None: parser.error("Crossvalidation will train a new model " "you need to specify a model configuration.") nlu_config = config.load(cmdline_args.config) data = training_data.load_data(cmdline_args.data) data = drop_intents_below_freq(data, cutoff=5) results, entity_results = run_cv_evaluation( data, int(cmdline_args.folds), nlu_config) logger.info("CV evaluation (n={})".format(cmdline_args.folds)) if any(results): logger.info("Intent evaluation results") return_results(results.train, "train") return_results(results.test, "test") if any(entity_results): logger.info("Entity evaluation results") return_entity_results(entity_results.train, "train") return_entity_results(entity_results.test, "test")
def test_invalid_pipeline_template(): args = {"pipeline": "my_made_up_name"} f = write_file_config(args) with pytest.raises(config.InvalidConfigError) as execinfo: config.load(f.name) assert "unknown pipeline template" in str(execinfo.value)
from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config from rasa_nlu.model import Metadata, Interpreter training_data=load_data('testData.json') trainer=Trainer(config.load('config_spacy.json')) interpreter = trainer.train(training_data) model_directory=trainer.persist('./model/nlu',fixed_model_name='queries') #x=interpreter.parse("Where we can find Eiffel Tower?") #print(x)
from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data('data/nlu_data.md') trainer = Trainer(config.load('nlu_tensorflow.yml')) interpreter = trainer.train(training_data) model_directory = trainer.persist('models/nlu', fixed_model_name="current") print(model_directory)
def train_nlu(config_data): training_data = load_data(config_data["data"]) trainer = Trainer(config.load('./config_spacy.json'), builder) trainer.train(training_data) model_directory = trainer.persist(config_data["path"], fixed_model_name='restaurantnlu')
def train_nlu(data, configs, model_dir): training_data = load_data(data) trainer = Trainer( config.load("C:/Users/heman/Desktop/myproject/config.yml")) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='weathernlu')
def __init__(self, config_file, data_file, model_dir): self.config_file = config_file self.data_file = data_file self.rasa_config = config.load(config_file) self.model_dir = model_dir
from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config # for future use #from rasa_nlu.evaluate import run_evaluation # loading the nlu training samples training_data = load_data("nlu.md") # trainer to educate our pipeline trainer = Trainer(config.load("config.yml")) # train the model! interpreter = trainer.train(training_data) # store it for future use model_directory = trainer.persist("./models/nlu", fixed_model_name="current") print(interpreter.parse("doing great")) #for future #run_evaluation("nlu.md", model_directory)
def train_nlu(): training_data = load_data('data/nlu_intents/') trainer = Trainer(config.load("nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('./models/nlu', fixed_model_name='intents') return model_directory
def train_nlu(data, config1, model_dir): training_data = load_data(data) trainer = Trainer(config.load(config1)) trainer.train(training_data) model_directory = trainer.persist(model_dir)
# Find the parent item = find_parent_item(word) print("item: {0} has color : {1}".format(item, word)) # Assign the colors assign_colors(doc) # Import necessary modules from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config # Create a trainer trainer = Trainer(config.load("___")) # Load the training data training_data = load_data('___') # Create an interpreter by training the model interpreter = trainer.train(___) # Try it out print( interpreter.parse( "I'm looking for a Mexican restaurant in the North of town")) print(interpreter.parse("show me Chinese food in the centre of town")) print(interpreter.parse("I want an Indian restaurant in the west")) print(interpreter.parse("are there any good pizza places in the center?"))
def train(): training_data = load_data("./data/training_data.json") trainer = Trainer(config.load("config.yml")) trainer.train(training_data) model_directory = trainer.persist('./models/nlu') return model_directory
from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config # Create a trainer that uses this config trainer = Trainer(config.load("language.yml")) # Load the training data training_data = load_data('key.json') # Create an interpreter by training the model interpreter = trainer.train(training_data)
def train_harubot(data_json, config_file, model_dir): training_data = load_data(data_json) trainer = Trainer(config.load(config_file)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='harubot')
# trained in another subprocess trainer = Trainer(cfg, component_builder) persistor = create_persistor(storage) training_data = load_data(data, cfg.language) interpreter = trainer.train(training_data, **kwargs) if path: persisted_path = trainer.persist(path, persistor, project, fixed_model_name) else: persisted_path = None return trainer, interpreter, persisted_path if __name__ == '__main__': cmdline_args = create_argument_parser().parse_args() utils.configure_colored_logging(cmdline_args.loglevel) do_train(config.load(cmdline_args.config), cmdline_args.data, cmdline_args.path, cmdline_args.project, cmdline_args.fixed_model_name, cmdline_args.storage, num_threads=cmdline_args.num_threads) logger.info("Finished training")
def train(data, config_file, model_dir): training_data = load_data(data) trainer = Trainer(config.load(config_file)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='chat')
def train_nlu(data, configs, model_dir): training_data=load_data(data) #load NLU training sample trainer = Trainer(config.load(configs)) #train the pipeline first interpreter = trainer.train(training_data) #train the model model_directory = trainer.persist("models/nlu",fixed_model_name="chatter") #store in directory
def train_nlu(data, config_file, model_dir): training_data = load_data(data) trainer = Trainer(config.load(config_file), builder) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='restaurantnlu')
# Import necessary modules from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu import config # Create a trainer that uses this config trainer = Trainer(config.load("/config/nlu_config.yml")) # Load the training data training_data = load_data('/data/nlu_data.json') # Create an interpreter by training the model interpreter = trainer.train(training_data)
def train_nlu(data, configs, model_dir): training_data = load_data(data) trainer = Trainer(config.load(configs)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='cocoanlu')
from rasa_nlu.model import Trainer from rasa_nlu import config from rasa_core.actions import Action from rasa_core.events import SlotSet from rasa_core.policies import FallbackPolicy, KerasPolicy, MemoizationPolicy from rasa_core.agent import Agent print("Initializing the ChatBot:") print("STEP 1:Training the NLU Model") #Training the NLU MODEL: # loading the nlu training samples training_data = load_data("NLU_Train.json") # trainer to create the pipeline trainer = Trainer(config.load("NLU_model_Config.yml")) # training the model interpreter = trainer.train(training_data) # storeing it for future model_directory = trainer.persist("./models/nlu", fixed_model_name="current") print("Done") print("STEP 2: Training the CORE model") fallback = FallbackPolicy(fallback_action_name="utter_default", core_threshold=0.2, nlu_threshold=0.1) agent = Agent(domain='restaurant_domain.yml', policies=[ MemoizationPolicy(), KerasPolicy(validation_split=0.0, epochs=200), fallback
from rasa_nlu.training_data import load_data from rasa_nlu.model import Trainer from rasa_nlu import config import requests trainer = Trainer(config.load('config_spacy.yml')) training_data = load_data('data-wea-en.json') interpreter = trainer.train(training_data) def parse_message(message): parsed_json = interpreter.parse(message) intent = parsed_json['intent']['name'] intent_confidence = parsed_json['intent']['confidence'] entities = parsed_json['entities'] c_name, w_ele = None, None for i in entities: if i['entity'] == 'location': c_name = i['value'] elif i['entity'] == 'element': w_ele = i['value'] return intent, c_name, w_ele, intent_confidence def get_wea_json(c_name): ow_url = 'https://api.openweathermap.org/data/2.5/{weather_forecast}?q={city_name}&units=metric' api_key = '&appid={TOKEN}' wea_url = ow_url.format(weather_forecast='weather', city_name=c_name) + api_key wea_json = requests.get(wea_url).json()
from rasa_nlu.training_data import load_data from rasa_nlu.model import Trainer from rasa_nlu import config from rasa_nlu.model import Interpreter training_data = load_data("E:/work/ML/SampleRASA/starter-pack-rasa-nlu/data/trainingdata.json") trainer = Trainer(config.load("E:/work/ML/SampleRASA/starter-pack-rasa-nlu/nlu_config.yml")) #interpreter = trainer.train(training_data) trainer.train(training_data) model_directory = trainer.persist("E:/work/ML/SampleRASA/starter-pack-rasa-nlu/models/", project_name="nlu") interpreter = Interpreter.load(model_directory) output = interpreter.parse(u"Deductible:USD 10,000 each and every Claim. Including costs and Expenses Deductible:") #parsing = interpreter.parse('hello') print(output) # assert parsing['intent']['name'] == 'greet' # assert model_directory
def default_config(): return config.load(CONFIG_DEFAULTS_PATH)