def test_training_data_conversion(tmpdir, data_file, gold_standard_file): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath) td = load_data(out_path.strpath) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file) assert cmp_dict_list(td.entity_examples, gold_standard.entity_examples) assert cmp_dict_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms
def test_training_data_conversion(tmpdir, data_file, gold_standard_file): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath) td = load_data(out_path.strpath) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms
def train_babi_nlu(): training_data = load_data('examples/babi/data/franken_data.json') trainer = Trainer(RasaNLUConfig("examples/babi/data/config_nlu.json")) trainer.train(training_data) model_directory = trainer.persist('examples/babi/models/nlu/', model_name=model_name) return model_directory
def __init__(self,update_model = True): self.dir_path = os.path.dirname(os.path.realpath(__file__)) print("/".join(self.dir_path.split("/")[0:-1]) + "/config/config.json") config = json.load(open("/".join(self.dir_path.split("/")[0:-1]) + "/config/config.json")) self.key = config['aws']['accessKeyId'] self.secret = config['aws']['secretAccessKey'] self.bucketname = config['aws']['bucket_name'] self.s3 = boto3.resource('s3', aws_access_key_id=self.key, aws_secret_access_key=self.secret) self.config = config if update_model: #-----train a new model and save it in mdoel_directory----------- #load training data training_data = load_data(self.dir_path +'/data/training_data.json') # set config and train trainer = Trainer(RasaNLUConfig(self.dir_path + "/sample_configs/config_spacy.json")) trainer.train(training_data) # Returns the directory the model is stored in self.model_directory = trainer.persist('./projects/') # return self.model_directory else: self.model_directory = self.dir_path + "/projects/default/model_20171110-144019" # load the model self.interpreter = Interpreter.load(self.model_directory, RasaNLUConfig(self.dir_path + "/sample_configs/config_spacy.json")) self.standard_dims = [] with open(self.dir_path + "/data/dimensions.json") as f: data = json.load(f) for key, value in data.items(): self.standard_dims = self.standard_dims + value self.standard_metrics = [] with open(self.dir_path + "/data/metrics.json") as f: data = json.load(f) for key, value in data.items(): self.standard_metrics = self.standard_metrics + value
def chatbot(appName,trainingData): model_directory = "" #获取存储路径图 dataPath = app.config['NLU_SERVER_TRAINING_DATA_PATH'] #将配置生成配置文件 training_data_file = dataPath + appName + "_training.json" trainingData = json.loads(trainingData) with open(training_data_file, 'w') as f: json.dump(trainingData, f) try: #训练模型 training_data = load_data(training_data_file) trainer = Trainer(RasaNLUConfig(app.config['NLU_SERVER_TRAINER_PATH']),skip_validation=True) trainer.train(training_data) #设置模型存储路径及名称 timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') fixed_model_name = appName + "_" + timestamp model_directory = trainer.persist(app.config['NLU_MODEL_STORE_PATH'],project_name=appName,fixed_model_name=fixed_model_name) print(model_directory) #persist(self, path, persistor=None, project_name=None, #fixed_model_name=None): except Exception as e: print("Error: 模型训练失败"+e.message) else: print("Success: 模型训练成功") return model_directory
def test_rasa_data(): td = load_data('data/examples/rasa/demo-rasa.json', "en") assert td.entity_examples != [] assert td.intent_examples != [] assert len(td.sorted_entity_examples()) >= len(filter(lambda e: e["entities"], td.entity_examples)) assert len(td.sorted_intent_examples()) == len(td.intent_examples) assert td.entity_synonyms == {}
def test_repeated_entities(): data = u""" { "rasa_nlu_data": { "common_examples" : [ { "text": "book a table today from 3 to 6 for 3 people", "intent": "unk", "entities": [ { "entity": "description", "start": 35, "end": 36, "value": "3" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name, "en") assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example["entities"] assert len(entities) == 1 start, end = MitieEntityExtractor.find_entity(entities[0], example["text"]) assert start == 9 assert end == 10
def test_rasa_data(): td = load_data('data/examples/rasa/demo-rasa.json') assert td.entity_examples != [] assert td.intent_examples != [] assert len(td.sorted_entity_examples()) >= len([e for e in td.entity_examples if e["entities"]]) assert len(td.sorted_intent_examples()) == len(td.intent_examples) assert td.entity_synonyms == {}
def train_classificator(): trainer = Trainer(RasaNLUConfig(CONF.get_value('nlu-config-file-path'))) training_data = load_data(CONF.get_value('nlu-training-data-path')) trainer.train(training_data) trainer.persist( CONF.get_value('models-directory'), fixed_model_name=CONF.get_value('classification-model-name'))
def get_training_data(self): # Compose training data from models common_examples = [] entity_synonyms = [] user_says = IntentUserSays.objects.all() for user_say in user_says: entities = [{ 'start': int(entity.start), 'end': int(entity.end), 'value': str(entity.value), 'entity': str(entity.entity.name) }\ for entity in user_say.entities.all()] common_examples.append({ 'text': user_say.text, 'intent': user_say.story_intent.intent.name, 'entities': entities }) user_say_entities = IntentUserSaysEntities.objects.all() entity_synonyms = [{'value': entity.value, 'synonyms': self.split_synonyms(entity.synonyms)} for entity in user_say_entities] # Put everything together rasa_nlu_data = { 'rasa_nlu_data': { 'common_examples': common_examples, 'entity_synonyms': entity_synonyms } } # Write result to file training_file = os.path.join(self.TRAINING_DIR, 'rasa_nlu_data.json') with open(training_file, 'w') as outfile: outfile.write(json.dumps(rasa_nlu_data)) training_data = load_data(training_file) return training_data
def convert_training_data(data_file, out_file, output_format): td = load_data(data_file) with io.open(out_file, "w", encoding='utf-8') as f: if output_format == 'md': f.write(td.as_markdown()) else: f.write(td.as_json(indent=2))
def train_nlu(): training_data = load_data('nlu/musicplayer.rasa.md') trainer = Trainer(RasaNLUConfig("nlu/config.json")) trainer.train(training_data) model_directory = trainer.persist('nlu/model', fixed_model_name=model_name) return model_directory
def build_model(self, path): #training_data = load_data('data/batteryBoss_train.json') training_data = load_data(path) trainer = Trainer(RasaNLUConfig("config/config_spacy.json")) trainer.train(training_data) model_directory = trainer.persist('./model/') return model_directory
def test_nonascii_entities(): data = """ { "luis_schema_version": "2.0", "utterances" : [ { "text": "I am looking for a ßäæ ?€ö) item", "intent": "unk", "entities": [ { "entity": "description", "startPos": 19, "endPos": 26 } ] } ] }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 entity = entities[0] assert entity["value"] == "ßäæ ?€ö)" assert entity["start"] == 19 assert entity["end"] == 27 assert entity["entity"] == "description"
def test_multiword_entities(): data = u""" { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name, "en") assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example["entities"] assert len(entities) == 1 start, end = MitieEntityExtractor.find_entity(entities[0], example["text"]) assert start == 4 assert end == 7
def test_multiword_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 4 assert end == 7
def test_nonascii_entities(): data = u""" { "luis_schema_version": "1.0", "utterances" : [ { "text": "I am looking for a ßäæ ?€ö) item", "intent": "unk", "entities": [ { "entity": "description", "startPos": 5, "endPos": 8 } ] } ] }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name, "en", luis_data_tokenizer="tokenizer_mitie") assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example["entities"] assert len(entities) == 1 entity = entities[0] assert entity["value"] == u"ßäæ ?€ö)" assert entity["start"] == 19 assert entity["end"] == 27 assert entity["entity"] == "description"
def run_intent_evaluation(config, model_path, component_builder=None): from sklearn.metrics import confusion_matrix from sklearn.utils.multiclass import unique_labels # get the metadata config from the package data test_data = load_data(config['data']) interpreter = Interpreter.load(model_path, config, component_builder) test_y = [e.get("intent") for e in test_data.training_examples] preds = [] for e in test_data.training_examples: res = interpreter.parse(e.text) if res.get('intent'): preds.append(res['intent'].get('name')) else: preds.append(None) log_evaluation_table(test_y, preds) cnf_matrix = confusion_matrix(test_y, preds) plot_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Intent Confusion matrix') plt.show() return
def train_nlu(): training_data = load_data('data/franken_data.json') trainer = Trainer(RasaNLUConfig("nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") return model_directory
def train(): training_data = load_data('../mom/data/nlu.json') trainer = Trainer(RasaNLUConfig("../mom/nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist( '../models') # Returns the directory the model is stored in return model_directory
def run_intent_evaluation(config, model_path, component_builder=None): from sklearn.metrics import accuracy_score from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import f1_score from sklearn.metrics import precision_score from sklearn.utils.multiclass import unique_labels # get the metadata config from the package data test_data = load_data(config['data']) metadata = Metadata.load(model_path) interpreter = Interpreter.load(metadata, config, component_builder) test_y = [e.get("intent") for e in test_data.training_examples] preds = [] for e in test_data.training_examples: res = interpreter.parse(e.text) if res.get('intent'): preds.append(res['intent'].get('name')) else: preds.append(None) logger.info("Intent Evaluation Results") logger.info("F1-Score: {}".format(f1_score(test_y, preds, average='weighted'))) logger.info("Precision: {}".format(precision_score(test_y, preds, average='weighted'))) logger.info("Accuracy: {}".format(accuracy_score(test_y, preds))) logger.info("Classification report: \n{}".format(classification_report(test_y, preds))) cnf_matrix = confusion_matrix(test_y, preds) plot_intent_confusion_matrix(cnf_matrix, classes=unique_labels(test_y, preds), title='Intent Confusion matrix') plt.show() return
def train_nlu(self, force=False): if self.disable_nlu != "yes" and not self.isNluTraining: #print("TRY NLU TRAIN {} {} {}".format(force,self.isNluModified() , self.isNluModelMissing())) if (force or self.isNluModified() or self.isNluModelMissing()): self.isNluTraining = True print("NLU TRAIN {} {} {}".format(force, self.isNluModified(), self.isNluModelMissing())) from rasa_nlu.converters import load_data from rasa_nlu.config import RasaNLUConfig from rasa_nlu.model import Trainer training_data = load_data(self.nlu_training_file) trainer = Trainer(RasaNLUConfig(self.config_file)) trainer.train(training_data) #model_directory = trainer.persist('models/nlu/', fixed_model_name="current") pathParts = self.nlu_model_path.split('/') modelName = pathParts[-1] shortPath = "/".join(pathParts[:-2]) print("model {} path {}".format(modelName, shortPath)) model_directory = trainer.persist(shortPath, fixed_model_name=modelName) #self.core_model_modified=self.getCoreModelModified() self.isNluTraining = False self.nlu_modified = self.getNluModified() return model_directory
def test_repeated_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "book a table today from 3 to 6 for 3 people", "intent": "unk", "entities": [ { "entity": "description", "start": 35, "end": 36, "value": "3" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 9 assert end == 10
def test_rasa_data(): td = load_data('data/examples/rasa/demo-rasa.json') assert td.entity_examples != [] assert td.intent_examples != [] assert len(td.sorted_entity_examples()) >= len([e for e in td.entity_examples if e.get("entities")]) assert len(td.sorted_intent_examples()) == len(td.intent_examples) assert td.entity_synonyms == {u'Chines': u'chinese', u'Chinese': u'chinese', u'chines': u'chinese'}
def train_nlu(): training_data = load_data(DATA) trainer = Trainer(RasaNLUConfig(CONFIG_PATH)) trainer.train(training_data) model_directory = trainer.persist('models/nlu', fixed_model_name="13-3-2018") return model_directory
def rasa_train(request): training_data = load_data('rasa/data/demo_rasa.json') trainer = Trainer(RasaNLUConfig("rasa/nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") interpreter = Interpreter.load(model_directory) intent_dict = interpreter.parse("hello") print(intent_dict['intent']['name']) return HttpResponse(intent_dict)
def train(self): training_data = load_data(self.data_file) trainer = Trainer(self.rasa_config) trainer.train(training_data) self.interpreter = Interpreter.load(trainer.persist(self.model_dir), self.rasa_config) logging.info("rasa trained successfully")
def train_nlu(nlu_training_data): from rasa_nlu.config import RasaNLUConfig from rasa_nlu.converters import load_data from rasa_nlu.model import Trainer training_data = load_data(nlu_training_data_path + nlu_training_data) trainer = Trainer(RasaNLUConfig('nlu_model_config.json')) trainer.train(training_data) trainer.persist('models/nlu/', fixed_model_name='current')
def train_nlu(data, config, model_dir): print("[+] Staring training session") print("[+] data: ", data) print("[+] config: ", config) print("[+] model_dir: ", model_dir) training_data = load_data(data) trainer = Trainer(RasaNLUConfig(config)) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='proto1')
def main(): x = Config() model_directory = x.config['model_directory'] training_data = load_data('data') trainer = Trainer(RasaNLUConfig("config-spacy.json")) trainer.train(training_data) trained_model_directory = trainer.persist(model_directory) x.salvar('trained_model_directory', trained_model_directory)
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath, output_format) td = load_data(out_path.strpath) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, 'json') rto = load_data(rto_path.strpath) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms
def train(data, config, models, training_data=None, trainer=None): rm.logger = logger if training_data is None: training_data = load_data(data) if trainer is None: trainer = rm.Trainer(RasaNLUConfig(config)) trainer.train(training_data) model_directory = trainer.persist(models)
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format, language): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath, output_format, language) td = load_data(out_path.strpath, language) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file, language) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, 'json', language) rto = load_data(rto_path.strpath, language) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms
def do_train(config, component_builder=None): # type: (RasaNLUConfig, Optional[ComponentBuilder]) -> Tuple[Trainer, Text] """Loads the trainer and the data and runs the training of the specified model.""" trainer = Trainer(config, component_builder) persistor = create_persistor(config) training_data = load_data(config['data']) trainer.train(training_data) persisted_path = trainer.persist(config['path'], persistor) return trainer, persisted_path
def train_nlu_model(): try: training_data = load_data(RASA_NLU_TRAINING_DATA_PATH) trainer = Trainer(RasaNLUConfig(RASA_NLU_CONFIG_PATH)) trainer.train(training_data) model_directory = trainer.persist(RASA_NLU_MODEL_PATH, fixed_model_name=RASA_NLU_MODEL_NAME) return model_directory except Exception as e: print('Exception in train nlu', e)
def train_nlu(): training_data = load_data('data/nlu.md') conf = RasaNLUConfig("nlu_config.json") trainer = Trainer(RasaNLUConfig("nlu_config.json")) trainer.train(training_data) model_directory = trainer.persist( conf["path"], project_name=conf["project"], fixed_model_name=conf["fixed_model_name"]) print("persisted model to ", model_directory)
def train(cfg_name, model_name): from rasa_nlu.train import create_persistor from rasa_nlu.converters import load_data config = RasaNLUConfig(cfg_name) trainer = Trainer(config) training_data = load_data(config['data']) trainer.train(training_data) persistor = create_persistor(config) trainer.persist("test_models", persistor, model_name=model_name)
def train_nlu(): from rasa_nlu.converters import load_data from rasa_nlu.config import RasaNLUConfig from rasa_nlu.model import Trainer training_data = load_data("data/mobile_nlu_data.json") trainer = Trainer(RasaNLUConfig("mobile_nlu_model_config.json")) trainer.train(training_data) model_directory = trainer.persist("models/", project_name="ivr", fixed_model_name="demo") return model_directory
def do_train(config, component_builder=None): # type: (RasaNLUConfig, Optional[ComponentBuilder]) -> Tuple[Trainer, Interpreter, Text] """Loads the trainer and the data and runs the training of the specified model.""" # Ensure we are training a model that we can save in the end # WARN: there is still a race condition if a model with the same name is trained in another subprocess trainer = Trainer(config, component_builder) persistor = create_persistor(config) training_data = load_data(config['data']) interpreter = trainer.train(training_data) persisted_path = trainer.persist(config['path'], persistor, model_name=config['name']) return trainer, interpreter, persisted_path
def train_models(languages): """Generate your trained model.""" utils.check_languages(languages) config = utils.load_config() for language in languages: click.echo(_("================== Processing {lang} ==================").format(lang=language)) training_data = load_data(utils.get_training_data_path(language, config)) trainer = Trainer(RasaNLUConfig(cmdline_args=config)) click.echo(_("Training data for language {lang}.").format(lang=language)) trainer.train(training_data) click.echo(_("Persisting trained data for {lang}.").format(lang=language)) model_dir = trainer.persist(utils.get_model_base_dir(language)) click.echo(_("Stored data for {lang} in {path}.").format(lang=language, path=model_dir)) click.echo(_("================ Finished Training ================"))
def test_entities_synonyms(): data = u""" { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] }, { "text": "show me flights to nyc", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 22, "value": "nyc" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name) assert td.entity_synonyms["New York City"] == "nyc"
def test_markdown_data(): td = load_data('data/examples/rasa/demo-rasa.md') assert len(td.sorted_entity_examples()) >= len([e for e in td.entity_examples if e.get("entities")]) assert len(td.sorted_intent_examples()) == len(td.intent_examples) assert td.entity_synonyms == {u'Chines': u'chinese', u'Chinese': u'chinese', u'chines': u'chinese', u'vegg': u'vegetarian', u'veggie': u'vegetarian'}
def train_nlu(): training_data = load_data(NLU_DATA) trainer = Trainer(RasaNLUConfig(CONFIG_PATH)) trainer.train(training_data) model_directory = trainer.persist('../models/nlu', fixed_model_name="current")
def test_luis_data(): td = load_data('data/examples/luis/demo-restaurants.json') assert td.entity_examples != [] assert td.intent_examples != [] assert td.entity_synonyms == {}
def test_wit_data(): td = load_data('data/examples/wit/demo-flights.json') assert td.entity_examples != [] assert td.intent_examples != [] assert td.entity_synonyms == {}
def test_api_data(): td = load_data('data/examples/api/') assert td.entity_examples != [] assert td.intent_examples != [] assert td.entity_synonyms != {}
def convert_training_data(data_file, out_file): td = load_data(data_file) with io.open(out_file, "w") as f: f.write(td.as_json(indent=2))