def convert_alien_format_dataset(input_path, output_path): convert_training_data( data_file=input_path, out_file=output_path, output_format="json", language="zh-cn" )
def test_training_data_conversion(tmpdir, data_file, gold_standard_file): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath) td = load_data(out_path.strpath) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file) assert cmp_dict_list(td.entity_examples, gold_standard.entity_examples) assert cmp_dict_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms
def test_training_data_conversion(tmpdir, data_file, gold_standard_file): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath) td = load_data(out_path.strpath) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.model import Trainer from rasa_nlu.convert import convert_training_data import convert convert.read_excel() convert_training_data('data/nlu.md', 'nludata/train_data_md.json', 'json', 'ay') training_data = load_data('nludata/') trainer = Trainer(config.load("nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist('models/nlu/', fixed_model_name="current") return model_directory
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format, language): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath, output_format, language) td = load_data(out_path.strpath, language) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file, language) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, 'json', language) rto = load_data(rto_path.strpath, language) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath, output_format) td = load_data(out_path.strpath) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, 'json') rto = load_data(rto_path.strpath) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms
def convert_alien_format_dataset(language, input_path, output_path): convert_training_data(data_file=input_path, out_file=output_path, output_format="json", language=language.lower())