예제 #1
0
def convert_alien_format_dataset(input_path, output_path):
    convert_training_data(
        data_file=input_path,
        out_file=output_path,
        output_format="json",
        language="zh-cn"
    )
예제 #2
0
def test_training_data_conversion(tmpdir, data_file, gold_standard_file):
    out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath)
    td = load_data(out_path.strpath)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = load_data(gold_standard_file)
    assert cmp_dict_list(td.entity_examples, gold_standard.entity_examples)
    assert cmp_dict_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms
예제 #3
0
def test_training_data_conversion(tmpdir, data_file, gold_standard_file):
    out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath)
    td = load_data(out_path.strpath)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = load_data(gold_standard_file)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms
예제 #4
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer
    from rasa_nlu.convert import convert_training_data
    import convert

    convert.read_excel()
    convert_training_data('data/nlu.md', 'nludata/train_data_md.json', 'json',
                          'ay')
    training_data = load_data('nludata/')
    trainer = Trainer(config.load("nlu_model_config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu/',
                                      fixed_model_name="current")

    return model_directory
예제 #5
0
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format, language):
    out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath, output_format, language)
    td = load_data(out_path.strpath, language)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = load_data(gold_standard_file, language)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms

    # converting the converted file back to original file format and performing the same tests
    rto_path = tmpdir.join("data_in_original_format.txt")
    convert_training_data(out_path.strpath, rto_path.strpath, 'json', language)
    rto = load_data(rto_path.strpath, language)
    cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
    cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
    assert gold_standard.entity_synonyms == rto.entity_synonyms
예제 #6
0
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format):
    out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath, output_format)
    td = load_data(out_path.strpath)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = load_data(gold_standard_file)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms

    # converting the converted file back to original file format and performing the same tests
    rto_path = tmpdir.join("data_in_original_format.txt")
    convert_training_data(out_path.strpath, rto_path.strpath, 'json')
    rto = load_data(rto_path.strpath)
    cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
    cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
    assert gold_standard.entity_synonyms == rto.entity_synonyms
def convert_alien_format_dataset(language, input_path, output_path):
    convert_training_data(data_file=input_path,
                          out_file=output_path,
                          output_format="json",
                          language=language.lower())