def test_markdown_single_sections():
    td_regex_only = training_data.load_data('data/test/markdown_single_sections/regex_only.md')
    assert td_regex_only.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"}]

    td_syn_only = training_data.load_data('data/test/markdown_single_sections/synonyms_only.md')
    assert td_syn_only.entity_synonyms == {'Chines': 'chinese',
                                           'Chinese': 'chinese'}
def test_data_merging(files):
    td_reference = training_data.load_data(files[0])
    td = training_data.load_data(files[1])
    assert len(td.entity_examples) == len(td_reference.entity_examples)
    assert len(td.intent_examples) == len(td_reference.intent_examples)
    assert len(td.training_examples) == len(td_reference.training_examples)
    assert td.intents == td_reference.intents
    assert td.entities == td_reference.entities
    assert td.entity_synonyms == td_reference.entity_synonyms
    assert td.regex_features == td_reference.regex_features
def test_nonascii_entities():
    data = """
{
  "luis_schema_version": "2.0",
  "utterances" : [
    {
      "text": "I am looking for a ßäæ ?€ö) item",
      "intent": "unk",
      "entities": [
        {
          "entity": "description",
          "startPos": 19,
          "endPos": 26
        }
      ]
    }
  ]
}"""
    with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
        f.write(data.encode("utf-8"))
        f.flush()
        td = training_data.load_data(f.name)
        assert len(td.entity_examples) == 1
        example = td.entity_examples[0]
        entities = example.get("entities")
        assert len(entities) == 1
        entity = entities[0]
        assert entity["value"] == "ßäæ ?€ö)"
        assert entity["start"] == 19
        assert entity["end"] == 27
        assert entity["entity"] == "description"
def test_multiword_entities():
    data = """
{
  "rasa_nlu_data": {
    "common_examples" : [
      {
        "text": "show me flights to New York City",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 32,
            "value": "New York City"
          }
        ]
      }
    ]
  }
}"""
    with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
        f.write(data.encode("utf-8"))
        f.flush()
        td = training_data.load_data(f.name)
        assert len(td.entity_examples) == 1
        example = td.entity_examples[0]
        entities = example.get("entities")
        assert len(entities) == 1
        tokens = WhitespaceTokenizer().tokenize(example.text)
        start, end = MitieEntityExtractor.find_entity(entities[0],
                                                      example.text,
                                                      tokens)
        assert start == 4
        assert end == 7
Beispiel #5
0
def do_train(cfg,  # type: RasaNLUModelConfig
             data,  # type: Text
             path=None,  # type: Text
             project=None,  # type: Optional[Text]
             fixed_model_name=None,  # type: Optional[Text]
             storage=None,  # type: Text
             component_builder=None,  # type: Optional[ComponentBuilder]
             **kwargs   # type: Any
             ):
    # type: (...) -> Tuple[Trainer, Interpreter, Text]
    """Loads the trainer and the data and runs the training of the model."""

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(cfg, component_builder)
    persistor = create_persistor(storage)
    training_data = load_data(data, cfg.language)
    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path,
                                         persistor,
                                         project,
                                         fixed_model_name)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path
def test_repeated_entities():
    data = """
{
  "rasa_nlu_data": {
    "common_examples" : [
      {
        "text": "book a table today from 3 to 6 for 3 people",
        "intent": "unk",
        "entities": [
          {
            "entity": "description",
            "start": 35,
            "end": 36,
            "value": "3"
          }
        ]
      }
    ]
  }
}"""
    with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
        f.write(data.encode("utf-8"))
        f.flush()
        td = training_data.load_data(f.name)
        assert len(td.entity_examples) == 1
        example = td.entity_examples[0]
        entities = example.get("entities")
        assert len(entities) == 1
        tokens = WhitespaceTokenizer().tokenize(example.text)
        start, end = MitieEntityExtractor.find_entity(entities[0],
                                                      example.text,
                                                      tokens)
        assert start == 9
        assert end == 10
Beispiel #7
0
def test_run_cv_evaluation():
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    nlu_config = config.load(
        "sample_configs/config_pretrained_embeddings_spacy.yml")

    n_folds = 2
    results, entity_results = cross_validate(td, n_folds, nlu_config)

    assert len(results.train["Accuracy"]) == n_folds
    assert len(results.train["Precision"]) == n_folds
    assert len(results.train["F1-score"]) == n_folds
    assert len(results.test["Accuracy"]) == n_folds
    assert len(results.test["Precision"]) == n_folds
    assert len(results.test["F1-score"]) == n_folds
    assert len(entity_results.train[
        'CRFEntityExtractor']["Accuracy"]) == n_folds
    assert len(entity_results.train[
        'CRFEntityExtractor']["Precision"]) == n_folds
    assert len(entity_results.train[
        'CRFEntityExtractor']["F1-score"]) == n_folds
    assert len(entity_results.test[
        'CRFEntityExtractor']["Accuracy"]) == n_folds
    assert len(entity_results.test[
        'CRFEntityExtractor']["Precision"]) == n_folds
    assert len(entity_results.test[
        'CRFEntityExtractor']["F1-score"]) == n_folds
Beispiel #8
0
def run_evaluation(data_path, model_path,
                   component_builder=None):  # pragma: no cover
    """Evaluate intent classification and entity extraction."""

    # get the metadata config from the package data
    interpreter = Interpreter.load(model_path, component_builder)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)
    extractors = get_entity_extractors(interpreter)
    entity_predictions, tokens = get_entity_predictions(interpreter,
                                                        test_data)
    if duckling_extractors.intersection(extractors):
        entity_predictions = remove_duckling_entities(entity_predictions)
        extractors = remove_duckling_extractors(extractors)

    if is_intent_classifier_present(interpreter):
        intent_targets = get_intent_targets(test_data)
        intent_predictions = get_intent_predictions(interpreter, test_data)
        logger.info("Intent evaluation results:")
        evaluate_intents(intent_targets, intent_predictions)

    if extractors:
        entity_targets = get_entity_targets(test_data)

        logger.info("Entity evaluation results:")
        evaluate_entities(entity_targets, entity_predictions, tokens,
                          extractors)
Beispiel #9
0
def test_drop_intents_below_freq():
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    clean_td = drop_intents_below_freq(td, 0)
    assert clean_td.intents == {'affirm', 'goodbye', 'greet',
                                'restaurant_search'}

    clean_td = drop_intents_below_freq(td, 10)
    assert clean_td.intents == {'affirm', 'restaurant_search'}
Beispiel #10
0
def test_wit_data():
    td = training_data.load_data('data/examples/wit/demo-flights.json')
    assert len(td.entity_examples) == 4
    assert len(td.intent_examples) == 1
    assert len(td.training_examples) == 4
    assert td.entity_synonyms == {}
    assert td.intents == {"flight_booking"}
    assert td.entities == {"location", "datetime"}
Beispiel #11
0
def test_luis_data():
    td = training_data.load_data('data/examples/luis/demo-restaurants.json')
    assert len(td.entity_examples) == 8
    assert len(td.intent_examples) == 28
    assert len(td.training_examples) == 28
    assert td.entity_synonyms == {}
    assert td.intents == {"affirm", "goodbye", "greet", "inform"}
    assert td.entities == {"location", "cuisine"}
Beispiel #12
0
def test_lookup_table_md():
    lookup_fname = 'data/test/lookup_tables/plates.txt'
    td_lookup = training_data.load_data(
        'data/test/lookup_tables/lookup_table.md')
    assert td_lookup.lookup_tables[0]['name'] == 'plates'
    assert td_lookup.lookup_tables[0]['elements'] == lookup_fname
    assert td_lookup.lookup_tables[1]['name'] == 'drinks'
    assert td_lookup.lookup_tables[1]['elements'] == [
        'mojito', 'lemonade', 'sweet berry wine', 'tea', 'club mate']
Beispiel #13
0
def test_prepare_data():
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    clean_data = prepare_data(td, 0)
    unique_intents = sorted(set([i.data["intent"] for i in clean_data]))
    assert(unique_intents == ['affirm', 'goodbye', 'greet', 'restaurant_search'])

    clean_data = prepare_data(td, 10)
    unique_intents = sorted(set([i.data["intent"] for i in clean_data]))
    assert(unique_intents == ['affirm', 'restaurant_search'])
Beispiel #14
0
    def train(cfg_name, project_name):
        from rasa_nlu import training_data

        cfg = config.load(cfg_name)
        trainer = Trainer(cfg, component_builder)
        training_data = training_data.load_data(data)

        trainer.train(training_data)
        trainer.persist("test_projects", project_name=project_name)
Beispiel #15
0
def run_evaluation(data_path, model,
                   report_folder=None,
                   successes_filename=None,
                   errors_filename='errors.json',
                   confmat_filename=None,
                   intent_hist_filename=None,
                   component_builder=None):  # pragma: no cover
    """Evaluate intent classification and entity extraction."""

    # get the metadata config from the package data
    if isinstance(model, Interpreter):
        interpreter = model
    else:
        interpreter = Interpreter.load(model, component_builder)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)
    extractors = get_entity_extractors(interpreter)
    entity_predictions, tokens = get_entity_predictions(interpreter,
                                                        test_data)

    if duckling_extractors.intersection(extractors):
        entity_predictions = remove_duckling_entities(entity_predictions)
        extractors = remove_duckling_extractors(extractors)

    result = {
        "intent_evaluation": None,
        "entity_evaluation": None
    }

    if report_folder:
        utils.create_dir(report_folder)

    if is_intent_classifier_present(interpreter):
        intent_targets = get_intent_targets(test_data)
        intent_results = get_intent_predictions(
            intent_targets, interpreter, test_data)

        logger.info("Intent evaluation results:")
        result['intent_evaluation'] = evaluate_intents(intent_results,
                                                       report_folder,
                                                       successes_filename,
                                                       errors_filename,
                                                       confmat_filename,
                                                       intent_hist_filename)

    if extractors:
        entity_targets = get_entity_targets(test_data)

        logger.info("Entity evaluation results:")
        result['entity_evaluation'] = evaluate_entities(entity_targets,
                                                        entity_predictions,
                                                        tokens,
                                                        extractors,
                                                        report_folder)

    return result
Beispiel #16
0
def test_train_test_split(filename):
    td = training_data.load_data(filename)
    assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"}
    assert td.entities == {"location", "cuisine"}
    assert len(td.training_examples) == 42
    assert len(td.intent_examples) == 42

    td_train, td_test = td.train_test_split(train_frac=0.8)

    assert len(td_train.training_examples) == 32
    assert len(td_test.training_examples) == 10
Beispiel #17
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data('data/nlu_data/')
    trainer = Trainer(config.load("nlu_model_config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu', fixed_model_name="current")

    return model_directory
def test_dialogflow_data():
    td = training_data.load_data('data/examples/dialogflow/')
    assert len(td.entity_examples) == 5
    assert len(td.intent_examples) == 24
    assert len(td.training_examples) == 24
    assert td.intents == {"affirm", "goodbye", "hi", "inform"}
    assert td.entities == {"cuisine", "location"}
    non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v}
    assert non_trivial_synonyms == {"mexico": "mexican",
                                    "china": "chinese",
                                    "india": "indian"}
Beispiel #19
0
def train_nlu_gao():
    from rasa_nlu_gao.training_data import load_data
    from rasa_nlu_gao import config
    from rasa_nlu_gao.model import Trainer

    training_data = load_data('data/rasa_dataset_training.json')
    trainer = Trainer(config.load("config_embedding_bilstm.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu_gao/',
                                      fixed_model_name="current")

    return model_directory
Beispiel #20
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data('./data/coco_data.json')
    trainer = Trainer(config.load("./config_spacy.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('./models/nlu/',
                                      fixed_model_name="coconlu")

    return model_directory
Beispiel #21
0
def test_training_data_conversion(tmpdir, data_file, gold_standard_file,
                                  output_format, language):
    out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath, output_format, language)
    td = training_data.load_data(out_path.strpath, language)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = training_data.load_data(gold_standard_file, language)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms

    # converting the converted file back to original
    # file format and performing the same tests
    rto_path = tmpdir.join("data_in_original_format.txt")
    convert_training_data(out_path.strpath, rto_path.strpath, 'json', language)
    rto = training_data.load_data(rto_path.strpath, language)
    cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
    cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
    assert gold_standard.entity_synonyms == rto.entity_synonyms
Beispiel #22
0
def run_evaluation(data_path, model,
                   report_filename=None,
                   successes_filename=None,
                   errors_filename='errors.json',
                   confmat_filename=None,
                   intent_hist_filename=None,
                   component_builder=None):  # pragma: no cover
    """Evaluate intent classification and entity extraction."""

    # get the metadata config from the package data
    if isinstance(model, Interpreter):
        interpreter = model
    else:
        interpreter = Interpreter.load(model, component_builder)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)
    extractors = get_entity_extractors(interpreter)
    entity_predictions, tokens = get_entity_predictions(interpreter,
                                                        test_data)

    if duckling_extractors.intersection(extractors):
        entity_predictions = remove_duckling_entities(entity_predictions)
        extractors = remove_duckling_extractors(extractors)

    result = {
        "intent_evaluation": None,
        "entity_evaluation": None
    }

    if is_intent_classifier_present(interpreter):
        intent_targets = get_intent_targets(test_data)
        intent_results = get_intent_predictions(
                intent_targets, interpreter, test_data)

        logger.info("Intent evaluation results:")
        result['intent_evaluation'] = evaluate_intents(intent_results,
                                                       report_filename,
                                                       successes_filename,
                                                       errors_filename,
                                                       confmat_filename,
                                                       intent_hist_filename)

    if extractors:
        entity_targets = get_entity_targets(test_data)

        logger.info("Entity evaluation results:")
        result['entity_evaluation'] = evaluate_entities(entity_targets,
                                                        entity_predictions,
                                                        tokens,
                                                        extractors)

    return result
Beispiel #23
0
def train_nlu():  # pipline
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    # training_data = load_data('../rasa/data/res_data.json')
    training_data = load_data('./data_loading/test_loading.json')
    trainer = Trainer(config.load("../rasa/config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist("./nlu_model/jarvis_nlu/",
                                      fixed_model_name="current")

    return model_directory
Beispiel #24
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data("data/mobile_nlu_data.json")
    trainer = Trainer(config.load("mobile_nlu_model_config.json"))
    trainer.train(training_data)
    model_directory = trainer.persist("models/",
                                      project_name="ivr",
                                      fixed_model_name="demo")

    return model_directory
Beispiel #25
0
def test_run_cv_evaluation():
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    nlu_config = RasaNLUConfig("sample_configs/config_spacy.json")

    n_folds = 3
    results = run_cv_evaluation(td, n_folds, nlu_config)

    assert len(results.train["Accuracy"]) == n_folds
    assert len(results.train["Precision"]) == n_folds
    assert len(results.train["F1-score"]) == n_folds
    assert len(results.test["Accuracy"]) == n_folds
    assert len(results.test["Precision"]) == n_folds
    assert len(results.test["F1-score"]) == n_folds
Beispiel #26
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer
    import jieba

    jieba.load_userdict("jieba_userdict.txt")
    training_data = load_data("data/train_file_new.json")
    trainer = Trainer(config.load("hainan_nlu_model_config.json"))
    trainer.train(training_data)
    model_directory = trainer.persist("models/")

    return model_directory
Beispiel #27
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data('data/nlu/')
    trainer = Trainer(config.load("nlu_model_config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu/',
                                      project_name='default',
                                      fixed_model_name="current")

    return model_directory
Beispiel #28
0
def test_train():
    print(datetime.datetime.now())
    # data_set generated by Chatito
    training_data = load_data(data_set)
    # load rasa pipeline
    trainer = Trainer(
        config.load(
            "/Users/guolei/Documents/EIT/GUOLEI/ContextManager/sample_configs/config_spacy.yml"
        ))
    trainer.train(training_data)
    # save model
    trainer.persist('./projects/')
    print(datetime.datetime.now())
Beispiel #29
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data(
        '../../../models/train_data/nlu_data/training_data.json')
    trainer = Trainer(config.load("../../../models/nlu_config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('../../../models/models/nlu/',
                                      fixed_model_name="current")

    return model_directory
Beispiel #30
0
    def __init__(self, model_path="config_spacy.yml", data='train.md'):
        # Create a trainer that uses this config
        trainer = Trainer(config.load("config_spacy.yml"))
        # Load the training data
        training_data = load_data('train.md')
        # Create an interpreter by training the model
        self.interpreter = trainer.train(training_data)
        trainer = Trainer(config.load("config_spacy.yml"))
        self.tieba_interpreter = trainer.train(load_data("tieba_train.md"))
        self.tieba = Tieba()
        self.respond_dict = {
            "TIEBA": self.respond_tieba,
            "default": self.respond_default,
            "get_posts": self.tieba.get_posts,
            "turn_to_post": self.tieba.turn_to_post,
            "LAUNCH": self.launch,
            "QUERY": self.query,
            "ROUTE": self.route
        }

        self.state = State.FREE
        self.message_trace = []
Beispiel #31
0
def train_nlu(data, configs, model_dir):
    """
        Train a NLU model

        :param data:
        :param configs:
        :param model_dir:
        :return: None
    """
    training_data = load_data(data)
    trainer = Trainer(config.load(configs))
    trainer.train(training_data)
    model_directory = trainer.persist(model_dir, fixed_model_name='weathernlu')
Beispiel #32
0
def home():
     train_data = load_data('rasa_dataset.json')
     trainer = Trainer(config.load("config_spacy.yaml"))

     trainer.train(train_data)
     model_directory = trainer.persist('/projects')
     interpreter = Interpreter.load(model_directory)
     if 'text' in request.args:
        txt = request.args['text']
        #id = int(request.args['text'])
        return interpreter.parse(txt)
     else:
        return "Please write any query."
def train_test(td_file, config_file, model_dir):
    # helper function to split into test and train and evaluate on results.

    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.6)
    trainer.train(train)
    model_loc = trainer.persist(model_dir)
    with open('data/tmp/temp_test.json', 'w', encoding="utf8") as f:
        f.write(test.as_json())
    with open('data/temp_train.json', 'w', encoding="utf8") as f:
        f.write(train.as_json())
    evaluate_model('data/tmp/temp_test.json', model_loc)
Beispiel #34
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu.model import Trainer
    from rasa_nlu import config

    training_data = load_data('data/testdata.json')
    trainer = Trainer(config.load("nlu_config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist(
        'models/nlu', fixed_model_name="nlu",
        project_name="current")  # Returns the directory the model is stored in

    return model_directory
Beispiel #35
0
def rasa_train(message):
    training_data = load_data('demo-rasa.json')
    # Create a trainer
    trainer = Trainer(config.load("config_spacy.yml"))
    # Create an interpreter by training the model
    interpreter = trainer.train(training_data)
    response = interpreter.parse(message)
    matched_intent = None
    for intent, pattern in patterns.items():
        if re.search(pattern, message) is not None:
            matched_intent = intent
            response["intent"]["name"] = matched_intent
    return response
Beispiel #36
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data('data/intents/')
    trainer = Trainer(config.load('nlu_config.yml'))
    trainer.train(training_data)

    model_directory = trainer.persist('models/nlu/',
                                      fixed_model_name='current')

    return model_directory
Beispiel #37
0
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu.config import RasaNLUModelConfig
    from rasa_nlu.model import Trainer
    from rasa_nlu import config
    from pathlib import Path

    training_data = load_data('training_data/general-chat.md')
    trainer = Trainer(
        config.load("training_data/config_tensorflow_embedding.yaml"))
    trainer.train(training_data)
    model_directory = trainer.persist(Path('.').parent / "models",
                                      project_name='ctraubot',
                                      fixed_model_name='nlu')
def rasa_base(request):
    if request.method == "POST":
        user = request.POST.get('user')
        training_data = load_data('main/Bank_Data.json')
        trainer = Trainer(config.load('main/config_spacy.yml'))
        trainer.train(training_data)
        model_directory = trainer.persist('main/')
        interpreter = Interpreter.load(model_directory)
        print(user)
        output = interpreter.parse(str(user))
    else:
        return render(request, "CHUG/rasa.html")

    return render(request, 'CHUG/rasa.html', {'data': output})
Beispiel #39
0
def train(data_path):
    # 示例数据

    getcwd = os.getcwd()

    print(getcwd)

    training_data = load_data(data_path)
    # pipeline配置指定了配置文件地址
    trainer = Trainer(config.load("./../nlu_config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('./../models_center/intent/fb')

    print(model_directory)
Beispiel #40
0
def train_nlu(domain_id="default"):
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data('{}/{}/intellei_rasa.json'.format(
        data_folder, domain_id))
    trainer = Trainer(config.load("intelleibot/nlu_model_config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('{}/{}/nlu/'.format(
        model_folder, domain_id),
                                      fixed_model_name="current")

    return model_directory
Beispiel #41
0
def initInterPreter():
    # Import necessary modules
    from rasa_nlu.training_data import load_data
    #from rasa_nlu.config import RasaNLUModelConfig
    from rasa_nlu.model import Trainer
    from rasa_nlu import config
    
    # Create a trainer that uses this config
    trainer = Trainer(config.load("config_spacy.yml"))
    
    # Load the training data
    training_data = load_data('data/training_dataset14.json')
    
    # Create an interpreter by training the model
    globleDict['interpreter'] = trainer.train(training_data)
Beispiel #42
0
def train_test(td_file, config_file, model_dir, key='company', noise=0.1):
    """trains a model using the training data
       (split into train-test) and config"""
    td = load_data(td_file)

    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.8)
    test = add_noise(test, key, noise=noise)

    trainer.train(train)
    tmp_fname = 'data/tmp/temp_test.json'
    model_loc = trainer.persist(model_dir)
    with open(tmp_fname, 'w', encoding="utf8") as f:
        f.write(test.as_json())
    evaluate_model(tmp_fname, model_loc)
Beispiel #43
0
def test_run_cv_evaluation():
    import numpy as np
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    n_folds = 3
    nlu_config = RasaNLUConfig("sample_configs/config_defaults.json")

    np.seed(2018)
    results = run_cv_evaluation(td, n_folds, nlu_config)

    rel_tol = 1e-09
    abs_tol = 0.01

    acc = np.mean(results["accuracy"])
    exp_acc = 0.65  # expected result
    np.testing.assert_approx_equal(acc, exp_acc, significant=5)
Beispiel #44
0
def run_evaluation(config, model_path, component_builder=None):  # pragma: no cover
    """Evaluate intent classification and entity extraction."""
    # get the metadata config from the package data
    test_data = training_data.load_data(config['data'], config['language'])
    interpreter = Interpreter.load(model_path, config, component_builder)
    intent_targets, entity_targets = get_targets(test_data)
    intent_predictions, entity_predictions, tokens = get_predictions(interpreter, test_data)
    extractors = get_entity_extractors(interpreter)

    if extractors.intersection(duckling_extractors):
        entity_predictions = patch_duckling_entities(entity_predictions)
        extractors = patch_duckling_extractors(interpreter, extractors)

    evaluate_intents(intent_targets, intent_predictions)
    evaluate_entities(entity_targets, entity_predictions, tokens, extractors)
Beispiel #45
0
def test_run_cv_evaluation():
    import numpy as np
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    n_folds = 3
    nlu_config = RasaNLUConfig("sample_configs/config_defaults.json")

    np.seed(2018)
    results = run_cv_evaluation(td, n_folds, nlu_config)

    rel_tol=1e-09
    abs_tol=0.01

    acc = np.mean(results["accuracy"])
    exp_acc = 0.65 # expected result
    np.testing.assert_approx_equal(acc, exp_acc, significant=5)
def train_model():
	# trains a model and times it
	t = time()		
	# training_data = load_data('demo_train.md')
	training_data = load_data('data/company_train_lookup.json')	
	td_load_time = time() - t	
	trainer = Trainer(config.load('config.yaml'))
	t = time()	
	trainer.train(training_data)
	train_time = time() - t
	clear_model_dir()
	t = time()		
	model_directory = trainer.persist('./tmp/models')  # Returns the directory the model is stored in	
	persist_time = time() - t
	return td_load_time, train_time, persist_time
Beispiel #47
0
 def generate_rasa_training_data_and_interpreter(self, bot_directories,
                                                 module_id):
     training_data_dir = bot_directories[
         "training_data_directory"] + module_id + ".json"
     config_file_dir = bot_directories[
         "training_data_directory"] + 'config_spacy.yaml'
     model_dir = bot_directories["model_directory"]
     training_data = load_data(training_data_dir)
     trainer = Trainer(config.load(config_file_dir))
     # train the model and save it to a folder
     trainer.train(training_data)
     model_directory = trainer.persist(model_dir)
     print("trained model for module '" + module_id + "'")
     rasa_interpreter = Interpreter.load(model_directory)
     return training_data, rasa_interpreter
def intent_pooled_model(is_train):
    if is_train:
        training_data = load_data('./data/nlpcc_intent/rasa_nlpcc_train.json')
        config_file = './sample_configs/config_bert_intent_classifier_pooled.yml'
        ModelConfig = config.load(config_file)
        trainer = Trainer(ModelConfig)
        interpreter = trainer.train(training_data)
    else:
        model_directory = './models/rasa_bert/nlpcc_pooled'
        interpreter = Interpreter.load(model_directory)
    query = "播放一首歌"
    while query != "Stop":
        print(interpreter.parse(query))
        query = input("input query: (insert Stop to close)\n")
    print('intent classifier close')
def sentiment_analyzer(is_train):
    if is_train:
        training_data = load_data('./data/sentiment_analyzer/trainset.json')
        config_file = './sample_configs/config_bert_sentiment.yml'
        ModelConfig = config.load(config_file)
        trainer = Trainer(ModelConfig)
        interpreter = trainer.train(training_data)
    else:
        model_directory = './models/sentiment/sentiment_demo'
        interpreter = Interpreter.load(model_directory)
    query = "今天好开心呀"
    while query != "Stop":
        print(interpreter.parse(query))
        query = input("input query: (insert Stop to close)\n")
    print('sentiment_analyzer close')
def en_spell_checker_model(is_train):
    if is_train:
        training_data = load_data('./data/examples/rasa/demo-rasa.json')
        config_file = './sample_configs/config_bert_spell_checker_en.yml'
        ModelConfig = config.load(config_file)
        trainer = Trainer(ModelConfig)
        interpreter = trainer.train(training_data)
    else:
        model_directory = './models/spell_checker/rasa_bert_spell_checker_en'
        interpreter = Interpreter.load(model_directory)
    query = "How old aer you?"
    while query != "Stop":
        print(interpreter.parse(query))
        query = input("input query: (insert Stop to close)\n")
    print('spell_checker close')
def NER(is_train):
    if is_train:
        training_data = load_data('./data/ner/bert_ner_train.json')
        config_file = './sample_configs/config_bert_ner.yml'
        ModelConfig = config.load(config_file)
        trainer = Trainer(ModelConfig)
        interpreter = trainer.train(training_data)
    else:
        model_directory = './models/rasa_bert/ner_demo'
        interpreter = Interpreter.load(model_directory)
    query = "这是中国领导人首次在哈佛大学发表演讲。"
    while query != "Stop":
        print(interpreter.parse(query))
        query = input("input query: (insert Stop to close)\n")
    print('Ner close')
Beispiel #52
0
def test_demo_data(filename):
    td = training_data.load_data(filename)
    assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"}
    assert td.entities == {"location", "cuisine"}
    assert len(td.training_examples) == 42
    assert len(td.intent_examples) == 42
    assert len(td.entity_examples) == 11

    assert td.entity_synonyms == {'Chines': 'chinese',
                                  'Chinese': 'chinese',
                                  'chines': 'chinese',
                                  'vegg': 'vegetarian',
                                  'veggie': 'vegetarian'}

    assert td.regex_features == [{"name": "greet", "pattern": r"hey[^\s]*"},
                                 {"name": "zipcode", "pattern": r"[0-9]{5}"}]
Beispiel #53
0
def do_train(config,  # type: RasaNLUConfig
             component_builder=None  # type: Optional[ComponentBuilder]
             ):
    # type: (...) -> Tuple[Trainer, Interpreter, Text]
    """Loads the trainer and the data and runs the training of the model."""

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(config, component_builder)
    persistor = create_persistor(config)
    training_data = load_data(config['data'], config['language'])
    interpreter = trainer.train(training_data)
    persisted_path = trainer.persist(config['path'], persistor,
                                     config['project'],
                                     config['fixed_model_name'])
    return trainer, interpreter, persisted_path
def test_interpreter(pipeline_template, component_builder):
    test_data = "data/examples/rasa/demo-rasa.json"
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = test_data
    td = training_data.load_data(test_data)
    interpreter = utilities.interpreter_for(component_builder, _conf)

    texts = ["good bye", "i am looking for an indian spot"]

    for text in texts:
        result = interpreter.parse(text, time=None)
        assert result['text'] == text
        assert not result['intent']['name'] or result['intent']['name'] in td.intents
        assert result['intent']['confidence'] >= 0
        # Ensure the model doesn't detect entity types that are not present
        # Models on our test data set are not stable enough to require the exact entities to be found
        for entity in result['entities']:
            assert entity['entity'] in td.entities
Beispiel #55
0
def test_dialogflow_data():
    td = training_data.load_data('data/examples/dialogflow/')
    assert len(td.entity_examples) == 5
    assert len(td.intent_examples) == 24
    assert len(td.training_examples) == 24
    assert len(td.lookup_tables) == 2
    assert td.intents == {"affirm", "goodbye", "hi", "inform"}
    assert td.entities == {"cuisine", "location"}
    non_trivial_synonyms = {k: v
                            for k, v in td.entity_synonyms.items() if k != v}
    assert non_trivial_synonyms == {"mexico": "mexican",
                                    "china": "chinese",
                                    "india": "indian"}
    # The order changes based on different computers hence the grouping
    assert {td.lookup_tables[0]['name'],
            td.lookup_tables[1]['name']} == {'location', 'cuisine'}
    assert {len(td.lookup_tables[0]['elements']),
            len(td.lookup_tables[1]['elements'])} == {4, 6}
Beispiel #56
0
def test_run_cv_evaluation():
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    nlu_config = config.load("sample_configs/config_spacy.yml")

    n_folds = 2
    results, entity_results = run_cv_evaluation(td, n_folds, nlu_config)

    assert len(results.train["Accuracy"]) == n_folds
    assert len(results.train["Precision"]) == n_folds
    assert len(results.train["F1-score"]) == n_folds
    assert len(results.test["Accuracy"]) == n_folds
    assert len(results.test["Precision"]) == n_folds
    assert len(results.test["F1-score"]) == n_folds
    assert len(entity_results.train['ner_crf']["Accuracy"]) == n_folds
    assert len(entity_results.train['ner_crf']["Precision"]) == n_folds
    assert len(entity_results.train['ner_crf']["F1-score"]) == n_folds
    assert len(entity_results.test['ner_crf']["Accuracy"]) == n_folds
    assert len(entity_results.test['ner_crf']["Precision"]) == n_folds
    assert len(entity_results.test['ner_crf']["F1-score"]) == n_folds
Beispiel #57
0
def test_entities_synonyms():
    data = u"""
{
  "rasa_nlu_data": {
    "entity_synonyms": [
      {
        "value": "nyc",
        "synonyms": ["New York City", "nyc", "the big apple"]
      }
    ],
    "common_examples" : [
      {
        "text": "show me flights to New York City",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 32,
            "value": "NYC"
          }
        ]
      },
      {
        "text": "show me flights to nyc",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 22,
            "value": "nyc"
          }
        ]
      }
    ]
  }
}"""
    with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
        f.write(data.encode("utf-8"))
        f.flush()
        td = training_data.load_data(f.name)
        assert td.entity_synonyms["New York City"] == "nyc"
Beispiel #58
0
def test_spacy_featurizer_casing(spacy_nlp):
    from rasa_nlu.featurizers import spacy_featurizer

    # if this starts failing for the default model, we should think about
    # removing the lower casing the spacy nlp component does when it
    # retrieves vectors. For compressed spacy models (e.g. models
    # ending in _sm) this test will most likely fail.

    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    for e in td.intent_examples:
        doc = spacy_nlp(e.text)
        doc_capitalized = spacy_nlp(e.text.capitalize())

        vecs = spacy_featurizer.features_for_doc(doc)
        vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized)

        assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \
            "Vectors are unequal for texts '{}' and '{}'".format(
                    e.text, e.text.capitalize())
Beispiel #59
0
def main():
    parser = create_argument_parser()
    cmdline_args = parser.parse_args()
    utils.configure_colored_logging(cmdline_args.loglevel)

    if cmdline_args.mode == "crossvalidation":

        # TODO: move parsing into sub parser
        # manual check argument dependency
        if cmdline_args.model is not None:
            parser.error("Crossvalidation will train a new model "
                         "- do not specify external model.")

        if cmdline_args.config is None:
            parser.error("Crossvalidation will train a new model "
                         "you need to specify a model configuration.")

        nlu_config = config.load(cmdline_args.config)
        data = training_data.load_data(cmdline_args.data)
        data = drop_intents_below_freq(data, cutoff=5)
        results, entity_results = cross_validate(
            data, int(cmdline_args.folds), nlu_config)
        logger.info("CV evaluation (n={})".format(cmdline_args.folds))

        if any(results):
            logger.info("Intent evaluation results")
            return_results(results.train, "train")
            return_results(results.test, "test")
        if any(entity_results):
            logger.info("Entity evaluation results")
            return_entity_results(entity_results.train, "train")
            return_entity_results(entity_results.test, "test")

    elif cmdline_args.mode == "evaluation":
        run_evaluation(cmdline_args.data,
                       cmdline_args.model,
                       cmdline_args.report,
                       cmdline_args.successes,
                       cmdline_args.errors,
                       cmdline_args.confmat,
                       cmdline_args.histogram)

    logger.info("Finished evaluation")