Esempio n. 1
0
def train_nlu():
    training_data = load_data('./data/nlu.md')
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('./models/nlu/',
                                      fixed_model_name="current")
    return model_directory
Esempio n. 2
0
def train_nlu(data, configs, model_dir):
    training_data = load_data(data)
    trainer = Trainer(config.load(configs))
    trainer.train(training_data)
    model_directory = trainer.persist(model_dir, fixed_model_name="nlu")
    logger.info(f"Model trained. Stored in '{model_directory}'.")
    return model_directory
Esempio n. 3
0
def trained_nlu_model(request):
    cfg = RasaNLUModelConfig({"pipeline": "keyword"})
    trainer = Trainer(cfg)
    td = training_data.load_data(DEFAULT_DATA_PATH)

    trainer.train(td)

    model_path = trainer.persist(NLU_MODEL_PATH)

    nlu_data = data.get_nlu_directory(DEFAULT_DATA_PATH)
    output_path = os.path.join(NLU_MODEL_PATH, NLU_MODEL_NAME)
    new_fingerprint = model.model_fingerprint(NLU_DEFAULT_CONFIG_PATH,
                                              nlu_data=nlu_data)
    model.create_package_rasa(model_path, output_path, new_fingerprint)

    def fin():
        if os.path.exists(NLU_MODEL_PATH):
            shutil.rmtree(NLU_MODEL_PATH)

        if os.path.exists(output_path):
            shutil.rmtree(output_path)

    request.addfinalizer(fin)

    return output_path
Esempio n. 4
0
def train(nlu_config: Union[Text, RasaNLUModelConfig],
          data: Text,
          path: Optional[Text] = None,
          project: Optional[Text] = None,
          fixed_model_name: Optional[Text] = None,
          storage: Optional[Text] = None,
          component_builder: Optional[ComponentBuilder] = None,
          training_data_endpoint: Optional[EndpointConfig] = None,
          **kwargs: Any) -> Tuple[Trainer, Interpreter, Text]:
    """Loads the trainer and the data and runs the training of the model."""

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(nlu_config, component_builder)
    persistor = create_persistor(storage)
    if training_data_endpoint is not None:
        training_data = load_data_from_endpoint(training_data_endpoint,
                                                nlu_config.language)
    else:
        training_data = load_data(data, nlu_config.language)
    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path, persistor, project,
                                         fixed_model_name)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path
Esempio n. 5
0
    def _async_train(self, config, nlu_data, model_name):
        training_start = timer()
        with self.lock:
            self.training_status[model_name] = {
                "status": "TRAINING",
            }
        
        data = self.data_reader.read_from_json({'rasa_nlu_data': nlu_data})
        with self.interpreter_cache.lock:
            trainer = Trainer(RasaNLUModelConfig(config), self.interpreter_cache.component_builder)
        
        interpreter = trainer.train(data)
        tempdir = tempfile.mkdtemp()
        trainer.persist(tempdir, None, "nlu")
        
        _model_package = create_package_rasa(tempdir, os.path.join("models", model_name))

        self.interpreter_cache.store(model_name, interpreter)
        
        with self.lock:
            training_end = timer()
            self.training_status[model_name] = {
                "status": "READY",
                "training_time": f"{training_end - training_start:.2f}"
            }
Esempio n. 6
0
def train_nlu(data_path, configs, model_path):
    logging.basicConfig(filename=logfile, level=logging.DEBUG)
    training_data = load_data(data_path)
    trainer = Trainer(config.load(configs))
    trainer.train(training_data)
    model_directory = trainer.persist(model_path, fixed_model_name='nlu')
    run_evaluation(data_path, model_directory)
Esempio n. 7
0
def load_training_data(data_file="../data/testData.json",
                       config_file="../configs/config_spacy.yml"):
    training_data = load_data(data_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(training_data)
    model_directory = trainer.persist('./projects/default/')

    # where model_directory points to the model folder
    return model_directory
def train_model(td_file, config_file, model_dir):
    """trains a model using the training data and config
       creates model and returns the path to this model for evaluation"""
    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    trainer.train(td)
    model_loc = trainer.persist(model_dir)

    return model_loc
Esempio n. 9
0
def test_load_and_persist_without_train(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    trainer = Trainer(_config, component_builder)
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(tmpdir.strpath, persistor)
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Esempio n. 10
0
def test_nlu_interpreter():
    #training_data = load_data("data/chitchat_nlu.md")
    training_data = load_data("data")
    trainer = Trainer(config.load("config.yml"))
    interpreter = trainer.train(training_data)
    test_interpreter_dir = trainer.persist("./tests/models", project_name="nlu")
    parsing = interpreter.parse('hello')

    assert parsing['intent']['name'] == 'greet'
    assert test_interpreter_dir
Esempio n. 11
0
def trained_nlu_model():
    cfg = RasaNLUModelConfig({"pipeline": "keyword"})
    trainer = Trainer(cfg)
    td = training_data.load_data(DEFAULT_DATA_PATH)

    trainer.train(td)
    model_path = trainer.persist("test_models",
                                 project_name="test_model_keyword")

    return model_path
Esempio n. 12
0
def test_load_and_persist_without_train(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})

    trainer = Trainer(_config, component_builder)
    persisted_path = trainer.persist(tmpdir.strpath)

    loaded = Interpreter.load(persisted_path, component_builder)

    assert loaded.pipeline
    assert loaded.parse("Rasa is great!") is not None
def train_eval_rasa_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :rtype: None
    """
    from rasa.nlu.training_data import load_data
    from rasa.nlu.model import Trainer
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu import config
    from rasa.nlu.test import run_evaluation

    config_file = source_config / "config_rasa_converrt.yml"

    if cross:
        filename_results = source_result / "rasa_cross_semeval_2020_model_task1_{}".format(save)

        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_rasa_data_task1()

        training_data = load_data(str(train_data[0]))
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, n_jobs=-1, verbose=True)
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
        
        print("--> Evaluating training data with Rasa metrics (Cross-validation)...")
        import os
        from datetime import datetime
        filename_test = str(train_data[1])
        print(filename_test)
        dmtime = "test_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S"))
        out_test = source_result / "rasa_cross_evaluation_task1" / dmtime
        model_directory = sorted(filename_results.glob("nlu_*"), key=os.path.getmtime)[-1] 
        run_evaluation(filename_test, str(model_directory), output_directory=str(out_test))

    else:
        filename_results = source_result / "rasa_semeval_2020_model_task1_{}".format(save)
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_file = train_data_obj.build_rasa_data_task1()

        training_data = load_data(train_file)
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, verbose=True, n_jobs=-1, fixed_model_name="nlu")
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
Esempio n. 14
0
 def __init__(self):
     try:
         test = Interpreter.load("./models/nlu/current")
         self.interpreter = test
     except Exception:
         training_data = load_data("./data/nlu.md")
         trainer = Trainer(config.load("config.yml"))
         self.interpreter = trainer.train(training_data)
         model_directory = trainer.persist("./models/nlu",
                                           fixed_model_name="current")
     self.music_verbs = ['Riproduci', 'Suona', 'Fai partire', 'Avvia']
Esempio n. 15
0
def test_run_cv_evaluation(
        pretrained_embeddings_spacy_config: RasaNLUModelConfig,
        monkeypatch: MonkeyPatch):
    td = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.json")

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 2
            },
        ],
    })

    # mock training
    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)
    mock = Mock(return_value=Interpreter(trainer.pipeline, None))
    monkeypatch.setattr(Trainer, "train", mock)

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
        report_as_dict=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert all(key in intent_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None
        for intent_report in intent_results.evaluation["report"].values())
    for extractor_evaluation in entity_results.evaluation.values():
        assert all(key in extractor_evaluation for key in ["errors", "report"])
Esempio n. 16
0
def test_train_with_empty_data(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    trainer = Trainer(_config, component_builder)
    trainer.train(TrainingData())
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(tmpdir.strpath, persistor,
                                     project_name="my_project")
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Esempio n. 17
0
def train_nlu():
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.model import Trainer

    training_data = load_data('data/nlu.md')
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu/',
                                      fixed_model_name="current")

    return model_directory
Esempio n. 18
0
 def train(self):
     # loading the nlu training samples
     training_data = load_data(self.data)
     # trainer to educate our pipeline
     trainer = Trainer(config.load(self.pipeline))
     # train the model
     self.interpreter = trainer.train(training_data)
     # store it for future use
     self.model_directory = trainer.persist(
         "opennlu/data/model/rasa",
         fixed_model_name=self.name,
         persist_nlu_training_data=training_data)
Esempio n. 19
0
def train_test(td_file, config_file, model_dir):
    # helper function to split into test and train and evaluate on results.

    td = load_data(td_file)
    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.6)
    trainer.train(train)
    model_loc = trainer.persist(model_dir)
    with open("data/tmp/temp_test.json", "w", encoding="utf8") as f:
        f.write(test.as_json())
    with open("data/temp_train.json", "w", encoding="utf8") as f:
        f.write(train.as_json())
    evaluate_model("data/tmp/temp_test.json", model_loc)
Esempio n. 20
0
async def train(
    nlu_config: Union[Text, Dict, RasaNLUModelConfig],
    data: Union[Text, "TrainingDataImporter"],
    path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    storage: Optional[Text] = None,
    component_builder: Optional[ComponentBuilder] = None,
    training_data_endpoint: Optional[EndpointConfig] = None,
    persist_nlu_training_data: bool = False,
    model_to_finetune: Optional[Interpreter] = None,
    **kwargs: Any,
) -> Tuple[Trainer, Interpreter, Optional[Text]]:
    """Loads the trainer and the data and runs the training of the model."""
    from rasa.shared.importers.importer import TrainingDataImporter

    if not isinstance(nlu_config, RasaNLUModelConfig):
        nlu_config = config.load(nlu_config)

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(
        nlu_config, component_builder, model_to_finetune=model_to_finetune
    )
    persistor = create_persistor(storage)
    if training_data_endpoint is not None:
        training_data = await load_data_from_endpoint(
            training_data_endpoint, nlu_config.language
        )
    elif isinstance(data, TrainingDataImporter):
        training_data = await data.get_nlu_data(nlu_config.language)
    else:
        training_data = load_data(data, nlu_config.language)

    training_data.print_stats()
    if training_data.entity_roles_groups_used():
        rasa.shared.utils.common.mark_as_experimental_feature(
            "Entity Roles and Groups feature"
        )

    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(
            path, persistor, fixed_model_name, persist_nlu_training_data
        )
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path
Esempio n. 21
0
 def __init__(self, agentName, botconfig, data, **kwargs):
     logger.info("Training Agent " + agentName + " in progress")
     trainingData = load_data(data)
     self.intents = list(trainingData.intents)
     self.entities = list(trainingData.entities)
     trainer = Trainer(config.load(botconfig))
     self.interpreter = trainer.train(trainingData)
     self.model_path = "./models/" + agentName + "/"
     persist_path = trainer.persist(self.model_path)
     self.tar_path = package_model(fingerprint=None,
                                   train_path=persist_path,
                                   output_directory=self.model_path)
     self.model_name = self.tar_path.replace(self.model_path, "")
     self.model_version = self.model_name[:self.model_name.index(".tar.gz")]
Esempio n. 22
0
def train():
    td = load_data("{}/demo_rasa.json".format(prj_dir))
    _config = RasaNLUModelConfig(load_json("{}/config.json".format(prj_dir)))
    trainer = Trainer(_config)
    trainer.train(td)
    persisted_path = trainer.persist("{}/models".format(prj_dir))
    loaded = Interpreter.load(persisted_path)
    assert loaded.pipeline

    # Inference
    result = loaded.parse("i'm looking for a place in the north of town")
    result = loaded.parse("show me chinese restaurants")
    result = dict(filter(lambda item: item[0] not in ["intent_ranking"], result.items()))
    show_dict(result)
Esempio n. 23
0
def call():
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu.model import Trainer

    builder = ComponentBuilder(use_cache=True)

    training_data = load_data('./data/weapon.md')
    trainer = Trainer(config.load("./config.yml"), builder)
    trainer.train(training_data)
    model_directory = trainer.persist('./models', fixed_model_name="model")
    print('done')
    return model_directory
def train_test(td_file, config_file, model_dir, key="company", noise=0.1):
    """trains a model using the training data
       (split into train-test) and config"""
    td = load_data(td_file)

    trainer = Trainer(config.load(config_file))
    train, test = td.train_test_split(train_frac=0.8)
    test = add_noise(test, key, noise=noise)

    trainer.train(train)
    tmp_fname = "data/tmp/temp_test.json"
    model_loc = trainer.persist(model_dir)
    with open(tmp_fname, "w", encoding="utf8") as f:
        f.write(test.as_json())
    evaluate_model(tmp_fname, model_loc)
Esempio n. 25
0
def _get_tokenizer_from_nlu_config(
    nlu_config: Optional[RasaNLUModelConfig] = None,
) -> Optional[Tokenizer]:
    """Extracts the first Tokenizer in the NLU pipeline.

    Args:
        nlu_config: NLU Config.

    Returns:
        The first Tokenizer in the NLU pipeline, if any.
    """
    if not nlu_config:
        return None

    pipeline: List[Component] = Trainer(nlu_config, skip_validation=True).pipeline
    tokenizer: Optional[Tokenizer] = None
    for component in pipeline:
        if isinstance(component, Tokenizer):
            if tokenizer:
                rasa.shared.utils.io.raise_warning(
                    "The pipeline contains more than one tokenizer. "
                    "Only the first tokenizer will be used for story validation.",
                )
            tokenizer = component

    return tokenizer
Esempio n. 26
0
def run_trial(space):
    """The objective function is pickled and transferred to the workers.
       Hence, this function has to contain all the imports we need.
    """

    data_dir = os.environ.get("INPUT_DATA_DIRECTORY", "./data")
    model_dir = os.environ.get("INPUT_MODEL_DIRECTORY", "./models")
    target_metric = os.environ.get("INPUT_TARGET_METRIC", "f1_score")

    if target_metric not in AVAILABLE_METRICS:
        logger.error("The metric '{}' is not in the available metrics. "
                     "Please use one of the available metrics: {}."
                     "".format(target_metric, AVAILABLE_METRICS))

        return {"loss": 1, "status": STATUS_FAIL}

    logger.debug("Search space: {}".format(space))

    # The epoch has to be an int since `tqdm` otherwise will cause an exception.
    if "epochs" in space:
        space["epochs"] = int(space["epochs"])

    with open(os.path.join(data_dir, "template_config.yml")) as f:
        config_yml = f.read().format(**space)
        config = read_yaml(config_yml)
        config = rasa.nlu.config.load(config)

    trainer = Trainer(config)
    training_data = load_data(os.path.join(data_dir, "train.md"))
    test_data_path = os.path.join(data_dir, "validation.md")

    # wrap in train and eval in try/except in case
    # nlu_hyperopt proposes invalid combination of params

    try:
        model = trainer.train(training_data)
        model_path = trainer.persist(model_dir)

        if target_metric is None or target_metric == "threshold_loss":
            loss = _get_threshold_loss(model, test_data_path)
        else:
            loss = _get_nlu_evaluation_loss(model_path, target_metric,
                                            test_data_path)
        return {"loss": loss, "status": STATUS_OK}
    except Exception as e:
        logger.error(e)
        return {"loss": 1, "status": STATUS_FAIL}
Esempio n. 27
0
def cross_validate(
        data: TrainingData, n_folds: int,
        nlu_config: Union[RasaNLUModelConfig, Text]) -> CVEvaluationResult:
    """Stratified cross validation on data.

    Args:
        data: Training Data
        n_folds: integer, number of cv folds
        nlu_config: nlu config file

    Returns:
        dictionary with key, list structure, where each entry in list
              corresponds to the relevant result for one fold
    """
    from collections import defaultdict
    import tempfile

    if isinstance(nlu_config, str):
        nlu_config = config.load(nlu_config)

    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)

    intent_train_results = defaultdict(list)
    intent_test_results = defaultdict(list)
    entity_train_results = defaultdict(lambda: defaultdict(list))
    entity_test_results = defaultdict(lambda: defaultdict(list))
    tmp_dir = tempfile.mkdtemp()

    for train, test in generate_folds(n_folds, data):
        interpreter = trainer.train(train)

        # calculate train accuracy
        intent_train_results, entity_train_results = combine_result(
            intent_train_results, entity_train_results, interpreter, train)
        # calculate test accuracy
        intent_test_results, entity_test_results = combine_result(
            intent_test_results, entity_test_results, interpreter, test)

    shutil.rmtree(tmp_dir, ignore_errors=True)

    return (
        CVEvaluationResult(dict(intent_train_results),
                           dict(intent_test_results)),
        CVEvaluationResult(dict(entity_train_results),
                           dict(entity_test_results)),
    )
Esempio n. 28
0
def test_invalid_many_tokenizers_in_config():
    nlu_config = {
        "pipeline": [{"name": "WhitespaceTokenizer"}, {"name": "SpacyTokenizer"}]
    }

    with pytest.raises(config.InvalidConfigError) as execinfo:
        Trainer(config.RasaNLUModelConfig(nlu_config))
    assert "More then one tokenizer is used" in str(execinfo.value)
Esempio n. 29
0
    def __init__(self, languages=None):

        if languages is None:
            languages = ['en', 'de']

        pipeline = [{"name": "WhitespaceTokenizer"},
                    {"name": "CRFEntityExtractor"},
                    {"name": "EntitySynonymMapper"},
                    {"name": "CountVectorsFeaturizer"},
                    {"name": "EmbeddingIntentClassifier"}]

        self.interpreters = {}
        for lang in languages:
            filepath = resource_filename(__name__, f'nlu_{lang}.md')
            training_data = load_data(filepath)
            trainer = Trainer(RasaNLUModelConfig({"pipeline": pipeline}))
            self.interpreters[lang] = trainer.train(training_data)
Esempio n. 30
0
def test_invalid_many_tokenizers_in_config():
    nlu_config = {
        "pipeline": [{"name": "WhitespaceTokenizer"}, {"name": "SpacyTokenizer"}]
    }

    with pytest.raises(InvalidConfigException) as execinfo:
        Trainer(config.RasaNLUModelConfig(nlu_config))
    assert "The pipeline configuration contains more than one" in str(execinfo.value)