def train_eval_rasa_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :rtype: None
    """
    from rasa.nlu.training_data import load_data
    from rasa.nlu.model import Trainer
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu import config
    from rasa.nlu.test import run_evaluation

    config_file = source_config / "config_rasa_converrt.yml"

    if cross:
        filename_results = source_result / "rasa_cross_semeval_2020_model_task1_{}".format(save)

        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_rasa_data_task1()

        training_data = load_data(str(train_data[0]))
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, n_jobs=-1, verbose=True)
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
        
        print("--> Evaluating training data with Rasa metrics (Cross-validation)...")
        import os
        from datetime import datetime
        filename_test = str(train_data[1])
        print(filename_test)
        dmtime = "test_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S"))
        out_test = source_result / "rasa_cross_evaluation_task1" / dmtime
        model_directory = sorted(filename_results.glob("nlu_*"), key=os.path.getmtime)[-1] 
        run_evaluation(filename_test, str(model_directory), output_directory=str(out_test))

    else:
        filename_results = source_result / "rasa_semeval_2020_model_task1_{}".format(save)
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_file = train_data_obj.build_rasa_data_task1()

        training_data = load_data(train_file)
        builder = ComponentBuilder(use_cache=True)  
        trainer = Trainer(config.load(str(config_file)), builder)
        
        print("--> Training patent data with Rasa...")
        trainer.train(training_data, num_threads=8, verbose=True, n_jobs=-1, fixed_model_name="nlu")
        
        print("--> Saving model trained with Rasa (Rasa)...")
        model_directory = trainer.persist(filename_results)
예제 #2
0
def test_override_defaults_supervised_embeddings_pipeline():
    builder = ComponentBuilder()

    _config = RasaNLUModelConfig(
        {
            "language": "en",
            "pipeline": [
                {"name": "SpacyNLP"},
                {"name": "SpacyTokenizer"},
                {"name": "SpacyFeaturizer", "pooling": "max"},
                {
                    "name": "DIETClassifier",
                    "epochs": 10,
                    "hidden_layers_sizes": {"text": [256, 128]},
                },
            ],
        }
    )

    idx_featurizer = _config.component_names.index("SpacyFeaturizer")
    idx_classifier = _config.component_names.index("DIETClassifier")

    component1 = builder.create_component(
        _config.for_component(idx_featurizer), _config
    )
    assert component1.component_config["pooling"] == "max"

    component2 = builder.create_component(
        _config.for_component(idx_classifier), _config
    )
    assert component2.component_config["epochs"] == 10
    assert (
        component2.defaults["hidden_layers_sizes"].keys()
        == component2.component_config["hidden_layers_sizes"].keys()
    )
예제 #3
0
def test_n2g_parse(text):
    message = Message(text)
    spans = [{
        'end': 3,
        'label': 'PER',
        'start': 0
    }, {
        'end': 9,
        'label': 'PER',
        'start': 7
    }, {
        'end': 11,
        'label': 'Pronoun',
        'start': 10
    }]
    message.set("spans", spans, add_to_output=True)
    component_meta = {
        "name": "easy_n2g",
        "data_path": "data/n2g/name_dev.dat",
        "file": "component_1_easy_n2g.pkl",
        "class": "litemind.nlu.utils.n2g.easy_n2g.N2GHelper"
    }
    model_dir = 'models/coref/model_20190515-150912'
    n2g = ComponentBuilder().load_component(component_meta, model_dir,
                                            Metadata({}, None))
    n2g.process(message)
    pprint.pprint(message.data)
예제 #4
0
def test_c2g_file_parse(file_dir, filename):
    path = os.path.join(file_dir, filename)
    data = []
    if os.path.isfile(path):
        dat_excel = pd.read_excel(path, header=None)
        titles = list(dat_excel.columns)
        for i, r in dat_excel.iterrows():
            row = []
            for t in titles:
                if pd.notna(r[t]):
                    row.append(str(r[t]))
                else:
                    row.append(None)
            data.append(row)

    message = Message(data)
    component_meta = {
        "name": "call_parse",
        "file": "component_0_call_parse.pkl",
        "class": "litemind.call2graph.parse.c2g.CallParse"
    }
    model_dir = 'models/c2g/model'
    call2parse = ComponentBuilder().load_component(component_meta, model_dir,
                                                   Metadata({}, None))
    call2parse.process(message)
    pprint.pprint(message.data)
예제 #5
0
    def __init__(self,
                 project_dir=None,
                 max_training_processes=1,
                 response_log=None,
                 emulation_mode=None,
                 remote_storage=None,
                 component_builder=None,
                 model_server=None,
                 wait_time_between_pulls=None):
        self._training_processes = max(max_training_processes, 1)
        self._current_training_processes = 0
        self.responses = self._create_query_logger(response_log)
        self.project_dir = config.make_path_absolute(project_dir)
        self.emulator = self._create_emulator(emulation_mode)
        self.remote_storage = remote_storage
        self.model_server = model_server
        self.wait_time_between_pulls = wait_time_between_pulls

        if component_builder:
            self.component_builder = component_builder
        else:
            self.component_builder = ComponentBuilder(use_cache=True)

        self.project_store = self._create_project_store(project_dir)

        # tensorflow sessions are not fork-safe,
        # and training processes have to be spawned instead of forked. See
        # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment
        # -258934405
        multiprocessing.set_start_method('spawn', force=True)

        self.pool = ProcessPool(self._training_processes)
예제 #6
0
def train_update(
    repository_version_language_id, by_user, repository_authorization, from_queue="celery"
):  # pragma: no cover

    update_request = backend().request_backend_start_training_nlu(
        repository_version_language_id, by_user, repository_authorization, from_queue
    )

    examples_list = get_examples_request(repository_version_language_id, repository_authorization)

    with PokeLogging() as pl:
        try:
            examples = []

            for example in examples_list:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    )
                )

            update_request["dataset_size"] = len(examples)

            pipeline_builder = PipelineBuilder(update_request)
            pipeline_builder.print_pipeline()
            rasa_nlu_config = pipeline_builder.get_nlu_model()

            trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False))
            training_data = TrainingData(
                training_examples=examples, lookup_tables=None
            )

            trainer.train(training_data)

            persistor = BothubPersistor(
                repository_version_language_id, repository_authorization, rasa_version
            )
            trainer.persist(
                mkdtemp(),
                persistor=persistor,
                fixed_model_name=f"{update_request.get('repository_version')}_"
                f"{update_request.get('total_training_end') + 1}_"
                f"{update_request.get('language')}",
            )
        except Exception as e:
            logger.exception(e)
            backend().request_backend_trainfail_nlu(
                repository_version_language_id, repository_authorization
            )
            raise e
        finally:
            backend().request_backend_traininglog_nlu(
                repository_version_language_id, pl.getvalue(), repository_authorization
            )
예제 #7
0
def test_coref_parse(text):
    message = Message(text)
    model_dir = 'models/coref/model_20190515-150912'
    ltp_component_meta = {
        "name": "ltp",
        "path": "/Users/zhangzhen/data/ltp_data_v3.4.0",
        "lexicon": "lexicon",
        "dimension": {
            "Nh": "PER",
            "Ni": "ORG",
            "Ns": "LOC"
        },
        "class": "litemind.nlu.utils.ltp.LtpHelper"
    }
    ltp = ComponentBuilder().load_component(ltp_component_meta, model_dir,
                                            Metadata({}, None))
    ltp.process(message)

    spans = [{
        'end': 3,
        'gender': '男',
        'label': 'PER',
        'start': 0
    }, {
        'end': 9,
        'gender': '男',
        'label': 'PER',
        'start': 7
    }, {
        'end': 11,
        'label': 'Pronoun',
        'start': 10
    }]
    message.set("spans", spans, add_to_output=True)
    component_meta = {
        "name": "stg",
        "w2v_path": "/Users/zhangzhen/data/emb_ch/embedding.50.cformat",
        "class": "litemind.coref.stg.Strategy"
    }
    coref_stg = ComponentBuilder().load_component(component_meta, model_dir,
                                                  Metadata({}, None))
    coref_stg.process(message)
    pprint.pprint(message.data)
예제 #8
0
def test_c2g_parse(message, filename):
    component_meta = {
        "name": "call_parse",
        "file": "component_0_call_parse.pkl",
        "class": "litemind.call2graph.parse.c2g.CallParse"
    }
    model_dir = 'models/c2g/model'
    call2parse = ComponentBuilder().load_component(component_meta, model_dir,
                                                   Metadata({}, None))
    call2parse.process(message)
예제 #9
0
def test_override_defaults_supervised_embeddings_pipeline():
    cfg = config.load("data/test/config_embedding_test.yml")
    builder = ComponentBuilder()

    component1_cfg = cfg.for_component(0)

    component1 = builder.create_component(component1_cfg, cfg)
    assert component1.max_ngram == 3

    component2_cfg = cfg.for_component(1)
    component2 = builder.create_component(component2_cfg, cfg)
    assert component2.epochs == 10
예제 #10
0
    def validate_rasa_config(config: Dict):
        """
        validates bot config.yml content for invalid entries
        :param config: configuration
        :return: None
        """
        rasa_config = RasaNLUModelConfig(config)
        component_builder = ComponentBuilder()
        for i in range(len(rasa_config.pipeline)):
            component_cfg = rasa_config.for_component(i)
            component_builder.create_component(component_cfg, rasa_config)

        configuration.load(config)
예제 #11
0
def call():
    from rasa.nlu.training_data import load_data
    from rasa.nlu import config
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu.model import Trainer

    builder = ComponentBuilder(use_cache=True)

    training_data = load_data('./data/weapon.md')
    trainer = Trainer(config.load("./config.yml"), builder)
    trainer.train(training_data)
    model_directory = trainer.persist('./models', fixed_model_name="model")
    print('done')
    return model_directory
예제 #12
0
def test_ltp(text):
    message = Message(text)
    component_meta = {
        "name": "ltp",
        "path": "/Users/zhangzhen/data/ltp_data_v3.4.0",
        "lexicon": "lexicon",
        "dimension": {
            "Nh": "PER",
            "Ni": "ORG",
            "Ns": "LOC"
        },
        "class": "litemind.nlu.utils.ltp.LtpHelper"
    }
    model_dir = 'models/coref/model_20190515-150912'
    ltp = ComponentBuilder().load_component(component_meta, model_dir,
                                            Metadata({}, None))
    ltp.process(message)
    pprint.pprint(message.data)
예제 #13
0
def train_update(update, examples_data, label_examples_data, algorithm,
                 ner_spacy, similarity_type, language, connection):
    with PokeLogging() as pl:
        try:
            examples = []
            label_examples = []

            for example in examples_data:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    ))

            for label_example in label_examples_data:
                label_examples.append(
                    Message.build(
                        text=label_example.get("text"),
                        entities=label_example.get("entities"),
                    ))

            rasa_nlu_config = get_rasa_nlu_config_from_update(
                algorithm, ner_spacy, similarity_type, language)
            trainer = Trainer(rasa_nlu_config,
                              ComponentBuilder(use_cache=False))
            training_data = BothubTrainingData(
                label_training_examples=label_examples,
                training_examples=examples)

            trainer.train(training_data)

            persistor = BothubPersistor(update, connection)
            trainer.persist(
                mkdtemp(),
                persistor=persistor,
                fixed_model_name=str(update),
            )
        except Exception as e:
            logger.exception(e)

            raise e
        finally:
            pass
예제 #14
0
async def test_load_model_from_server(trained_nlu_model):
    fingerprint = "somehash"
    model_endpoint = EndpointConfig("http://server.com/models/nlu/tags/latest")

    # mock a response that returns a zipped model
    with io.open(NLU_MODEL_PATH, "rb") as f:
        responses.add(
            responses.GET,
            model_endpoint.url,
            headers={
                "ETag": fingerprint,
                "filename": "my_model_xyz.tar.gz"
            },
            body=f.read(),
            content_type="application/zip",
            stream=True,
        )
    nlu_model = await load_from_server(ComponentBuilder(use_cache=False),
                                       model_server=model_endpoint)
    assert nlu_model.fingerprint == fingerprint
예제 #15
0
    def __init__(
        self,
        project_dir=None,
        max_worker_processes=1,
        response_log=None,
        emulation_mode=None,
        remote_storage=None,
        component_builder=None,
        model_server=None,
        wait_time_between_pulls=None,
    ):
        self._worker_processes = max(max_worker_processes, 1)
        self._current_worker_processes = 0
        self.responses = self._create_query_logger(response_log)
        self.project_dir = config.make_path_absolute(project_dir)
        self.emulator = self._create_emulator(emulation_mode)
        self.remote_storage = remote_storage
        self.model_server = model_server
        self.wait_time_between_pulls = wait_time_between_pulls

        if component_builder:
            self.component_builder = component_builder
        else:
            self.component_builder = ComponentBuilder(use_cache=True)

        # TODO: Should be moved to separate method
        loop = asyncio.get_event_loop()
        if loop.is_closed():
            loop = asyncio.new_event_loop()
        self.project_store = loop.run_until_complete(
            self._create_project_store(self.project_dir)
        )
        loop.close()

        # tensorflow sessions are not fork-safe,
        # and training processes have to be spawned instead of forked. See
        # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment
        # -258934405
        multiprocessing.set_start_method("spawn", force=True)

        self.pool = ProcessPoolExecutor(max_workers=self._worker_processes)
예제 #16
0
    def __init__(
        self,
        model_dir: Optional[Text] = None,
        max_worker_processes: int = 1,
        response_log: Optional[Text] = None,
        emulation_mode: Optional[Text] = None,
        remote_storage: Optional[Text] = None,
        component_builder: ComponentBuilder = None,
        model_server: EndpointConfig = None,
        wait_time_between_pulls: int = None,
    ):
        self._worker_processes = max(max_worker_processes, 1)
        self._current_worker_processes = 0
        self.responses = self._create_query_logger(response_log)

        if model_dir is None:
            model_dir = tempfile.gettempdir()
        self.model_dir = os.path.abspath(model_dir)

        self.emulator = self._create_emulator(emulation_mode)
        self.remote_storage = remote_storage
        self.model_server = model_server
        self.wait_time_between_pulls = wait_time_between_pulls

        if component_builder:
            self.component_builder = component_builder
        else:
            self.component_builder = ComponentBuilder(use_cache=True)

        self.nlu_model = NLUModel.fallback_model(self.component_builder)

        # tensorflow sessions are not fork-safe,
        # and training processes have to be spawned instead of forked. See
        # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment
        # -258934405
        multiprocessing.set_start_method("spawn", force=True)
예제 #17
0
    (trainer, trained, persisted_path) = await train(
        _config,
        path=model_path,
        data="../../../data/test/demo-rasa-zh.json",
        component_builder=component_builder,
    )

    assert trainer.pipeline
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    text = "感冒发烧了怎么办"
    print("--------------------------------------------------")
    print(trained.parse(text))
    print("++++++++++++++++++++++++++++++++++++++++++++++++++")
    print("++++++++++++++++++++++++++++++++++++++++++++++++++")
    print(loaded.parse(text))
    print("--------------------------------------------------")
    assert loaded.parse(text) == trained.parse(text)


if __name__ == '__main__':
    # test_train_model_checkpointing_peter()
    classifier_params = {RANDOM_SEED: 1, EPOCHS: 1, BILOU_FLAG: False}
    loop = asyncio.get_event_loop()
    res = loop.run_until_complete(
        train_persist_load_with_composite_entities(classifier_params,
                                                   ComponentBuilder(),
                                                   "../models"))
    loop.close()
예제 #18
0
def component_builder():
    return ComponentBuilder()
예제 #19
0
def evaluate_crossval_update(repository_version_language,
                             repository_authorization,
                             aws_bucket_authentication, language):
    update_request = backend().request_backend_get_current_configuration(
        repository_authorization)
    examples_list = get_examples_request(repository_version_language,
                                         repository_authorization)

    with PokeLogging() as pl:
        try:
            examples = []

            for example in examples_list:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    ))

            data = TrainingData(training_examples=examples)
            pipeline_builder = PipelineBuilder(update_request)
            pipeline_builder.print_pipeline()
            rasa_nlu_config = pipeline_builder.get_nlu_model()
            trainer = Trainer(rasa_nlu_config,
                              ComponentBuilder(use_cache=False))

            result = {
                "intent_evaluation": None,
                "entity_evaluation": None,
                "response_selection_evaluation": None,
            }

            intent_test_metrics: IntentMetrics = defaultdict(list)
            entity_test_metrics: EntityMetrics = defaultdict(
                lambda: defaultdict(list))
            response_selection_test_metrics: ResponseSelectionMetrics = defaultdict(
                list)

            intent_results: List[IntentEvaluationResult] = []
            entity_results: List[EntityEvaluationResult] = []
            response_selection_test_results: List[
                ResponseSelectionEvaluationResult] = ([])
            entity_evaluation_possible = False
            extractors: Set[Text] = set()

            language_preprocessor = PreprocessingFactory(language).factory()

            for train, test in generate_folds(3, data):

                interpreter = trainer.train(train)

                test.training_examples = [
                    language_preprocessor.preprocess(x)
                    for x in test.training_examples
                ]

                # calculate test accuracy
                combine_result(
                    intent_test_metrics,
                    entity_test_metrics,
                    response_selection_test_metrics,
                    interpreter,
                    test,
                    intent_results,
                    entity_results,
                    response_selection_test_results,
                )

                if not extractors:
                    extractors = get_entity_extractors(interpreter)
                    entity_evaluation_possible = (
                        entity_evaluation_possible
                        or _contains_entity_labels(entity_results))

            if intent_results:
                result["intent_evaluation"] = evaluate_intents(intent_results)

            if entity_results:
                extractors = get_entity_extractors(interpreter)
                result["entity_evaluation"] = evaluate_entities(
                    entity_results, extractors)

            intent_evaluation = result.get("intent_evaluation")
            entity_evaluation = result.get("entity_evaluation")

            merged_logs = merge_intent_entity_log(intent_evaluation,
                                                  entity_evaluation)
            log = get_formatted_log(merged_logs)

            charts = plot_and_save_charts(repository_version_language,
                                          intent_results,
                                          aws_bucket_authentication)
            evaluate_result = backend(
            ).request_backend_create_evaluate_results(
                {
                    "repository_version": repository_version_language,
                    "matrix_chart": charts.get("matrix_chart"),
                    "confidence_chart": charts.get("confidence_chart"),
                    "log": json.dumps(log),
                    "intentprecision": intent_evaluation.get("precision"),
                    "intentf1_score": intent_evaluation.get("f1_score"),
                    "intentaccuracy": intent_evaluation.get("accuracy"),
                    "entityprecision": entity_evaluation.get("precision"),
                    "entityf1_score": entity_evaluation.get("f1_score"),
                    "entityaccuracy": entity_evaluation.get("accuracy"),
                    "cross_validation": True
                },
                repository_authorization,
            )

            intent_reports = intent_evaluation.get("report", {})
            entity_reports = entity_evaluation.get("report", {})

            for intent_key in intent_reports.keys():
                if intent_key not in excluded_itens:
                    intent = intent_reports.get(intent_key)

                    backend().request_backend_create_evaluate_results_intent(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "precision": intent.get("precision"),
                            "recall": intent.get("recall"),
                            "f1_score": intent.get("f1-score"),
                            "support": intent.get("support"),
                            "intent_key": intent_key,
                        },
                        repository_authorization,
                    )

            # remove group entities when entities returned as "<entity>.<group_entity>"
            for entity_key in entity_reports.keys():
                if '.' in entity_key:
                    new_entity_key = entity_key.split('.')[0]
                    entity_reports[new_entity_key] = entity_reports[entity_key]
                    entity_reports.pop(entity_key, None)

            for entity_key in entity_reports.keys():
                if entity_key not in excluded_itens:  # pragma: no cover
                    entity = entity_reports.get(entity_key)

                    backend().request_backend_create_evaluate_results_score(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "repository_version": repository_version_language,
                            "precision": entity.get("precision"),
                            "recall": entity.get("recall"),
                            "f1_score": entity.get("f1-score"),
                            "support": entity.get("support"),
                            "entity_key": entity_key,
                        },
                        repository_authorization,
                    )

            return {
                "id": evaluate_result.get("evaluate_id"),
                "version": evaluate_result.get("evaluate_version"),
                "cross_validation": True,
            }

        except Exception as e:
            logger.exception(e)
            raise e
def train_eval_rasa_nlu_model(lang='en', cross=False, save=''):
    """ Train rasa data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :return: None
    :rtype: None
    """
    from rasa.nlu.training_data import load_data
    from rasa.nlu.model import Trainer
    from rasa.nlu.components import ComponentBuilder
    from rasa.nlu import config
    from rasa.nlu.test import run_evaluation
    import pickle

    config_file = source_config / "config_rasa_bert.yml"

    if cross:
        train_data_obj = BuildSnipsDataTask2(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_rasa_data_task2()
        filename_results = source_result / "rasa_cross_semeval_2020_model_task2_{}".format(
            save)
        if Path(filename_results).exists():
            training_data = load_data(str(train_data[0]))
            builder = ComponentBuilder(use_cache=True)
            with codecs.open(
                    source_result / "builder_task2_{}.pkl".format(save),
                    "wb") as ant:
                pickle.dump(builder, ant)
            trainer = Trainer(config.load(str(config_file)), builder)
            print("\n--> Training patent data with Rasa (Cross-validation)...")
            trainer.train(training_data, num_threads=8, verbose=True)
            print("--> Saving model trained with Rasa (Cross-validation)...")
            model_directory = trainer.persist(filename_results)

        print(
            "--> Evaluating training data with Rasa metrics (Cross-validation)..."
        )
        import os
        from datetime import datetime
        filename_test = str(train_data[1])
        dmtime = "test_{}_{}".format(save,
                                     datetime.now().strftime("%Y%m%d-%H%M%S"))
        out_test = source_result / "rasa_cross_evaluation_task2" / dmtime
        model_directory = sorted(filename_results.glob("nlu_*"),
                                 key=os.path.getmtime)[-1]
        print(out_test)
        run_evaluation(filename_test,
                       str(model_directory),
                       output_directory=str(out_test))

    else:
        filename_results = source_result / "rasa_semeval_2020_results_task2_{}".format(
            save)
        train_data_obj = BuildSnipsDataTask2(lang, cross=cross, vers=save)
        train_file = train_data_obj.build_rasa_data_task2()

        print("\n--> Training will use the file: {}...".format(
            str(train_file)))
        training_data = load_data(str(train_file))
        builder = ComponentBuilder(use_cache=True)
        with codecs.open(source_result / "builder_task2_{}.pkl".format(save),
                         "wb") as ant:
            pickle.dump(builder, ant)
        trainer = Trainer(config.load(str(config_file)), builder)
        print("\n--> Training patent data with Rasa...")
        trainer.train(training_data,
                      num_threads=12,
                      n_jobs=8,
                      verbose=True,
                      fixed_model_name="nlu")
        print("--> Saving model trained with Rasa...")
        model_directory = trainer.persist(filename_results)
        """
예제 #21
0
def train_update(update, by, repository_authorization):
    update_request = backend().request_backend_start_training_nlu(
        update, by, repository_authorization)

    examples_list = get_examples_request(update, repository_authorization)
    examples_label_list = get_examples_label_request(update,
                                                     repository_authorization)

    with PokeLogging() as pl:
        try:
            examples = []
            label_examples = []

            get_examples = backend(
            ).request_backend_get_entities_and_labels_nlu(
                update,
                update_request.get("language"),
                json.dumps({
                    "examples": examples_list,
                    "label_examples_query": examples_label_list,
                    "update_id": update,
                }),
                repository_authorization,
            )

            for example in get_examples.get("examples"):
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    ))

            for label_example in get_examples.get("label_examples"):
                label_examples.append(
                    Message.build(
                        text=label_example.get("text"),
                        entities=label_example.get("entities"),
                    ))

            rasa_nlu_config = get_rasa_nlu_config_from_update(update_request)
            trainer = Trainer(rasa_nlu_config,
                              ComponentBuilder(use_cache=False))
            training_data = BothubTrainingData(
                label_training_examples=label_examples,
                training_examples=examples)

            trainer.train(training_data)

            persistor = BothubPersistor(update, repository_authorization)
            trainer.persist(
                mkdtemp(),
                persistor=persistor,
                fixed_model_name=str(update_request.get("update_id")),
            )
        except Exception as e:
            logger.exception(e)
            backend().request_backend_trainfail_nlu(update,
                                                    repository_authorization)
            raise e
        finally:
            backend().request_backend_traininglog_nlu(
                update, pl.getvalue(), repository_authorization)
예제 #22
0
def evaluate_crossval_update(repository_version,
                             by,
                             repository_authorization,
                             from_queue='celery'):
    update_request = backend().request_backend_start_training_nlu(
        repository_version, by, repository_authorization, from_queue)
    examples_list = get_examples_request(repository_version,
                                         repository_authorization)

    with PokeLogging() as pl:
        try:
            examples = []

            for example in examples_list:
                examples.append(
                    Message.build(
                        text=example.get("text"),
                        intent=example.get("intent"),
                        entities=example.get("entities"),
                    ))

            data = TrainingData(training_examples=examples)
            rasa_nlu_config = get_rasa_nlu_config(update_request)
            trainer = Trainer(rasa_nlu_config,
                              ComponentBuilder(use_cache=False))

            result = {
                "intent_evaluation": None,
                "entity_evaluation": None,
                "response_selection_evaluation": None,
            }

            intent_train_metrics: IntentMetrics = defaultdict(list)
            intent_test_metrics: IntentMetrics = defaultdict(list)
            entity_train_metrics: EntityMetrics = defaultdict(
                lambda: defaultdict(list))
            entity_test_metrics: EntityMetrics = defaultdict(
                lambda: defaultdict(list))
            response_selection_train_metrics: ResponseSelectionMetrics = defaultdict(
                list)
            response_selection_test_metrics: ResponseSelectionMetrics = defaultdict(
                list)

            intent_results: List[IntentEvaluationResult] = []
            entity_results: List[EntityEvaluationResult] = []
            response_selection_test_results: List[
                ResponseSelectionEvaluationResult] = ([])
            entity_evaluation_possible = False
            extractors: Set[Text] = set()

            for train, test in generate_folds(3, data):
                interpreter = trainer.train(train)

                # calculate train accuracy
                combine_result(
                    intent_train_metrics,
                    entity_train_metrics,
                    response_selection_train_metrics,
                    interpreter,
                    train,
                )
                # calculate test accuracy
                combine_result(
                    intent_test_metrics,
                    entity_test_metrics,
                    response_selection_test_metrics,
                    interpreter,
                    test,
                    intent_results,
                    entity_results,
                    response_selection_test_results,
                )

                if not extractors:
                    extractors = get_entity_extractors(interpreter)
                    entity_evaluation_possible = (
                        entity_evaluation_possible
                        or _contains_entity_labels(entity_results))

            if intent_results:
                result["intent_evaluation"] = evaluate_intents(intent_results)

            if entity_results:
                extractors = get_entity_extractors(interpreter)
                result["entity_evaluation"] = evaluate_entities(
                    entity_results, extractors)

            intent_evaluation = result.get("intent_evaluation")
            entity_evaluation = result.get("entity_evaluation")

            merged_logs = merge_intent_entity_log(intent_evaluation,
                                                  entity_evaluation)
            log = get_formatted_log(merged_logs)

            charts = plot_and_save_charts(repository_version, intent_results)
            evaluate_result = backend(
            ).request_backend_create_evaluate_results(
                {
                    "repository_version": repository_version,
                    "matrix_chart": charts.get("matrix_chart"),
                    "confidence_chart": charts.get("confidence_chart"),
                    "log": json.dumps(log),
                    "intentprecision": intent_evaluation.get("precision"),
                    "intentf1_score": intent_evaluation.get("f1_score"),
                    "intentaccuracy": intent_evaluation.get("accuracy"),
                    "entityprecision": entity_evaluation.get("precision"),
                    "entityf1_score": entity_evaluation.get("f1_score"),
                    "entityaccuracy": entity_evaluation.get("accuracy"),
                },
                repository_authorization,
            )

            intent_reports = intent_evaluation.get("report", {})
            entity_reports = entity_evaluation.get("report", {})

            for intent_key in intent_reports.keys():
                if intent_key and intent_key not in excluded_itens:
                    intent = intent_reports.get(intent_key)

                    backend().request_backend_create_evaluate_results_intent(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "precision": intent.get("precision"),
                            "recall": intent.get("recall"),
                            "f1_score": intent.get("f1-score"),
                            "support": intent.get("support"),
                            "intent_key": intent_key,
                        },
                        repository_authorization,
                    )

            for entity_key in entity_reports.keys():
                if entity_key and entity_key not in excluded_itens:  # pragma: no cover
                    entity = entity_reports.get(entity_key)

                    backend().request_backend_create_evaluate_results_score(
                        {
                            "evaluate_id": evaluate_result.get("evaluate_id"),
                            "repository_version": repository_version,
                            "precision": entity.get("precision"),
                            "recall": entity.get("recall"),
                            "f1_score": entity.get("f1-score"),
                            "support": entity.get("support"),
                            "entity_key": entity_key,
                        },
                        repository_authorization,
                    )

            return {
                "id": evaluate_result.get("evaluate_id"),
                "version": evaluate_result.get("evaluate_version"),
                "cross_validation": True
            }

        except Exception as e:
            logger.exception(e)
            backend().request_backend_trainfail_nlu(repository_version,
                                                    repository_authorization)
            raise e
        finally:
            backend().request_backend_traininglog_nlu(
                repository_version, pl.getvalue(), repository_authorization)
def label_data_with_rasa_nlu_model(lang='en', save="", out='practice'):
    """ Label counterfactual training data 

    :param lang: abbreviate language name of model
    :param save: path name where model is saved
    :return: csv file
    :rtype: file

    model_20200501-025838 = 0.58
    model_20200502-090721 = 0.48
    model_20200502-135337 = 
    """
    from rasa.nlu.model import Interpreter
    from rasa.nlu.components import ComponentBuilder
    import pickle
    import json

    model = source_result / "rasa_semeval_2020_model_task1_{}".format(save)
    if Path(model).exists():
        print("\n--> Loading Rasa model...")
        model = str(model / "nlu_20200509-063701")
        builder = ComponentBuilder(use_cache=True)
        nlu_engine = Interpreter.load(model, builder)

        if out == 'evaluate':
            print("--> [EVALUATION] Start labeling with Rasa model...")
            pd_data = pandas.read_csv(test_task_1)
            pred = []
            for i, row in pd_data.iterrows():
                sentence = row['sentence']
                sent_id = row['sentenceID']
                sent_parse = nlu_engine.parse(sentence)
                if sent_parse['intent']['name'] == "counterfactual":
                    pred.append((sent_id, 1))
                elif sent_parse['intent']['name'] == "no_counterfactual":
                    pred.append((sent_id, 0))
                else:
                    print("ERROR__: ", sent_parse)

                print(sent_parse['intent']['name'], sent_parse['text'])

            results = pandas.DataFrame(data=pred,
                                       columns=["sentenceID", "pred_label"])
            model_saved = source_result / \
            "rasa_semeval_2020_evaluation_task1_final_{}.csv".format(save)
            results.to_csv(model_saved, index=False)

            from datetime import datetime
            from zipfile import ZipFile
            dtime = datetime.now().strftime("%Y%m%d-%H%M%S")
            results_name = "rasa_semeval_2020_evaluation_task1_{}_{}.zip".format(
                save, dtime)

            results.to_csv(model_saved, index=False)
            with ZipFile(source_result / results_name, 'w') as myzip:
                myzip.write(str(model_saved), "subtask1.csv")

        elif out == 'practice':
            print("--> [PRACTICE] Start labeling with Rasa model...")
            test_task_prac_1 = source_data / "task1-train.csv"
            pd_data = pandas.read_csv(test_task_prac_1)
            pred = []
            for i, row in pd_data.iterrows():
                sentence = row['sentence']
                sent_parse = nlu_engine.parse(sentence)
                if sent_parse['intent']['intentName'] == "Counterfactual":
                    pred.append((row['sentenceID'], 1))
                elif sent_parse['intent']['intentName'] == "NoCounterfactual":
                    pred.append((row['sentenceID'], 0))
                else:
                    print(sent_parse['intent']['intentName'])

                #print(predict[0], row['gold_label'])
            results = pandas.DataFrame(data=pred,
                                       columns=["sentenceID", "pred_label"])
            model_saved = source_result / \
            "rasa_semeval_2020_evaluation_pratice_task1_{}.csv".format(save)
            results.to_csv(model_saved, index=False)
예제 #24
0
파일: train.py 프로젝트: luweishuang/rasa
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": "zh"})

    (trainer, trained, persisted_path) = await train(
        _config,
        path=model_path,
        data="/Users/psc/code/rasa/tests/nlu/classifiers/intents.yml",
        component_builder=component_builder,
    )

    assert trainer.pipeline
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    text = "南京明天天气如何"
    print("--------------------------------------------------")
    print(trained.parse(text))
    print("++++++++++++++++++++++++++++++++++++++++++++++++++")
    print("++++++++++++++++++++++++++++++++++++++++++++++++++")
    print(loaded.parse(text))
    print("--------------------------------------------------")
    assert loaded.parse(text) == trained.parse(text)


if __name__ == '__main__':
    # test_train_model_checkpointing_peter()
    loop = asyncio.get_event_loop()
    res = loop.run_until_complete(
        train_persist_load_with_composite_entities(ComponentBuilder(),
                                                   "models"))
    loop.close()
예제 #25
0
from flask import (Flask, request)
from rasa.nlu.components import ComponentBuilder
from rasa.nlu.model import Interpreter
import json

component_builder = ComponentBuilder()
model_path1 = "models/"


app = Flask(__name__)
@app.route('/message', methods=['GET', 'POST'])
def rasa_result():
    comment = request.get_data()
    json_data = json.loads(comment.decode())
    text = json_data["q"]
    model_path = model_path1 + json_data["model"]
    loaded = Interpreter.load(model_path, component_builder)
    print("++++++++++++++++++++++++++++++++++++++++++++++++++")
    return loaded.parse(text)


if __name__ == '__main__':
    app.run(host="0.0.0.0", port=8090, debug=True)
예제 #26
0
파일: utils.py 프로젝트: glhr/rasa_project
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import logging
import rasa.utils.io as io_utils
from rasa.nlu.config import RasaNLUModelConfig
from rasa.nlu.components import ComponentBuilder

spacy_nlp_config = {"name": "SpacyNLP"}
blank_config = RasaNLUModelConfig({"language": "en", "pipeline": []})
spacy_nlp = ComponentBuilder().create_component(spacy_nlp_config,
                                                blank_config).nlp

mitie_nlp_config = {"name": "MitieNLP"}
mitie_feature_extractor = ComponentBuilder().create_component(
    mitie_nlp_config, blank_config).extractor


def logging_setup():
    logger = logging.getLogger(__name__)
    logging.getLogger("tensorflow").setLevel(logging.ERROR)
    logging.getLogger("absl").setLevel(logging.ERROR)
    logging.getLogger("transformers").setLevel(logging.ERROR)
    logging.getLogger("rasa").setLevel(logging.ERROR)
    io_utils.configure_colored_logging(logging.INFO)
    return logger