def test_override_defaults_supervised_embeddings_pipeline(): builder = ComponentBuilder() _config = RasaNLUModelConfig({ "language": "en", "pipeline": [ { "name": "SpacyNLP" }, { "name": "SpacyTokenizer" }, { "name": "SpacyFeaturizer", "pooling": "max" }, { "name": "DIETClassifier", "epochs": 10, "hidden_layers_sizes": { "text": [256, 128] }, }, ], }) idx_featurizer = _config.component_names.index("SpacyFeaturizer") idx_classifier = _config.component_names.index("DIETClassifier") component1 = builder.create_component( _config.for_component(idx_featurizer), _config) assert component1.component_config["pooling"] == "max" component2 = builder.create_component( _config.for_component(idx_classifier), _config) assert component2.component_config["epochs"] == 10 assert (component2.defaults["hidden_layers_sizes"].keys() == component2.component_config["hidden_layers_sizes"].keys())
def _build_pipeline( cfg: RasaNLUModelConfig, component_builder: ComponentBuilder) -> List[Component]: """Transform the passed names of the pipeline components into classes""" pipeline = [] # Transform the passed names of the pipeline components into classes for i in range(len(cfg.pipeline)): component_cfg = cfg.for_component(i) component = component_builder.create_component(component_cfg, cfg) pipeline.append(component) return pipeline
def call(): from rasa.nlu.training_data import load_data from rasa.nlu import config from rasa.nlu.components import ComponentBuilder from rasa.nlu.model import Trainer builder = ComponentBuilder(use_cache=True) training_data = load_data('./data/weapon.md') trainer = Trainer(config.load("./config.yml"), builder) trainer.train(training_data) model_directory = trainer.persist('./models', fixed_model_name="model") print('done') return model_directory
def train_update(repository_version, by, repository_authorization, from_queue='celery'): # pragma: no cover update_request = backend().request_backend_start_training_nlu( repository_version, by, repository_authorization, from_queue ) examples_list = get_examples_request(repository_version, repository_authorization) with PokeLogging() as pl: try: examples = [] for example in examples_list: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), ) ) rasa_nlu_config = get_rasa_nlu_config(update_request) trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) training_data = TrainingData( training_examples=examples ) trainer.train(training_data) persistor = BothubPersistor( repository_version, repository_authorization, rasa_version ) trainer.persist( mkdtemp(), persistor=persistor, fixed_model_name=f"{update_request.get('repository_version')}_" f"{update_request.get('total_training_end')+1}_" f"{update_request.get('language')}", ) except Exception as e: logger.exception(e) backend().request_backend_trainfail_nlu( repository_version, repository_authorization ) raise e finally: backend().request_backend_traininglog_nlu( repository_version, pl.getvalue(), repository_authorization )
def _build_pipeline( self, cfg: RasaNLUModelConfig, component_builder: ComponentBuilder ) -> List[Component]: """Transform the passed names of the pipeline components into classes.""" pipeline = [] # Transform the passed names of the pipeline components into classes for index, pipeline_component in enumerate(cfg.pipeline): component_cfg = cfg.for_component(index) component = component_builder.create_component(component_cfg, cfg) components.validate_component_keys(component, pipeline_component) pipeline.append(component) if not self.skip_validation: components.validate_pipeline(pipeline) return pipeline
def test_coref_parse(text): message = Message(text) model_dir = 'models/coref/model_20190515-150912' ltp_component_meta = { "name": "ltp", "path": "/Users/zhangzhen/data/ltp_data_v3.4.0", "lexicon": "lexicon", "dimension": { "Nh": "PER", "Ni": "ORG", "Ns": "LOC" }, "class": "litemind.nlu.utils.ltp.LtpHelper" } ltp = ComponentBuilder().load_component(ltp_component_meta, model_dir, Metadata({}, None)) ltp.process(message) spans = [{ 'end': 3, 'gender': '男', 'label': 'PER', 'start': 0 }, { 'end': 9, 'gender': '男', 'label': 'PER', 'start': 7 }, { 'end': 11, 'label': 'Pronoun', 'start': 10 }] message.set("spans", spans, add_to_output=True) component_meta = { "name": "stg", "w2v_path": "/Users/zhangzhen/data/emb_ch/embedding.50.cformat", "class": "litemind.coref.stg.Strategy" } coref_stg = ComponentBuilder().load_component(component_meta, model_dir, Metadata({}, None)) coref_stg.process(message) pprint.pprint(message.data)
def train_update(update, examples_data, label_examples_data, algorithm, ner_spacy, similarity_type, language, connection): with PokeLogging() as pl: try: examples = [] label_examples = [] for example in examples_data: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), )) for label_example in label_examples_data: label_examples.append( Message.build( text=label_example.get("text"), entities=label_example.get("entities"), )) rasa_nlu_config = get_rasa_nlu_config_from_update( algorithm, ner_spacy, similarity_type, language) trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) training_data = BothubTrainingData( label_training_examples=label_examples, training_examples=examples) trainer.train(training_data) persistor = BothubPersistor(update, connection) trainer.persist( mkdtemp(), persistor=persistor, fixed_model_name=str(update), ) except Exception as e: logger.exception(e) raise e finally: pass
async def test_load_model_from_server(trained_nlu_model): fingerprint = "somehash" model_endpoint = EndpointConfig("http://server.com/models/nlu/tags/latest") # mock a response that returns a zipped model with io.open(NLU_MODEL_PATH, "rb") as f: responses.add( responses.GET, model_endpoint.url, headers={ "ETag": fingerprint, "filename": "my_model_xyz.tar.gz" }, body=f.read(), content_type="application/zip", stream=True, ) nlu_model = await load_from_server(ComponentBuilder(use_cache=False), model_server=model_endpoint) assert nlu_model.fingerprint == fingerprint
def __init__( self, project_dir=None, max_worker_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None, model_server=None, wait_time_between_pulls=None, ): self._worker_processes = max(max_worker_processes, 1) self._current_worker_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) # TODO: Should be moved to separate method loop = asyncio.get_event_loop() if loop.is_closed(): loop = asyncio.new_event_loop() self.project_store = loop.run_until_complete( self._create_project_store(self.project_dir) ) loop.close() # tensorflow sessions are not fork-safe, # and training processes have to be spawned instead of forked. See # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment # -258934405 multiprocessing.set_start_method("spawn", force=True) self.pool = ProcessPoolExecutor(max_workers=self._worker_processes)
def __init__( self, model_dir: Optional[Text] = None, max_worker_processes: int = 1, response_log: Optional[Text] = None, emulation_mode: Optional[Text] = None, remote_storage: Optional[Text] = None, component_builder: ComponentBuilder = None, model_server: EndpointConfig = None, wait_time_between_pulls: int = None, ): self._worker_processes = max(max_worker_processes, 1) self._current_worker_processes = 0 self.responses = self._create_query_logger(response_log) if model_dir is None: model_dir = tempfile.gettempdir() self.model_dir = os.path.abspath(model_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.nlu_model = NLUModel.fallback_model(self.component_builder) # tensorflow sessions are not fork-safe, # and training processes have to be spawned instead of forked. See # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment # -258934405 multiprocessing.set_start_method("spawn", force=True)
def train_eval_rasa_nlu_model(lang='en', cross=False, save=''): """ Train rasa data from all brat annotation object :param lang: abbreviate language name :param save: path where model will be save :return: None :rtype: None """ from rasa.nlu.training_data import load_data from rasa.nlu.model import Trainer from rasa.nlu.components import ComponentBuilder from rasa.nlu import config from rasa.nlu.test import run_evaluation import pickle config_file = source_config / "config_rasa_bert.yml" if cross: train_data_obj = BuildSnipsDataTask2(lang, cross=cross, vers=save) train_data = train_data_obj.build_rasa_data_task2() filename_results = source_result / "rasa_cross_semeval_2020_model_task2_{}".format( save) if Path(filename_results).exists(): training_data = load_data(str(train_data[0])) builder = ComponentBuilder(use_cache=True) with codecs.open( source_result / "builder_task2_{}.pkl".format(save), "wb") as ant: pickle.dump(builder, ant) trainer = Trainer(config.load(str(config_file)), builder) print("\n--> Training patent data with Rasa (Cross-validation)...") trainer.train(training_data, num_threads=8, verbose=True) print("--> Saving model trained with Rasa (Cross-validation)...") model_directory = trainer.persist(filename_results) print( "--> Evaluating training data with Rasa metrics (Cross-validation)..." ) import os from datetime import datetime filename_test = str(train_data[1]) dmtime = "test_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S")) out_test = source_result / "rasa_cross_evaluation_task2" / dmtime model_directory = sorted(filename_results.glob("nlu_*"), key=os.path.getmtime)[-1] print(out_test) run_evaluation(filename_test, str(model_directory), output_directory=str(out_test)) else: filename_results = source_result / "rasa_semeval_2020_results_task2_{}".format( save) train_data_obj = BuildSnipsDataTask2(lang, cross=cross, vers=save) train_file = train_data_obj.build_rasa_data_task2() print("\n--> Training will use the file: {}...".format( str(train_file))) training_data = load_data(str(train_file)) builder = ComponentBuilder(use_cache=True) with codecs.open(source_result / "builder_task2_{}.pkl".format(save), "wb") as ant: pickle.dump(builder, ant) trainer = Trainer(config.load(str(config_file)), builder) print("\n--> Training patent data with Rasa...") trainer.train(training_data, num_threads=12, n_jobs=8, verbose=True, fixed_model_name="nlu") print("--> Saving model trained with Rasa...") model_directory = trainer.persist(filename_results) """
def evaluate_crossval_update(repository_version_language, repository_authorization, aws_bucket_authentication, language): update_request = backend().request_backend_get_current_configuration( repository_authorization) examples_list = get_examples_request(repository_version_language, repository_authorization) with PokeLogging() as pl: try: examples = [] for example in examples_list: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), )) data = TrainingData(training_examples=examples) pipeline_builder = PipelineBuilder(update_request) pipeline_builder.print_pipeline() rasa_nlu_config = pipeline_builder.get_nlu_model() trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) result = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } intent_test_metrics: IntentMetrics = defaultdict(list) entity_test_metrics: EntityMetrics = defaultdict( lambda: defaultdict(list)) response_selection_test_metrics: ResponseSelectionMetrics = defaultdict( list) intent_results: List[IntentEvaluationResult] = [] entity_results: List[EntityEvaluationResult] = [] response_selection_test_results: List[ ResponseSelectionEvaluationResult] = ([]) entity_evaluation_possible = False extractors: Set[Text] = set() language_preprocessor = PreprocessingFactory(language).factory() for train, test in generate_folds(3, data): interpreter = trainer.train(train) test.training_examples = [ language_preprocessor.preprocess(x) for x in test.training_examples ] # calculate test accuracy combine_result( intent_test_metrics, entity_test_metrics, response_selection_test_metrics, interpreter, test, intent_results, entity_results, response_selection_test_results, ) if not extractors: extractors = get_entity_extractors(interpreter) entity_evaluation_possible = ( entity_evaluation_possible or _contains_entity_labels(entity_results)) if intent_results: result["intent_evaluation"] = evaluate_intents(intent_results) if entity_results: extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors) intent_evaluation = result.get("intent_evaluation") entity_evaluation = result.get("entity_evaluation") merged_logs = merge_intent_entity_log(intent_evaluation, entity_evaluation) log = get_formatted_log(merged_logs) charts = plot_and_save_charts(repository_version_language, intent_results, aws_bucket_authentication) evaluate_result = backend( ).request_backend_create_evaluate_results( { "repository_version": repository_version_language, "matrix_chart": charts.get("matrix_chart"), "confidence_chart": charts.get("confidence_chart"), "log": json.dumps(log), "intentprecision": intent_evaluation.get("precision"), "intentf1_score": intent_evaluation.get("f1_score"), "intentaccuracy": intent_evaluation.get("accuracy"), "entityprecision": entity_evaluation.get("precision"), "entityf1_score": entity_evaluation.get("f1_score"), "entityaccuracy": entity_evaluation.get("accuracy"), "cross_validation": True }, repository_authorization, ) intent_reports = intent_evaluation.get("report", {}) entity_reports = entity_evaluation.get("report", {}) for intent_key in intent_reports.keys(): if intent_key not in excluded_itens: intent = intent_reports.get(intent_key) backend().request_backend_create_evaluate_results_intent( { "evaluate_id": evaluate_result.get("evaluate_id"), "precision": intent.get("precision"), "recall": intent.get("recall"), "f1_score": intent.get("f1-score"), "support": intent.get("support"), "intent_key": intent_key, }, repository_authorization, ) # remove group entities when entities returned as "<entity>.<group_entity>" for entity_key in entity_reports.keys(): if '.' in entity_key: new_entity_key = entity_key.split('.')[0] entity_reports[new_entity_key] = entity_reports[entity_key] entity_reports.pop(entity_key, None) for entity_key in entity_reports.keys(): if entity_key not in excluded_itens: # pragma: no cover entity = entity_reports.get(entity_key) backend().request_backend_create_evaluate_results_score( { "evaluate_id": evaluate_result.get("evaluate_id"), "repository_version": repository_version_language, "precision": entity.get("precision"), "recall": entity.get("recall"), "f1_score": entity.get("f1-score"), "support": entity.get("support"), "entity_key": entity_key, }, repository_authorization, ) return { "id": evaluate_result.get("evaluate_id"), "version": evaluate_result.get("evaluate_version"), "cross_validation": True, } except Exception as e: logger.exception(e) raise e
def evaluate_crossval_update(repository_version, by, repository_authorization, from_queue='celery'): update_request = backend().request_backend_start_training_nlu( repository_version, by, repository_authorization, from_queue) examples_list = get_examples_request(repository_version, repository_authorization) with PokeLogging() as pl: try: examples = [] for example in examples_list: examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), )) data = TrainingData(training_examples=examples) rasa_nlu_config = get_rasa_nlu_config(update_request) trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) result = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } intent_train_metrics: IntentMetrics = defaultdict(list) intent_test_metrics: IntentMetrics = defaultdict(list) entity_train_metrics: EntityMetrics = defaultdict( lambda: defaultdict(list)) entity_test_metrics: EntityMetrics = defaultdict( lambda: defaultdict(list)) response_selection_train_metrics: ResponseSelectionMetrics = defaultdict( list) response_selection_test_metrics: ResponseSelectionMetrics = defaultdict( list) intent_results: List[IntentEvaluationResult] = [] entity_results: List[EntityEvaluationResult] = [] response_selection_test_results: List[ ResponseSelectionEvaluationResult] = ([]) entity_evaluation_possible = False extractors: Set[Text] = set() for train, test in generate_folds(3, data): interpreter = trainer.train(train) # calculate train accuracy combine_result( intent_train_metrics, entity_train_metrics, response_selection_train_metrics, interpreter, train, ) # calculate test accuracy combine_result( intent_test_metrics, entity_test_metrics, response_selection_test_metrics, interpreter, test, intent_results, entity_results, response_selection_test_results, ) if not extractors: extractors = get_entity_extractors(interpreter) entity_evaluation_possible = ( entity_evaluation_possible or _contains_entity_labels(entity_results)) if intent_results: result["intent_evaluation"] = evaluate_intents(intent_results) if entity_results: extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = evaluate_entities( entity_results, extractors) intent_evaluation = result.get("intent_evaluation") entity_evaluation = result.get("entity_evaluation") merged_logs = merge_intent_entity_log(intent_evaluation, entity_evaluation) log = get_formatted_log(merged_logs) charts = plot_and_save_charts(repository_version, intent_results) evaluate_result = backend( ).request_backend_create_evaluate_results( { "repository_version": repository_version, "matrix_chart": charts.get("matrix_chart"), "confidence_chart": charts.get("confidence_chart"), "log": json.dumps(log), "intentprecision": intent_evaluation.get("precision"), "intentf1_score": intent_evaluation.get("f1_score"), "intentaccuracy": intent_evaluation.get("accuracy"), "entityprecision": entity_evaluation.get("precision"), "entityf1_score": entity_evaluation.get("f1_score"), "entityaccuracy": entity_evaluation.get("accuracy"), }, repository_authorization, ) intent_reports = intent_evaluation.get("report", {}) entity_reports = entity_evaluation.get("report", {}) for intent_key in intent_reports.keys(): if intent_key and intent_key not in excluded_itens: intent = intent_reports.get(intent_key) backend().request_backend_create_evaluate_results_intent( { "evaluate_id": evaluate_result.get("evaluate_id"), "precision": intent.get("precision"), "recall": intent.get("recall"), "f1_score": intent.get("f1-score"), "support": intent.get("support"), "intent_key": intent_key, }, repository_authorization, ) for entity_key in entity_reports.keys(): if entity_key and entity_key not in excluded_itens: # pragma: no cover entity = entity_reports.get(entity_key) backend().request_backend_create_evaluate_results_score( { "evaluate_id": evaluate_result.get("evaluate_id"), "repository_version": repository_version, "precision": entity.get("precision"), "recall": entity.get("recall"), "f1_score": entity.get("f1-score"), "support": entity.get("support"), "entity_key": entity_key, }, repository_authorization, ) return { "id": evaluate_result.get("evaluate_id"), "version": evaluate_result.get("evaluate_version"), "cross_validation": True } except Exception as e: logger.exception(e) backend().request_backend_trainfail_nlu(repository_version, repository_authorization) raise e finally: backend().request_backend_traininglog_nlu( repository_version, pl.getvalue(), repository_authorization)
def train_update(update, by, repository_authorization): update_request = backend().request_backend_start_training_nlu( update, by, repository_authorization) examples_list = get_examples_request(update, repository_authorization) examples_label_list = get_examples_label_request(update, repository_authorization) with PokeLogging() as pl: try: examples = [] label_examples = [] get_examples = backend( ).request_backend_get_entities_and_labels_nlu( update, update_request.get("language"), json.dumps({ "examples": examples_list, "label_examples_query": examples_label_list, "update_id": update, }), repository_authorization, ) for example in get_examples.get("examples"): examples.append( Message.build( text=example.get("text"), intent=example.get("intent"), entities=example.get("entities"), )) for label_example in get_examples.get("label_examples"): label_examples.append( Message.build( text=label_example.get("text"), entities=label_example.get("entities"), )) rasa_nlu_config = get_rasa_nlu_config_from_update(update_request) trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) training_data = BothubTrainingData( label_training_examples=label_examples, training_examples=examples) trainer.train(training_data) persistor = BothubPersistor(update, repository_authorization) trainer.persist( mkdtemp(), persistor=persistor, fixed_model_name=str(update_request.get("update_id")), ) except Exception as e: logger.exception(e) backend().request_backend_trainfail_nlu(update, repository_authorization) raise e finally: backend().request_backend_traininglog_nlu( update, pl.getvalue(), repository_authorization)
def label_data_with_rasa_nlu_model(lang='en', save="", out='practice'): """ Label counterfactual training data :param lang: abbreviate language name of model :param save: path name where model is saved :return: csv file :rtype: file model_20200501-025838 = 0.58 model_20200502-090721 = 0.48 model_20200502-135337 = """ from rasa.nlu.model import Interpreter from rasa.nlu.components import ComponentBuilder import pickle import json model = source_result / "rasa_semeval_2020_model_task1_{}".format(save) if Path(model).exists(): print("\n--> Loading Rasa model...") model = str(model / "nlu_20200509-063701") builder = ComponentBuilder(use_cache=True) nlu_engine = Interpreter.load(model, builder) if out == 'evaluate': print("--> [EVALUATION] Start labeling with Rasa model...") pd_data = pandas.read_csv(test_task_1) pred = [] for i, row in pd_data.iterrows(): sentence = row['sentence'] sent_id = row['sentenceID'] sent_parse = nlu_engine.parse(sentence) if sent_parse['intent']['name'] == "counterfactual": pred.append((sent_id, 1)) elif sent_parse['intent']['name'] == "no_counterfactual": pred.append((sent_id, 0)) else: print("ERROR__: ", sent_parse) print(sent_parse['intent']['name'], sent_parse['text']) results = pandas.DataFrame(data=pred, columns=["sentenceID", "pred_label"]) model_saved = source_result / \ "rasa_semeval_2020_evaluation_task1_final_{}.csv".format(save) results.to_csv(model_saved, index=False) from datetime import datetime from zipfile import ZipFile dtime = datetime.now().strftime("%Y%m%d-%H%M%S") results_name = "rasa_semeval_2020_evaluation_task1_{}_{}.zip".format( save, dtime) results.to_csv(model_saved, index=False) with ZipFile(source_result / results_name, 'w') as myzip: myzip.write(str(model_saved), "subtask1.csv") elif out == 'practice': print("--> [PRACTICE] Start labeling with Rasa model...") test_task_prac_1 = source_data / "task1-train.csv" pd_data = pandas.read_csv(test_task_prac_1) pred = [] for i, row in pd_data.iterrows(): sentence = row['sentence'] sent_parse = nlu_engine.parse(sentence) if sent_parse['intent']['intentName'] == "Counterfactual": pred.append((row['sentenceID'], 1)) elif sent_parse['intent']['intentName'] == "NoCounterfactual": pred.append((row['sentenceID'], 0)) else: print(sent_parse['intent']['intentName']) #print(predict[0], row['gold_label']) results = pandas.DataFrame(data=pred, columns=["sentenceID", "pred_label"]) model_saved = source_result / \ "rasa_semeval_2020_evaluation_pratice_task1_{}.csv".format(save) results.to_csv(model_saved, index=False)
from flask import (Flask, request) from rasa.nlu.components import ComponentBuilder from rasa.nlu.model import Interpreter import json component_builder = ComponentBuilder() model_path1 = "models/" app = Flask(__name__) @app.route('/message', methods=['GET', 'POST']) def rasa_result(): comment = request.get_data() json_data = json.loads(comment.decode()) text = json_data["q"] model_path = model_path1 + json_data["model"] loaded = Interpreter.load(model_path, component_builder) print("++++++++++++++++++++++++++++++++++++++++++++++++++") return loaded.parse(text) if __name__ == '__main__': app.run(host="0.0.0.0", port=8090, debug=True)
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import logging import rasa.utils.io as io_utils from rasa.nlu.config import RasaNLUModelConfig from rasa.nlu.components import ComponentBuilder spacy_nlp_config = {"name": "SpacyNLP"} blank_config = RasaNLUModelConfig({"language": "en", "pipeline": []}) spacy_nlp = ComponentBuilder().create_component(spacy_nlp_config, blank_config).nlp mitie_nlp_config = {"name": "MitieNLP"} mitie_feature_extractor = ComponentBuilder().create_component( mitie_nlp_config, blank_config).extractor def logging_setup(): logger = logging.getLogger(__name__) logging.getLogger("tensorflow").setLevel(logging.ERROR) logging.getLogger("absl").setLevel(logging.ERROR) logging.getLogger("transformers").setLevel(logging.ERROR) logging.getLogger("rasa").setLevel(logging.ERROR) io_utils.configure_colored_logging(logging.INFO) return logger
def component_builder(): return ComponentBuilder()
_config = RasaNLUModelConfig({"pipeline": pipeline, "language": "zh"}) (trainer, trained, persisted_path) = await train( _config, path=model_path, data="/Users/psc/code/rasa/tests/nlu/classifiers/intents.yml", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline text = "南京明天天气如何" print("--------------------------------------------------") print(trained.parse(text)) print("++++++++++++++++++++++++++++++++++++++++++++++++++") print("++++++++++++++++++++++++++++++++++++++++++++++++++") print(loaded.parse(text)) print("--------------------------------------------------") assert loaded.parse(text) == trained.parse(text) if __name__ == '__main__': # test_train_model_checkpointing_peter() loop = asyncio.get_event_loop() res = loop.run_until_complete( train_persist_load_with_composite_entities(ComponentBuilder(), "models")) loop.close()
def spacy_nlp(component_builder: ComponentBuilder, blank_config: RasaNLUModelConfig): spacy_nlp_config = {"name": "SpacyNLP", "model": "en_core_web_md"} return component_builder.create_component(spacy_nlp_config, blank_config).nlp
def mitie_feature_extractor(component_builder: ComponentBuilder, blank_config): mitie_nlp_config = {"name": "MitieNLP"} return component_builder.create_component(mitie_nlp_config, blank_config).extractor
def test_builder_load_unknown(component_builder: ComponentBuilder): with pytest.raises(Exception) as excinfo: component_meta = {"name": "my_made_up_componment"} component_builder.load_component(component_meta, "", Metadata({})) assert "Cannot find class" in str(excinfo.value)
(trainer, trained, persisted_path) = await train( _config, path=model_path, data="../../../data/test/demo-rasa-zh.json", component_builder=component_builder, ) assert trainer.pipeline assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline text = "感冒发烧了怎么办" print("--------------------------------------------------") print(trained.parse(text)) print("++++++++++++++++++++++++++++++++++++++++++++++++++") print("++++++++++++++++++++++++++++++++++++++++++++++++++") print(loaded.parse(text)) print("--------------------------------------------------") assert loaded.parse(text) == trained.parse(text) if __name__ == '__main__': # test_train_model_checkpointing_peter() classifier_params = {RANDOM_SEED: 1, EPOCHS: 1, BILOU_FLAG: False} loop = asyncio.get_event_loop() res = loop.run_until_complete( train_persist_load_with_composite_entities(classifier_params, ComponentBuilder(), "../models")) loop.close()