def test_environment_variable_in_list(): os.environ['variable'] = 'test' content = "model: \n - value\n - ${variable}" result = utils.read_yaml(content) assert result['model'][1] == 'test'
def test_environment_variable_dict_with_prefix_and_with_postfix(): os.environ['variable'] = 'test' content = "model: \n test: dir/${variable}/dir" result = utils.read_yaml(content) assert result['model']['test'] == 'dir/test/dir'
def test_read_emojis_from_json(): import json from rasa_nlu.utils import read_yaml d = {"text": "hey 😁💯 👩🏿💻👨🏿💻🧜♂️"} json_string = json.dumps(d, indent=2) s = read_yaml(json_string) assert s.get('text') == "hey 😁💯 👩🏿💻👨🏿💻🧜♂️"
def test_read_emojis_from_json(): import json from rasa_nlu.utils import read_yaml d = {"text": "hey 😁💯 👩🏿💻👨🏿💻🧜♂️(?u)\\b\\w+\\b} f\u00fcr"} json_string = json.dumps(d, indent=2) s = read_yaml(json_string) expected = "hey 😁💯 👩🏿💻👨🏿💻🧜♂️(?u)\\b\\w+\\b} für" assert s.get('text') == expected
def test_emojis_in_yaml(): test_data = """ data: - one 😁💯 👩🏿💻👨🏿💻 - two £ """ actual = utils.read_yaml(test_data) assert actual["data"][0] == "one 😁💯 👩🏿💻👨🏿💻" assert actual["data"][1] == "two £"
def test_emojis_in_yaml(): test_data = """ data: - one 😁💯 👩🏿💻👨🏿💻 - two £ (?u)\\b\\w+\\b f\u00fcr """ actual = utils.read_yaml(test_data) assert actual["data"][0] == "one 😁💯 👩🏿💻👨🏿💻" assert actual["data"][1] == "two £ (?u)\\b\\w+\\b für"
def test_bool_str(): test_data = """ one: "yes" two: "true" three: "True" """ actual = utils.read_yaml(test_data) assert actual["one"] == "yes" assert actual["two"] == "true" assert actual["three"] == "True"
def test_emojis_in_tmp_file(): test_data = """ data: - one 😁💯 👩🏿💻👨🏿💻 - two £ (?u)\\b\\w+\\b f\u00fcr """ test_file = utils.create_temporary_file(test_data) with io.open(test_file, mode='r', encoding="utf-8") as f: content = f.read() actual = utils.read_yaml(content) assert actual["data"][0] == "one 😁💯 👩🏿💻👨🏿💻" assert actual["data"][1] == "two £ (?u)\\b\\w+\\b für"
def test_emojis_in_tmp_file(): test_data = """ data: - one 😁 - two £ """ test_file = utils.create_temporary_file(test_data) with io.open(test_file, mode='r', encoding="utf-8") as f: content = f.read() actual = utils.read_yaml(content) assert actual["data"][0] == "one 😁" assert actual["data"][1] == "two £"
def run_trial(space): """The objective function is pickled and transferred to the workers. Hence, this function has to contain all the imports we need. """ data_dir = os.environ.get("DATA_DIRECTORY", "./data") model_dir = os.environ.get("MODEL_DIRECTORY", "./models") target_metric = os.environ.get("TARGET_METRIC", "f1_score") if target_metric not in AVAILABLE_METRICS: logger.error("The metric '{}' is not in the available metrics. " "Please use one of the available metrics: {}." "".format(target_metric, AVAILABLE_METRICS)) return {"loss": 1, "status": STATUS_FAIL} logger.debug("Search space: {}".format(space)) # The epoch has to be an int since `tqdm` otherwise will cause an exception. if "epochs" in space: space["epochs"] = int(space["epochs"]) with open(os.path.join(data_dir, "template_config.yml")) as f: config_yml = f.read().format(**space) config = read_yaml(config_yml) config = RasaNLUModelConfig(config) trainer = Trainer(config) training_data = load_data(os.path.join(data_dir, "train.md")) test_data_path = os.path.join(data_dir, "validation.md") # wrap in train and eval in try/except in case # nlu_hyperopt proposes invalid combination of params try: model = trainer.train(training_data) model_path = trainer.persist(model_dir) if target_metric is None or target_metric == "threshold_loss": loss = _get_threshold_loss(model, test_data_path) else: loss = _get_nlu_evaluation_loss(model_path, target_metric, test_data_path) return {"loss": loss, "status": STATUS_OK} except Exception as e: logger.error(e) return {"loss": 1, "status": STATUS_FAIL}
def train(self, request): # if not set will use the default project name, e.g. "default" project = parameter_or_default(request, "project", default=None) # if set will not generate a model name but use the passed one model_name = parameter_or_default(request, "model", default=None) request_content = request.content.read().decode('utf-8', 'strict') if is_yaml_request(request): # assumes the user submitted a model configuration with a data # parameter attached to it model_config = utils.read_yaml(request_content) data = model_config.get("data") else: # assumes the caller just provided training data without config # this will use the default model config the server # was started with model_config = self.default_model_config data = request_content data_file = dump_to_data_file(data) request.setHeader('Content-Type', 'application/json') try: request.setResponseCode(200) response = yield self.data_router.start_train_process( data_file, project, RasaNLUModelConfig(model_config), model_name) returnValue( json_to_string( {'info': 'new model trained: {}' ''.format(response)})) except AlreadyTrainingError as e: request.setResponseCode(403) returnValue(json_to_string({"error": "{}".format(e)})) except InvalidProjectError as e: request.setResponseCode(404) returnValue(json_to_string({"error": "{}".format(e)})) except TrainingException as e: request.setResponseCode(500) returnValue(json_to_string({"error": "{}".format(e)}))
def extract_data_and_config(self, request): request_content = request.content.read().decode('utf-8', 'strict') content_type = self.get_request_content_type(request) if 'yml' in content_type: # assumes the user submitted a model configuration with a data # parameter attached to it model_config = utils.read_yaml(request_content) data = model_config.get("data") elif 'json' in content_type: model_config, data = self.extract_json(request_content) else: raise Exception("Content-Type must be 'application/x-yml' " "or 'application/json'") return model_config, data
def train(self, request): project = parameter_or_default(request, "project", default=None) request_content = request.content.read().decode('utf-8', 'strict') if is_yaml_request(request): # assumes the user submitted a model configuration with a data # parameter attached to it model_config = utils.read_yaml(request_content) data = model_config.get("data") else: # assumes the caller just provided training data without config # this will use the default model config the server # was started with model_config = self.default_model_config data = request_content data_file = dump_to_data_file(data) request.setHeader('Content-Type', 'application/json') try: request.setResponseCode(200) response = yield self.data_router.start_train_process( data_file, project, RasaNLUModelConfig(model_config)) returnValue(json_to_string({'info': 'new model trained: {}' ''.format(response)})) except AlreadyTrainingError as e: request.setResponseCode(403) returnValue(json_to_string({"error": "{}".format(e)})) except InvalidProjectError as e: request.setResponseCode(404) returnValue(json_to_string({"error": "{}".format(e)})) except TrainingException as e: request.setResponseCode(500) returnValue(json_to_string({"error": "{}".format(e)}))
def test_environment_variable_not_existing(): content = "model: \n test: ${variable}" with pytest.raises(KeyError): utils.read_yaml(content)