def test_fit_simple( self, resources, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, SIMPLE, REGRESSION, PYTHON, is_training=True, nested=True, ) input_dataset = resources.datasets(SKLEARN, REGRESSION) output = tmp_path / "output" output.mkdir() cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format( ArgumentsOptions.MAIN_COMMAND, REGRESSION, custom_model_dir, resources.targets(REGRESSION), input_dataset, ) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_duplicate_target_name(self, resources, tmp_path): custom_model_dir = _create_custom_model_dir( resources, tmp_path, SKLEARN_REGRESSION, SPARSE, language=PYTHON, is_training=True, ) input_dataset = resources.datasets(SKLEARN_REGRESSION, TARGET_NAME_DUPLICATED_X) target_dataset = resources.datasets(SKLEARN_REGRESSION, TARGET_NAME_DUPLICATED_Y) output = tmp_path / "output" output.mkdir() cmd = "{} fit --code-dir {} --input {} --target-type {} --verbose ".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, REGRESSION) cmd += " --target-csv " + target_dataset _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_sparse_transform_fit( self, framework, resources, tmp_path, ): input_dataset = resources.datasets(None, SPARSE) target_dataset = resources.datasets(None, SPARSE_TARGET) custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, REGRESSION, language=framework, ) columns = resources.datasets(framework, SPARSE_COLUMNS) cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose --target-csv {} --sparse-column-file {}".format( ArgumentsOptions.MAIN_COMMAND, TRANSFORM, custom_model_dir, input_dataset, target_dataset, columns, ) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_fit_schema_validation(self, resources, tmp_path): custom_model_dir = _create_custom_model_dir( resources, tmp_path, SKLEARN_BINARY_SCHEMA_VALIDATION, BINARY, PYTHON, is_training=True, include_metadata=True, ) input_dataset = resources.datasets(SKLEARN, BINARY) output = tmp_path / "output" output.mkdir() cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format( ArgumentsOptions.MAIN_COMMAND, BINARY, custom_model_dir, resources.targets(BINARY), input_dataset, ) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_transform_fit( self, resources, framework, problem, weights, tmp_path, ): language = PYTHON custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language=framework, ) input_dataset = resources.datasets(framework, problem) weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd( weights, input_dataset, r_fit=language == R_FIT ) target_type = TRANSFORM cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format( ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir, input_dataset ) if problem != ANOMALY: cmd += " --target {}".format(resources.targets(problem)) if problem in [BINARY, MULTICLASS]: cmd = _cmd_add_class_labels( cmd, resources.class_labels(framework, problem), target_type=target_type ) cmd += weights_cmd _exec_shell_cmd( cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd) )
def test_fit_sparse(self, resources, tmp_path, framework): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, SPARSE, language=R_FIT if framework == RDS else PYTHON, is_training=True, ) input_dataset = resources.datasets(framework, SPARSE) target_dataset = resources.datasets(framework, SPARSE_TARGET) columns = resources.datasets(framework, SPARSE_COLUMNS) output = tmp_path / "output" output.mkdir() cmd = "{} fit --code-dir {} --input {} --target-type {} --verbose --sparse-column-file {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, REGRESSION, columns ) cmd += " --target-csv " + target_dataset _exec_shell_cmd( cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd) )
def test_drum_push_training( self, resources, framework, problem, language, tmp_path, get_target, sklearn_drop_in_env, ): custom_model_dir = _create_custom_model_dir(resources, tmp_path, framework, problem, language, is_training=True) env_id, _ = sklearn_drop_in_env yaml_string = get_push_yaml(env_id, resources.datasets(framework, problem), problem, get_target(problem)) with open(os.path.join(custom_model_dir, "model-metadata.yaml"), "w") as outfile: yaml.dump(yaml.load(yaml_string), outfile) cmd = "{} push --code-dir {} --verbose".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, ) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_custom_model_with_all_hooks( self, resources, framework, language, hooks_list, target_type, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, None, language, ) input_dataset = resources.datasets(framework, REGRESSION) output = tmp_path / "output" cmd = "{} score --code-dir {} --input {} --output {} --target-type {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, output, target_type) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd)) if hooks_list == CustomHooks.ALL_PREDICT_STRUCTURED: preds = pd.read_csv(output) assert all(val for val in ( preds["Predictions"] == len(hooks_list)).values), preds elif hooks_list == CustomHooks.ALL_PREDICT_UNSTRUCTURED: with open(output) as f: all_data = f.read() assert str(len(hooks_list)) in all_data
def _run_server_thread(cmd, process_obj_holder, verbose=True): _exec_shell_cmd( cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, process_obj_holder=process_obj_holder, verbose=verbose, )
def test_fit_sh( self, resources, framework, problem, weights, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, PYTHON, is_training=True, ) env = os.environ fit_sh = os.path.join( TESTS_ROOT_PATH, "..", "public_dropin_environments/{}_{}/fit.sh".format( PYTHON, framework if framework not in [SKLEARN_ANOMALY, SKLEARN_BINARY, SKLEARN_MULTICLASS, SKLEARN_SPARSE] else SKLEARN, ), ) input_dir = tmp_path / "input_dir" self._create_fit_input_data_dir( resources.targets, resources.datasets, input_dir, problem, weights, is_sparse=framework == SKLEARN_SPARSE, ) output = tmp_path / "output" output.mkdir() unset_drum_supported_env_vars() env["CODEPATH"] = str(custom_model_dir) env["INPUT_DIRECTORY"] = str(input_dir) env["ARTIFACT_DIRECTORY"] = str(output) env["TARGET_TYPE"] = problem if problem != BINARY_TEXT else BINARY if framework == SKLEARN_SPARSE: env["TRAINING_DATA_EXTENSION"] = ".mtx" else: env["TRAINING_DATA_EXTENSION"] = ".csv" if problem in [BINARY, BINARY_TEXT]: labels = resources.class_labels(framework, problem) env["NEGATIVE_CLASS_LABEL"] = labels[0] env["POSITIVE_CLASS_LABEL"] = labels[1] elif problem == MULTICLASS: labels = resources.class_labels(framework, problem) with open(os.path.join(tmp_path, "class_labels.txt"), mode="w") as f: f.write("\n".join(labels)) env["CLASS_LABELS_FILE"] = f.name _exec_shell_cmd(fit_sh, "Failed cmd {}".format(fit_sh), env=env) # clear env vars as it may affect next test cases unset_drum_supported_env_vars()
def test_fit_hyperparameters( self, resources, framework, problem, docker, parameters, weights, tmp_path, ): if framework == RDS_HYPERPARAMETERS: language = R_FIT else: language = PYTHON custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, is_training=True, ) input_dataset = resources.datasets(framework, problem) parameter_file = resources.datasets(framework, parameters) weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd( weights, input_dataset, r_fit=language == R_FIT) target_type = resources.target_types( problem) if "transform" not in framework else TRANSFORM cmd = "{} fit --target-type {} --code-dir {} --input {} --parameter-file {} --verbose ".format( ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir, input_dataset, parameter_file, ) if problem != ANOMALY: cmd += " --target {}".format(resources.targets(problem)) if problem in [BINARY, MULTICLASS]: cmd = _cmd_add_class_labels(cmd, resources.class_labels( framework, problem), target_type=target_type) if docker: cmd += " --docker {} ".format(docker) cmd += weights_cmd _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_fit_for_use_output_and_nested( self, resources, framework, problem, docker, weights, use_output, tmp_path, nested, ): if docker and framework != SKLEARN: return if framework == RDS: language = R_FIT else: language = PYTHON custom_model_dir = _create_custom_model_dir(resources, tmp_path, framework, problem, language, is_training=True, nested=nested) input_dataset = resources.datasets(framework, problem) weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd( weights, input_dataset, r_fit=language == R_FIT) output = tmp_path / "output" output.mkdir() cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format( ArgumentsOptions.MAIN_COMMAND, problem, custom_model_dir, input_dataset) if problem != ANOMALY: cmd += " --target {}".format(resources.targets(problem)) if use_output: cmd += " --output {}".format(output) if problem == BINARY: cmd = _cmd_add_class_labels(cmd, resources.class_labels( framework, problem), target_type=problem) if docker: cmd += " --docker {} ".format(docker) cmd += weights_cmd _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_unstructured_models_batch( self, resources, framework, problem, language, docker, mimetype, ret_mode, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) input_dataset = resources.datasets(framework, problem) output = tmp_path / "output" content_type = "--content-type '{};'".format( mimetype) if mimetype is not None else "" cmd = "{} score --code-dir {} --input {} --output {} --target-type unstructured {} --query 'ret_mode={}'".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, output, content_type, ret_mode, ) if docker: cmd += " --docker {} --verbose ".format(docker) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd)) if ret_mode == "binary": with open(output, "rb") as f: out_data = f.read() assert 10 == int.from_bytes(out_data, byteorder="big") else: with open(output) as f: out_data = f.read() assert "10" in out_data
def test_fit_schema_failure(self, resources, tmp_path): custom_model_dir = _create_custom_model_dir( resources, tmp_path, SKLEARN_BINARY_SCHEMA_VALIDATION, BINARY, PYTHON, is_training=True, include_metadata=True, ) input_dataset = resources.datasets(SKLEARN, BINARY_TEXT) output = tmp_path / "output" output.mkdir() cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format( ArgumentsOptions.MAIN_COMMAND, BINARY, custom_model_dir, resources.targets(BINARY_TEXT), input_dataset, ) with pytest.raises(AssertionError): _, _, stderr = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd)) assert "DrumSchemaValidationException" in stderr
def test_drum_monitoring_with_mlops_installed(self, resources, framework, problem, language, docker, tmp_path): cmd, input_file, output_file, mlops_spool_dir = TestMLOpsMonitoring._drum_with_monitoring( resources, framework, problem, language, docker, tmp_path) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd)) in_data = pd.read_csv(input_file) out_data = pd.read_csv(output_file) assert in_data.shape[0] == out_data.shape[0] print("Spool dir {}".format(mlops_spool_dir)) assert os.path.isdir(mlops_spool_dir) assert os.path.isfile(os.path.join(mlops_spool_dir, "fs_spool.1"))
def test_drum_help(self, cmd): _, stdo, _ = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) assert "usage: drum" in str(stdo)
def test_custom_models_with_drum( self, resources, framework, problem, language, docker, tmp_path, use_labels_file, temp_file, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) input_dataset = resources.datasets(framework, problem) output = tmp_path / "output" cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, output, resources.target_types(problem), ) if resources.target_types(problem) in [BINARY, MULTICLASS]: cmd = _cmd_add_class_labels( cmd, resources.class_labels(framework, problem), target_type=resources.target_types(problem), multiclass_label_file=temp_file if use_labels_file else None, ) if docker: cmd += " --docker {} --verbose ".format(docker) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd)) in_data = pd.read_csv(input_dataset) out_data = pd.read_csv(output) assert in_data.shape[0] == out_data.shape[0]
def test_drum_bad_subparser(self): cmd = ("{} some_command".format(ArgumentsOptions.MAIN_COMMAND), ) _, _, stde = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) assert "argument subparser_name: invalid choice: 'some_command'" in str( stde)
def test_docker_image_with_wrong_dep_install( self, resources, framework, problem, code_dir, env_dir, tmp_path, ): custom_model_dir = os.path.join(MODEL_TEMPLATES_PATH, code_dir) tmp_dir = tmp_path / "tmp_code_dir" custom_model_dir = shutil.copytree(custom_model_dir, tmp_dir) with open(os.path.join(custom_model_dir, "requirements.txt"), mode="w") as f: f.write("\nnon_existing_dep") docker_env = os.path.join(PUBLIC_DROPIN_ENVS_PATH, env_dir) input_dataset = resources.datasets(framework, problem) output = tmp_path / "output" cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, output, resources.target_types(problem), ) cmd = _cmd_add_class_labels( cmd, resources.class_labels(framework, problem), target_type=resources.target_types(problem), multiclass_label_file=None, ) cmd += " --docker {} --verbose ".format(docker_env) _, _, stde = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) assert re.search( r"ERROR drum: Failed to build a docker image", stde, ) assert re.search( r"Could not find a version that satisfies the requirement non_existing_dep", stde, )
def test_template_creation(self, language, language_suffix, tmp_path): print("Running template creation tests: {}".format(language)) directory = tmp_path / "template_test_{}".format(uuid4()) cmd = "{drum_prog} new model --language {language} --code-dir {directory}".format( drum_prog=ArgumentsOptions.MAIN_COMMAND, language=language, directory=directory) _exec_shell_cmd( cmd, "Failed creating a template for custom model, cmd={}".format(cmd)) assert os.path.isdir( directory), "Directory {} does not exists (or not a dir)".format( directory) assert os.path.isfile(os.path.join(directory, "README.md")) custom_file = os.path.join(directory, CUSTOM_FILE_NAME + language_suffix) assert os.path.isfile(custom_file)
def test_fit_variety(framework, variety_resources, resources, variety_data_names, tmp_path): # get data info from fixtures df = variety_data_names df_path = variety_resources.dataset(df) problem = variety_resources.problem(df) target = variety_resources.target(df) if problem == BINARY: class_labels = variety_resources.class_labels(df) if framework == RDS: # there's one annoying dataset where R needs 0 and 1 and python wants 1.0 and 0.0 class_labels = [ int(x) if type(x) is float else x for x in class_labels ] # figure out language if framework == RDS: language = R_FIT else: language = PYTHON custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, is_training=True, nested=False, ) output = tmp_path / "output" output.mkdir() cmd = "{} fit --code-dir {} --input {} --verbose ".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, df_path) if problem == ANOMALY: cmd += " --unsupervised" else: cmd += " --target {}".format(target) if problem == BINARY: cmd = _cmd_add_class_labels(cmd, class_labels, target_type=problem) p, _, err = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) if p.returncode != 0: raise AssertionError(err)
def test_custom_models_perf_test( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) input_dataset = resources.datasets(framework, problem) cmd = "{} perf-test -i 200 -s 1000 --code-dir {} --input {} --target-type {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, resources.target_types(problem), ) if resources.target_types(problem) in [BINARY, MULTICLASS]: cmd = _cmd_add_class_labels( cmd, resources.class_labels(framework, problem), target_type=resources.target_types(problem), ) if docker: cmd += " --docker {}".format(docker) _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd))
def test_custom_models_perf_test( self, resources, framework, problem, language, docker, timeout, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) input_dataset = resources.datasets(framework, problem) cmd = "{} perf-test -i 200 -s 1000 --code-dir {} --input {} --target-type {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, resources.target_types(problem), ) if timeout is not None: cmd += " --timeout {}".format(timeout) if resources.target_types(problem) in [BINARY, MULTICLASS]: cmd = _cmd_add_class_labels( cmd, resources.class_labels(framework, problem), target_type=resources.target_types(problem), ) if docker: cmd += " --docker {}".format(docker) _, stdo, _ = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd)) if timeout is not None: expected_str = "timed out ({}s)".format(timeout) assert expected_str in stdo assert "NA" in stdo else: assert "NA" not in stdo
def test_drum_monitoring_fails_in_unstructured_mode( self, resources, framework, problem, language, docker, tmp_path): cmd, input_file, output_file, mlops_spool_dir = TestMLOpsMonitoring._drum_with_monitoring( resources, framework, problem, language, docker, tmp_path) cmd += " --target-type unstructured" _, stdo, _ = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) assert str(stdo).find( "MLOps monitoring can not be used in unstructured mode") != -1
def test_validation_check_with_bad_column_names( self, resources, framework, problem, language, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) column_names = [ "column", "col/unm", "col\\unm", 'col"umn', "col umn", "col:umn", 'col""umn', ] d = {col: [1.0] for col in column_names} df = pd.DataFrame(data=d) with NamedTemporaryFile(mode="w") as temp_f: df.to_csv(temp_f.name) input_dataset = temp_f.name cmd = "{} validation --code-dir {} --input {} --target-type {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, resources.target_types(problem), ) _, stdo, _ = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) assert re.search(r"Null value imputation\s+PASSED", stdo)
def test_set_language( self, resources, framework, problem, language, set_language, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) input_dataset = resources.datasets(framework, problem) cmd = "{} score --code-dir {} --input {} --target-type {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, resources.target_types(problem), ) if set_language: cmd += " --language {}".format(set_language) if problem == BINARY: cmd += " --positive-class-label yes --negative-class-label no" p, stdo, stde = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) if not set_language: stdo_stde = str(stdo) + str(stde) cases_4_5_6_7 = (str(stdo_stde).find( "Can not detect language by artifacts and/or custom.py/R files" ) != -1) assert cases_4_5_6_7 if framework == CODEGEN_AND_SKLEARN and set_language == "r": stdo_stde = str(stdo) + str(stde) case = (str(stdo_stde).find( "Could not find a serialized model artifact with .rds extension, supported by default R predictor. " "If your artifact is not supported by default predictor, implement custom.load_model hook." ) != -1) assert case
def test_prediction_consistency(self, resources, tmp_path, framework, problem): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, SPARSE, language=PYTHON, is_training=True, ) input_dataset = resources.datasets(framework, problem) if problem in [BINARY_TEXT, BINARY_BOOL]: target_type = BINARY else: target_type = problem cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format( ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir, input_dataset) cmd += " --target {}".format(resources.targets(problem)) if target_type in [BINARY, MULTICLASS]: cmd = _cmd_add_class_labels( cmd, resources.class_labels(framework, problem), target_type) _, stdout, stderr = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=True, ) # we should throw a warning, not an error assert "Your predictions were different when we tried to predict twice." in stderr # but don't error out assert ( "Your model can be fit to your data, and predictions can be made on the fit model!" in stdout) # clean up sample_dir = stderr.split(":")[-1] if sample_dir.endswith("\n"): sample_dir = sample_dir[:-1] os.remove(sample_dir.strip())
def test_custom_models_validation_test( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) input_dataset = resources.datasets(framework, problem) cmd = "{} validation --code-dir {} --input {} --target-type {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, resources.target_types(problem), ) if problem == BINARY: cmd = _cmd_add_class_labels( cmd, resources.class_labels(framework, problem), target_type=resources.target_types(problem), ) if docker: cmd += " --docker {}".format(docker) _, stdo, _ = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) if language == NO_CUSTOM: assert re.search(r"Null value imputation\s+FAILED", stdo) else: assert re.search(r"Null value imputation\s+PASSED", stdo)
def test_bin_models_with_wrong_labels( self, resources, framework, problem, language, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) input_dataset = resources.datasets(framework, problem) cmd = "{} score --code-dir {} --input {} --target-type {}".format( ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, resources.target_types(problem), ) if problem == BINARY: cmd = cmd + " --positive-class-label yes --negative-class-label no" p, stdo, stde = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) stdo_stde = str(stdo) + str(stde) if framework == SKLEARN: assert (str(stdo_stde).find( "Wrong class labels ['no', 'yes']. Use class labels detected by sklearn model" ) != -1) elif framework == RDS: assert (str(stdo_stde).find( "Wrong class labels. Use class labels according to your dataset" ) != -1)
def test_drum_monitoring_no_mlops_installed(self, resources, framework, problem, language, docker, tmp_path): """ We expect the run of drum to fail since the mlops package is assumed to not be installed Returns ------- """ cmd, _, _, _ = TestMLOpsMonitoring._drum_with_monitoring( resources, framework, problem, language, docker, tmp_path) p, _, _ = _exec_shell_cmd( cmd, "Failed in {} command line! {}".format( ArgumentsOptions.MAIN_COMMAND, cmd), assert_if_fail=False, ) assert ( p.returncode != 0 ), "drum should fail when datarobot-mlops is not installed and monitoring is requested"