def test_fit_simple(
        self,
        resources,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SIMPLE,
            REGRESSION,
            PYTHON,
            is_training=True,
            nested=True,
        )

        input_dataset = resources.datasets(SKLEARN, REGRESSION)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            REGRESSION,
            custom_model_dir,
            resources.targets(REGRESSION),
            input_dataset,
        )
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_duplicate_target_name(self, resources, tmp_path):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SKLEARN_REGRESSION,
            SPARSE,
            language=PYTHON,
            is_training=True,
        )

        input_dataset = resources.datasets(SKLEARN_REGRESSION,
                                           TARGET_NAME_DUPLICATED_X)
        target_dataset = resources.datasets(SKLEARN_REGRESSION,
                                            TARGET_NAME_DUPLICATED_Y)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --code-dir {} --input {} --target-type {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset,
            REGRESSION)

        cmd += " --target-csv " + target_dataset
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_sparse_transform_fit(
        self,
        framework,
        resources,
        tmp_path,
    ):
        input_dataset = resources.datasets(None, SPARSE)
        target_dataset = resources.datasets(None, SPARSE_TARGET)

        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            REGRESSION,
            language=framework,
        )
        columns = resources.datasets(framework, SPARSE_COLUMNS)

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose --target-csv {} --sparse-column-file {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            TRANSFORM,
            custom_model_dir,
            input_dataset,
            target_dataset,
            columns,
        )

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_fit_schema_validation(self, resources, tmp_path):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SKLEARN_BINARY_SCHEMA_VALIDATION,
            BINARY,
            PYTHON,
            is_training=True,
            include_metadata=True,
        )

        input_dataset = resources.datasets(SKLEARN, BINARY)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            BINARY,
            custom_model_dir,
            resources.targets(BINARY),
            input_dataset,
        )
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Esempio n. 5
0
    def test_transform_fit(
        self, resources, framework, problem, weights, tmp_path,
    ):
        language = PYTHON
        custom_model_dir = _create_custom_model_dir(
            resources, tmp_path, framework, problem, language=framework,
        )

        input_dataset = resources.datasets(framework, problem)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT
        )

        target_type = TRANSFORM

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir, input_dataset
        )
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if problem in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd, resources.class_labels(framework, problem), target_type=target_type
            )

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd)
        )
Esempio n. 6
0
    def test_fit_sparse(self, resources, tmp_path, framework):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            SPARSE,
            language=R_FIT if framework == RDS else PYTHON,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, SPARSE)
        target_dataset = resources.datasets(framework, SPARSE_TARGET)
        columns = resources.datasets(framework, SPARSE_COLUMNS)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --code-dir {} --input {} --target-type {} --verbose --sparse-column-file {}".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, REGRESSION, columns
        )

        cmd += " --target-csv " + target_dataset
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd)
        )
    def test_drum_push_training(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
        get_target,
        sklearn_drop_in_env,
    ):
        custom_model_dir = _create_custom_model_dir(resources,
                                                    tmp_path,
                                                    framework,
                                                    problem,
                                                    language,
                                                    is_training=True)

        env_id, _ = sklearn_drop_in_env
        yaml_string = get_push_yaml(env_id,
                                    resources.datasets(framework, problem),
                                    problem, get_target(problem))
        with open(os.path.join(custom_model_dir, "model-metadata.yaml"),
                  "w") as outfile:
            yaml.dump(yaml.load(yaml_string), outfile)

        cmd = "{} push --code-dir {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
        )
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Esempio n. 8
0
    def test_custom_model_with_all_hooks(
        self,
        resources,
        framework,
        language,
        hooks_list,
        target_type,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            None,
            language,
        )

        input_dataset = resources.datasets(framework, REGRESSION)

        output = tmp_path / "output"

        cmd = "{} score --code-dir {} --input {} --output {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset,
            output, target_type)
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if hooks_list == CustomHooks.ALL_PREDICT_STRUCTURED:
            preds = pd.read_csv(output)
            assert all(val for val in (
                preds["Predictions"] == len(hooks_list)).values), preds
        elif hooks_list == CustomHooks.ALL_PREDICT_UNSTRUCTURED:
            with open(output) as f:
                all_data = f.read()
                assert str(len(hooks_list)) in all_data
def _run_server_thread(cmd, process_obj_holder, verbose=True):
    _exec_shell_cmd(
        cmd,
        "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd),
        assert_if_fail=False,
        process_obj_holder=process_obj_holder,
        verbose=verbose,
    )
Esempio n. 10
0
    def test_fit_sh(
        self, resources, framework, problem, weights, tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources, tmp_path, framework, problem, PYTHON, is_training=True,
        )

        env = os.environ
        fit_sh = os.path.join(
            TESTS_ROOT_PATH,
            "..",
            "public_dropin_environments/{}_{}/fit.sh".format(
                PYTHON,
                framework
                if framework
                not in [SKLEARN_ANOMALY, SKLEARN_BINARY, SKLEARN_MULTICLASS, SKLEARN_SPARSE]
                else SKLEARN,
            ),
        )

        input_dir = tmp_path / "input_dir"
        self._create_fit_input_data_dir(
            resources.targets,
            resources.datasets,
            input_dir,
            problem,
            weights,
            is_sparse=framework == SKLEARN_SPARSE,
        )

        output = tmp_path / "output"
        output.mkdir()

        unset_drum_supported_env_vars()

        env["CODEPATH"] = str(custom_model_dir)
        env["INPUT_DIRECTORY"] = str(input_dir)
        env["ARTIFACT_DIRECTORY"] = str(output)
        env["TARGET_TYPE"] = problem if problem != BINARY_TEXT else BINARY
        if framework == SKLEARN_SPARSE:
            env["TRAINING_DATA_EXTENSION"] = ".mtx"
        else:
            env["TRAINING_DATA_EXTENSION"] = ".csv"

        if problem in [BINARY, BINARY_TEXT]:
            labels = resources.class_labels(framework, problem)
            env["NEGATIVE_CLASS_LABEL"] = labels[0]
            env["POSITIVE_CLASS_LABEL"] = labels[1]
        elif problem == MULTICLASS:
            labels = resources.class_labels(framework, problem)
            with open(os.path.join(tmp_path, "class_labels.txt"), mode="w") as f:
                f.write("\n".join(labels))
                env["CLASS_LABELS_FILE"] = f.name

        _exec_shell_cmd(fit_sh, "Failed cmd {}".format(fit_sh), env=env)

        # clear env vars as it may affect next test cases
        unset_drum_supported_env_vars()
Esempio n. 11
0
    def test_fit_hyperparameters(
        self,
        resources,
        framework,
        problem,
        docker,
        parameters,
        weights,
        tmp_path,
    ):
        if framework == RDS_HYPERPARAMETERS:
            language = R_FIT
        else:
            language = PYTHON

        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, problem)
        parameter_file = resources.datasets(framework, parameters)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT)

        target_type = resources.target_types(
            problem) if "transform" not in framework else TRANSFORM

        cmd = "{} fit --target-type {} --code-dir {} --input {} --parameter-file {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND,
            target_type,
            custom_model_dir,
            input_dataset,
            parameter_file,
        )
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if problem in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(cmd,
                                        resources.class_labels(
                                            framework, problem),
                                        target_type=target_type)
        if docker:
            cmd += " --docker {} ".format(docker)

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Esempio n. 12
0
    def test_fit_for_use_output_and_nested(
        self,
        resources,
        framework,
        problem,
        docker,
        weights,
        use_output,
        tmp_path,
        nested,
    ):
        if docker and framework != SKLEARN:
            return
        if framework == RDS:
            language = R_FIT
        else:
            language = PYTHON

        custom_model_dir = _create_custom_model_dir(resources,
                                                    tmp_path,
                                                    framework,
                                                    problem,
                                                    language,
                                                    is_training=True,
                                                    nested=nested)

        input_dataset = resources.datasets(framework, problem)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, problem, custom_model_dir,
            input_dataset)
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if use_output:
            cmd += " --output {}".format(output)
        if problem == BINARY:
            cmd = _cmd_add_class_labels(cmd,
                                        resources.class_labels(
                                            framework, problem),
                                        target_type=problem)
        if docker:
            cmd += " --docker {} ".format(docker)

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Esempio n. 13
0
    def test_unstructured_models_batch(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        mimetype,
        ret_mode,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        content_type = "--content-type '{};'".format(
            mimetype) if mimetype is not None else ""
        cmd = "{} score --code-dir {} --input {} --output {} --target-type unstructured {} --query 'ret_mode={}'".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            content_type,
            ret_mode,
        )

        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if ret_mode == "binary":
            with open(output, "rb") as f:
                out_data = f.read()
                assert 10 == int.from_bytes(out_data, byteorder="big")
        else:
            with open(output) as f:
                out_data = f.read()
                assert "10" in out_data
Esempio n. 14
0
    def test_fit_schema_failure(self, resources, tmp_path):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SKLEARN_BINARY_SCHEMA_VALIDATION,
            BINARY,
            PYTHON,
            is_training=True,
            include_metadata=True,
        )

        input_dataset = resources.datasets(SKLEARN, BINARY_TEXT)
        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            BINARY,
            custom_model_dir,
            resources.targets(BINARY_TEXT),
            input_dataset,
        )
        with pytest.raises(AssertionError):
            _, _, stderr = _exec_shell_cmd(
                cmd, "Failed in {} command line! {}".format(
                    ArgumentsOptions.MAIN_COMMAND, cmd))
            assert "DrumSchemaValidationException" in stderr
Esempio n. 15
0
    def test_drum_monitoring_with_mlops_installed(self, resources, framework,
                                                  problem, language, docker,
                                                  tmp_path):
        cmd, input_file, output_file, mlops_spool_dir = TestMLOpsMonitoring._drum_with_monitoring(
            resources, framework, problem, language, docker, tmp_path)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        in_data = pd.read_csv(input_file)
        out_data = pd.read_csv(output_file)
        assert in_data.shape[0] == out_data.shape[0]

        print("Spool dir {}".format(mlops_spool_dir))
        assert os.path.isdir(mlops_spool_dir)
        assert os.path.isfile(os.path.join(mlops_spool_dir, "fs_spool.1"))
 def test_drum_help(self, cmd):
     _, stdo, _ = _exec_shell_cmd(
         cmd,
         "Failed in {} command line! {}".format(
             ArgumentsOptions.MAIN_COMMAND, cmd),
         assert_if_fail=False,
     )
     assert "usage: drum" in str(stdo)
    def test_custom_models_with_drum(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
        use_labels_file,
        temp_file,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
                multiclass_label_file=temp_file if use_labels_file else None,
            )
        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        in_data = pd.read_csv(input_dataset)
        out_data = pd.read_csv(output)
        assert in_data.shape[0] == out_data.shape[0]
 def test_drum_bad_subparser(self):
     cmd = ("{} some_command".format(ArgumentsOptions.MAIN_COMMAND), )
     _, _, stde = _exec_shell_cmd(
         cmd,
         "Failed in {} command line! {}".format(
             ArgumentsOptions.MAIN_COMMAND, cmd),
         assert_if_fail=False,
     )
     assert "argument subparser_name: invalid choice: 'some_command'" in str(
         stde)
    def test_docker_image_with_wrong_dep_install(
        self,
        resources,
        framework,
        problem,
        code_dir,
        env_dir,
        tmp_path,
    ):

        custom_model_dir = os.path.join(MODEL_TEMPLATES_PATH, code_dir)

        tmp_dir = tmp_path / "tmp_code_dir"
        custom_model_dir = shutil.copytree(custom_model_dir, tmp_dir)
        with open(os.path.join(custom_model_dir, "requirements.txt"),
                  mode="w") as f:
            f.write("\nnon_existing_dep")

        docker_env = os.path.join(PUBLIC_DROPIN_ENVS_PATH, env_dir)
        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        cmd = _cmd_add_class_labels(
            cmd,
            resources.class_labels(framework, problem),
            target_type=resources.target_types(problem),
            multiclass_label_file=None,
        )
        cmd += " --docker {} --verbose ".format(docker_env)

        _, _, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        assert re.search(
            r"ERROR drum:  Failed to build a docker image",
            stde,
        )

        assert re.search(
            r"Could not find a version that satisfies the requirement non_existing_dep",
            stde,
        )
Esempio n. 20
0
    def test_template_creation(self, language, language_suffix, tmp_path):
        print("Running template creation tests: {}".format(language))
        directory = tmp_path / "template_test_{}".format(uuid4())

        cmd = "{drum_prog} new model --language {language} --code-dir {directory}".format(
            drum_prog=ArgumentsOptions.MAIN_COMMAND,
            language=language,
            directory=directory)

        _exec_shell_cmd(
            cmd,
            "Failed creating a template for custom model, cmd={}".format(cmd))

        assert os.path.isdir(
            directory), "Directory {} does not exists (or not a dir)".format(
                directory)

        assert os.path.isfile(os.path.join(directory, "README.md"))
        custom_file = os.path.join(directory,
                                   CUSTOM_FILE_NAME + language_suffix)
        assert os.path.isfile(custom_file)
def test_fit_variety(framework, variety_resources, resources,
                     variety_data_names, tmp_path):

    # get data info from fixtures
    df = variety_data_names
    df_path = variety_resources.dataset(df)
    problem = variety_resources.problem(df)
    target = variety_resources.target(df)
    if problem == BINARY:
        class_labels = variety_resources.class_labels(df)
        if framework == RDS:
            # there's one annoying dataset where R needs 0 and 1 and python wants 1.0 and 0.0
            class_labels = [
                int(x) if type(x) is float else x for x in class_labels
            ]
    # figure out language
    if framework == RDS:
        language = R_FIT
    else:
        language = PYTHON

    custom_model_dir = _create_custom_model_dir(
        resources,
        tmp_path,
        framework,
        problem,
        language,
        is_training=True,
        nested=False,
    )

    output = tmp_path / "output"
    output.mkdir()

    cmd = "{} fit --code-dir {} --input {} --verbose ".format(
        ArgumentsOptions.MAIN_COMMAND, custom_model_dir, df_path)
    if problem == ANOMALY:
        cmd += " --unsupervised"
    else:
        cmd += " --target {}".format(target)

    if problem == BINARY:
        cmd = _cmd_add_class_labels(cmd, class_labels, target_type=problem)

    p, _, err = _exec_shell_cmd(
        cmd,
        "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND,
                                               cmd),
        assert_if_fail=False,
    )

    if p.returncode != 0:
        raise AssertionError(err)
Esempio n. 22
0
    def test_custom_models_perf_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} perf-test -i 200 -s 1000 --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )

        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )

        if docker:
            cmd += " --docker {}".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Esempio n. 23
0
    def test_custom_models_perf_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        timeout,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} perf-test -i 200 -s 1000 --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )

        if timeout is not None:
            cmd += " --timeout {}".format(timeout)

        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )

        if docker:
            cmd += " --docker {}".format(docker)

        _, stdo, _ = _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if timeout is not None:
            expected_str = "timed out ({}s)".format(timeout)
            assert expected_str in stdo
            assert "NA" in stdo
        else:
            assert "NA" not in stdo
Esempio n. 24
0
    def test_drum_monitoring_fails_in_unstructured_mode(
            self, resources, framework, problem, language, docker, tmp_path):
        cmd, input_file, output_file, mlops_spool_dir = TestMLOpsMonitoring._drum_with_monitoring(
            resources, framework, problem, language, docker, tmp_path)

        cmd += " --target-type unstructured"
        _, stdo, _ = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        assert str(stdo).find(
            "MLOps monitoring can not be used in unstructured mode") != -1
Esempio n. 25
0
    def test_validation_check_with_bad_column_names(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        column_names = [
            "column",
            "col/unm",
            "col\\unm",
            'col"umn',
            "col umn",
            "col:umn",
            'col""umn',
        ]
        d = {col: [1.0] for col in column_names}
        df = pd.DataFrame(data=d)

        with NamedTemporaryFile(mode="w") as temp_f:
            df.to_csv(temp_f.name)

            input_dataset = temp_f.name

            cmd = "{} validation --code-dir {} --input {} --target-type {}".format(
                ArgumentsOptions.MAIN_COMMAND,
                custom_model_dir,
                input_dataset,
                resources.target_types(problem),
            )

            _, stdo, _ = _exec_shell_cmd(
                cmd,
                "Failed in {} command line! {}".format(
                    ArgumentsOptions.MAIN_COMMAND, cmd),
                assert_if_fail=False,
            )

            assert re.search(r"Null value imputation\s+PASSED", stdo)
Esempio n. 26
0
    def test_set_language(
        self,
        resources,
        framework,
        problem,
        language,
        set_language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )
        input_dataset = resources.datasets(framework, problem)
        cmd = "{} score --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if set_language:
            cmd += " --language {}".format(set_language)
        if problem == BINARY:
            cmd += " --positive-class-label yes --negative-class-label no"

        p, stdo, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )
        if not set_language:
            stdo_stde = str(stdo) + str(stde)
            cases_4_5_6_7 = (str(stdo_stde).find(
                "Can not detect language by artifacts and/or custom.py/R files"
            ) != -1)
            assert cases_4_5_6_7
        if framework == CODEGEN_AND_SKLEARN and set_language == "r":
            stdo_stde = str(stdo) + str(stde)
            case = (str(stdo_stde).find(
                "Could not find a serialized model artifact with .rds extension, supported by default R predictor. "
                "If your artifact is not supported by default predictor, implement custom.load_model hook."
            ) != -1)
            assert case
Esempio n. 27
0
    def test_prediction_consistency(self, resources, tmp_path, framework,
                                    problem):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            SPARSE,
            language=PYTHON,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, problem)

        if problem in [BINARY_TEXT, BINARY_BOOL]:
            target_type = BINARY
        else:
            target_type = problem

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir,
            input_dataset)
        cmd += " --target {}".format(resources.targets(problem))

        if target_type in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd, resources.class_labels(framework, problem), target_type)

        _, stdout, stderr = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=True,
        )

        # we should throw a warning, not an error
        assert "Your predictions were different when we tried to predict twice." in stderr
        # but don't error out
        assert (
            "Your model can be fit to your data,  and predictions can be made on the fit model!"
            in stdout)
        # clean up
        sample_dir = stderr.split(":")[-1]
        if sample_dir.endswith("\n"):
            sample_dir = sample_dir[:-1]
        os.remove(sample_dir.strip())
Esempio n. 28
0
    def test_custom_models_validation_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} validation --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if problem == BINARY:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )
        if docker:
            cmd += " --docker {}".format(docker)

        _, stdo, _ = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        if language == NO_CUSTOM:
            assert re.search(r"Null value imputation\s+FAILED", stdo)
        else:
            assert re.search(r"Null value imputation\s+PASSED", stdo)
Esempio n. 29
0
    def test_bin_models_with_wrong_labels(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)
        cmd = "{} score --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if problem == BINARY:
            cmd = cmd + " --positive-class-label yes --negative-class-label no"

        p, stdo, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        stdo_stde = str(stdo) + str(stde)

        if framework == SKLEARN:
            assert (str(stdo_stde).find(
                "Wrong class labels ['no', 'yes']. Use class labels detected by sklearn model"
            ) != -1)
        elif framework == RDS:
            assert (str(stdo_stde).find(
                "Wrong class labels. Use class labels according to your dataset"
            ) != -1)
Esempio n. 30
0
    def test_drum_monitoring_no_mlops_installed(self, resources, framework,
                                                problem, language, docker,
                                                tmp_path):
        """
        We expect the run of drum to fail since the mlops package is assumed to not be installed
        Returns
        -------

        """
        cmd, _, _, _ = TestMLOpsMonitoring._drum_with_monitoring(
            resources, framework, problem, language, docker, tmp_path)
        p, _, _ = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )
        assert (
            p.returncode != 0
        ), "drum should fail when datarobot-mlops is not installed and monitoring is requested"