def test_duplicate_target_name(self, resources, tmp_path):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SKLEARN_REGRESSION,
            SPARSE,
            language=PYTHON,
            is_training=True,
        )

        input_dataset = resources.datasets(SKLEARN_REGRESSION,
                                           TARGET_NAME_DUPLICATED_X)
        target_dataset = resources.datasets(SKLEARN_REGRESSION,
                                            TARGET_NAME_DUPLICATED_Y)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --code-dir {} --input {} --target-type {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset,
            REGRESSION)

        cmd += " --target-csv " + target_dataset
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_predictors_supported_payload_formats(
        self,
        resources,
        framework,
        problem,
        language,
        supported_payload_formats,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
        ) as run:
            response = requests.get(run.url_server_address + "/capabilities/")

            assert response.ok
            assert response.json() == {
                "supported_payload_formats": supported_payload_formats
            }
Beispiel #3
0
    def test_perf_test_drum_server_kill(
        self, resources, framework, problem, language, docker, tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources, tmp_path, framework, problem, language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} perf-test -i 10 -s 10 --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )

        # wait for drum perf-test server from prev test case to be stopped
        time.sleep(0.5)
        assert _find_drum_perf_test_server_process() is None
        p = subprocess.Popen(cmd, shell=True, env=os.environ, universal_newlines=True,)
        time.sleep(2)
        # kill drum perf-test process, child server should be running
        p.kill()
        pid = _find_drum_perf_test_server_process()
        assert pid is not None
        _kill_drum_perf_test_server_process(pid)
        assert _find_drum_perf_test_server_process() is None
Beispiel #4
0
    def test_custom_model_with_all_hooks(
        self,
        resources,
        framework,
        language,
        hooks_list,
        target_type,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            None,
            language,
        )

        input_dataset = resources.datasets(framework, REGRESSION)

        output = tmp_path / "output"

        cmd = "{} score --code-dir {} --input {} --output {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset,
            output, target_type)
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if hooks_list == CustomHooks.ALL_PREDICT_STRUCTURED:
            preds = pd.read_csv(output)
            assert all(val for val in (
                preds["Predictions"] == len(hooks_list)).values), preds
        elif hooks_list == CustomHooks.ALL_PREDICT_UNSTRUCTURED:
            with open(output) as f:
                all_data = f.read()
                assert str(len(hooks_list)) in all_data
    def test_fit_schema_validation(self, resources, tmp_path):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SKLEARN_BINARY_SCHEMA_VALIDATION,
            BINARY,
            PYTHON,
            is_training=True,
            include_metadata=True,
        )

        input_dataset = resources.datasets(SKLEARN, BINARY)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            BINARY,
            custom_model_dir,
            resources.targets(BINARY),
            input_dataset,
        )
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_drum_push_training(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
        get_target,
        sklearn_drop_in_env,
    ):
        custom_model_dir = _create_custom_model_dir(resources,
                                                    tmp_path,
                                                    framework,
                                                    problem,
                                                    language,
                                                    is_training=True)

        env_id, _ = sklearn_drop_in_env
        yaml_string = get_push_yaml(env_id,
                                    resources.datasets(framework, problem),
                                    problem, get_target(problem))
        with open(os.path.join(custom_model_dir, "model-metadata.yaml"),
                  "w") as outfile:
            yaml.dump(yaml.load(yaml_string), outfile)

        cmd = "{} push --code-dir {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
        )
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Beispiel #7
0
    def test_transform_fit(
        self, resources, framework, problem, weights, tmp_path,
    ):
        language = PYTHON
        custom_model_dir = _create_custom_model_dir(
            resources, tmp_path, framework, problem, language=framework,
        )

        input_dataset = resources.datasets(framework, problem)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT
        )

        target_type = TRANSFORM

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir, input_dataset
        )
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if problem in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd, resources.class_labels(framework, problem), target_type=target_type
            )

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd)
        )
    def test_custom_models_with_drum_nginx_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=True,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": open(input_dataset)
                        }
                    },
                    {
                        "data": open(input_dataset, "rb")
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = pd.read_csv(input_dataset)
                    assert in_data.shape[0] == actual_num_predictions

            # test model info
            response = requests.get(run.url_server_address + "/info/")

            assert response.ok
            response_dict = response.json()
            for key in ModelInfoKeys.REQUIRED:
                assert key in response_dict
            assert response_dict[
                ModelInfoKeys.TARGET_TYPE] == resources.target_types(problem)
            assert response_dict[ModelInfoKeys.DRUM_SERVER] == "nginx + uwsgi"
            assert response_dict[ModelInfoKeys.DRUM_VERSION] == drum_version

            assert ModelInfoKeys.MODEL_METADATA in response_dict
Beispiel #9
0
    def test_fit_sparse(self, resources, tmp_path, framework):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            SPARSE,
            language=R_FIT if framework == RDS else PYTHON,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, SPARSE)
        target_dataset = resources.datasets(framework, SPARSE_TARGET)
        columns = resources.datasets(framework, SPARSE_COLUMNS)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --code-dir {} --input {} --target-type {} --verbose --sparse-column-file {}".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset, REGRESSION, columns
        )

        cmd += " --target-csv " + target_dataset
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd)
        )
Beispiel #10
0
    def test_unstructured_mode_prediction_server_wrong_endpoint(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
        ) as run:
            for endpoint in ["/predict/", "/predictions/"]:
                response = requests.post(url=run.url_server_address + endpoint)
                assert response.status_code == HTTP_422_UNPROCESSABLE_ENTITY
                expected_msg = "ERROR: This model has target type 'unstructured', use the /predictUnstructured/ or /predictionsUnstructured/ endpoint."
                assert response.json()["message"] == expected_msg
    def test_fit_schema_failure(self, resources, tmp_path):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SKLEARN_BINARY_SCHEMA_VALIDATION,
            BINARY,
            PYTHON,
            is_training=True,
            include_metadata=True,
        )

        input_dataset = resources.datasets(SKLEARN, BINARY_TEXT)
        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            BINARY,
            custom_model_dir,
            resources.targets(BINARY_TEXT),
            input_dataset,
        )
        with pytest.raises(AssertionError):
            _, _, stderr = _exec_shell_cmd(
                cmd, "Failed in {} command line! {}".format(
                    ArgumentsOptions.MAIN_COMMAND, cmd))
            assert "DrumSchemaValidationException" in stderr
    def test_fit_simple(
        self,
        resources,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            SIMPLE,
            REGRESSION,
            PYTHON,
            is_training=True,
            nested=True,
        )

        input_dataset = resources.datasets(SKLEARN, REGRESSION)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --target {} --input {} --verbose".format(
            ArgumentsOptions.MAIN_COMMAND,
            REGRESSION,
            custom_model_dir,
            resources.targets(REGRESSION),
            input_dataset,
        )
        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_sparse_transform_fit(
        self,
        framework,
        resources,
        tmp_path,
    ):
        input_dataset = resources.datasets(None, SPARSE)
        target_dataset = resources.datasets(None, SPARSE_TARGET)

        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            REGRESSION,
            language=framework,
        )
        columns = resources.datasets(framework, SPARSE_COLUMNS)

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose --target-csv {} --sparse-column-file {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            TRANSFORM,
            custom_model_dir,
            input_dataset,
            target_dataset,
            columns,
        )

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Beispiel #14
0
    def test_fit_sh(
        self, resources, framework, problem, weights, tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources, tmp_path, framework, problem, PYTHON, is_training=True,
        )

        env = os.environ
        fit_sh = os.path.join(
            TESTS_ROOT_PATH,
            "..",
            "public_dropin_environments/{}_{}/fit.sh".format(
                PYTHON,
                framework
                if framework
                not in [SKLEARN_ANOMALY, SKLEARN_BINARY, SKLEARN_MULTICLASS, SKLEARN_SPARSE]
                else SKLEARN,
            ),
        )

        input_dir = tmp_path / "input_dir"
        self._create_fit_input_data_dir(
            resources.targets,
            resources.datasets,
            input_dir,
            problem,
            weights,
            is_sparse=framework == SKLEARN_SPARSE,
        )

        output = tmp_path / "output"
        output.mkdir()

        unset_drum_supported_env_vars()

        env["CODEPATH"] = str(custom_model_dir)
        env["INPUT_DIRECTORY"] = str(input_dir)
        env["ARTIFACT_DIRECTORY"] = str(output)
        env["TARGET_TYPE"] = problem if problem != BINARY_TEXT else BINARY
        if framework == SKLEARN_SPARSE:
            env["TRAINING_DATA_EXTENSION"] = ".mtx"
        else:
            env["TRAINING_DATA_EXTENSION"] = ".csv"

        if problem in [BINARY, BINARY_TEXT]:
            labels = resources.class_labels(framework, problem)
            env["NEGATIVE_CLASS_LABEL"] = labels[0]
            env["POSITIVE_CLASS_LABEL"] = labels[1]
        elif problem == MULTICLASS:
            labels = resources.class_labels(framework, problem)
            with open(os.path.join(tmp_path, "class_labels.txt"), mode="w") as f:
                f.write("\n".join(labels))
                env["CLASS_LABELS_FILE"] = f.name

        _exec_shell_cmd(fit_sh, "Failed cmd {}".format(fit_sh), env=env)

        # clear env vars as it may affect next test cases
        unset_drum_supported_env_vars()
    def test_fit_hyperparameters(
        self,
        resources,
        framework,
        problem,
        docker,
        parameters,
        weights,
        tmp_path,
    ):
        if framework == RDS_HYPERPARAMETERS:
            language = R_FIT
        else:
            language = PYTHON

        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, problem)
        parameter_file = resources.datasets(framework, parameters)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT)

        target_type = resources.target_types(
            problem) if "transform" not in framework else TRANSFORM

        cmd = "{} fit --target-type {} --code-dir {} --input {} --parameter-file {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND,
            target_type,
            custom_model_dir,
            input_dataset,
            parameter_file,
        )
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if problem in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(cmd,
                                        resources.class_labels(
                                            framework, problem),
                                        target_type=target_type)
        if docker:
            cmd += " --docker {} ".format(docker)

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_fit_for_use_output_and_nested(
        self,
        resources,
        framework,
        problem,
        docker,
        weights,
        use_output,
        tmp_path,
        nested,
    ):
        if docker and framework != SKLEARN:
            return
        if framework == RDS:
            language = R_FIT
        else:
            language = PYTHON

        custom_model_dir = _create_custom_model_dir(resources,
                                                    tmp_path,
                                                    framework,
                                                    problem,
                                                    language,
                                                    is_training=True,
                                                    nested=nested)

        input_dataset = resources.datasets(framework, problem)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, problem, custom_model_dir,
            input_dataset)
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if use_output:
            cmd += " --output {}".format(output)
        if problem == BINARY:
            cmd = _cmd_add_class_labels(cmd,
                                        resources.class_labels(
                                            framework, problem),
                                        target_type=problem)
        if docker:
            cmd += " --docker {} ".format(docker)

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
Beispiel #17
0
    def test_custom_transform_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
        use_arrow,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            # do predictions
            files = {"X": open(input_dataset)}
            if use_arrow:
                files["arrow_version"] = ".2"

            response = requests.post(run.url_server_address + "/transform/",
                                     files=files)
            print(response.text)
            assert response.ok

            in_data = pd.read_csv(input_dataset)

            if framework == SKLEARN_TRANSFORM_DENSE:
                if use_arrow:
                    transformed_out = read_arrow_payload(eval(response.text))
                    assert eval(response.text)["out.format"] == "arrow"
                else:
                    transformed_out = read_csv_payload(eval(response.text))
                    assert eval(response.text)["out.format"] == "csv"
                actual_num_predictions = transformed_out.shape[0]
            else:
                transformed_out = read_mtx_payload(eval(response.text))
                actual_num_predictions = transformed_out.shape[0]
                assert eval(response.text)["out.format"] == "sparse"
            validate_transformed_output(
                transformed_out,
                should_be_sparse=framework == SKLEARN_TRANSFORM)
            assert in_data.shape[0] == actual_num_predictions
def test_fit_variety(framework, variety_resources, resources,
                     variety_data_names, tmp_path):

    # get data info from fixtures
    df = variety_data_names
    df_path = variety_resources.dataset(df)
    problem = variety_resources.problem(df)
    target = variety_resources.target(df)
    if problem == BINARY:
        class_labels = variety_resources.class_labels(df)
        if framework == RDS:
            # there's one annoying dataset where R needs 0 and 1 and python wants 1.0 and 0.0
            class_labels = [
                int(x) if type(x) is float else x for x in class_labels
            ]
    # figure out language
    if framework == RDS:
        language = R_FIT
    else:
        language = PYTHON

    custom_model_dir = _create_custom_model_dir(
        resources,
        tmp_path,
        framework,
        problem,
        language,
        is_training=True,
        nested=False,
    )

    output = tmp_path / "output"
    output.mkdir()

    cmd = "{} fit --code-dir {} --input {} --verbose ".format(
        ArgumentsOptions.MAIN_COMMAND, custom_model_dir, df_path)
    if problem == ANOMALY:
        cmd += " --unsupervised"
    else:
        cmd += " --target {}".format(target)

    if problem == BINARY:
        cmd = _cmd_add_class_labels(cmd, class_labels, target_type=problem)

    p, _, err = _exec_shell_cmd(
        cmd,
        "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND,
                                               cmd),
        assert_if_fail=False,
    )

    if p.returncode != 0:
        raise AssertionError(err)
    def test_predictions_r_mtx(
        self,
        resources,
        framework,
        problem,
        language,
        nginx,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                nginx=nginx,
        ) as run:
            input_dataset = resources.datasets(framework, SPARSE)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": ("X.mtx", open(input_dataset))
                        }
                    },
                    {
                        "data": open(input_dataset),
                        "headers": {
                            "Content-Type":
                            "{};".format(PredictionServerMimetypes.TEXT_MTX)
                        },
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = StructuredInputReadUtils.read_structured_input_file_as_df(
                        input_dataset)
                    assert in_data.shape[0] == actual_num_predictions
Beispiel #20
0
    def test_custom_models_perf_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        timeout,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} perf-test -i 200 -s 1000 --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )

        if timeout is not None:
            cmd += " --timeout {}".format(timeout)

        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )

        if docker:
            cmd += " --docker {}".format(docker)

        _, stdo, _ = _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if timeout is not None:
            expected_str = "timed out ({}s)".format(timeout)
            assert expected_str in stdo
            assert "NA" in stdo
        else:
            assert "NA" not in stdo
Beispiel #21
0
    def test_validation_check_with_bad_column_names(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        column_names = [
            "column",
            "col/unm",
            "col\\unm",
            'col"umn',
            "col umn",
            "col:umn",
            'col""umn',
        ]
        d = {col: [1.0] for col in column_names}
        df = pd.DataFrame(data=d)

        with NamedTemporaryFile(mode="w") as temp_f:
            df.to_csv(temp_f.name)

            input_dataset = temp_f.name

            cmd = "{} validation --code-dir {} --input {} --target-type {}".format(
                ArgumentsOptions.MAIN_COMMAND,
                custom_model_dir,
                input_dataset,
                resources.target_types(problem),
            )

            _, stdo, _ = _exec_shell_cmd(
                cmd,
                "Failed in {} command line! {}".format(
                    ArgumentsOptions.MAIN_COMMAND, cmd),
                assert_if_fail=False,
            )

            assert re.search(r"Null value imputation\s+PASSED", stdo)
Beispiel #22
0
    def test_unstructured_models_batch(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        mimetype,
        ret_mode,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        content_type = "--content-type '{};'".format(
            mimetype) if mimetype is not None else ""
        cmd = "{} score --code-dir {} --input {} --output {} --target-type unstructured {} --query 'ret_mode={}'".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            content_type,
            ret_mode,
        )

        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if ret_mode == "binary":
            with open(output, "rb") as f:
                out_data = f.read()
                assert 10 == int.from_bytes(out_data, byteorder="big")
        else:
            with open(output) as f:
                out_data = f.read()
                assert "10" in out_data
Beispiel #23
0
    def test_set_language(
        self,
        resources,
        framework,
        problem,
        language,
        set_language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )
        input_dataset = resources.datasets(framework, problem)
        cmd = "{} score --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if set_language:
            cmd += " --language {}".format(set_language)
        if problem == BINARY:
            cmd += " --positive-class-label yes --negative-class-label no"

        p, stdo, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )
        if not set_language:
            stdo_stde = str(stdo) + str(stde)
            cases_4_5_6_7 = (str(stdo_stde).find(
                "Can not detect language by artifacts and/or custom.py/R files"
            ) != -1)
            assert cases_4_5_6_7
        if framework == CODEGEN_AND_SKLEARN and set_language == "r":
            stdo_stde = str(stdo) + str(stde)
            case = (str(stdo_stde).find(
                "Could not find a serialized model artifact with .rds extension, supported by default R predictor. "
                "If your artifact is not supported by default predictor, implement custom.load_model hook."
            ) != -1)
            assert case
Beispiel #24
0
    def test_custom_models_with_drum_nginx_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=True,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": open(input_dataset)
                        }
                    },
                    {
                        "data": open(input_dataset, "rb")
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = pd.read_csv(input_dataset)
                    assert in_data.shape[0] == actual_num_predictions
    def test_custom_models_with_drum(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
        use_labels_file,
        temp_file,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
                multiclass_label_file=temp_file if use_labels_file else None,
            )
        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        in_data = pd.read_csv(input_dataset)
        out_data = pd.read_csv(output)
        assert in_data.shape[0] == out_data.shape[0]
    def test_prediction_consistency(self, resources, tmp_path, framework,
                                    problem):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            SPARSE,
            language=PYTHON,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, problem)

        if problem in [BINARY_TEXT, BINARY_BOOL]:
            target_type = BINARY
        else:
            target_type = problem

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir,
            input_dataset)
        cmd += " --target {}".format(resources.targets(problem))

        if target_type in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd, resources.class_labels(framework, problem), target_type)

        _, stdout, stderr = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=True,
        )

        # we should throw a warning, not an error
        assert "Your predictions were different when we tried to predict twice." in stderr
        # but don't error out
        assert (
            "Your model can be fit to your data,  and predictions can be made on the fit model!"
            in stdout)
        # clean up
        sample_dir = stderr.split(":")[-1]
        if sample_dir.endswith("\n"):
            sample_dir = sample_dir[:-1]
        os.remove(sample_dir.strip())
Beispiel #27
0
    def test_custom_models_validation_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} validation --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if problem == BINARY:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )
        if docker:
            cmd += " --docker {}".format(docker)

        _, stdo, _ = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        if language == NO_CUSTOM:
            assert re.search(r"Null value imputation\s+FAILED", stdo)
        else:
            assert re.search(r"Null value imputation\s+PASSED", stdo)
Beispiel #28
0
    def test_bin_models_with_wrong_labels(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)
        cmd = "{} score --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if problem == BINARY:
            cmd = cmd + " --positive-class-label yes --negative-class-label no"

        p, stdo, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        stdo_stde = str(stdo) + str(stde)

        if framework == SKLEARN:
            assert (str(stdo_stde).find(
                "Wrong class labels ['no', 'yes']. Use class labels detected by sklearn model"
            ) != -1)
        elif framework == RDS:
            assert (str(stdo_stde).find(
                "Wrong class labels. Use class labels according to your dataset"
            ) != -1)
Beispiel #29
0
    def test_custom_models_with_drum_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        nginx,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=nginx,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            for ret_mode in ["text", "binary"]:
                for endpoint in [
                        "/predictUnstructured/", "/predictionsUnstructured/"
                ]:
                    # do predictions
                    url = run.url_server_address + endpoint
                    data = open(input_dataset, "rb").read()
                    params = {"ret_mode": ret_mode}
                    response = requests.post(url=url, data=data, params=params)

                    assert response.ok
                    if ret_mode == "text":
                        assert response.text == "10"
                    else:
                        assert 10 == int.from_bytes(response.content,
                                                    byteorder="big")
Beispiel #30
0
    def _drum_with_monitoring(resources, framework, problem, language, docker,
                              tmp_path):
        """
        We expect the run of drum to be ok, since mlops is assumed to be installed.
        """
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        mlops_spool_dir = tmp_path / "mlops_spool"
        os.mkdir(str(mlops_spool_dir))

        input_dataset = resources.datasets(framework, problem)
        output = tmp_path / "output"

        cmd = "{} score --code-dir {} --input {} --output {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        monitor_settings = (
            "spooler_type=filesystem;directory={};max_files=1;file_max_size=1024000"
            .format(mlops_spool_dir))
        cmd += ' --monitor --model-id 555 --deployment-id 777 --monitor-settings="{}"'.format(
            monitor_settings)

        if problem == BINARY:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )
        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        return cmd, input_dataset, output, mlops_spool_dir