Esempio n. 1
0
    def test_transform_fit(
        self, resources, framework, problem, weights, tmp_path,
    ):
        language = PYTHON
        custom_model_dir = _create_custom_model_dir(
            resources, tmp_path, framework, problem, language=framework,
        )

        input_dataset = resources.datasets(framework, problem)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT
        )

        target_type = TRANSFORM

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir, input_dataset
        )
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if problem in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd, resources.class_labels(framework, problem), target_type=target_type
            )

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND, cmd)
        )
    def test_fit_hyperparameters(
        self,
        resources,
        framework,
        problem,
        docker,
        parameters,
        weights,
        tmp_path,
    ):
        if framework == RDS_HYPERPARAMETERS:
            language = R_FIT
        else:
            language = PYTHON

        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, problem)
        parameter_file = resources.datasets(framework, parameters)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT)

        target_type = resources.target_types(
            problem) if "transform" not in framework else TRANSFORM

        cmd = "{} fit --target-type {} --code-dir {} --input {} --parameter-file {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND,
            target_type,
            custom_model_dir,
            input_dataset,
            parameter_file,
        )
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if problem in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(cmd,
                                        resources.class_labels(
                                            framework, problem),
                                        target_type=target_type)
        if docker:
            cmd += " --docker {} ".format(docker)

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_fit_for_use_output_and_nested(
        self,
        resources,
        framework,
        problem,
        docker,
        weights,
        use_output,
        tmp_path,
        nested,
    ):
        if docker and framework != SKLEARN:
            return
        if framework == RDS:
            language = R_FIT
        else:
            language = PYTHON

        custom_model_dir = _create_custom_model_dir(resources,
                                                    tmp_path,
                                                    framework,
                                                    problem,
                                                    language,
                                                    is_training=True,
                                                    nested=nested)

        input_dataset = resources.datasets(framework, problem)

        weights_cmd, input_dataset, __keep_this_around = self._add_weights_cmd(
            weights, input_dataset, r_fit=language == R_FIT)

        output = tmp_path / "output"
        output.mkdir()

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, problem, custom_model_dir,
            input_dataset)
        if problem != ANOMALY:
            cmd += " --target {}".format(resources.targets(problem))

        if use_output:
            cmd += " --output {}".format(output)
        if problem == BINARY:
            cmd = _cmd_add_class_labels(cmd,
                                        resources.class_labels(
                                            framework, problem),
                                        target_type=problem)
        if docker:
            cmd += " --docker {} ".format(docker)

        cmd += weights_cmd

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_docker_image_with_wrong_dep_install(
        self,
        resources,
        framework,
        problem,
        code_dir,
        env_dir,
        tmp_path,
    ):

        custom_model_dir = os.path.join(MODEL_TEMPLATES_PATH, code_dir)

        tmp_dir = tmp_path / "tmp_code_dir"
        custom_model_dir = shutil.copytree(custom_model_dir, tmp_dir)
        with open(os.path.join(custom_model_dir, "requirements.txt"),
                  mode="w") as f:
            f.write("\nnon_existing_dep")

        docker_env = os.path.join(PUBLIC_DROPIN_ENVS_PATH, env_dir)
        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        cmd = _cmd_add_class_labels(
            cmd,
            resources.class_labels(framework, problem),
            target_type=resources.target_types(problem),
            multiclass_label_file=None,
        )
        cmd += " --docker {} --verbose ".format(docker_env)

        _, _, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        assert re.search(
            r"ERROR drum:  Failed to build a docker image",
            stde,
        )

        assert re.search(
            r"Could not find a version that satisfies the requirement non_existing_dep",
            stde,
        )
def test_fit_variety(framework, variety_resources, resources,
                     variety_data_names, tmp_path):

    # get data info from fixtures
    df = variety_data_names
    df_path = variety_resources.dataset(df)
    problem = variety_resources.problem(df)
    target = variety_resources.target(df)
    if problem == BINARY:
        class_labels = variety_resources.class_labels(df)
        if framework == RDS:
            # there's one annoying dataset where R needs 0 and 1 and python wants 1.0 and 0.0
            class_labels = [
                int(x) if type(x) is float else x for x in class_labels
            ]
    # figure out language
    if framework == RDS:
        language = R_FIT
    else:
        language = PYTHON

    custom_model_dir = _create_custom_model_dir(
        resources,
        tmp_path,
        framework,
        problem,
        language,
        is_training=True,
        nested=False,
    )

    output = tmp_path / "output"
    output.mkdir()

    cmd = "{} fit --code-dir {} --input {} --verbose ".format(
        ArgumentsOptions.MAIN_COMMAND, custom_model_dir, df_path)
    if problem == ANOMALY:
        cmd += " --unsupervised"
    else:
        cmd += " --target {}".format(target)

    if problem == BINARY:
        cmd = _cmd_add_class_labels(cmd, class_labels, target_type=problem)

    p, _, err = _exec_shell_cmd(
        cmd,
        "Failed in {} command line! {}".format(ArgumentsOptions.MAIN_COMMAND,
                                               cmd),
        assert_if_fail=False,
    )

    if p.returncode != 0:
        raise AssertionError(err)
Esempio n. 6
0
    def test_custom_models_perf_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        timeout,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} perf-test -i 200 -s 1000 --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )

        if timeout is not None:
            cmd += " --timeout {}".format(timeout)

        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )

        if docker:
            cmd += " --docker {}".format(docker)

        _, stdo, _ = _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if timeout is not None:
            expected_str = "timed out ({}s)".format(timeout)
            assert expected_str in stdo
            assert "NA" in stdo
        else:
            assert "NA" not in stdo
    def test_custom_models_with_drum(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
        use_labels_file,
        temp_file,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
                multiclass_label_file=temp_file if use_labels_file else None,
            )
        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        in_data = pd.read_csv(input_dataset)
        out_data = pd.read_csv(output)
        assert in_data.shape[0] == out_data.shape[0]
Esempio n. 8
0
    def test_custom_models_validation_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} validation --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if problem == BINARY:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )
        if docker:
            cmd += " --docker {}".format(docker)

        _, stdo, _ = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        if language == NO_CUSTOM:
            assert re.search(r"Null value imputation\s+FAILED", stdo)
        else:
            assert re.search(r"Null value imputation\s+PASSED", stdo)
    def test_prediction_consistency(self, resources, tmp_path, framework,
                                    problem):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            SPARSE,
            language=PYTHON,
            is_training=True,
        )

        input_dataset = resources.datasets(framework, problem)

        if problem in [BINARY_TEXT, BINARY_BOOL]:
            target_type = BINARY
        else:
            target_type = problem

        cmd = "{} fit --target-type {} --code-dir {} --input {} --verbose ".format(
            ArgumentsOptions.MAIN_COMMAND, target_type, custom_model_dir,
            input_dataset)
        cmd += " --target {}".format(resources.targets(problem))

        if target_type in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd, resources.class_labels(framework, problem), target_type)

        _, stdout, stderr = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=True,
        )

        # we should throw a warning, not an error
        assert "Your predictions were different when we tried to predict twice." in stderr
        # but don't error out
        assert (
            "Your model can be fit to your data,  and predictions can be made on the fit model!"
            in stdout)
        # clean up
        sample_dir = stderr.split(":")[-1]
        if sample_dir.endswith("\n"):
            sample_dir = sample_dir[:-1]
        os.remove(sample_dir.strip())
Esempio n. 10
0
    def _drum_with_monitoring(resources, framework, problem, language, docker,
                              tmp_path):
        """
        We expect the run of drum to be ok, since mlops is assumed to be installed.
        """
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        mlops_spool_dir = tmp_path / "mlops_spool"
        os.mkdir(str(mlops_spool_dir))

        input_dataset = resources.datasets(framework, problem)
        output = tmp_path / "output"

        cmd = "{} score --code-dir {} --input {} --output {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        monitor_settings = (
            "spooler_type=filesystem;directory={};max_files=1;file_max_size=1024000"
            .format(mlops_spool_dir))
        cmd += ' --monitor --model-id 555 --deployment-id 777 --monitor-settings="{}"'.format(
            monitor_settings)

        if problem == BINARY:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )
        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        return cmd, input_dataset, output, mlops_spool_dir
Esempio n. 11
0
    def __init__(
        self,
        target_type,
        labels,
        custom_model_dir,
        docker=None,
        with_error_server=False,
        show_stacktrace=True,
        nginx=False,
        memory=None,
        fail_on_shutdown_error=True,
    ):
        port = CMRunnerUtils.find_free_port()
        self.server_address = "localhost:{}".format(port)
        url_host = os.environ.get("TEST_URL_HOST", "localhost")

        if docker:
            self.url_server_address = "http://{}:{}".format(url_host, port)
        else:
            self.url_server_address = "http://localhost:{}".format(port)

        cmd = "{} server --code-dir {} --target-type {} --address {}".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, target_type,
            self.server_address)
        if labels:
            cmd = _cmd_add_class_labels(cmd, labels, target_type=target_type)
        if docker:
            cmd += " --docker {}".format(docker)
            if memory:
                cmd += " --memory {}".format(memory)
        if with_error_server:
            cmd += " --with-error-server"
        if show_stacktrace:
            cmd += " --show-stacktrace"
        if nginx:
            cmd += " --production"
        self._cmd = cmd

        self._process_object_holder = DrumServerProcess()
        self._server_thread = None
        self._with_nginx = nginx
        self._fail_on_shutdown_error = fail_on_shutdown_error
Esempio n. 12
0
    def test_classification_validation_fails_on_add_up_to_one(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} validation --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )
        if problem == BINARY:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )

        _, stdo, _ = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        assert re.search(r"Basic batch prediction\s+FAILED", stdo)
        assert re.search(r"Null value imputation\s+FAILED", stdo)
Esempio n. 13
0
    def test_custom_models_perf_test(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        cmd = "{} perf-test -i 200 -s 1000 --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            resources.target_types(problem),
        )

        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
            )

        if docker:
            cmd += " --docker {}".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
    def test_classification_validation_fails(
        self,
        resources,
        framework,
        language,
        target_type,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            None,
            language,
        )

        input_dataset = resources.datasets(framework, BINARY)

        cmd = "{} score --code-dir {} --input {} --target-type {}".format(
            ArgumentsOptions.MAIN_COMMAND, custom_model_dir, input_dataset,
            target_type)

        if resources.target_types(target_type) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, target_type),
                target_type=resources.target_types(target_type),
            )

        _, _, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        assert "Your prediction probabilities do not add up to 1." in str(stde)
    def __init__(
        self,
        target_type,
        labels,
        custom_model_dir,
        docker=None,
        with_error_server=False,
        show_stacktrace=True,
        nginx=False,
        memory=None,
        fail_on_shutdown_error=True,
        pass_args_as_env_vars=False,
        verbose=True,
        append_cmd=None,
    ):
        port = CMRunnerUtils.find_free_port()
        self.server_address = "localhost:{}".format(port)
        url_host = os.environ.get("TEST_URL_HOST", "localhost")

        if docker:
            self.url_server_address = "http://{}:{}".format(url_host, port)
        else:
            self.url_server_address = "http://localhost:{}".format(port)

        cmd = "{} server".format(ArgumentsOptions.MAIN_COMMAND)

        if pass_args_as_env_vars:
            os.environ[ArgumentOptionsEnvVars.CODE_DIR] = str(custom_model_dir)
            os.environ[ArgumentOptionsEnvVars.TARGET_TYPE] = target_type
            os.environ[ArgumentOptionsEnvVars.ADDRESS] = self.server_address
        else:
            cmd += " --code-dir {} --target-type {} --address {}".format(
                custom_model_dir, target_type, self.server_address
            )

        if labels:
            cmd = _cmd_add_class_labels(
                cmd, labels, target_type=target_type, pass_args_as_env_vars=pass_args_as_env_vars
            )
        if docker:
            cmd += " --docker {}".format(docker)
            if memory:
                cmd += " --memory {}".format(memory)
        if with_error_server:
            if pass_args_as_env_vars:
                os.environ[ArgumentOptionsEnvVars.WITH_ERROR_SERVER] = "1"
            else:
                cmd += " --with-error-server"
        if show_stacktrace:
            if pass_args_as_env_vars:
                os.environ[ArgumentOptionsEnvVars.SHOW_STACKTRACE] = "1"
            else:
                cmd += " --show-stacktrace"
        if nginx:
            if pass_args_as_env_vars:
                os.environ[ArgumentOptionsEnvVars.PRODUCTION] = "1"
            else:
                cmd += " --production"

        if append_cmd is not None:
            cmd += " " + append_cmd

        self._cmd = cmd

        self._process_object_holder = DrumServerProcess()
        self._server_thread = None
        self._with_nginx = nginx
        self._fail_on_shutdown_error = fail_on_shutdown_error
        self._verbose = verbose
    def test_docker_image_with_deps_install(
        self,
        resources,
        framework,
        problem,
        code_dir,
        env_dir,
        skip_deps_install,
        tmp_path,
    ):

        custom_model_dir = os.path.join(MODEL_TEMPLATES_PATH, code_dir)
        if framework == CODEGEN and not skip_deps_install:
            tmp_dir = tmp_path / "tmp_code_dir"
            custom_model_dir = shutil.copytree(custom_model_dir, tmp_dir)
            with open(os.path.join(custom_model_dir, "requirements.txt"),
                      mode="w") as f:
                f.write("deps_are_not_supported_in_java")
        docker_env = os.path.join(PUBLIC_DROPIN_ENVS_PATH, env_dir)
        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        cmd = '{} score --code-dir {} --input "{}" --output {} --target-type {}'.format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            resources.target_types(problem),
        )
        if resources.target_types(problem) in [BINARY, MULTICLASS]:
            cmd = _cmd_add_class_labels(
                cmd,
                resources.class_labels(framework, problem),
                target_type=resources.target_types(problem),
                multiclass_label_file=None,
            )
        cmd += " --docker {} --verbose ".format(docker_env)

        if skip_deps_install:
            cmd += " --skip-deps-install"

        _, stdo, stde = _exec_shell_cmd(
            cmd,
            "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd),
            assert_if_fail=False,
        )

        if skip_deps_install:
            # requirements.txt is not supported for java models, so test should pass
            if framework == CODEGEN:
                in_data = pd.read_csv(input_dataset)
                out_data = pd.read_csv(output)
                assert in_data.shape[0] == out_data.shape[0]
            else:
                assert re.search(
                    r"ERROR drum:  Error from docker process:",
                    stde,
                )
        else:
            if framework is None and problem == UNSTRUCTURED:
                with open(output) as f:
                    out_data = f.read()
                    assert "10" in out_data
            elif framework == PYTORCH and problem == MULTICLASS:
                in_data = pd.read_csv(input_dataset)
                out_data = pd.read_csv(output)
                assert in_data.shape[0] == out_data.shape[0]
            elif framework == CODEGEN and problem == REGRESSION:
                assert re.search(
                    r"WARNING drum:  Dependencies management is not supported for the 'java' language and will not be installed into an image",
                    stde,
                )
            else:
                assert False