Example #1
0
    def _basic_batch_prediction_check(self):
        test_name = "Basic batch prediction"
        test_passed = True
        failure_message = ""
        cmd_list = sys.argv

        TMP_DIR = "/tmp"
        DIR_PREFIX = "drum_validation_check_"

        output_dir = mkdtemp(prefix=DIR_PREFIX, dir=TMP_DIR)
        output_filename = os.path.join(output_dir, "output")

        CMRunnerUtils.replace_cmd_argument_value(cmd_list,
                                                 ArgumentsOptions.OUTPUT,
                                                 output_filename)

        p = subprocess.Popen(cmd_list, env=os.environ)
        retcode = p.wait()
        if retcode != 0:
            test_passed = False
            failure_message = "Test failed on provided dataset: {}".format(
                self._input_csv)

        return test_name, test_passed, failure_message
    def validation_test(self):

        # TODO: create infrastructure to easily add more checks
        # NullValueImputationCheck
        test_name = "Null value imputation"
        ValidationTestResult = collections.namedtuple("ValidationTestResult", "filename retcode")

        cmd_list = sys.argv
        cmd_list[0] = ArgumentsOptions.MAIN_COMMAND
        cmd_list[1] = ArgumentsOptions.SCORE

        TMP_DIR = "/tmp"
        DIR_PREFIX = "drum_validation_checks_"

        null_datasets_dir = mkdtemp(prefix=DIR_PREFIX, dir=TMP_DIR)

        df = pd.read_csv(self._input_csv)
        column_names = list(df.iloc[[0]])

        results = {}

        for column_name in column_names:
            with NamedTemporaryFile(
                mode="w",
                dir=null_datasets_dir,
                prefix="null_value_imputation_{}_".format(column_name),
                delete=False,
            ) as temp_f:
                temp_data_name = temp_f.name
                df_tmp = df.copy()
                df_tmp[column_name] = None
                df_tmp.to_csv(temp_data_name, index=False)
                CMRunnerUtils.replace_cmd_argument_value(
                    cmd_list, ArgumentsOptions.INPUT, temp_data_name
                )

                p = subprocess.Popen(cmd_list, env=os.environ)
                retcode = p.wait()
                if retcode != 0:
                    results[column_name] = ValidationTestResult(temp_data_name, retcode)

        table = Texttable()
        table.set_deco(Texttable.HEADER)

        try:
            terminal_size = shutil.get_terminal_size()
            table.set_max_width(terminal_size.columns)
        except Exception as e:
            pass

        header_names = ["Test case", "Status"]
        col_types = ["t", "t"]
        col_align = ["l", "l"]

        rows = []

        if len(results) == 0:
            rows.append(header_names)
            rows.append([test_name, "PASSED"])
            shutil.rmtree(null_datasets_dir)
        else:
            col_types.append("t")
            col_align.append("l")
            header_names.append("Details")
            rows.append(header_names)
            for test_result in results.values():
                if not test_result.retcode:
                    os.remove(test_result.filename)

            table2 = Texttable()
            table2.set_deco(Texttable.HEADER)

            message = (
                "Null value imputation check performs check by imputing each feature with NaN value. "
                "If check fails for a feature, test dataset is saved in {}/{}. "
                "Make sure to delete those folders if it takes too much space.".format(
                    TMP_DIR, DIR_PREFIX
                )
            )
            rows.append([test_name, "FAILED", message])

            header_names2 = ["Failed feature", "Dataset filename"]

            table2.set_cols_dtype(["t", "t"])
            table2.set_cols_align(["l", "l"])

            rows2 = [header_names2]

            for key, test_result in results.items():
                if test_result.retcode:
                    rows2.append([key, test_result.filename])
                    pass

            table2.add_rows(rows2)
            table_res = table2.draw()
            rows.append(["", "", "\n{}".format(table_res)])

        table.set_cols_dtype(col_types)
        table.set_cols_align(col_align)
        table.add_rows(rows)
        tbl_report = table.draw()
        print("\n\nValidation checks results")
        print(tbl_report)
Example #3
0
    def _prepare_docker_command(self, options, run_mode, raw_arguments):
        """
        Building a docker command line for running the model inside the docker - this command line can
        be used by the user independently of drum.
        Parameters
        Returns: docker command line to run as a string
        """
        options.docker = self._maybe_build_image(options.docker)
        in_docker_model = "/opt/model"
        in_docker_input_file = "/opt/input.csv"
        in_docker_output_file = "/opt/output.csv"
        in_docker_fit_output_dir = "/opt/fit_output_dir"
        in_docker_fit_target_filename = "/opt/fit_target.csv"
        in_docker_fit_row_weights_filename = "/opt/fit_row_weights.csv"

        docker_cmd = "docker run --rm --interactive  --user $(id -u):$(id -g) "
        docker_cmd_args = " -v {}:{}".format(options.code_dir, in_docker_model)

        in_docker_cmd_list = raw_arguments
        in_docker_cmd_list[0] = ArgumentsOptions.MAIN_COMMAND
        in_docker_cmd_list[1] = run_mode.value

        CMRunnerUtils.delete_cmd_argument(in_docker_cmd_list,
                                          ArgumentsOptions.DOCKER)
        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                 ArgumentsOptions.CODE_DIR,
                                                 in_docker_model)
        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list, "-cd",
                                                 in_docker_model)
        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                 ArgumentsOptions.INPUT,
                                                 in_docker_input_file)
        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                 ArgumentsOptions.OUTPUT,
                                                 in_docker_output_file)

        if run_mode == RunMode.SERVER:
            host_port_list = options.address.split(":", 1)
            if len(host_port_list) == 1:
                raise DrumCommonException(
                    "Error: when using the docker option provide argument --server host:port"
                )
            port = int(host_port_list[1])
            host_port_inside_docker = "{}:{}".format("0.0.0.0", port)
            CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                     ArgumentsOptions.ADDRESS,
                                                     host_port_inside_docker)
            docker_cmd_args += " -p {port}:{port}".format(port=port)

        if run_mode in [
                RunMode.SCORE, RunMode.PERF_TEST, RunMode.VALIDATION,
                RunMode.FIT
        ]:
            docker_cmd_args += " -v {}:{}".format(options.input,
                                                  in_docker_input_file)

            if run_mode == RunMode.SCORE and options.output:
                output_file = os.path.realpath(options.output)
                if not os.path.exists(output_file):
                    # Creating an empty file so the mount command will mount the file correctly -
                    # otherwise docker create an empty directory
                    open(output_file, "a").close()
                docker_cmd_args += " -v {}:{}".format(output_file,
                                                      in_docker_output_file)
                CMRunnerUtils.replace_cmd_argument_value(
                    in_docker_cmd_list, ArgumentsOptions.OUTPUT,
                    in_docker_output_file)
            elif run_mode == RunMode.FIT:
                if options.output:
                    fit_output_dir = os.path.realpath(options.output)
                    docker_cmd_args += " -v {}:{}".format(
                        fit_output_dir, in_docker_fit_output_dir)
                CMRunnerUtils.replace_cmd_argument_value(
                    in_docker_cmd_list, ArgumentsOptions.OUTPUT,
                    in_docker_fit_output_dir)
                if options.target_csv:
                    fit_target_filename = os.path.realpath(options.target_csv)
                    docker_cmd_args += " -v {}:{}".format(
                        fit_target_filename, in_docker_fit_target_filename)
                    CMRunnerUtils.replace_cmd_argument_value(
                        in_docker_cmd_list,
                        ArgumentsOptions.TARGET_FILENAME,
                        in_docker_fit_target_filename,
                    )
                if options.row_weights_csv:
                    fit_row_weights_filename = os.path.realpath(
                        options.row_weights_csv)
                    docker_cmd_args += " -v {}:{}".format(
                        fit_row_weights_filename,
                        in_docker_fit_row_weights_filename)
                    CMRunnerUtils.replace_cmd_argument_value(
                        in_docker_cmd_list,
                        ArgumentsOptions.WEIGHTS_CSV,
                        in_docker_fit_row_weights_filename,
                    )

        docker_cmd += " {} {} {}".format(docker_cmd_args, options.docker,
                                         " ".join(in_docker_cmd_list))

        self._print_verbose("docker command: [{}]".format(docker_cmd))
        return docker_cmd
Example #4
0
    def _prepare_docker_command(self, options, run_mode, raw_arguments):
        """
        Building a docker command line for running the model inside the docker - this command line
        can be used by the user independently of drum.
        Parameters
        Returns: docker command line to run as a string
        """
        options.docker = self._maybe_build_image(options.docker)
        in_docker_model = "/opt/model"
        in_docker_input_file = "/opt/input.csv"
        in_docker_output_file = "/opt/output.csv"
        in_docker_fit_output_dir = "/opt/fit_output_dir"
        in_docker_fit_target_filename = "/opt/fit_target.csv"
        in_docker_fit_row_weights_filename = "/opt/fit_row_weights.csv"

        docker_cmd = "docker run --rm --entrypoint '' --interactive --user $(id -u):$(id -g)"
        docker_cmd_args = ' -v "{}":{}'.format(options.code_dir,
                                               in_docker_model)

        in_docker_cmd_list = raw_arguments
        in_docker_cmd_list[0] = ArgumentsOptions.MAIN_COMMAND
        in_docker_cmd_list[1] = run_mode.value

        # [RAPTOR-5607] Using -cd makes fit fail within docker, but not --code-dir.
        # Hotfix it by replacing -cd with --code-dir
        in_docker_cmd_list = [
            ArgumentsOptions.CODE_DIR if arg == "-cd" else arg
            for arg in in_docker_cmd_list
        ]

        CMRunnerUtils.delete_cmd_argument(in_docker_cmd_list,
                                          ArgumentsOptions.DOCKER)
        CMRunnerUtils.delete_cmd_argument(in_docker_cmd_list,
                                          ArgumentsOptions.SKIP_DEPS_INSTALL)
        if options.memory:
            docker_cmd_args += " --memory {mem_size} --memory-swap {mem_size} ".format(
                mem_size=options.memory)
            CMRunnerUtils.delete_cmd_argument(in_docker_cmd_list,
                                              ArgumentsOptions.MEMORY)

        if options.class_labels and ArgumentsOptions.CLASS_LABELS not in in_docker_cmd_list:
            CMRunnerUtils.delete_cmd_argument(
                in_docker_cmd_list, ArgumentsOptions.CLASS_LABELS_FILE)
            in_docker_cmd_list.append(ArgumentsOptions.CLASS_LABELS)
            for label in options.class_labels:
                in_docker_cmd_list.append(label)

        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                 ArgumentsOptions.CODE_DIR,
                                                 in_docker_model)
        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list, "-cd",
                                                 in_docker_model)
        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                 ArgumentsOptions.INPUT,
                                                 in_docker_input_file)
        CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                 ArgumentsOptions.OUTPUT,
                                                 in_docker_output_file)

        if run_mode == RunMode.SERVER:
            host_port_list = options.address.split(":", 1)
            if len(host_port_list) == 1:
                raise DrumCommonException(
                    "Error: when using the docker option provide argument --server host:port"
                )
            port = int(host_port_list[1])
            host_port_inside_docker = "{}:{}".format("0.0.0.0", port)
            CMRunnerUtils.replace_cmd_argument_value(in_docker_cmd_list,
                                                     ArgumentsOptions.ADDRESS,
                                                     host_port_inside_docker)
            docker_cmd_args += " -p {port}:{port}".format(port=port)

        if run_mode in [
                RunMode.SCORE, RunMode.PERF_TEST, RunMode.VALIDATION,
                RunMode.FIT
        ]:
            docker_cmd_args += ' -v "{}":{}'.format(options.input,
                                                    in_docker_input_file)

            if run_mode == RunMode.SCORE and options.output:
                output_file = os.path.realpath(options.output)
                if not os.path.exists(output_file):
                    # Creating an empty file so the mount command will mount the file correctly -
                    # otherwise docker create an empty directory
                    open(output_file, "a").close()
                docker_cmd_args += ' -v "{}":{}'.format(
                    output_file, in_docker_output_file)
                CMRunnerUtils.replace_cmd_argument_value(
                    in_docker_cmd_list, ArgumentsOptions.OUTPUT,
                    in_docker_output_file)
            elif run_mode == RunMode.FIT:
                if options.output:
                    fit_output_dir = os.path.realpath(options.output)
                    docker_cmd_args += ' -v "{}":{}'.format(
                        fit_output_dir, in_docker_fit_output_dir)
                CMRunnerUtils.replace_cmd_argument_value(
                    in_docker_cmd_list, ArgumentsOptions.OUTPUT,
                    in_docker_fit_output_dir)
                if options.target_csv:
                    fit_target_filename = os.path.realpath(options.target_csv)
                    docker_cmd_args += ' -v "{}":{}'.format(
                        fit_target_filename, in_docker_fit_target_filename)
                    CMRunnerUtils.replace_cmd_argument_value(
                        in_docker_cmd_list,
                        ArgumentsOptions.TARGET_CSV,
                        in_docker_fit_target_filename,
                    )
                if options.row_weights_csv:
                    fit_row_weights_filename = os.path.realpath(
                        options.row_weights_csv)
                    docker_cmd_args += ' -v "{}":{}'.format(
                        fit_row_weights_filename,
                        in_docker_fit_row_weights_filename)
                    CMRunnerUtils.replace_cmd_argument_value(
                        in_docker_cmd_list,
                        ArgumentsOptions.WEIGHTS_CSV,
                        in_docker_fit_row_weights_filename,
                    )

        docker_cmd += " {} {} {}".format(docker_cmd_args, options.docker,
                                         " ".join(in_docker_cmd_list))

        self._print_verbose("docker command: [{}]".format(docker_cmd))
        return docker_cmd
Example #5
0
    def _null_value_imputation_check(self):
        test_name = "Null value imputation"
        test_passed = True
        failure_message = ""
        cmd_list = sys.argv

        TMP_DIR = "/tmp"
        DIR_PREFIX = "drum_validation_checks_"

        ValidationTestResult = collections.namedtuple(
            "ValidationTestResult", "filename retcode message")

        null_datasets_dir = mkdtemp(prefix=DIR_PREFIX, dir=TMP_DIR)

        df = pd.read_csv(self._input_csv)
        column_names = list(df.iloc[[0]])

        results = {}
        for i, column_name in enumerate(column_names):
            output_filename = os.path.join(null_datasets_dir,
                                           "output{}".format(i))
            tmp_dataset_file_path = os.path.join(
                null_datasets_dir, "null_value_imputation_column{}".format(i))
            df_tmp = df.copy()
            df_tmp[column_name] = None
            df_tmp.to_csv(tmp_dataset_file_path, index=False)
            CMRunnerUtils.replace_cmd_argument_value(cmd_list,
                                                     ArgumentsOptions.INPUT,
                                                     tmp_dataset_file_path)
            CMRunnerUtils.replace_cmd_argument_value(cmd_list,
                                                     ArgumentsOptions.OUTPUT,
                                                     output_filename)

            p = subprocess.Popen(cmd_list, env=os.environ)
            retcode = p.wait()
            if retcode != 0:
                test_passed = False
                results[column_name] = ValidationTestResult(
                    tmp_dataset_file_path, retcode, "")

        # process results
        if test_passed:
            shutil.rmtree(null_datasets_dir)
        else:
            for test_result in results.values():
                if not test_result.retcode:
                    os.remove(test_result.filename)

            table = Texttable()
            table.set_deco(Texttable.HEADER)

            headers = ["Failed feature", "Message", "Dataset filename"]

            table.set_cols_dtype(["t", "t", "t"])
            table.set_cols_align(["l", "l", "l"])

            rows = [headers]

            for key, test_result in results.items():
                if test_result.retcode:
                    rows.append(
                        [key, test_result.message, test_result.filename])

            table.add_rows(rows)
            table_res = table.draw()

            message = (
                "Null value imputation check performs check by imputing each feature with NaN value. "
                "If check fails for a feature, test dataset is saved in {}/{}* "
                "Make sure to delete those folders if it takes too much space."
                .format(TMP_DIR, DIR_PREFIX))
            failure_message = "{}\n\n{}".format(message, table_res)

        return test_name, test_passed, failure_message