コード例 #1
0
    def _prepare_prediction_server_or_batch_pipeline(self, run_language):
        options = self.options
        # functional pipeline is predictor pipeline
        # they are a little different for batch and server predictions.
        functional_pipeline_name = self._functional_pipelines[(self.run_mode,
                                                               run_language)]
        functional_pipeline_filepath = CMRunnerUtils.get_pipeline_filepath(
            functional_pipeline_name)
        # fields to replace in the functional pipeline (predictor)
        replace_data = {
            "positiveClassLabel":
            '"{}"'.format(options.positive_class_label)
            if options.positive_class_label else "null",
            "negativeClassLabel":
            '"{}"'.format(options.negative_class_label)
            if options.negative_class_label else "null",
            "customModelPath":
            os.path.abspath(options.code_dir),
        }

        if self.run_mode == RunMode.SCORE:
            replace_data.update({
                "input_filename":
                options.input,
                "output_filename":
                '"{}"'.format(options.output) if options.output else "null",
            })

        functional_pipeline_str = CMRunnerUtils.render_file(
            functional_pipeline_filepath, replace_data)
        ret_pipeline = functional_pipeline_str

        if self.run_mode == RunMode.SERVER:
            with open(
                    CMRunnerUtils.get_pipeline_filepath(
                        EXTERNAL_SERVER_RUNNER), "r") as f:
                runner_pipeline_json = json.load(f)
                # can not use template for pipeline as quotes won't be escaped
                args = runner_pipeline_json["pipe"][0]["arguments"]
                # in server mode, predictor pipeline is passed to server as param
                args["pipeline"] = functional_pipeline_str
                args["repo"] = CMRunnerUtils.get_components_repo()
                host_port_list = options.address.split(":", 1)
                args["host"] = host_port_list[0]
                args["port"] = int(
                    host_port_list[1]) if len(host_port_list) == 2 else None
                args["threaded"] = options.threaded
                args["show_perf"] = options.show_perf
                ret_pipeline = json.dumps(runner_pipeline_json)
        return ret_pipeline
コード例 #2
0
    def _run_fit_and_predictions_pipelines_in_mlpiper(self):
        if self.run_mode == RunMode.SERVER:
            run_language = self._check_artifacts_and_get_run_language()
            # in prediction server mode infra pipeline == prediction server runner pipeline
            infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline(
                run_language)
        elif self.run_mode == RunMode.SCORE:
            run_language = self._check_artifacts_and_get_run_language()
            tmp_output_filename = None
            # if output is not provided, output into tmp file and print
            if not self.options.output:
                # keep object reference so it will be destroyed only in the end of the process
                __tmp_output_file = tempfile.NamedTemporaryFile(mode="w")
                self.options.output = tmp_output_filename = __tmp_output_file.name
            # in batch prediction mode infra pipeline == predictor pipeline
            infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline(
                run_language)
        elif self.run_mode == RunMode.FIT:
            run_language = self._get_fit_run_language()
            infra_pipeline_str = self._prepare_fit_pipeline(run_language)
        else:
            error_message = "{} mode is not supported here".format(
                self.run_mode)
            print(error_message)
            raise DrumCommonException(error_message)

        config = ExecutorConfig(
            pipeline=infra_pipeline_str,
            pipeline_file=None,
            run_locally=True,
            comp_root_path=CMRunnerUtils.get_components_repo(),
            mlpiper_jar=None,
            spark_jars=None,
        )

        _pipeline_executor = Executor(config).standalone(True).set_verbose(
            self.options.verbose)
        # assign logger with the name drum.mlpiper.Executor to mlpiper Executor
        _pipeline_executor.set_logger(
            logging.getLogger(LOGGER_NAME_PREFIX + "." +
                              _pipeline_executor.logger_name()))

        self.logger.info(">>> Start {} in the {} mode".format(
            ArgumentsOptions.MAIN_COMMAND, self.run_mode.value))
        sc = StatsCollector(disable_instance=(
            not hasattr(self.options, "show_perf")
            or not self.options.show_perf or self.run_mode == RunMode.SERVER))
        sc.register_report("Full time", "end", StatsOperation.SUB, "start")
        sc.register_report("Init time (incl model loading)", "init",
                           StatsOperation.SUB, "start")
        sc.register_report("Run time (incl reading CSV)", "run",
                           StatsOperation.SUB, "init")
        with verbose_stdout(self.options.verbose):
            sc.enable()
            try:
                sc.mark("start")

                _pipeline_executor.init_pipeline()
                self.runtime.initialization_succeeded = True
                sc.mark("init")

                _pipeline_executor.run_pipeline(cleanup=False)
                sc.mark("run")
            finally:
                _pipeline_executor.cleanup_pipeline()
                sc.mark("end")
                sc.disable()
        self.logger.info("<<< Finish {} in the {} mode".format(
            ArgumentsOptions.MAIN_COMMAND, self.run_mode.value))
        sc.print_reports()
        if self.run_mode == RunMode.SCORE:
            # print result if output is not provided
            if tmp_output_filename:
                print(pd.read_csv(tmp_output_filename))