コード例 #1
0
class PredictionServer(ConnectableComponent, PredictMixin):
    def __init__(self, engine):
        super(PredictionServer, self).__init__(engine)
        self._show_perf = False
        self._stats_collector = None
        self._memory_monitor = None
        self._run_language = None
        self._predictor = None
        self._target_type = None

    def configure(self, params):
        super(PredictionServer, self).configure(params)
        self._show_perf = self._params.get("show_perf")
        self._run_language = RunLanguage(params.get("run_language"))
        self._target_type = params[TARGET_TYPE_ARG_KEYWORD]

        self._stats_collector = StatsCollector(
            disable_instance=not self._show_perf)

        self._stats_collector.register_report("run_predictor_total", "finish",
                                              StatsOperation.SUB, "start")
        self._memory_monitor = MemoryMonitor(monitor_current_process=True)

        if self._run_language == RunLanguage.PYTHON:
            from datarobot_drum.drum.language_predictors.python_predictor.python_predictor import (
                PythonPredictor, )

            self._predictor = PythonPredictor()
        elif self._run_language == RunLanguage.JAVA:
            from datarobot_drum.drum.language_predictors.java_predictor.java_predictor import (
                JavaPredictor, )

            self._predictor = JavaPredictor()
        elif self._run_language == RunLanguage.R:
            # this import is here, because RPredictor imports rpy library,
            # which is not installed for Java and Python cases.
            from datarobot_drum.drum.language_predictors.r_predictor.r_predictor import RPredictor

            self._predictor = RPredictor()
        else:
            raise DrumCommonException(
                "Prediction server doesn't support language: {} ".format(
                    self._run_language))

        self._predictor.configure(params)

    def _materialize(self, parent_data_objs, user_data):
        model_api = base_api_blueprint()

        @model_api.route("/capabilities/", methods=["GET"])
        def capabilities():
            return make_predictor_capabilities(
                self._predictor.supported_payload_formats)

        @model_api.route("/health/", methods=["GET"])
        def health():
            return {"message": "OK"}, HTTP_200_OK

        @model_api.route("/predict/", methods=["POST"])
        def predict():
            logger.debug("Entering predict() endpoint")

            self._stats_collector.enable()
            self._stats_collector.mark("start")

            try:
                response, response_status = self.do_predict(logger=logger)
            finally:
                self._stats_collector.mark("finish")
                self._stats_collector.disable()
            return response, response_status

        @model_api.route("/predictUnstructured/", methods=["POST"])
        def predict_unstructured():
            logger.debug("Entering predict() endpoint")

            self._stats_collector.enable()
            self._stats_collector.mark("start")

            try:
                response, response_status = self.do_predict_unstructured(
                    logger=logger)
            finally:
                self._stats_collector.mark("finish")
                self._stats_collector.disable()
            return response, response_status

        @model_api.route("/stats/", methods=["GET"])
        def stats():
            mem_info = self._memory_monitor.collect_memory_info()
            ret_dict = {"mem_info": mem_info._asdict()}

            self._stats_collector.round()
            ret_dict["time_info"] = {}
            for name in self._stats_collector.get_report_names():
                d = self._stats_collector.dict_report(name)
                ret_dict["time_info"][name] = d
            self._stats_collector.stats_reset()
            return ret_dict, HTTP_200_OK

        @model_api.errorhandler(Exception)
        def handle_exception(e):
            logger.exception(e)
            return {
                "message": "ERROR: {}".format(e)
            }, HTTP_500_INTERNAL_SERVER_ERROR

        app = get_flask_app(model_api)

        host = self._params.get("host", None)
        port = self._params.get("port", None)
        try:
            app.run(host, port, threaded=False)
        except OSError as e:
            raise DrumCommonException("{}: host: {}; port: {}".format(
                e, host, port))

        if self._stats_collector:
            self._stats_collector.print_reports()

        return []
コード例 #2
0
    def _run_fit_and_predictions_pipelines_in_mlpiper(self):
        if self.run_mode == RunMode.SERVER:
            run_language = self._check_artifacts_and_get_run_language()
            # in prediction server mode infra pipeline == prediction server runner pipeline
            infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline(
                run_language)
        elif self.run_mode == RunMode.SCORE:
            run_language = self._check_artifacts_and_get_run_language()
            tmp_output_filename = None
            # if output is not provided, output into tmp file and print
            if not self.options.output:
                # keep object reference so it will be destroyed only in the end of the process
                __tmp_output_file = tempfile.NamedTemporaryFile(mode="w")
                self.options.output = tmp_output_filename = __tmp_output_file.name
            # in batch prediction mode infra pipeline == predictor pipeline
            infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline(
                run_language)
        elif self.run_mode == RunMode.FIT:
            run_language = self._get_fit_run_language()
            infra_pipeline_str = self._prepare_fit_pipeline(run_language)
        else:
            error_message = "{} mode is not supported here".format(
                self.run_mode)
            print(error_message)
            raise DrumCommonException(error_message)

        config = ExecutorConfig(
            pipeline=infra_pipeline_str,
            pipeline_file=None,
            run_locally=True,
            comp_root_path=CMRunnerUtils.get_components_repo(),
            mlpiper_jar=None,
            spark_jars=None,
        )

        _pipeline_executor = Executor(config).standalone(True).set_verbose(
            self.options.verbose)
        # assign logger with the name drum.mlpiper.Executor to mlpiper Executor
        _pipeline_executor.set_logger(
            logging.getLogger(LOGGER_NAME_PREFIX + "." +
                              _pipeline_executor.logger_name()))

        self.logger.info(">>> Start {} in the {} mode".format(
            ArgumentsOptions.MAIN_COMMAND, self.run_mode.value))
        sc = StatsCollector(disable_instance=(
            not hasattr(self.options, "show_perf")
            or not self.options.show_perf or self.run_mode == RunMode.SERVER))
        sc.register_report("Full time", "end", StatsOperation.SUB, "start")
        sc.register_report("Init time (incl model loading)", "init",
                           StatsOperation.SUB, "start")
        sc.register_report("Run time (incl reading CSV)", "run",
                           StatsOperation.SUB, "init")
        with verbose_stdout(self.options.verbose):
            sc.enable()
            try:
                sc.mark("start")

                _pipeline_executor.init_pipeline()
                self.runtime.initialization_succeeded = True
                sc.mark("init")

                _pipeline_executor.run_pipeline(cleanup=False)
                sc.mark("run")
            finally:
                _pipeline_executor.cleanup_pipeline()
                sc.mark("end")
                sc.disable()
        self.logger.info("<<< Finish {} in the {} mode".format(
            ArgumentsOptions.MAIN_COMMAND, self.run_mode.value))
        sc.print_reports()
        if self.run_mode == RunMode.SCORE:
            # print result if output is not provided
            if tmp_output_filename:
                print(pd.read_csv(tmp_output_filename))
コード例 #3
0
class PredictionServer(ConnectableComponent):
    def __init__(self, engine):
        super(PredictionServer, self).__init__(engine)
        self._show_perf = False
        self._stats_collector = None
        self._memory_monitor = None
        self._run_language = None
        self._predictor = None

    def configure(self, params):
        super(PredictionServer, self).configure(params)
        self._threaded = self._params.get("threaded", False)
        self._show_perf = self._params.get("show_perf")
        self._stats_collector = StatsCollector(disable_instance=not self._show_perf)

        self._stats_collector.register_report(
            "run_predictor_total", "finish", StatsOperation.SUB, "start"
        )
        self._memory_monitor = MemoryMonitor()
        self._run_language = RunLanguage(params.get("run_language"))
        if self._run_language == RunLanguage.PYTHON:
            from datarobot_drum.drum.language_predictors.python_predictor.python_predictor import (
                PythonPredictor,
            )

            self._predictor = PythonPredictor()
        elif self._run_language == RunLanguage.JAVA:
            from datarobot_drum.drum.language_predictors.java_predictor.java_predictor import (
                JavaPredictor,
            )

            self._predictor = JavaPredictor()
        elif self._run_language == RunLanguage.R:
            # this import is here, because RPredictor imports rpy library,
            # which is not installed for Java and Python cases.
            from datarobot_drum.drum.language_predictors.r_predictor.r_predictor import RPredictor

            self._predictor = RPredictor()
        else:
            raise DrumCommonException(
                "Prediction server doesn't support language: {} ".format(self._run_language)
            )

        self._predictor.configure(params)

    def _materialize(self, parent_data_objs, user_data):
        model_api = base_api_blueprint()

        @model_api.route("/health/", methods=["GET"])
        def health():
            return {"message": "OK"}, HTTP_200_OK

        @model_api.route("/predict/", methods=["POST"])
        def predict():
            response_status = HTTP_200_OK
            file_key = "X"
            logger.debug("Entering predict() endpoint")
            REGRESSION_PRED_COLUMN = "Predictions"
            filename = request.files[file_key] if file_key in request.files else None
            logger.debug("Filename provided under X key: {}".format(filename))

            if not filename:
                wrong_key_error_message = "Samples should be provided as a csv file under `{}` key.".format(
                    file_key
                )
                logger.error(wrong_key_error_message)
                response_status = HTTP_422_UNPROCESSABLE_ENTITY
                return {"message": "ERROR: " + wrong_key_error_message}, response_status

            in_df = pd.read_csv(filename)

            # TODO labels have to be provided as command line arguments or within configure endpoint
            self._stats_collector.enable()
            self._stats_collector.mark("start")
            out_df = self._predictor.predict(in_df)

            num_columns = len(out_df.columns)
            # float32 is not JSON serializable, so cast to float, which is float64
            out_df = out_df.astype("float")
            if num_columns == 1:
                # df.to_json() is much faster.
                # But as it returns string, we have to assemble final json using strings.
                df_json = out_df[REGRESSION_PRED_COLUMN].to_json(orient="records")
                response_json = '{{"predictions":{df_json}}}'.format(df_json=df_json)
            elif num_columns == 2:
                # df.to_json() is much faster.
                # But as it returns string, we have to assemble final json using strings.
                df_json_str = out_df.to_json(orient="records")
                response_json = '{{"predictions":{df_json}}}'.format(df_json=df_json_str)
            else:
                ret_str = (
                    "Predictions dataframe has {} columns; "
                    "Expected: 1 - for regression, 2 - for binary classification.".format(
                        num_columns
                    )
                )
                response_json = {"message": "ERROR: " + ret_str}
                response_status = HTTP_422_UNPROCESSABLE_ENTITY

            self._stats_collector.mark("finish")
            self._stats_collector.disable()
            return response_json, response_status

        @model_api.route("/stats/", methods=["GET"])
        def stats():
            mem_info = self._memory_monitor.collect_memory_info()
            ret_dict = {"mem_info": mem_info._asdict()}
            self._stats_collector.round()

            ret_dict["time_info"] = {}
            for name in self._stats_collector.get_report_names():
                d = self._stats_collector.dict_report(name)
                ret_dict["time_info"][name] = d
            self._stats_collector.stats_reset()
            return ret_dict, HTTP_200_OK

        @model_api.errorhandler(Exception)
        def handle_exception(e):
            logger.exception(e)
            return {"message": "ERROR: {}".format(e)}, HTTP_500_INTERNAL_SERVER_ERROR

        app = get_flask_app(model_api)
        logging.getLogger("werkzeug").setLevel(logger.getEffectiveLevel())

        host = self._params.get("host", None)
        port = self._params.get("port", None)
        try:
            app.run(host, port, threaded=self._threaded)
        except OSError as e:
            raise DrumCommonException("{}: host: {}; port: {}".format(e, host, port))

        if self._stats_collector:
            self._stats_collector.print_reports()

        return []
コード例 #4
0
class PredictionServer(ConnectableComponent, PredictMixin):
    def __init__(self, engine):
        super(PredictionServer, self).__init__(engine)
        self._show_perf = False
        self._stats_collector = None
        self._memory_monitor = None
        self._run_language = None
        self._predictor = None
        self._target_type = None
        self._code_dir = None
        self._deployment_config = None

    def configure(self, params):
        super(PredictionServer, self).configure(params)
        self._code_dir = self._params.get("__custom_model_path__")
        self._show_perf = self._params.get("show_perf")
        self._run_language = RunLanguage(params.get("run_language"))
        self._target_type = TargetType(params[TARGET_TYPE_ARG_KEYWORD])

        self._stats_collector = StatsCollector(disable_instance=not self._show_perf)

        self._stats_collector.register_report(
            "run_predictor_total", "finish", StatsOperation.SUB, "start"
        )
        self._memory_monitor = MemoryMonitor(monitor_current_process=True)
        self._deployment_config = parse_validate_deployment_config_file(
            self._params["deployment_config"]
        )

        if self._run_language == RunLanguage.PYTHON:
            from datarobot_drum.drum.language_predictors.python_predictor.python_predictor import (
                PythonPredictor,
            )

            self._predictor = PythonPredictor()
        elif self._run_language == RunLanguage.JAVA:
            from datarobot_drum.drum.language_predictors.java_predictor.java_predictor import (
                JavaPredictor,
            )

            self._predictor = JavaPredictor()
        elif self._run_language == RunLanguage.JULIA:
            from datarobot_drum.drum.language_predictors.julia_predictor.julia_predictor import (
                JlPredictor,
            )

            self._predictor = JlPredictor()
        elif self._run_language == RunLanguage.R:
            # this import is here, because RPredictor imports rpy library,
            # which is not installed for Java and Python cases.
            from datarobot_drum.drum.language_predictors.r_predictor.r_predictor import RPredictor

            self._predictor = RPredictor()
        else:
            raise DrumCommonException(
                "Prediction server doesn't support language: {} ".format(self._run_language)
            )

        self._predictor.configure(params)

    def _materialize(self, parent_data_objs, user_data):
        model_api = base_api_blueprint()

        @model_api.route("/capabilities/", methods=["GET"])
        def capabilities():
            return make_predictor_capabilities(self._predictor.supported_payload_formats)

        @model_api.route("/info/", methods=["GET"])
        def info():
            model_info = self._predictor.model_info()
            model_info.update({ModelInfoKeys.LANGUAGE: self._run_language.value})
            model_info.update({ModelInfoKeys.DRUM_VERSION: drum_version})
            model_info.update({ModelInfoKeys.DRUM_SERVER: "flask"})
            model_info.update(
                {ModelInfoKeys.MODEL_METADATA: read_model_metadata_yaml(self._code_dir)}
            )

            return model_info, HTTP_200_OK

        @model_api.route("/health/", methods=["GET"])
        def health():
            return {"message": "OK"}, HTTP_200_OK

        @model_api.route("/predictions/", methods=["POST"])
        @model_api.route("/predict/", methods=["POST"])
        def predict():
            logger.debug("Entering predict() endpoint")

            self._stats_collector.enable()
            self._stats_collector.mark("start")

            try:
                response, response_status = self.do_predict_structured(logger=logger)
            finally:
                self._stats_collector.mark("finish")
                self._stats_collector.disable()
            return response, response_status

        @model_api.route("/transform/", methods=["POST"])
        def transform():

            logger.debug("Entering transform() endpoint")

            self._stats_collector.enable()
            self._stats_collector.mark("start")

            try:
                response, response_status = self.do_transform(logger=logger)
            finally:
                self._stats_collector.mark("finish")
                self._stats_collector.disable()
            return response, response_status

        @model_api.route("/predictionsUnstructured/", methods=["POST"])
        @model_api.route("/predictUnstructured/", methods=["POST"])
        def predict_unstructured():
            logger.debug("Entering predict() endpoint")

            self._stats_collector.enable()
            self._stats_collector.mark("start")

            try:
                response, response_status = self.do_predict_unstructured(logger=logger)
            finally:
                self._stats_collector.mark("finish")
                self._stats_collector.disable()
            return response, response_status

        @model_api.route("/stats/", methods=["GET"])
        def stats():
            mem_info = self._memory_monitor.collect_memory_info()
            ret_dict = {"mem_info": mem_info._asdict()}

            self._stats_collector.round()
            ret_dict["time_info"] = {}
            for name in self._stats_collector.get_report_names():
                d = self._stats_collector.dict_report(name)
                ret_dict["time_info"][name] = d
            self._stats_collector.stats_reset()
            return ret_dict, HTTP_200_OK

        @model_api.errorhandler(Exception)
        def handle_exception(e):
            logger.exception(e)
            return {"message": "ERROR: {}".format(e)}, HTTP_500_INTERNAL_SERVER_ERROR

        # Disables warning for development server
        cli = sys.modules["flask.cli"]
        cli.show_server_banner = lambda *x: None

        app = get_flask_app(model_api)

        host = self._params.get("host", None)
        port = self._params.get("port", None)
        try:
            app.run(host, port, threaded=False)
        except OSError as e:
            raise DrumCommonException("{}: host: {}; port: {}".format(e, host, port))

        if self._stats_collector:
            self._stats_collector.print_reports()

        return []