Ejemplo n.º 1
0
    def _extract_metrics_stats(self, prev_stats_snapshot):
        raw_metrics = self._raw_stats.get("metrics", None)
        if not raw_metrics:
            return

        self._uwsgi_pm_metrics = self._extract_relevant_raw_metrics(raw_metrics)
        if not self._uwsgi_pm_metrics:
            return

        if not self._metrics_execution_order:
            # arrange the metrics in a topological order because it could be a DAG, where one
            # metric is dependant on a metric that depends on others. Using the topological
            # mechanism we can also allow more then one reference in a single metric definition.
            self._metrics_execution_order = TopologicalSort(Metric.metrics(), "metric_name",
                                                            "related_metric_meta").sort()

        self._uwsgi_pm_metrics_per_window = {}

        for metric_meta in self._metrics_execution_order:
            metric_name = metric_meta.metric_name
            metric_value = self.uwsgi_pm_metrics[metric_name]

            if metric_meta.metric_type == MetricType.COUNTER_PER_TIME_WINDOW:
                if prev_stats_snapshot:
                    metric_value -= prev_stats_snapshot.uwsgi_pm_metric_by_name(metric_name)

                self._calculate_metric_value(metric_value, metric_meta, self.total_requests_diff,
                                             self.uwsgi_pm_metrics_per_window)
            else:
                self._calculate_metric_value(metric_value, metric_meta, self.total_requests,
                                             self._uwsgi_pm_metrics)
                self._uwsgi_pm_metrics_accumulation[metric_name] = self._uwsgi_pm_metrics[metric_name]
Ejemplo n.º 2
0
    def _setup(self, pipeline_name, monitor_info):
        target_path = tempfile.mkdtemp(dir=ComponentConstants.TMP_RESTFUL_ROOT,
                                       prefix=ComponentConstants.TMP_RESTFUL_DIR_PREFIX)
        os.chmod(target_path, 0o777)

        shared_conf = {
            SharedConstants.TARGET_PATH_KEY: target_path,
            SharedConstants.SOCK_FILENAME_KEY: UwsgiConstants.SOCK_FILENAME,
            SharedConstants.STATS_SOCK_FILENAME_KEY: UwsgiConstants.STATS_SOCK_FILENAME
        }

        log_format = self._params.get(ComponentConstants.LOG_FORMAT_KEY, ComponentConstants.DEFAULT_LOG_FORMAT)

        log_level_param = self._params.get(ComponentConstants.LOG_LEVEL_KEY, ComponentConstants.DEFAULT_LOG_LEVEL).lower()
        log_level = constants.LOG_LEVELS.get(log_level_param, logging.INFO)
        self._logger.debug("log_level_param: {}, log_level: {}, level_constants: {}"
                           .format(log_level_param, log_level, constants.LOG_LEVELS))

        stats_reporting_interval_sec = self._params.get(ComponentConstants.STATS_REPORTING_INTERVAL_SEC,
                                                        ComponentConstants.DEFAULT_STATS_REPORTING_INTERVAL_SEC)

        model_filepath_key = java_mapping.RESERVED_KEYS[ComponentConstants.INPUT_MODEL_TAG_NAME]
        self._params[model_filepath_key] = ModelEnv(self._params[model_filepath_key]).model_filepath

        uwsgi_entry_point_conf = {
            UwsgiConstants.RESTFUL_COMP_MODULE_KEY: self.__module__,
            UwsgiConstants.RESTFUL_COMP_CLS_KEY: self.__class__.__name__,
            ComponentConstants.LOG_FORMAT_KEY: log_format,
            ComponentConstants.LOG_LEVEL_KEY: log_level,
            ComponentConstants.STATS_REPORTING_INTERVAL_SEC: stats_reporting_interval_sec,
            UwsgiConstants.PARAMS_KEY: self._params,
            UwsgiConstants.PIPELINE_NAME_KEY: pipeline_name,
            UwsgiConstants.MODEL_PATH_KEY: self._params[model_filepath_key],
            ComponentConstants.UWSGI_DISABLE_LOGGING_KEY:
                parameter.str2bool(self._params.get(ComponentConstants.UWSGI_DISABLE_LOGGING_KEY,
                                                    ComponentConstants.DEFAULT_UWSGI_DISABLE_LOGGING)),
            ComponentConstants.METRICS_KEY: Metric.metrics()
        }
        self._logger.debug("uwsgi_entry_point_conf: {}".format(uwsgi_entry_point_conf))

        nginx_conf = {
            ComponentConstants.HOST_KEY: ComponentConstants.DEFAULT_HOST,
            ComponentConstants.PORT_KEY: self._params[ComponentConstants.PORT_KEY],
            NginxConstants.DISABLE_ACCESS_LOG_KEY: log_level != logging.DEBUG
        }
        self._logger.debug("nginx_conf: {}".format(nginx_conf))

        self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY,
                                                            ComponentConstants.DEFAULT_DRY_RUN))
        if self._dry_run:
            self._logger.warning("\n\n" + 80 * '#' + "\n" + 25 * " " + "Running in DRY RUN mode\n" + 80 * '#')

        self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY, ComponentConstants.DEFAULT_DRY_RUN))

        self._wsgi_broker = UwsgiBroker(self._ml_engine, self._dry_run) \
            .setup_and_run(shared_conf, uwsgi_entry_point_conf, monitor_info)

        self._nginx_broker = NginxBroker(self._ml_engine, self._dry_run) \
            .setup_and_run(shared_conf, nginx_conf)
Ejemplo n.º 3
0
    def configure(self, params):
        """
        @brief      It is called in within the 'deputy' context
        """
        self._logger.info(
            "Configure component with input params, name: {}, params: {}".
            format(self.name(), params))
        self._params = params

        self._metric1 = Metric("requests.per.win.time",
                               hidden=True,
                               metric_type=MetricType.COUNTER_PER_TIME_WINDOW)

        self._metric2 = Metric(name="distance.per.req",
                               title="Avg Distance / time-window [per-reqs]",
                               metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                               value_type=float,
                               metric_relation=MetricRelation.AVG_PER_REQUEST)

        self._metric3 = Metric(
            name="distance.per.counter",
            title="Avg Distance / time-window [counter.per.reqs]",
            metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
            value_type=float,
            metric_relation=MetricRelation.DIVIDE_BY,
            related_metric=self._metric1)

        self._metric4 = Metric(name="classification",
                               title="Prediction Distribution",
                               metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                               metric_relation=MetricRelation.BAR_GRAPH,
                               related_metric=[(self._metric2, "metric2"),
                                               (self._metric3, "metric3")])
Ejemplo n.º 4
0
 def _report_metrics_collection(self, metrics):
     for name, value in metrics.items():
         metric_meta = Metric.metric_by_name(name)
         self._logger.debug("Reporting metrics ... {}".format(metric_meta))
         if not metric_meta.hidden:
             if metric_meta.metric_relation == MetricRelation.BAR_GRAPH:
                 self._report_bar_graph_metric(metric_meta, metrics)
             else:
                 mlops.set_stat(metric_meta.title, value)
Ejemplo n.º 5
0
    def __init__(self, engine):
        super(RESTfulComponent, self).__init__(engine if engine else RestModelServingEngine("uwsgi-context"))
        self._dry_run = False
        self._wsgi_broker = None
        self._nginx_broker = None
        self._wid = None

        if mlops_loaded:
            from os import environ
            if environ.get(RestfulConstants.STATS_AGGREGATE_FLAG) is not None:
                mlops.init(mlops_mode=MLOpsMode.REST_ACCUMULATOR)
            else:
                mlops.init()

        self._total_stat_requests = Metric("pm.stat_requests",
                                           title="Total number of stat requests",
                                           metric_type=MetricType.COUNTER,
                                           value_type=int,
                                           metric_relation=MetricRelation.SUM_OF)
Ejemplo n.º 6
0
    def _extract_relevant_raw_metrics(self, raw_metrics):
        uwsgi_pm_metrics = {}
        # Set values according their types
        for name, body in raw_metrics.items():
            if Metric.NAME_SUFFIX in name:
                value = body["value"]
                if Metric.metric_by_name(name).value_type == float:
                    value /= Metric.FLOAT_PRECISION
                uwsgi_pm_metrics[name] = value

        return uwsgi_pm_metrics
Ejemplo n.º 7
0
class SklearnRESTfulServingTest(RESTfulComponent):
    JSON_KEY_NAME = "data"

    def __init__(self, engine):
        super(SklearnRESTfulServingTest, self).__init__(engine)
        self._model = None
        self._model_loading_error = None
        self._params = {}
        self._verbose = self._logger.isEnabledFor(logging.DEBUG)

        self._metric1 = None
        self._metric2 = None
        self._metric3 = None
        self._metric4 = None

        self.info_json = {
            "sample_keyword":
            SklearnRESTfulServingTest.JSON_KEY_NAME,
            "python":
            "{}.{}.{}".format(sys.version_info[0], sys.version_info[1],
                              sys.version_info[2]),
            "numpy":
            np.version.version,
            "sklearn":
            sklearn.__version__,
        }

    def configure(self, params):
        """
        @brief      It is called in within the 'deputy' context
        """
        self._logger.info(
            "Configure component with input params, name: {}, params: {}".
            format(self.name(), params))
        self._params = params

        self._metric1 = Metric("requests.per.win.time",
                               hidden=True,
                               metric_type=MetricType.COUNTER_PER_TIME_WINDOW)

        self._metric2 = Metric(name="distance.per.req",
                               title="Avg Distance / time-window [per-reqs]",
                               metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                               value_type=float,
                               metric_relation=MetricRelation.AVG_PER_REQUEST)

        self._metric3 = Metric(
            name="distance.per.counter",
            title="Avg Distance / time-window [counter.per.reqs]",
            metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
            value_type=float,
            metric_relation=MetricRelation.DIVIDE_BY,
            related_metric=self._metric1)

        self._metric4 = Metric(name="classification",
                               title="Prediction Distribution",
                               metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                               metric_relation=MetricRelation.BAR_GRAPH,
                               related_metric=[(self._metric2, "metric2"),
                                               (self._metric3, "metric3")])

    def load_model_callback(self, model_path, stream, version):
        self._logger.info(sys.version_info)

        self._logger.info("Model is loading, wid: {}, path: {}".format(
            self.get_wid(), model_path))
        self._logger.info("params: {}".format(pprint.pformat(self._params)))
        model = None

        with warnings.catch_warnings(record=True) as warns:
            try:
                with open(model_path, "rb") as f:
                    self._model_loading_error = None
                    model = pickle.load(f) if sys.version_info[0] < 3 \
                        else pickle.load(f, encoding='latin1')

                    if self._verbose:
                        self._logger.debug("Un-pickled model: {}".format(
                            self._model))
                    self._logger.debug("Model loaded successfully!")

            except Exception as e:
                warn_str = ""
                if len(warns) > 0:
                    warn_str = "{}".format(warns[-1].message)
                self._logger.error(
                    "Model loading warning: {}; Model loading error: {}".
                    format(warn_str, e))

                # Not sure we want to throw exception only to move to a non model mode
                if self._params.get("ignore-incompatible-model", True):
                    self._logger.info(
                        "New model could not be loaded, due to error: {}".
                        format(e))
                    if self._model is None:
                        self._model_loading_error = "Model loading warning: {}; Model loading error: {}".format(
                            warn_str, str(e))
                    else:
                        raise Exception(
                            "Model loading warning: {}; Model loading error: {}"
                            .format(warn_str, e))

        # This line should be reached only if
        #  a) model loaded successfully
        #  b) model loading failed but it can be ignored
        if model is not None:
            self._model = model

    def _empty_predict(self):
        model_loaded = True if self._model else False

        result_json = {
            "message": "got empty predict",
            "expected_input_format": "{{\"data\":[<vector>]}}",
            "model_loaded": model_loaded,
            "model_class": str(type(self._model))
        }

        if model_loaded is False and self._model_loading_error:
            result_json["model_load_error"] = self._model_loading_error

        if self._model:
            if hasattr(self._model, "n_features_"):
                result_json["n_features"] = self._model.n_features_
                result_json[
                    "expected_input_format"] += ", where vector has {} comma separated values".format(
                        self._model.n_features_)

        result_json.update(self.info_json)

        return result_json

    @FlaskRoute('/predict')
    def predict(self, url_params, form_params):

        if len(form_params) == 0:
            return 200, self._empty_predict()

        elif not self._model:
            if self._model_loading_error:
                return_json = {
                    "error":
                    "Failed loading model: {}".format(
                        self._model_loading_error)
                }
            else:
                return_json = {
                    "error": "Model not loaded yet - please set a model"
                }
            return_json.update(self.info_json)
            return 404, return_json

        elif SklearnRESTfulServingTest.JSON_KEY_NAME not in form_params:
            msg = "Unexpected json format for prediction! Missing '{}' key in: {}" \
                .format(SklearnRESTfulServingTest.JSON_KEY_NAME, form_params)
            self._logger.error(msg)
            error_json = {"error": msg}
            error_json.update(self.info_json)
            return 404, error_json
        else:
            try:
                two_dim_array = np.array(
                    [form_params[SklearnRESTfulServingTest.JSON_KEY_NAME]])
                prediction = self._model.predict(two_dim_array)
                if self._verbose:
                    self._logger.debug(
                        "predict, url_params: {}, form_params: {}".format(
                            url_params, form_params))
                    self._logger.debug("type<form_params>: {}\n{}".format(
                        type(form_params), form_params))
                    self._logger.debug("type(two_dim_array): {}\n{}".format(
                        type(two_dim_array), two_dim_array))
                    self._logger.debug("prediction: {}, type: {}".format(
                        prediction[0], type(prediction[0])))
                return 200, {"prediction": prediction[0]}
            except Exception as e:
                error_json = {
                    "error": "Error performing prediction: {}".format(e)
                }
                error_json.update(self.info_json)
                return 404, error_json

    @FlaskRoute('/metric-test')
    def metric_test(self, url_params, form_params):
        try:
            self._metric1.increase(1)

            confident_num = random.random()

            # The values in the graphs are supposed to be the same
            self._metric2.increase(confident_num)
            self._metric3.increase(confident_num * 2)
            return 200, {"response": "ok"}
        except Exception as ex:
            return 404, {"message": str(ex)}
Ejemplo n.º 8
0
    def configure(self, params):
        """
        @brief      It is called in within the 'deputy' context
        """
        self._logger.info(
            "Configure component with input params, name: {}, params: {}".
            format(self.name(), params))
        self._params = params

        self._total_confidence_metric = Metric(
            "total.confidence",
            title="Average Confidence",
            metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
            value_type=float,
            metric_relation=MetricRelation.AVG_PER_REQUEST)

        self._num_predictable_classes = self._params.get(
            "num_predictable_classes", 0)
        if self._num_predictable_classes > 0:

            # Prediction distribution bar graph
            self._prediction_distribution_bar_graph_metric = Metric(
                "prediction_distribution",
                title="Prediction Distribution",
                metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                metric_relation=MetricRelation.BAR_GRAPH,
                related_metric=[])
            for iii in range(self._num_predictable_classes):
                metric = Metric("num.prediction.per.class.{}".format(iii),
                                hidden=True,
                                metric_type=MetricType.COUNTER_PER_TIME_WINDOW)
                self._num_predictions_metric_per_class.append(metric)
                self._prediction_distribution_bar_graph_metric.add_related_metric(
                    (metric, "{}".format(iii)))

            # Confidence bar graph
            self._confidence_bar_graph_metric = Metric(
                "confidence_bar_graph",
                title="Average Confidence per class",
                metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                metric_relation=MetricRelation.BAR_GRAPH,
                related_metric=[])

            for iii in range(self._num_predictable_classes):
                metric = Metric(
                    "total.confidence.per.class.{}".format(iii),
                    hidden=True,
                    metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                    value_type=float,
                    metric_relation=MetricRelation.DIVIDE_BY,
                    related_metric=self._num_predictions_metric_per_class[iii])
                self._confidence_metric_per_class.append(metric)
                self._confidence_bar_graph_metric.add_related_metric(
                    (metric, "{}".format(iii)))

        self._low_confidence_threshold_percent = self._params.get(
            "low_confidence_threshold_percent", 0)
        if self._low_confidence_threshold_percent > 0:
            self._num_confidences_below_threshold = Metric(
                "num.confidence.below.thrsh",
                title="Number of predictions with confidence below {}% threshold"
                .format(self._low_confidence_threshold_percent),
                metric_type=MetricType.COUNTER_PER_TIME_WINDOW)
Ejemplo n.º 9
0
class SklearnRESTfulServing(RESTfulComponent):
    JSON_KEY_NAME = "data"

    def __init__(self, engine):
        super(SklearnRESTfulServing, self).__init__(engine)
        self._model = None
        self._model_type = ModelType.other
        self._model_loading_error = None
        self._params = {}
        self._verbose = self._logger.isEnabledFor(logging.DEBUG)
        self._num_predictable_classes = 0
        self._confidence_metric_per_class = []
        self._num_predictions_metric_per_class = []

        self._low_confidence_threshold_percent = None

        # Metrics
        self._num_confidences_below_threshold = None
        self._total_confidence_metric = None
        self._confidence_bar_graph_metric = None
        self._prediction_distribution_bar_graph_metric = None

        self.info_json = {
            "sample_keyword":
            SklearnRESTfulServing.JSON_KEY_NAME,
            "python":
            "{}.{}.{}".format(sys.version_info[0], sys.version_info[1],
                              sys.version_info[2]),
            "numpy":
            np.version.version,
            "sklearn":
            sklearn.__version__,
        }

    def configure(self, params):
        """
        @brief      It is called in within the 'deputy' context
        """
        self._logger.info(
            "Configure component with input params, name: {}, params: {}".
            format(self.name(), params))
        self._params = params

        self._total_confidence_metric = Metric(
            "total.confidence",
            title="Average Confidence",
            metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
            value_type=float,
            metric_relation=MetricRelation.AVG_PER_REQUEST)

        self._num_predictable_classes = self._params.get(
            "num_predictable_classes", 0)
        if self._num_predictable_classes > 0:

            # Prediction distribution bar graph
            self._prediction_distribution_bar_graph_metric = Metric(
                "prediction_distribution",
                title="Prediction Distribution",
                metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                metric_relation=MetricRelation.BAR_GRAPH,
                related_metric=[])
            for iii in range(self._num_predictable_classes):
                metric = Metric("num.prediction.per.class.{}".format(iii),
                                hidden=True,
                                metric_type=MetricType.COUNTER_PER_TIME_WINDOW)
                self._num_predictions_metric_per_class.append(metric)
                self._prediction_distribution_bar_graph_metric.add_related_metric(
                    (metric, "{}".format(iii)))

            # Confidence bar graph
            self._confidence_bar_graph_metric = Metric(
                "confidence_bar_graph",
                title="Average Confidence per class",
                metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                metric_relation=MetricRelation.BAR_GRAPH,
                related_metric=[])

            for iii in range(self._num_predictable_classes):
                metric = Metric(
                    "total.confidence.per.class.{}".format(iii),
                    hidden=True,
                    metric_type=MetricType.COUNTER_PER_TIME_WINDOW,
                    value_type=float,
                    metric_relation=MetricRelation.DIVIDE_BY,
                    related_metric=self._num_predictions_metric_per_class[iii])
                self._confidence_metric_per_class.append(metric)
                self._confidence_bar_graph_metric.add_related_metric(
                    (metric, "{}".format(iii)))

        self._low_confidence_threshold_percent = self._params.get(
            "low_confidence_threshold_percent", 0)
        if self._low_confidence_threshold_percent > 0:
            self._num_confidences_below_threshold = Metric(
                "num.confidence.below.thrsh",
                title="Number of predictions with confidence below {}% threshold"
                .format(self._low_confidence_threshold_percent),
                metric_type=MetricType.COUNTER_PER_TIME_WINDOW)

    def load_model_callback(self, model_path, stream, version):
        self._logger.info(sys.version_info)

        self._logger.info("Model is loading, wid: {}, path: {}".format(
            self.get_wid(), model_path))
        self._logger.info("params: {}".format(pprint.pformat(self._params)))
        model = None

        with warnings.catch_warnings(record=True) as warns:
            try:
                with open(model_path, "rb") as f:
                    self._model_loading_error = None
                    model = pickle.load(f) if sys.version_info[0] < 3 \
                        else pickle.load(f, encoding='latin1')

                    if self._verbose:
                        self._logger.debug("Un-pickled model: {}".format(
                            self._model))
                    self._logger.debug("Model loaded successfully!")

            except Exception as e:
                warn_str = ""
                if len(warns) > 0:
                    warn_str = "{}".format(warns[-1].message)
                self._logger.error(
                    "Model loading warning: {}; Model loading error: {}".
                    format(warn_str, e))

                # Not sure we want to throw exception only to move to a non model mode
                if self._params.get("ignore-incompatible-model", True):
                    self._logger.info(
                        "New model could not be loaded, due to error: {}".
                        format(e))
                    if self._model is None:
                        self._model_loading_error = "Model loading warning: {}; Model loading error: {}".format(
                            warn_str, str(e))
                    else:
                        raise Exception(
                            "Model loading warning: {}; Model loading error: {}"
                            .format(warn_str, e))

        # This line should be reached only if
        #  a) model loaded successfully
        #  b) model loading failed but it can be ignored
        if model is not None:
            self._model = model
            self._update_model_type()

    def _update_model_type(self):
        if self._model:
            if sklearn.base.is_classifier(self._model) or getattr(
                    self._model, "_estimator_type", None) == "clusterer":
                self._model_type = ModelType.classifier
            elif sklearn.base.is_regressor(self._model):
                self._model_type = ModelType.regressor
            else:
                self._model_type = ModelType.other

    def _empty_predict(self):
        model_loaded = True if self._model else False

        result_json = {
            "message": "got empty predict",
            "expected_input_format": "{{\"data\":[<vector>]}}",
            "model_loaded": model_loaded,
            "model_class": str(type(self._model))
        }

        if model_loaded is False and self._model_loading_error:
            result_json["model_load_error"] = self._model_loading_error

        if self._model:
            if hasattr(self._model, "n_features_"):
                result_json["n_features"] = self._model.n_features_
                result_json[
                    "expected_input_format"] += ", where vector has {} comma separated values".format(
                        self._model.n_features_)

        result_json.update(self.info_json)

        return result_json

    @FlaskRoute('/predict')
    def predict(self, url_params, form_params):

        if len(form_params) == 0:
            return 200, self._empty_predict()

        elif not self._model:
            if self._model_loading_error:
                return_json = {
                    "error":
                    "Failed loading model: {}".format(
                        self._model_loading_error)
                }
            else:
                return_json = {
                    "error": "Model not loaded yet - please set a model"
                }
            return_json.update(self.info_json)
            return 404, return_json

        elif SklearnRESTfulServing.JSON_KEY_NAME not in form_params:
            msg = "Unexpected json format for prediction! Missing '{}' key in: {}" \
                .format(SklearnRESTfulServing.JSON_KEY_NAME, form_params)
            self._logger.error(msg)
            error_json = {"error": msg}
            error_json.update(self.info_json)
            return 404, error_json
        else:
            try:
                entry = form_params[SklearnRESTfulServing.JSON_KEY_NAME]
                if isinstance(entry, list):
                    two_dim_array = np.array([entry])
                else:
                    two_dim_array = pd.DataFrame.from_dict(
                        [OrderedDict(entry)])
                pred_probs = None
                try:
                    pred_probs = self._model.predict_proba(two_dim_array)[0]
                except:
                    prediction = self._model.predict(two_dim_array)[0]

                if pred_probs is not None:
                    pred_index = np.argmax(pred_probs)
                    prediction = self._model.classes_[pred_index]
                    prediction_confidence = pred_probs[pred_index]

                    self._logger.debug(
                        "pred_probs: {}, pred_index: {}, prediction: {}, confidence: {}"
                        .format(pred_probs, pred_index, prediction,
                                prediction_confidence))

                    # Total confidence
                    self._total_confidence_metric.increase(
                        prediction_confidence)

                    if self._num_predictable_classes:
                        # Prediction confidence per class
                        # index = int(prediction * Metric.FLOAT_PRECISION) % self._num_predictable_classes
                        index = int(prediction)
                        self._confidence_metric_per_class[index].increase(
                            prediction_confidence)

                    # Lower probability threshold
                    if self._low_confidence_threshold_percent and \
                            prediction_confidence * 100 < self._low_confidence_threshold_percent:
                        self._num_confidences_below_threshold.increase(1)

                if self._model_type == ModelType.classifier and self._num_predictable_classes:
                    # index = int(prediction * Metric.FLOAT_PRECISION) % self._num_predictable_classes
                    index = int(prediction)
                    self._num_predictions_metric_per_class[index].increase(1)

                if self._verbose:
                    self._logger.debug(
                        "predict, url_params: {}, form_params: {}".format(
                            url_params, form_params))
                    self._logger.debug("type<form_params>: {}\n{}".format(
                        type(form_params), form_params))
                    self._logger.debug("type(two_dim_array): {}\n{}".format(
                        type(two_dim_array), two_dim_array))
                    self._logger.debug("prediction: {}, type: {}".format(
                        prediction, type(prediction)))
                return 200, {"prediction": prediction}
            except Exception as e:
                error_json = {
                    "error": "Error performing prediction: {}".format(e)
                }
                error_json.update(self.info_json)
                return 404, error_json
Ejemplo n.º 10
0
class RESTfulComponent(ConnectableComponent):
    _uuid_engine = None
    _stats_path_filename = None
    _stats_count = 0

    def __init__(self, engine):
        super(RESTfulComponent, self).__init__(engine if engine else RestModelServingEngine("uwsgi-context"))
        self._dry_run = False
        self._wsgi_broker = None
        self._nginx_broker = None
        self._wid = None

        if mlops_loaded:
            from os import environ
            if environ.get(RestfulConstants.STATS_AGGREGATE_FLAG) is not None:
                mlops.init(mlops_mode=MLOpsMode.REST_ACCUMULATOR)
            else:
                mlops.init()

        self._total_stat_requests = Metric("pm.stat_requests",
                                           title="Total number of stat requests",
                                           metric_type=MetricType.COUNTER,
                                           value_type=int,
                                           metric_relation=MetricRelation.SUM_OF)

    def set_wid(self, wid):
        self._wid = wid

    def get_wid(self):
        return self._wid

    def _validate_output(self, objs):
        pass

    def _post_validation(self, objs):
        pass

    def _materialize(self, parent_data_objs, user_data):
        monitor_info = {UwsgiConstants.MONITOR_ERROR_KEY: None, UwsgiConstants.MONITOR_THREAD_KEY: None}
        self._setup(self._ml_engine.pipeline_name, monitor_info)
        self._wait_and_monitor_errors(monitor_info)

    def _setup(self, pipeline_name, monitor_info):
        target_path = tempfile.mkdtemp(dir=ComponentConstants.TMP_RESTFUL_ROOT,
                                       prefix=ComponentConstants.TMP_RESTFUL_DIR_PREFIX)
        os.chmod(target_path, 0o777)

        fd, stats_path_filename = tempfile.mkstemp(dir=ComponentConstants.TMP_RESTFUL_ROOT,
                                                   prefix=ComponentConstants.TMP_RESTFUL_DIR_PREFIX)
        os.chmod(stats_path_filename, 0o777)

        self._logger.debug("Path for stats {}".format(stats_path_filename))

        shared_conf = {
            SharedConstants.TARGET_PATH_KEY: target_path,
            SharedConstants.SOCK_FILENAME_KEY: UwsgiConstants.SOCK_FILENAME,
            SharedConstants.STATS_SOCK_FILENAME_KEY: UwsgiConstants.STATS_SOCK_FILENAME,
            SharedConstants.STANDALONE: self._ml_engine.standalone,
            SharedConstants.STATS_PATH_FILENAME_KEY: stats_path_filename
        }

        log_format = self._params.get(ComponentConstants.LOG_FORMAT_KEY, ComponentConstants.DEFAULT_LOG_FORMAT)

        log_level_param = self._params.get(ComponentConstants.LOG_LEVEL_KEY, ComponentConstants.DEFAULT_LOG_LEVEL).lower()
        log_level = constants.LOG_LEVELS.get(log_level_param, logging.INFO)
        self._logger.debug("log_level_param: {}, log_level: {}, level_constants: {}"
                           .format(log_level_param, log_level, constants.LOG_LEVELS))

        stats_reporting_interval_sec = self._params.get(ComponentConstants.STATS_REPORTING_INTERVAL_SEC,
                                                        ComponentConstants.DEFAULT_STATS_REPORTING_INTERVAL_SEC)

        model_filepath_key = java_mapping.RESERVED_KEYS[ComponentConstants.INPUT_MODEL_TAG_NAME]
        self._params[model_filepath_key] = ModelEnv(self._params[model_filepath_key], self._ml_engine.standalone) \
            .model_filepath

        uwsgi_entry_point_conf = {
            UwsgiConstants.RESTFUL_COMP_MODULE_KEY: self.__module__,
            UwsgiConstants.RESTFUL_COMP_CLS_KEY: self.__class__.__name__,
            ComponentConstants.LOG_FORMAT_KEY: log_format,
            ComponentConstants.LOG_LEVEL_KEY: log_level,
            ComponentConstants.STATS_REPORTING_INTERVAL_SEC: stats_reporting_interval_sec,
            UwsgiConstants.PARAMS_KEY: self._params,
            UwsgiConstants.PIPELINE_NAME_KEY: pipeline_name,
            UwsgiConstants.MODEL_PATH_KEY: self._params[model_filepath_key],
            UwsgiConstants.DEPUTY_ID_KEY: self._ml_engine.get_uuid(),
            ComponentConstants.UWSGI_DISABLE_LOGGING_KEY:
                parameter.str2bool(self._params.get(ComponentConstants.UWSGI_DISABLE_LOGGING_KEY,
                                                    ComponentConstants.DEFAULT_UWSGI_DISABLE_LOGGING)),
            ComponentConstants.METRICS_KEY: Metric.metrics()
        }
        self._logger.debug("uwsgi_entry_point_conf: {}".format(uwsgi_entry_point_conf))

        nginx_conf = {
            ComponentConstants.HOST_KEY: ComponentConstants.DEFAULT_HOST,
            ComponentConstants.PORT_KEY: self._params[ComponentConstants.PORT_KEY],
            NginxConstants.DISABLE_ACCESS_LOG_KEY: log_level != logging.DEBUG
        }
        self._logger.debug("nginx_conf: {}".format(nginx_conf))

        self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY,
                                                            ComponentConstants.DEFAULT_DRY_RUN))
        if self._dry_run:
            self._logger.warning("\n\n" + 80 * '#' + "\n" + 25 * " " + "Running in DRY RUN mode\n" + 80 * '#')

        self._dry_run = parameter.str2bool(self._params.get(ComponentConstants.DRY_RUN_KEY, ComponentConstants.DEFAULT_DRY_RUN))

        self._wsgi_broker = UwsgiBroker(self._ml_engine, self._dry_run) \
            .setup_and_run(shared_conf, uwsgi_entry_point_conf, monitor_info)

        self._nginx_broker = NginxBroker(self._ml_engine, self._dry_run) \
            .setup_and_run(shared_conf, nginx_conf)

    def _wait_and_monitor_errors(self, monitor_info):
        self._logger.info("Going to read model / stop events ... (kidding, going to sleep forever ...)")

        if not self._dry_run and monitor_info[UwsgiConstants.MONITOR_THREAD_KEY]:
            try:
                monitor_info[UwsgiConstants.MONITOR_THREAD_KEY].join()

                if monitor_info[UwsgiConstants.MONITOR_ERROR_KEY]:
                    self._logger.error(monitor_info[UwsgiConstants.MONITOR_ERROR_KEY])
                    raise MLCompException(monitor_info[UwsgiConstants.MONITOR_ERROR_KEY])
            except KeyboardInterrupt:
                # When running from mlpiper tool (standalone)
                pass
            finally:
                self._nginx_broker.quit()
                self._wsgi_broker.quit()
        else:
            while True:
                time.sleep(3600*24*365)

    @abc.abstractmethod
    def load_model_callback(self, model_path, stream, version):
        """
        This abstract method is called whenever a new model is supposed to be loaded. The user is responsible
        to reload the model and start using it in any consequent predictions

        :param model_path: an absolute file path to the model
        """
        pass

    def _on_exit(self):
        cleanup_op = getattr(self, ComponentConstants.CLEANUP_CALLBACK_FUNC_NAME, None)
        if callable(cleanup_op):
            cleanup_op()
        else:
            self._logger.info("'{}' function is not defined by the restful child component!"
                              .format(ComponentConstants.CLEANUP_CALLBACK_FUNC_NAME))

    @classmethod
    def run(cls, port, model_path):
        raise MLCompException("Running restful components from CLI is not allowed without mlpiper")

    # NOTE: do not rename this route or over-ride it
    @FlaskRoute('/{}'.format(RestfulConstants.STATS_ROUTE))
    #@FlaskRoute('/statsinternal')
    def stats(self, url_params, form_params):
        status_code = 200

        import os
        import json

        stats_dict = {}
        self._stats_count += 1
        self._total_stat_requests.increase()

        if self._stats_path_filename is None or os.stat(self._stats_path_filename).st_size == 0:
            pass
        else:
            with open(self._stats_path_filename, 'r') as input:
                dict_json = ''
                for line in input:
                    dict_json += line
                try:
                    stats_dict = json.loads(dict_json)
                except Exception as e:
                    stats_dict[RestfulConstants.STATS_SYSTEM_ERROR] = str(e)
                    #stats_dict['system_error'] = str(e)
                    print("Unexpected error: {}", str(e))

        stats_dict[RestfulConstants.STATS_SYSTEM_INFO] = {}
        stats_dict[RestfulConstants.STATS_SYSTEM_INFO][RestfulConstants.STATS_WID] = self._wid
        stats_dict[RestfulConstants.STATS_SYSTEM_INFO][RestfulConstants.STATS_UUID] = self._uuid_engine

        try:
            if mlops_loaded:
                stats_dict[RestfulConstants.STATS_USER] = mlops.get_stats_map()
            else:
                print("Warning: mlops is not loaded, user statistics are lost")
        except Exception as e:
            print("error in get_stats_map: " + str(e))
            status_code = 404
            stats_dict = {"error": "error fetching stats map: {}".format(e)}

        return status_code, stats_dict