def test_build_pps_response_json_str_bad_target(self):
     d = {"Predictions": [1.2, 2.3, 3.4]}
     df = pd.DataFrame(data=d)
     config = parse_validate_deployment_config_file(
         self.deployment_config_regression)
     with pytest.raises(DrumCommonException,
                        match="target type 'None' is not supported"):
         build_pps_response_json_str(df, config, None)
    def test_map_regression_prediction(self):
        d = {"Predictions": [1.2, 2.3, 3.4]}
        df = pd.DataFrame(data=d)
        config = parse_validate_deployment_config_file(
            self.deployment_config_regression)
        assert config["target"]["name"] == "MEDV"
        assert config["target"]["type"] == "Regression"

        response = build_pps_response_json_str(df, config,
                                               TargetType.REGRESSION)
        response_json = json.loads(response)
        assert isinstance(response_json, dict)
        assert "data" in response_json
        predictions_list = response_json["data"]
        assert isinstance(predictions_list, list)
        assert len(predictions_list) == df.shape[0]

        pred_iter = iter(predictions_list)
        for index, row in df.iterrows():
            pred_item = next(pred_iter)
            assert isinstance(pred_item, dict)
            assert pred_item["rowId"] == index
            assert pred_item["prediction"] == row[0]
            assert isinstance(pred_item["predictionValues"], list)
            assert len(pred_item["predictionValues"]) == 1
            assert pred_item["predictionValues"][0]["label"] == config[
                "target"]["name"]
            assert pred_item["predictionValues"][0]["value"] == row[0]
Ejemplo n.º 3
0
    def _do_predict_structured(self, logger=None):
        response_status = HTTP_200_OK
        try:

            binary_data, mimetype, charset = self._fetch_data_from_request(
                "X", logger=logger)
            sparse_data = self._fetch_additional_files_from_request(
                SPARSE_COLNAMES, logger=logger)

            mimetype_support_error_response = self._check_mimetype_support(
                mimetype)
            if mimetype_support_error_response is not None:
                return mimetype_support_error_response
        except ValueError as e:
            response_status = HTTP_422_UNPROCESSABLE_ENTITY
            return {"message": "ERROR: " + str(e)}, response_status

        out_data = self._predictor.predict(binary_data=binary_data,
                                           mimetype=mimetype,
                                           charset=charset,
                                           sparse_colnames=sparse_data)

        if self._target_type == TargetType.UNSTRUCTURED:
            response = out_data

        else:

            def _build_drum_response_json_str(out_data):
                if len(out_data.columns) == 1:
                    out_data = out_data[REGRESSION_PRED_COLUMN]
                # df.to_json() is much faster.
                # But as it returns string, we have to assemble final json using strings.
                df_json_str = out_data.to_json(orient="records")
                response = '{{"predictions":{df_json}}}'.format(
                    df_json=df_json_str)
                return response

            # float32 is not JSON serializable, so cast to float, which is float64
            out_data = out_data.astype("float")
            if self._deployment_config is not None:
                response = build_pps_response_json_str(out_data,
                                                       self._deployment_config,
                                                       self._target_type)
            else:
                response = _build_drum_response_json_str(out_data)

        response = Response(
            response, mimetype=PredictionServerMimetypes.APPLICATION_JSON)

        return response, response_status
    def test_map_multiclass_prediction(self):
        class_labels = ["QSO", "STAR", "GALAXY"]
        d = {
            class_labels[0]: [0.6, 0.2, 0.3],
            class_labels[1]: [0.3, 0.4, 0.5],
            class_labels[2]: [0.1, 0.4, 0.2],
        }
        df = pd.DataFrame(data=d)
        config = parse_validate_deployment_config_file(
            self.deployment_config_multiclass)
        assert config["target"]["name"] == "class"
        assert config["target"]["type"] == "Multiclass"

        response = build_pps_response_json_str(df, config,
                                               TargetType.MULTICLASS)
        response_json = json.loads(response)
        assert isinstance(response_json, dict)
        assert "data" in response_json
        predictions_list = response_json["data"]
        assert isinstance(predictions_list, list)
        assert len(predictions_list) == df.shape[0]

        pred_iter = iter(predictions_list)
        expected_pred_iterator = iter(["QSO", "GALAXY", "STAR"])
        for index, row in df.iterrows():
            pred_item = next(pred_iter)

            assert isinstance(pred_item, dict)
            assert pred_item["rowId"] == index

            assert pred_item["prediction"] == next(expected_pred_iterator)
            assert isinstance(pred_item["predictionValues"], list)
            assert len(pred_item["predictionValues"]) == 3

            # expected list must be formed in the [GALAXY, QSO, STAR] order as classes are ordered this way
            # in map_multiclass_predictions according to the class mapping in deployment document
            assert pred_item["predictionValues"] == [
                {
                    "label": class_labels[2],
                    "value": row[class_labels[2]]
                },
                {
                    "label": class_labels[0],
                    "value": row[class_labels[0]]
                },
                {
                    "label": class_labels[1],
                    "value": row[class_labels[1]]
                },
            ]
    def test_map_binary_prediction(self):
        positive_class = "Iris-setosa"
        negative_class = "Iris-versicolor"
        d = {positive_class: [0.6, 0.5, 0.2], negative_class: [0.4, 0.5, 0.8]}
        df = pd.DataFrame(data=d)
        config = parse_validate_deployment_config_file(
            self.deployment_config_binary)
        assert config["target"]["name"] == "Species"
        assert config["target"]["type"] == "Binary"

        response = build_pps_response_json_str(df, config, TargetType.BINARY)
        response_json = json.loads(response)
        assert isinstance(response_json, dict)
        assert "data" in response_json
        predictions_list = response_json["data"]
        assert isinstance(predictions_list, list)
        assert len(predictions_list) == df.shape[0]

        pred_iter = iter(predictions_list)
        for index, row in df.iterrows():
            pred_item = next(pred_iter)
            assert isinstance(pred_item, dict)
            assert pred_item["rowId"] == index
            assert pred_item["predictionThreshold"] == config["target"][
                "prediction_threshold"]
            assert (pred_item["prediction"] == "Iris-setosa"
                    if row[positive_class] > pred_item["predictionThreshold"]
                    else negative_class)
            assert isinstance(pred_item["predictionValues"], list)
            assert len(pred_item["predictionValues"]) == 2

            # expected list must be formed in the [positive_class, negative_class] order
            # as that's how it is generated in map_binary_prediction
            assert pred_item["predictionValues"] == [
                {
                    "label": positive_class,
                    "value": row[positive_class]
                },
                {
                    "label": negative_class,
                    "value": 1 - row[positive_class]
                },
            ]