def test_predictors_supported_payload_formats(
        self,
        resources,
        framework,
        problem,
        language,
        supported_payload_formats,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
        ) as run:
            response = requests.get(run.url_server_address + "/capabilities/")

            assert response.ok
            assert response.json() == {
                "supported_payload_formats": supported_payload_formats
            }
Ejemplo n.º 2
0
    def assert_drum_server_run_failure(self,
                                       server_run_args,
                                       with_error_server,
                                       error_message,
                                       with_nginx=False,
                                       docker=None):
        drum_server_run = DrumServerRun(**server_run_args,
                                        with_error_server=with_error_server,
                                        nginx=with_nginx,
                                        docker=docker)

        if with_error_server or with_nginx:
            # assert that error the server is up and message is propagated via API
            with drum_server_run as run:
                # check /health/ route
                response = requests.get(run.url_server_address + "/health/")
                assert response.status_code == 513
                assert error_message in response.json()["message"]

                # check /predict/ route
                response = requests.post(run.url_server_address + "/predict/")

                assert response.status_code == 513
                assert error_message in response.json()["message"]
        else:
            # DrumServerRun tries to ping the server.
            # assert that the process is already dead we it's done.
            with pytest.raises(ProcessLookupError), drum_server_run:
                pass

        # nginx test runs in docker; to stop the process we kill it, so don't check return code
        if with_nginx:
            return
        assert drum_server_run.process.returncode == 1
        assert error_message in drum_server_run.process.err_stream
Ejemplo n.º 3
0
    def test_unstructured_mode_prediction_server_wrong_endpoint(
        self,
        resources,
        framework,
        problem,
        language,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
        ) as run:
            for endpoint in ["/predict/", "/predictions/"]:
                response = requests.post(url=run.url_server_address + endpoint)
                assert response.status_code == HTTP_422_UNPROCESSABLE_ENTITY
                expected_msg = "ERROR: This model has target type 'unstructured', use the /predictUnstructured/ or /predictionsUnstructured/ endpoint."
                assert response.json()["message"] == expected_msg
Ejemplo n.º 4
0
    def check_transform_server(self, target_temp_location=None):

        with DrumServerRun(
                self.target_type.value,
                self.resolve_labels(self.target_type, self.options),
                self.options.code_dir,
                verbose=self._verbose,
        ) as run:
            endpoint = "/transform/"
            payload = {"X": open(self.options.input)}
            if self.options.sparse_column_file:
                payload.update(
                    {SPARSE_COLNAMES: open(self.options.sparse_column_file)})

            # there is a known bug in urllib3 that needlessly gives a header warning
            # this will supress the warning for better user experience when running performance test
            filter_urllib3_logging()
            if self.options.target:
                target_location = target_temp_location.name
                payload.update({"y": open(target_location)})
            elif self.options.target_csv:
                target_location = self.options.target_csv
                payload.update({"y": open(target_location)})

            response = requests.post(run.url_server_address + endpoint,
                                     files=payload)
            if not response.ok:
                raise DrumCommonException("Failure in {} server: {}".format(
                    endpoint[1:-1], response.text))
    def test_custom_models_with_drum_nginx_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=True,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": open(input_dataset)
                        }
                    },
                    {
                        "data": open(input_dataset, "rb")
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = pd.read_csv(input_dataset)
                    assert in_data.shape[0] == actual_num_predictions

            # test model info
            response = requests.get(run.url_server_address + "/info/")

            assert response.ok
            response_dict = response.json()
            for key in ModelInfoKeys.REQUIRED:
                assert key in response_dict
            assert response_dict[
                ModelInfoKeys.TARGET_TYPE] == resources.target_types(problem)
            assert response_dict[ModelInfoKeys.DRUM_SERVER] == "nginx + uwsgi"
            assert response_dict[ModelInfoKeys.DRUM_VERSION] == drum_version

            assert ModelInfoKeys.MODEL_METADATA in response_dict
Ejemplo n.º 6
0
    def test_custom_transform_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
        use_arrow,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            # do predictions
            files = {"X": open(input_dataset)}
            if use_arrow:
                files["arrow_version"] = ".2"

            response = requests.post(run.url_server_address + "/transform/",
                                     files=files)
            print(response.text)
            assert response.ok

            in_data = pd.read_csv(input_dataset)

            if framework == SKLEARN_TRANSFORM_DENSE:
                if use_arrow:
                    transformed_out = read_arrow_payload(eval(response.text))
                    assert eval(response.text)["out.format"] == "arrow"
                else:
                    transformed_out = read_csv_payload(eval(response.text))
                    assert eval(response.text)["out.format"] == "csv"
                actual_num_predictions = transformed_out.shape[0]
            else:
                transformed_out = read_mtx_payload(eval(response.text))
                actual_num_predictions = transformed_out.shape[0]
                assert eval(response.text)["out.format"] == "sparse"
            validate_transformed_output(
                transformed_out,
                should_be_sparse=framework == SKLEARN_TRANSFORM)
            assert in_data.shape[0] == actual_num_predictions
    def test_predictions_r_mtx(
        self,
        resources,
        framework,
        problem,
        language,
        nginx,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                nginx=nginx,
        ) as run:
            input_dataset = resources.datasets(framework, SPARSE)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": ("X.mtx", open(input_dataset))
                        }
                    },
                    {
                        "data": open(input_dataset),
                        "headers": {
                            "Content-Type":
                            "{};".format(PredictionServerMimetypes.TEXT_MTX)
                        },
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = StructuredInputReadUtils.read_structured_input_file_as_df(
                        input_dataset)
                    assert in_data.shape[0] == actual_num_predictions
Ejemplo n.º 8
0
    def test_e2e_predict_fails(self, resources, params, with_error_server,
                               with_nginx, docker):
        """
        Verify that when drum server is started, if an error occurs on /predict/ route,
        'error server' is not started regardless '--with-error-server' flag.
        """
        framework, problem, custom_model_dir, server_run_args = params

        # remove a module required during processing of /predict/ request
        os.remove(os.path.join(custom_model_dir, "custom.py"))

        drum_server_run = DrumServerRun(**server_run_args,
                                        with_error_server=with_error_server,
                                        nginx=with_nginx,
                                        docker=docker)

        with drum_server_run as run:
            input_dataset = resources.datasets(framework, problem)

            response = requests.post(run.url_server_address + "/predict/",
                                     files={"X": open(input_dataset)})

            assert response.status_code == 500  # error occurs

            # assert that 'error server' is not started.
            # as 'error server' propagates errors with 513 status code,
            # assert that after error occurred, the next request is not 513

            # check /health/ route
            response = requests.get(run.url_server_address + "/health/")
            assert response.status_code == 200

            # check /predict/ route
            response = requests.post(run.url_server_address + "/predict/")

            error_message = (
                "ERROR: Samples should be provided as: "
                "  - a csv, mtx, or arrow file under `X` form-data param key."
                "  - binary data")
            assert response.status_code == 422
            assert response.json()["message"] == error_message

        # nginx test runs in docker; to stop the process we kill it, so don't check return code
        if with_nginx:
            return
        assert drum_server_run.process.returncode == 0
Ejemplo n.º 9
0
    def test_custom_models_with_drum_nginx_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=True,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": open(input_dataset)
                        }
                    },
                    {
                        "data": open(input_dataset, "rb")
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = pd.read_csv(input_dataset)
                    assert in_data.shape[0] == actual_num_predictions
Ejemplo n.º 10
0
    def test_custom_models_with_drum_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        nginx,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=nginx,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            for ret_mode in ["text", "binary"]:
                for endpoint in [
                        "/predictUnstructured/", "/predictionsUnstructured/"
                ]:
                    # do predictions
                    url = run.url_server_address + endpoint
                    data = open(input_dataset, "rb").read()
                    params = {"ret_mode": ret_mode}
                    response = requests.post(url=url, data=data, params=params)

                    assert response.ok
                    if ret_mode == "text":
                        assert response.text == "10"
                    else:
                        assert 10 == int.from_bytes(response.content,
                                                    byteorder="big")
    def test_ping_endpoints(self, params, with_error_server, with_nginx, docker):
        _, _, custom_model_dir, server_run_args = params

        # remove a module required during processing of /predict/ request
        os.remove(os.path.join(custom_model_dir, "custom.py"))

        drum_server_run = DrumServerRun(
            **server_run_args, with_error_server=with_error_server, nginx=with_nginx, docker=docker
        )

        with drum_server_run as run:
            response = requests.get(run.url_server_address + "/")
            assert response.status_code == 200
            response = requests.get(run.url_server_address + "/ping/")
            assert response.status_code == 200

        # nginx test runs in docker; to stop the process we kill it, so don't check return code
        if with_nginx:
            return
        assert drum_server_run.process.returncode == 0
    def test_custom_model_with_custom_java_predictor(
        self,
        resources,
        class_labels,
        problem,
    ):
        unset_drum_supported_env_vars()
        cur_file_dir = os.path.dirname(os.path.abspath(__file__))
        # have to point model dir to a folder with jar, so drum could detect the language
        model_dir = os.path.join(cur_file_dir, "custom_java_predictor")
        os.environ[
            EnvVarNames.
            DRUM_JAVA_CUSTOM_PREDICTOR_CLASS] = "com.datarobot.test.TestCustomPredictor"
        os.environ[EnvVarNames.DRUM_JAVA_CUSTOM_CLASS_PATH] = os.path.join(
            model_dir, "*")
        with DrumServerRun(
                resources.target_types(problem),
                class_labels,
                model_dir,
        ) as run:
            input_dataset = resources.datasets(None, problem)
            # do predictions
            post_args = {"data": open(input_dataset, "rb")}
            response = requests.post(run.url_server_address + "/predict",
                                     **post_args)
            print(response.text)
            assert response.ok
            predictions = json.loads(response.text)[RESPONSE_PREDICTIONS_KEY]
            actual_num_predictions = len(predictions)
            in_data = pd.read_csv(input_dataset)
            assert in_data.shape[0] == actual_num_predictions
            if problem == REGRESSION:
                assert list(range(1,
                                  actual_num_predictions + 1)) == predictions
            else:
                single_prediction = {"yes": 0.7, "no": 0.3}
                assert [single_prediction
                        ] * actual_num_predictions == predictions

        unset_drum_supported_env_vars()
    def test_custom_transforms_with_drum_nginx_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=True,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            # do predictions
            response = requests.post(run.url_server_address + "/transform/",
                                     files={"X": open(input_dataset)})

            assert response.ok

            in_data = pd.read_csv(input_dataset)

            parsed_response = parse_multi_part_response(response)

            transformed_mat = read_mtx_payload(parsed_response,
                                               X_TRANSFORM_KEY)
            actual_num_predictions = transformed_mat.shape[0]
            assert in_data.shape[0] == actual_num_predictions
Ejemplo n.º 14
0
    def test_response_one_var_return(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            url = run.url_server_address + "/predictUnstructured/"

            for one_or_two in ["one", "one-with-none"]:
                input_dataset = resources.datasets(framework, problem)
                data_bytes = open(input_dataset, "rb").read()
                params = {"ret_one_or_two": one_or_two}

                # Sending None or text_data encoded with utf8, by default text files are opened using utf8
                # Content-Type is not used in the hook, but used by drum to decode
                # Expected response content type is default: "text/plain; charset=UTF-8"
                for data in [None, data_bytes]:
                    for ct in [
                            "text/plain; charset=UTF-8", "text/some_other;"
                    ]:
                        for endpoint in [
                                "/predictUnstructured/",
                                "/predictionsUnstructured/"
                        ]:
                            url = run.url_server_address + endpoint
                            headers = {"Content-Type": ct}
                            response = requests.post(url=url,
                                                     data=data,
                                                     params=params,
                                                     headers=headers)
                            assert response.ok
                            content_type_header = response.headers[
                                "Content-Type"]
                            mimetype, content_type_params_dict = werkzeug.http.parse_options_header(
                                content_type_header)
                            assert mimetype == "text/plain"
                            assert content_type_params_dict["charset"] == UTF8
                            if data is None:
                                assert len(response.content) == 0
                            else:
                                assert response.content == data_bytes

                # Sending text_data encoded with utf16.
                # Content-Type is not used in the hook, but used by drum to decode.
                # Expected response content type is default: "text/plain; charset=UTF-8"
                data_text = u"some text текст"
                data_bytes = u"some text текст".encode(UTF16)
                for data in [data_bytes]:
                    for ct in ["text/plain; charset={}".format(UTF16)]:
                        for endpoint in [
                                "/predictUnstructured/",
                                "/predictionsUnstructured/"
                        ]:
                            url = run.url_server_address + endpoint
                            headers = {"Content-Type": ct}
                            response = requests.post(url=url,
                                                     data=data,
                                                     params=params,
                                                     headers=headers)
                            assert response.ok
                            content_type_header = response.headers[
                                "Content-Type"]
                            mimetype, content_type_params_dict = werkzeug.http.parse_options_header(
                                content_type_header)
                            assert mimetype == "text/plain"
                            assert content_type_params_dict["charset"] == UTF8
                            if data is None:
                                assert len(response.content) == 0
                            else:
                                assert response.content == data_text.encode(
                                    UTF8)

                # sending binary data
                headers = {"Content-Type": "application/octet-stream;"}
                response = requests.post(url=url,
                                         data=data_bytes,
                                         params=params,
                                         headers=headers)
                assert response.ok
                content_type_header = response.headers["Content-Type"]
                mimetype, content_type_params_dict = werkzeug.http.parse_options_header(
                    content_type_header)
                assert "application/octet-stream" == mimetype
                # check params dict is empty
                assert any(content_type_params_dict) == False
                assert response.content == data_bytes
Ejemplo n.º 15
0
    def test_response_content_type(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:

            text_data = u"my text, мой текст"

            # Fixtures are not used as don't want to spin up server for each test case
            # Test case with "application/octet-stream" is not very correct as data is returned as text.
            # In this test data is sent with mimetype=text/plain, so score_unstructured receives data as text.
            # Hook returns data as text with ret_charset, so response data will be encoded with this charset.
            for request_charset in [None, UTF8, UTF16]:
                for ret_charset in [None, UTF8, UTF16]:
                    for ret_mimetype in [
                            "application/octet-stream", "text/plain_drum_test"
                    ]:
                        for endpoint in [
                                "/predictUnstructured/",
                                "/predictionsUnstructured/"
                        ]:
                            params = {}
                            params["ret_one_or_two"] = "two"
                            charset_to_encode = UTF8 if request_charset is None else request_charset
                            # do predictions
                            url = run.url_server_address + endpoint
                            headers = {
                                "Content-Type":
                                "text/plain; charset={}".format(
                                    charset_to_encode)
                            }
                            if ret_charset is not None:
                                params["ret_charset"] = ret_charset
                            if ret_mimetype is not None:
                                params["ret_mimetype"] = ret_mimetype
                            response = requests.post(
                                url=url,
                                data=text_data.encode(charset_to_encode),
                                params=params,
                                headers=headers,
                            )

                            expected_charset = UTF8 if ret_charset is None else ret_charset
                            assert response.ok
                            content_type_header = response.headers[
                                "Content-Type"]
                            assert ret_mimetype in content_type_header
                            assert "charset={}".format(
                                expected_charset) in content_type_header
                            assert text_data == response.content.decode(
                                expected_charset)
    def test_predictions_python_arrow_mtx(
        self,
        resources,
        framework,
        problem,
        language,
        nginx,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                nginx=nginx,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            df = pd.read_csv(input_dataset)
            arrow_dataset_buf = pyarrow.ipc.serialize_pandas(
                df, preserve_index=False).to_pybytes()

            sink = io.BytesIO()
            scipy.io.mmwrite(sink, scipy.sparse.csr_matrix(df.values))
            mtx_dataset_buf = sink.getvalue()

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": ("X.arrow", arrow_dataset_buf)
                        }
                    },
                    {
                        "files": {
                            "X": ("X.mtx", mtx_dataset_buf)
                        }
                    },
                    {
                        "data": arrow_dataset_buf,
                        "headers": {
                            "Content-Type":
                            "{};".format(PredictionServerMimetypes.
                                         APPLICATION_X_APACHE_ARROW_STREAM)
                        },
                    },
                    {
                        "data": mtx_dataset_buf,
                        "headers": {
                            "Content-Type":
                            "{};".format(PredictionServerMimetypes.TEXT_MTX)
                        },
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = pd.read_csv(input_dataset)
                    assert in_data.shape[0] == actual_num_predictions
Ejemplo n.º 17
0
    def check_prediction_side_effects(self):
        rtol = 2e-02
        atol = 1e-06
        input_extension = os.path.splitext(self.options.input)
        is_sparse = input_extension[1] == ".mtx"

        if is_sparse:
            columns = [
                column.strip() for column in open(
                    self.options.sparse_column_file).readlines()
            ]
            df = pd.DataFrame.sparse.from_spmatrix(mmread(self.options.input),
                                                   columns=columns)
            samplesize = min(1000, max(int(len(df) * 0.1), 10))
            data_subset = df.sample(n=samplesize, random_state=42)
            subset_payload, colnames = make_mtx_payload(data_subset)
            subset_payload = ("X.mtx", subset_payload)
            files = {
                "X":
                subset_payload,
                SPARSE_COLNAMES: (
                    SPARSE_COLNAMES,
                    colnames,
                    PredictionServerMimetypes.APPLICATION_OCTET_STREAM,
                ),
            }
        else:
            df = pd.read_csv(self.options.input)
            samplesize = min(1000, max(int(len(df) * 0.1), 10))
            data_subset = df.sample(n=samplesize, random_state=42)
            subset_payload = make_csv_payload(data_subset)
            files = {"X": subset_payload}

        labels = self.resolve_labels(self.target_type, self.options)

        with DrumServerRun(self.target_type.value,
                           labels,
                           self.options.code_dir,
                           verbose=self._verbose) as run:
            endpoint = "/predict/"
            payload = {"X": open(self.options.input)}
            if is_sparse:
                payload.update({
                    SPARSE_COLNAMES: (
                        SPARSE_COLNAMES,
                        open(self.options.sparse_column_file),
                        PredictionServerMimetypes.APPLICATION_OCTET_STREAM,
                    )
                })

            response_full = requests.post(run.url_server_address + endpoint,
                                          files=payload)
            if not response_full.ok:
                raise DrumCommonException("Failure in {} server: {}".format(
                    endpoint[1:-1], response_full.text))

            response_sample = requests.post(run.url_server_address + endpoint,
                                            files=files)
            if not response_sample.ok:
                raise DrumCommonException("Failure in {} server: {}".format(
                    endpoint[1:-1], response_sample.text))

            preds_full = pd.DataFrame(
                json.loads(response_full.text)[RESPONSE_PREDICTIONS_KEY])
            preds_sample = pd.DataFrame(
                json.loads(response_sample.text)[RESPONSE_PREDICTIONS_KEY])

            preds_full_subset = preds_full.iloc[data_subset.index]

            if self._schema_validator:
                # Validate that the predictions are of the type and shape the user specified in the schema
                self._schema_validator.validate_outputs(preds_sample)

            matches = np.isclose(preds_full_subset,
                                 preds_sample,
                                 rtol=rtol,
                                 atol=atol)
            if not np.all(matches):
                if is_sparse:
                    _, __tempfile_sample = mkstemp(suffix=".mtx")
                    sparse_mat = vstack(x[0] for x in data_subset.values)
                    mmwrite(__tempfile_sample, sparse_mat.sparse.to_coo())
                else:
                    _, __tempfile_sample = mkstemp(suffix=".csv")
                    data_subset.to_csv(__tempfile_sample, index=False)

                message = """
                            Warning: Your predictions were different when we tried to predict twice.
                            The last 10 predictions from the main predict run were: {}
                            However when we reran predictions on the same data, we got: {}.
                            The sample used to calculate prediction reruns can be found in this file: {}""".format(
                    preds_full_subset[~matches][:10].to_string(index=False),
                    preds_sample[~matches][:10].to_string(index=False),
                    __tempfile_sample,
                )
                raise DrumPredException(message)
    def test_custom_models_drum_prediction_server_response(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": open(input_dataset)
                        }
                    },
                    {
                        "data": open(input_dataset, "rb")
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    response_json = json.loads(response.text)
                    assert isinstance(response_json, dict)
                    assert RESPONSE_PREDICTIONS_KEY in response_json
                    predictions_list = response_json[RESPONSE_PREDICTIONS_KEY]
                    assert isinstance(predictions_list, list)
                    assert len(predictions_list)
                    prediction_item = predictions_list[0]
                    if problem in [BINARY, MULTICLASS]:
                        assert isinstance(prediction_item, dict)
                        assert len(prediction_item) == len(
                            resources.class_labels(framework, problem))
                        assert all([
                            isinstance(x, str) for x in prediction_item.keys()
                        ])
                        assert all([
                            isinstance(x, float)
                            for x in prediction_item.values()
                        ])
                    elif problem == REGRESSION:
                        assert isinstance(prediction_item, float)
    def test_custom_transform_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
        use_arrow,
        pass_target,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            in_data = pd.read_csv(input_dataset)

            files = {"X": open(input_dataset)}
            if pass_target:
                target_dataset = resources.targets(problem)
                files["y"] = open(target_dataset)

            if use_arrow:
                files["arrow_version"] = ".2"

            response = requests.post(run.url_server_address + "/transform/",
                                     files=files)
            assert response.ok

            parsed_response = parse_multi_part_response(response)

            if framework == SKLEARN_TRANSFORM_DENSE:
                if use_arrow:
                    transformed_out = read_arrow_payload(
                        parsed_response, X_TRANSFORM_KEY)
                    if pass_target:
                        target_out = read_arrow_payload(
                            parsed_response, Y_TRANSFORM_KEY)
                    assert parsed_response["X.format"] == "arrow"
                    if pass_target:
                        assert parsed_response["y.format"] == "arrow"
                else:
                    transformed_out = read_csv_payload(parsed_response,
                                                       X_TRANSFORM_KEY)
                    if pass_target:
                        target_out = read_csv_payload(parsed_response,
                                                      Y_TRANSFORM_KEY)
                    assert parsed_response["X.format"] == "csv"
                    if pass_target:
                        assert parsed_response["y.format"] == "csv"
                actual_num_predictions = transformed_out.shape[0]
            else:
                transformed_out = read_mtx_payload(parsed_response,
                                                   X_TRANSFORM_KEY)
                colnames = parsed_response["X.colnames"].decode("utf-8").split(
                    "\n")
                assert len(colnames) == transformed_out.shape[1]
                if pass_target:
                    # this shouldn't be sparse even though features are
                    if use_arrow:
                        target_out = read_arrow_payload(
                            parsed_response, Y_TRANSFORM_KEY)
                        if pass_target:
                            assert parsed_response["y.format"] == "arrow"
                    else:
                        target_out = read_csv_payload(parsed_response,
                                                      Y_TRANSFORM_KEY)
                        if pass_target:
                            assert parsed_response["y.format"] == "csv"
                actual_num_predictions = transformed_out.shape[0]
                assert parsed_response["X.format"] == "sparse"
            validate_transformed_output(
                transformed_out,
                should_be_sparse=framework == SKLEARN_TRANSFORM)
            if pass_target:
                assert all(pd.read_csv(target_dataset) == target_out)
            assert in_data.shape[0] == actual_num_predictions
    def test_custom_models_with_drum_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        pass_args_as_env_vars,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        unset_drum_supported_env_vars()
        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                pass_args_as_env_vars=pass_args_as_env_vars,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": open(input_dataset)
                        }
                    },
                    {
                        "data": open(input_dataset, "rb")
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    print(response.text)
                    assert response.ok
                    actual_num_predictions = len(
                        json.loads(response.text)[RESPONSE_PREDICTIONS_KEY])
                    in_data = pd.read_csv(input_dataset)
                    assert in_data.shape[0] == actual_num_predictions
            # test model info
            response = requests.get(run.url_server_address + "/info/")

            assert response.ok
            response_dict = response.json()
            for key in ModelInfoKeys.REQUIRED:
                assert key in response_dict
            assert response_dict[
                ModelInfoKeys.TARGET_TYPE] == resources.target_types(problem)
            # Don't verify code dir when running with Docker.
            # Local code dir is mapped into user-defined location within docker.
            if docker is None:
                assert response_dict[ModelInfoKeys.CODE_DIR] == str(
                    custom_model_dir)
            assert response_dict[ModelInfoKeys.DRUM_SERVER] == "flask"
            assert response_dict[ModelInfoKeys.DRUM_VERSION] == drum_version

            if resources.target_types(problem) == TargetType.BINARY.value:
                assert ModelInfoKeys.POSITIVE_CLASS_LABEL in response_dict
                assert ModelInfoKeys.NEGATIVE_CLASS_LABEL in response_dict
            elif resources.target_types(
                    problem) == TargetType.MULTICLASS.value:
                assert ModelInfoKeys.CLASS_LABELS in response_dict

            if framework == SKLEARN and problem == REGRESSION:
                assert ModelInfoKeys.MODEL_METADATA in response_dict

        unset_drum_supported_env_vars()
Ejemplo n.º 21
0
    def check_prediction_side_effects(self):
        rtol = 2e-02
        atol = 1e-06
        input_extension = os.path.splitext(self.options.input)
        is_sparse = input_extension[1] == ".mtx"

        if is_sparse:
            df = pd.DataFrame(mmread(self.options.input).tocsr())
            samplesize = min(1000, max(int(len(df) * 0.1), 10))
            data_subset = df.sample(n=samplesize, random_state=42)
            _, __tempfile_sample = mkstemp(suffix=".mtx")
            sparse_mat = vstack(x[0] for x in data_subset.values)
            mmwrite(__tempfile_sample, sparse_mat)
        else:
            df = pd.read_csv(self.options.input)
            samplesize = min(1000, max(int(len(df) * 0.1), 10))
            data_subset = df.sample(n=samplesize, random_state=42)
            _, __tempfile_sample = mkstemp(suffix=".csv")
            data_subset.to_csv(__tempfile_sample, index=False)

        if self.target_type == TargetType.BINARY:
            labels = [self.options.negative_class_label, self.options.positive_class_label]
        elif self.target_type == TargetType.MULTICLASS:
            labels = self.options.class_labels
        else:
            labels = None

        with DrumServerRun(
            self.target_type.value,
            labels,
            self.options.code_dir,
        ) as run:
            response_key = (
                X_TRANSFORM_KEY
                if self.target_type == TargetType.TRANSFORM
                else RESPONSE_PREDICTIONS_KEY
            )
            endpoint = "/transform/" if self.target_type == TargetType.TRANSFORM else "/predict/"

            response_full = requests.post(
                run.url_server_address + endpoint, files={"X": open(self.options.input)}
            )

            response_sample = requests.post(
                run.url_server_address + endpoint, files={"X": open(__tempfile_sample)}
            )

            if self.target_type == TargetType.TRANSFORM:
                if is_sparse:
                    preds_full = pd.DataFrame(read_mtx_payload(eval(response_full.text)))
                    preds_sample = pd.DataFrame(read_mtx_payload(eval(response_sample.text)))
                else:
                    preds_full = read_csv_payload(eval(response_full.text))
                    preds_sample = read_csv_payload(eval(response_sample.text))
            else:
                preds_full = pd.DataFrame(json.loads(response_full.text)[response_key])
                preds_sample = pd.DataFrame(json.loads(response_sample.text)[response_key])

            preds_full_subset = preds_full.iloc[data_subset.index]

            matches = np.isclose(preds_full_subset, preds_sample, rtol=rtol, atol=atol)
            if not np.all(matches):
                message = """
                            Error: Your predictions were different when we tried to predict twice.
                            No randomness is allowed.
                            The last 10 predictions from the main predict run were: {}
                            However when we reran predictions on the same data, we got: {}.
                            The sample used to calculate prediction reruns can be found in this file: {}""".format(
                    preds_full_subset[~matches][:10], preds_sample[~matches][:10], __tempfile_sample
                )
                raise ValueError(message)
            else:
                os.remove(__tempfile_sample)
Ejemplo n.º 22
0
    def test_r2d2_drum_prediction_server(
        self,
        resources,
        tmp_path,
    ):
        print("current dir: {}".format(os.getcwd()))

        custom_model_dir = "tools/r2d2"

        with DrumServerRun(
                target_type=resources.target_types(REGRESSION_INFERENCE),
                labels=None,
                custom_model_dir=custom_model_dir,
                docker=DOCKER_PYTHON_SKLEARN,
                memory="500m",
                fail_on_shutdown_error=False,
        ) as run:
            print("r2d2 is running")
            cmd = "python tools/r2d2/custom.py memory 200 --server {}".format(
                run.server_address)
            print(cmd)

            p, stdout, stderr = _exec_shell_cmd(cmd, "Error running r2d2 main")
            print("CMD result: {}".format(p.returncode))
            print(stdout)
            print(stderr)
            assert p.returncode == 0

            data = pd.DataFrame(
                {
                    "cmd": ["memory"],
                    "arg": [100]
                },
                columns=["cmd", "arg"],
            )
            print("Sending the following data:")
            print(data)

            csv_data = data.to_csv(index=False)
            url = "{}/predict/".format(run.url_server_address)
            response = requests.post(url, files={"X": csv_data})
            print(response)
            assert response.ok

            # Sending the exception command.. should get a failed response
            data = pd.DataFrame(
                {
                    "cmd": ["exception"],
                    "arg": [100]
                },
                columns=["cmd", "arg"],
            )
            print("Sending the following data:")
            print(data)

            csv_data = data.to_csv(index=False)
            response = requests.post(url, files={"X": csv_data})
            print(response)
            assert response.status_code == 500

            # Server should be alive before we kill it with memory
            response = requests.get(run.url_server_address)
            print(response)
            assert response.ok

            # Killing the docker allocating too much memory
            data = pd.DataFrame(
                {
                    "cmd": ["memory"],
                    "arg": [1000]
                },
                columns=["cmd", "arg"],
            )

            print("Sending 1000m data:")
            print(data)
            csv_data = data.to_csv(index=False)

            try:
                response = requests.post(url, files={"X": csv_data})
                print(response)
                assert response.status_code == 500
            except Exception:
                print("Expected connection error")
    def test_drum_prediction_server_pps_response(
        self,
        resources,
        framework,
        problem,
        language,
        deployment_config,
        deployment_config_as_env_var,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        append_cmd = None
        if deployment_config_as_env_var:
            os.environ[
                ArgumentOptionsEnvVars.DEPLOYMENT_CONFIG] = deployment_config
        else:
            append_cmd = " --deployment-config {}".format(deployment_config)

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                append_cmd=append_cmd,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            # do predictions
            for endpoint in ["/predict/", "/predictions/"]:
                for post_args in [
                    {
                        "files": {
                            "X": open(input_dataset)
                        }
                    },
                    {
                        "data": open(input_dataset, "rb")
                    },
                ]:
                    response = requests.post(run.url_server_address + endpoint,
                                             **post_args)

                    assert response.ok
                    response_json = json.loads(response.text)
                    assert isinstance(response_json, dict)
                    assert "data" in response_json
                    predictions_list = response_json["data"]
                    assert isinstance(predictions_list, list)
                    assert len(predictions_list)

                    prediction_item = predictions_list[0]
                    assert "rowId" in prediction_item
                    assert "prediction" in prediction_item
                    assert "predictionValues" in prediction_item

                    assert pd.read_csv(input_dataset).shape[0] == len(
                        predictions_list)

        unset_drum_supported_env_vars()