def test_predictors_supported_payload_formats( self, resources, framework, problem, language, supported_payload_formats, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, ) as run: response = requests.get(run.url_server_address + "/capabilities/") assert response.ok assert response.json() == { "supported_payload_formats": supported_payload_formats }
def assert_drum_server_run_failure(self, server_run_args, with_error_server, error_message, with_nginx=False, docker=None): drum_server_run = DrumServerRun(**server_run_args, with_error_server=with_error_server, nginx=with_nginx, docker=docker) if with_error_server or with_nginx: # assert that error the server is up and message is propagated via API with drum_server_run as run: # check /health/ route response = requests.get(run.url_server_address + "/health/") assert response.status_code == 513 assert error_message in response.json()["message"] # check /predict/ route response = requests.post(run.url_server_address + "/predict/") assert response.status_code == 513 assert error_message in response.json()["message"] else: # DrumServerRun tries to ping the server. # assert that the process is already dead we it's done. with pytest.raises(ProcessLookupError), drum_server_run: pass # nginx test runs in docker; to stop the process we kill it, so don't check return code if with_nginx: return assert drum_server_run.process.returncode == 1 assert error_message in drum_server_run.process.err_stream
def test_unstructured_mode_prediction_server_wrong_endpoint( self, resources, framework, problem, language, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( "unstructured", resources.class_labels(framework, problem), custom_model_dir, ) as run: for endpoint in ["/predict/", "/predictions/"]: response = requests.post(url=run.url_server_address + endpoint) assert response.status_code == HTTP_422_UNPROCESSABLE_ENTITY expected_msg = "ERROR: This model has target type 'unstructured', use the /predictUnstructured/ or /predictionsUnstructured/ endpoint." assert response.json()["message"] == expected_msg
def check_transform_server(self, target_temp_location=None): with DrumServerRun( self.target_type.value, self.resolve_labels(self.target_type, self.options), self.options.code_dir, verbose=self._verbose, ) as run: endpoint = "/transform/" payload = {"X": open(self.options.input)} if self.options.sparse_column_file: payload.update( {SPARSE_COLNAMES: open(self.options.sparse_column_file)}) # there is a known bug in urllib3 that needlessly gives a header warning # this will supress the warning for better user experience when running performance test filter_urllib3_logging() if self.options.target: target_location = target_temp_location.name payload.update({"y": open(target_location)}) elif self.options.target_csv: target_location = self.options.target_csv payload.update({"y": open(target_location)}) response = requests.post(run.url_server_address + endpoint, files=payload) if not response.ok: raise DrumCommonException("Failure in {} server: {}".format( endpoint[1:-1], response.text))
def test_custom_models_with_drum_nginx_prediction_server( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, nginx=True, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions for endpoint in ["/predict/", "/predictions/"]: for post_args in [ { "files": { "X": open(input_dataset) } }, { "data": open(input_dataset, "rb") }, ]: response = requests.post(run.url_server_address + endpoint, **post_args) assert response.ok actual_num_predictions = len( json.loads(response.text)[RESPONSE_PREDICTIONS_KEY]) in_data = pd.read_csv(input_dataset) assert in_data.shape[0] == actual_num_predictions # test model info response = requests.get(run.url_server_address + "/info/") assert response.ok response_dict = response.json() for key in ModelInfoKeys.REQUIRED: assert key in response_dict assert response_dict[ ModelInfoKeys.TARGET_TYPE] == resources.target_types(problem) assert response_dict[ModelInfoKeys.DRUM_SERVER] == "nginx + uwsgi" assert response_dict[ModelInfoKeys.DRUM_VERSION] == drum_version assert ModelInfoKeys.MODEL_METADATA in response_dict
def test_custom_transform_server( self, resources, framework, problem, language, docker, tmp_path, use_arrow, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions files = {"X": open(input_dataset)} if use_arrow: files["arrow_version"] = ".2" response = requests.post(run.url_server_address + "/transform/", files=files) print(response.text) assert response.ok in_data = pd.read_csv(input_dataset) if framework == SKLEARN_TRANSFORM_DENSE: if use_arrow: transformed_out = read_arrow_payload(eval(response.text)) assert eval(response.text)["out.format"] == "arrow" else: transformed_out = read_csv_payload(eval(response.text)) assert eval(response.text)["out.format"] == "csv" actual_num_predictions = transformed_out.shape[0] else: transformed_out = read_mtx_payload(eval(response.text)) actual_num_predictions = transformed_out.shape[0] assert eval(response.text)["out.format"] == "sparse" validate_transformed_output( transformed_out, should_be_sparse=framework == SKLEARN_TRANSFORM) assert in_data.shape[0] == actual_num_predictions
def test_predictions_r_mtx( self, resources, framework, problem, language, nginx, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, nginx=nginx, ) as run: input_dataset = resources.datasets(framework, SPARSE) # do predictions for endpoint in ["/predict/", "/predictions/"]: for post_args in [ { "files": { "X": ("X.mtx", open(input_dataset)) } }, { "data": open(input_dataset), "headers": { "Content-Type": "{};".format(PredictionServerMimetypes.TEXT_MTX) }, }, ]: response = requests.post(run.url_server_address + endpoint, **post_args) assert response.ok actual_num_predictions = len( json.loads(response.text)[RESPONSE_PREDICTIONS_KEY]) in_data = StructuredInputReadUtils.read_structured_input_file_as_df( input_dataset) assert in_data.shape[0] == actual_num_predictions
def test_e2e_predict_fails(self, resources, params, with_error_server, with_nginx, docker): """ Verify that when drum server is started, if an error occurs on /predict/ route, 'error server' is not started regardless '--with-error-server' flag. """ framework, problem, custom_model_dir, server_run_args = params # remove a module required during processing of /predict/ request os.remove(os.path.join(custom_model_dir, "custom.py")) drum_server_run = DrumServerRun(**server_run_args, with_error_server=with_error_server, nginx=with_nginx, docker=docker) with drum_server_run as run: input_dataset = resources.datasets(framework, problem) response = requests.post(run.url_server_address + "/predict/", files={"X": open(input_dataset)}) assert response.status_code == 500 # error occurs # assert that 'error server' is not started. # as 'error server' propagates errors with 513 status code, # assert that after error occurred, the next request is not 513 # check /health/ route response = requests.get(run.url_server_address + "/health/") assert response.status_code == 200 # check /predict/ route response = requests.post(run.url_server_address + "/predict/") error_message = ( "ERROR: Samples should be provided as: " " - a csv, mtx, or arrow file under `X` form-data param key." " - binary data") assert response.status_code == 422 assert response.json()["message"] == error_message # nginx test runs in docker; to stop the process we kill it, so don't check return code if with_nginx: return assert drum_server_run.process.returncode == 0
def test_custom_models_with_drum_nginx_prediction_server( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, nginx=True, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions for endpoint in ["/predict/", "/predictions/"]: for post_args in [ { "files": { "X": open(input_dataset) } }, { "data": open(input_dataset, "rb") }, ]: response = requests.post(run.url_server_address + endpoint, **post_args) assert response.ok actual_num_predictions = len( json.loads(response.text)[RESPONSE_PREDICTIONS_KEY]) in_data = pd.read_csv(input_dataset) assert in_data.shape[0] == actual_num_predictions
def test_custom_models_with_drum_prediction_server( self, resources, framework, problem, language, nginx, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( "unstructured", resources.class_labels(framework, problem), custom_model_dir, docker, nginx=nginx, ) as run: input_dataset = resources.datasets(framework, problem) for ret_mode in ["text", "binary"]: for endpoint in [ "/predictUnstructured/", "/predictionsUnstructured/" ]: # do predictions url = run.url_server_address + endpoint data = open(input_dataset, "rb").read() params = {"ret_mode": ret_mode} response = requests.post(url=url, data=data, params=params) assert response.ok if ret_mode == "text": assert response.text == "10" else: assert 10 == int.from_bytes(response.content, byteorder="big")
def test_ping_endpoints(self, params, with_error_server, with_nginx, docker): _, _, custom_model_dir, server_run_args = params # remove a module required during processing of /predict/ request os.remove(os.path.join(custom_model_dir, "custom.py")) drum_server_run = DrumServerRun( **server_run_args, with_error_server=with_error_server, nginx=with_nginx, docker=docker ) with drum_server_run as run: response = requests.get(run.url_server_address + "/") assert response.status_code == 200 response = requests.get(run.url_server_address + "/ping/") assert response.status_code == 200 # nginx test runs in docker; to stop the process we kill it, so don't check return code if with_nginx: return assert drum_server_run.process.returncode == 0
def test_custom_model_with_custom_java_predictor( self, resources, class_labels, problem, ): unset_drum_supported_env_vars() cur_file_dir = os.path.dirname(os.path.abspath(__file__)) # have to point model dir to a folder with jar, so drum could detect the language model_dir = os.path.join(cur_file_dir, "custom_java_predictor") os.environ[ EnvVarNames. DRUM_JAVA_CUSTOM_PREDICTOR_CLASS] = "com.datarobot.test.TestCustomPredictor" os.environ[EnvVarNames.DRUM_JAVA_CUSTOM_CLASS_PATH] = os.path.join( model_dir, "*") with DrumServerRun( resources.target_types(problem), class_labels, model_dir, ) as run: input_dataset = resources.datasets(None, problem) # do predictions post_args = {"data": open(input_dataset, "rb")} response = requests.post(run.url_server_address + "/predict", **post_args) print(response.text) assert response.ok predictions = json.loads(response.text)[RESPONSE_PREDICTIONS_KEY] actual_num_predictions = len(predictions) in_data = pd.read_csv(input_dataset) assert in_data.shape[0] == actual_num_predictions if problem == REGRESSION: assert list(range(1, actual_num_predictions + 1)) == predictions else: single_prediction = {"yes": 0.7, "no": 0.3} assert [single_prediction ] * actual_num_predictions == predictions unset_drum_supported_env_vars()
def test_custom_transforms_with_drum_nginx_prediction_server( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, nginx=True, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions response = requests.post(run.url_server_address + "/transform/", files={"X": open(input_dataset)}) assert response.ok in_data = pd.read_csv(input_dataset) parsed_response = parse_multi_part_response(response) transformed_mat = read_mtx_payload(parsed_response, X_TRANSFORM_KEY) actual_num_predictions = transformed_mat.shape[0] assert in_data.shape[0] == actual_num_predictions
def test_response_one_var_return( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( "unstructured", resources.class_labels(framework, problem), custom_model_dir, docker, ) as run: url = run.url_server_address + "/predictUnstructured/" for one_or_two in ["one", "one-with-none"]: input_dataset = resources.datasets(framework, problem) data_bytes = open(input_dataset, "rb").read() params = {"ret_one_or_two": one_or_two} # Sending None or text_data encoded with utf8, by default text files are opened using utf8 # Content-Type is not used in the hook, but used by drum to decode # Expected response content type is default: "text/plain; charset=UTF-8" for data in [None, data_bytes]: for ct in [ "text/plain; charset=UTF-8", "text/some_other;" ]: for endpoint in [ "/predictUnstructured/", "/predictionsUnstructured/" ]: url = run.url_server_address + endpoint headers = {"Content-Type": ct} response = requests.post(url=url, data=data, params=params, headers=headers) assert response.ok content_type_header = response.headers[ "Content-Type"] mimetype, content_type_params_dict = werkzeug.http.parse_options_header( content_type_header) assert mimetype == "text/plain" assert content_type_params_dict["charset"] == UTF8 if data is None: assert len(response.content) == 0 else: assert response.content == data_bytes # Sending text_data encoded with utf16. # Content-Type is not used in the hook, but used by drum to decode. # Expected response content type is default: "text/plain; charset=UTF-8" data_text = u"some text текст" data_bytes = u"some text текст".encode(UTF16) for data in [data_bytes]: for ct in ["text/plain; charset={}".format(UTF16)]: for endpoint in [ "/predictUnstructured/", "/predictionsUnstructured/" ]: url = run.url_server_address + endpoint headers = {"Content-Type": ct} response = requests.post(url=url, data=data, params=params, headers=headers) assert response.ok content_type_header = response.headers[ "Content-Type"] mimetype, content_type_params_dict = werkzeug.http.parse_options_header( content_type_header) assert mimetype == "text/plain" assert content_type_params_dict["charset"] == UTF8 if data is None: assert len(response.content) == 0 else: assert response.content == data_text.encode( UTF8) # sending binary data headers = {"Content-Type": "application/octet-stream;"} response = requests.post(url=url, data=data_bytes, params=params, headers=headers) assert response.ok content_type_header = response.headers["Content-Type"] mimetype, content_type_params_dict = werkzeug.http.parse_options_header( content_type_header) assert "application/octet-stream" == mimetype # check params dict is empty assert any(content_type_params_dict) == False assert response.content == data_bytes
def test_response_content_type( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( "unstructured", resources.class_labels(framework, problem), custom_model_dir, docker, ) as run: text_data = u"my text, мой текст" # Fixtures are not used as don't want to spin up server for each test case # Test case with "application/octet-stream" is not very correct as data is returned as text. # In this test data is sent with mimetype=text/plain, so score_unstructured receives data as text. # Hook returns data as text with ret_charset, so response data will be encoded with this charset. for request_charset in [None, UTF8, UTF16]: for ret_charset in [None, UTF8, UTF16]: for ret_mimetype in [ "application/octet-stream", "text/plain_drum_test" ]: for endpoint in [ "/predictUnstructured/", "/predictionsUnstructured/" ]: params = {} params["ret_one_or_two"] = "two" charset_to_encode = UTF8 if request_charset is None else request_charset # do predictions url = run.url_server_address + endpoint headers = { "Content-Type": "text/plain; charset={}".format( charset_to_encode) } if ret_charset is not None: params["ret_charset"] = ret_charset if ret_mimetype is not None: params["ret_mimetype"] = ret_mimetype response = requests.post( url=url, data=text_data.encode(charset_to_encode), params=params, headers=headers, ) expected_charset = UTF8 if ret_charset is None else ret_charset assert response.ok content_type_header = response.headers[ "Content-Type"] assert ret_mimetype in content_type_header assert "charset={}".format( expected_charset) in content_type_header assert text_data == response.content.decode( expected_charset)
def test_predictions_python_arrow_mtx( self, resources, framework, problem, language, nginx, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, nginx=nginx, ) as run: input_dataset = resources.datasets(framework, problem) df = pd.read_csv(input_dataset) arrow_dataset_buf = pyarrow.ipc.serialize_pandas( df, preserve_index=False).to_pybytes() sink = io.BytesIO() scipy.io.mmwrite(sink, scipy.sparse.csr_matrix(df.values)) mtx_dataset_buf = sink.getvalue() # do predictions for endpoint in ["/predict/", "/predictions/"]: for post_args in [ { "files": { "X": ("X.arrow", arrow_dataset_buf) } }, { "files": { "X": ("X.mtx", mtx_dataset_buf) } }, { "data": arrow_dataset_buf, "headers": { "Content-Type": "{};".format(PredictionServerMimetypes. APPLICATION_X_APACHE_ARROW_STREAM) }, }, { "data": mtx_dataset_buf, "headers": { "Content-Type": "{};".format(PredictionServerMimetypes.TEXT_MTX) }, }, ]: response = requests.post(run.url_server_address + endpoint, **post_args) assert response.ok actual_num_predictions = len( json.loads(response.text)[RESPONSE_PREDICTIONS_KEY]) in_data = pd.read_csv(input_dataset) assert in_data.shape[0] == actual_num_predictions
def check_prediction_side_effects(self): rtol = 2e-02 atol = 1e-06 input_extension = os.path.splitext(self.options.input) is_sparse = input_extension[1] == ".mtx" if is_sparse: columns = [ column.strip() for column in open( self.options.sparse_column_file).readlines() ] df = pd.DataFrame.sparse.from_spmatrix(mmread(self.options.input), columns=columns) samplesize = min(1000, max(int(len(df) * 0.1), 10)) data_subset = df.sample(n=samplesize, random_state=42) subset_payload, colnames = make_mtx_payload(data_subset) subset_payload = ("X.mtx", subset_payload) files = { "X": subset_payload, SPARSE_COLNAMES: ( SPARSE_COLNAMES, colnames, PredictionServerMimetypes.APPLICATION_OCTET_STREAM, ), } else: df = pd.read_csv(self.options.input) samplesize = min(1000, max(int(len(df) * 0.1), 10)) data_subset = df.sample(n=samplesize, random_state=42) subset_payload = make_csv_payload(data_subset) files = {"X": subset_payload} labels = self.resolve_labels(self.target_type, self.options) with DrumServerRun(self.target_type.value, labels, self.options.code_dir, verbose=self._verbose) as run: endpoint = "/predict/" payload = {"X": open(self.options.input)} if is_sparse: payload.update({ SPARSE_COLNAMES: ( SPARSE_COLNAMES, open(self.options.sparse_column_file), PredictionServerMimetypes.APPLICATION_OCTET_STREAM, ) }) response_full = requests.post(run.url_server_address + endpoint, files=payload) if not response_full.ok: raise DrumCommonException("Failure in {} server: {}".format( endpoint[1:-1], response_full.text)) response_sample = requests.post(run.url_server_address + endpoint, files=files) if not response_sample.ok: raise DrumCommonException("Failure in {} server: {}".format( endpoint[1:-1], response_sample.text)) preds_full = pd.DataFrame( json.loads(response_full.text)[RESPONSE_PREDICTIONS_KEY]) preds_sample = pd.DataFrame( json.loads(response_sample.text)[RESPONSE_PREDICTIONS_KEY]) preds_full_subset = preds_full.iloc[data_subset.index] if self._schema_validator: # Validate that the predictions are of the type and shape the user specified in the schema self._schema_validator.validate_outputs(preds_sample) matches = np.isclose(preds_full_subset, preds_sample, rtol=rtol, atol=atol) if not np.all(matches): if is_sparse: _, __tempfile_sample = mkstemp(suffix=".mtx") sparse_mat = vstack(x[0] for x in data_subset.values) mmwrite(__tempfile_sample, sparse_mat.sparse.to_coo()) else: _, __tempfile_sample = mkstemp(suffix=".csv") data_subset.to_csv(__tempfile_sample, index=False) message = """ Warning: Your predictions were different when we tried to predict twice. The last 10 predictions from the main predict run were: {} However when we reran predictions on the same data, we got: {}. The sample used to calculate prediction reruns can be found in this file: {}""".format( preds_full_subset[~matches][:10].to_string(index=False), preds_sample[~matches][:10].to_string(index=False), __tempfile_sample, ) raise DrumPredException(message)
def test_custom_models_drum_prediction_server_response( self, resources, framework, problem, language, docker, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions for endpoint in ["/predict/", "/predictions/"]: for post_args in [ { "files": { "X": open(input_dataset) } }, { "data": open(input_dataset, "rb") }, ]: response = requests.post(run.url_server_address + endpoint, **post_args) assert response.ok response_json = json.loads(response.text) assert isinstance(response_json, dict) assert RESPONSE_PREDICTIONS_KEY in response_json predictions_list = response_json[RESPONSE_PREDICTIONS_KEY] assert isinstance(predictions_list, list) assert len(predictions_list) prediction_item = predictions_list[0] if problem in [BINARY, MULTICLASS]: assert isinstance(prediction_item, dict) assert len(prediction_item) == len( resources.class_labels(framework, problem)) assert all([ isinstance(x, str) for x in prediction_item.keys() ]) assert all([ isinstance(x, float) for x in prediction_item.values() ]) elif problem == REGRESSION: assert isinstance(prediction_item, float)
def test_custom_transform_server( self, resources, framework, problem, language, docker, tmp_path, use_arrow, pass_target, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, ) as run: input_dataset = resources.datasets(framework, problem) in_data = pd.read_csv(input_dataset) files = {"X": open(input_dataset)} if pass_target: target_dataset = resources.targets(problem) files["y"] = open(target_dataset) if use_arrow: files["arrow_version"] = ".2" response = requests.post(run.url_server_address + "/transform/", files=files) assert response.ok parsed_response = parse_multi_part_response(response) if framework == SKLEARN_TRANSFORM_DENSE: if use_arrow: transformed_out = read_arrow_payload( parsed_response, X_TRANSFORM_KEY) if pass_target: target_out = read_arrow_payload( parsed_response, Y_TRANSFORM_KEY) assert parsed_response["X.format"] == "arrow" if pass_target: assert parsed_response["y.format"] == "arrow" else: transformed_out = read_csv_payload(parsed_response, X_TRANSFORM_KEY) if pass_target: target_out = read_csv_payload(parsed_response, Y_TRANSFORM_KEY) assert parsed_response["X.format"] == "csv" if pass_target: assert parsed_response["y.format"] == "csv" actual_num_predictions = transformed_out.shape[0] else: transformed_out = read_mtx_payload(parsed_response, X_TRANSFORM_KEY) colnames = parsed_response["X.colnames"].decode("utf-8").split( "\n") assert len(colnames) == transformed_out.shape[1] if pass_target: # this shouldn't be sparse even though features are if use_arrow: target_out = read_arrow_payload( parsed_response, Y_TRANSFORM_KEY) if pass_target: assert parsed_response["y.format"] == "arrow" else: target_out = read_csv_payload(parsed_response, Y_TRANSFORM_KEY) if pass_target: assert parsed_response["y.format"] == "csv" actual_num_predictions = transformed_out.shape[0] assert parsed_response["X.format"] == "sparse" validate_transformed_output( transformed_out, should_be_sparse=framework == SKLEARN_TRANSFORM) if pass_target: assert all(pd.read_csv(target_dataset) == target_out) assert in_data.shape[0] == actual_num_predictions
def test_custom_models_with_drum_prediction_server( self, resources, framework, problem, language, docker, pass_args_as_env_vars, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) unset_drum_supported_env_vars() with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, docker, pass_args_as_env_vars=pass_args_as_env_vars, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions for endpoint in ["/predict/", "/predictions/"]: for post_args in [ { "files": { "X": open(input_dataset) } }, { "data": open(input_dataset, "rb") }, ]: response = requests.post(run.url_server_address + endpoint, **post_args) print(response.text) assert response.ok actual_num_predictions = len( json.loads(response.text)[RESPONSE_PREDICTIONS_KEY]) in_data = pd.read_csv(input_dataset) assert in_data.shape[0] == actual_num_predictions # test model info response = requests.get(run.url_server_address + "/info/") assert response.ok response_dict = response.json() for key in ModelInfoKeys.REQUIRED: assert key in response_dict assert response_dict[ ModelInfoKeys.TARGET_TYPE] == resources.target_types(problem) # Don't verify code dir when running with Docker. # Local code dir is mapped into user-defined location within docker. if docker is None: assert response_dict[ModelInfoKeys.CODE_DIR] == str( custom_model_dir) assert response_dict[ModelInfoKeys.DRUM_SERVER] == "flask" assert response_dict[ModelInfoKeys.DRUM_VERSION] == drum_version if resources.target_types(problem) == TargetType.BINARY.value: assert ModelInfoKeys.POSITIVE_CLASS_LABEL in response_dict assert ModelInfoKeys.NEGATIVE_CLASS_LABEL in response_dict elif resources.target_types( problem) == TargetType.MULTICLASS.value: assert ModelInfoKeys.CLASS_LABELS in response_dict if framework == SKLEARN and problem == REGRESSION: assert ModelInfoKeys.MODEL_METADATA in response_dict unset_drum_supported_env_vars()
def check_prediction_side_effects(self): rtol = 2e-02 atol = 1e-06 input_extension = os.path.splitext(self.options.input) is_sparse = input_extension[1] == ".mtx" if is_sparse: df = pd.DataFrame(mmread(self.options.input).tocsr()) samplesize = min(1000, max(int(len(df) * 0.1), 10)) data_subset = df.sample(n=samplesize, random_state=42) _, __tempfile_sample = mkstemp(suffix=".mtx") sparse_mat = vstack(x[0] for x in data_subset.values) mmwrite(__tempfile_sample, sparse_mat) else: df = pd.read_csv(self.options.input) samplesize = min(1000, max(int(len(df) * 0.1), 10)) data_subset = df.sample(n=samplesize, random_state=42) _, __tempfile_sample = mkstemp(suffix=".csv") data_subset.to_csv(__tempfile_sample, index=False) if self.target_type == TargetType.BINARY: labels = [self.options.negative_class_label, self.options.positive_class_label] elif self.target_type == TargetType.MULTICLASS: labels = self.options.class_labels else: labels = None with DrumServerRun( self.target_type.value, labels, self.options.code_dir, ) as run: response_key = ( X_TRANSFORM_KEY if self.target_type == TargetType.TRANSFORM else RESPONSE_PREDICTIONS_KEY ) endpoint = "/transform/" if self.target_type == TargetType.TRANSFORM else "/predict/" response_full = requests.post( run.url_server_address + endpoint, files={"X": open(self.options.input)} ) response_sample = requests.post( run.url_server_address + endpoint, files={"X": open(__tempfile_sample)} ) if self.target_type == TargetType.TRANSFORM: if is_sparse: preds_full = pd.DataFrame(read_mtx_payload(eval(response_full.text))) preds_sample = pd.DataFrame(read_mtx_payload(eval(response_sample.text))) else: preds_full = read_csv_payload(eval(response_full.text)) preds_sample = read_csv_payload(eval(response_sample.text)) else: preds_full = pd.DataFrame(json.loads(response_full.text)[response_key]) preds_sample = pd.DataFrame(json.loads(response_sample.text)[response_key]) preds_full_subset = preds_full.iloc[data_subset.index] matches = np.isclose(preds_full_subset, preds_sample, rtol=rtol, atol=atol) if not np.all(matches): message = """ Error: Your predictions were different when we tried to predict twice. No randomness is allowed. The last 10 predictions from the main predict run were: {} However when we reran predictions on the same data, we got: {}. The sample used to calculate prediction reruns can be found in this file: {}""".format( preds_full_subset[~matches][:10], preds_sample[~matches][:10], __tempfile_sample ) raise ValueError(message) else: os.remove(__tempfile_sample)
def test_r2d2_drum_prediction_server( self, resources, tmp_path, ): print("current dir: {}".format(os.getcwd())) custom_model_dir = "tools/r2d2" with DrumServerRun( target_type=resources.target_types(REGRESSION_INFERENCE), labels=None, custom_model_dir=custom_model_dir, docker=DOCKER_PYTHON_SKLEARN, memory="500m", fail_on_shutdown_error=False, ) as run: print("r2d2 is running") cmd = "python tools/r2d2/custom.py memory 200 --server {}".format( run.server_address) print(cmd) p, stdout, stderr = _exec_shell_cmd(cmd, "Error running r2d2 main") print("CMD result: {}".format(p.returncode)) print(stdout) print(stderr) assert p.returncode == 0 data = pd.DataFrame( { "cmd": ["memory"], "arg": [100] }, columns=["cmd", "arg"], ) print("Sending the following data:") print(data) csv_data = data.to_csv(index=False) url = "{}/predict/".format(run.url_server_address) response = requests.post(url, files={"X": csv_data}) print(response) assert response.ok # Sending the exception command.. should get a failed response data = pd.DataFrame( { "cmd": ["exception"], "arg": [100] }, columns=["cmd", "arg"], ) print("Sending the following data:") print(data) csv_data = data.to_csv(index=False) response = requests.post(url, files={"X": csv_data}) print(response) assert response.status_code == 500 # Server should be alive before we kill it with memory response = requests.get(run.url_server_address) print(response) assert response.ok # Killing the docker allocating too much memory data = pd.DataFrame( { "cmd": ["memory"], "arg": [1000] }, columns=["cmd", "arg"], ) print("Sending 1000m data:") print(data) csv_data = data.to_csv(index=False) try: response = requests.post(url, files={"X": csv_data}) print(response) assert response.status_code == 500 except Exception: print("Expected connection error")
def test_drum_prediction_server_pps_response( self, resources, framework, problem, language, deployment_config, deployment_config_as_env_var, tmp_path, ): custom_model_dir = _create_custom_model_dir( resources, tmp_path, framework, problem, language, ) append_cmd = None if deployment_config_as_env_var: os.environ[ ArgumentOptionsEnvVars.DEPLOYMENT_CONFIG] = deployment_config else: append_cmd = " --deployment-config {}".format(deployment_config) with DrumServerRun( resources.target_types(problem), resources.class_labels(framework, problem), custom_model_dir, append_cmd=append_cmd, ) as run: input_dataset = resources.datasets(framework, problem) # do predictions for endpoint in ["/predict/", "/predictions/"]: for post_args in [ { "files": { "X": open(input_dataset) } }, { "data": open(input_dataset, "rb") }, ]: response = requests.post(run.url_server_address + endpoint, **post_args) assert response.ok response_json = json.loads(response.text) assert isinstance(response_json, dict) assert "data" in response_json predictions_list = response_json["data"] assert isinstance(predictions_list, list) assert len(predictions_list) prediction_item = predictions_list[0] assert "rowId" in prediction_item assert "prediction" in prediction_item assert "predictionValues" in prediction_item assert pd.read_csv(input_dataset).shape[0] == len( predictions_list) unset_drum_supported_env_vars()