def test_custom_transforms_with_drum_nginx_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=True,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            # do predictions
            response = requests.post(run.url_server_address + "/transform/",
                                     files={"X": open(input_dataset)})

            assert response.ok

            in_data = pd.read_csv(input_dataset)

            parsed_response = parse_multi_part_response(response)

            transformed_mat = read_mtx_payload(parsed_response,
                                               X_TRANSFORM_KEY)
            actual_num_predictions = transformed_mat.shape[0]
            assert in_data.shape[0] == actual_num_predictions
    def test_custom_transform_server(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
        use_arrow,
        pass_target,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                resources.target_types(problem),
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            input_dataset = resources.datasets(framework, problem)
            in_data = pd.read_csv(input_dataset)

            files = {"X": open(input_dataset)}
            if pass_target:
                target_dataset = resources.targets(problem)
                files["y"] = open(target_dataset)

            if use_arrow:
                files["arrow_version"] = ".2"

            response = requests.post(run.url_server_address + "/transform/",
                                     files=files)
            assert response.ok

            parsed_response = parse_multi_part_response(response)

            if framework == SKLEARN_TRANSFORM_DENSE:
                if use_arrow:
                    transformed_out = read_arrow_payload(
                        parsed_response, X_TRANSFORM_KEY)
                    if pass_target:
                        target_out = read_arrow_payload(
                            parsed_response, Y_TRANSFORM_KEY)
                    assert parsed_response["X.format"] == "arrow"
                    if pass_target:
                        assert parsed_response["y.format"] == "arrow"
                else:
                    transformed_out = read_csv_payload(parsed_response,
                                                       X_TRANSFORM_KEY)
                    if pass_target:
                        target_out = read_csv_payload(parsed_response,
                                                      Y_TRANSFORM_KEY)
                    assert parsed_response["X.format"] == "csv"
                    if pass_target:
                        assert parsed_response["y.format"] == "csv"
                actual_num_predictions = transformed_out.shape[0]
            else:
                transformed_out = read_mtx_payload(parsed_response,
                                                   X_TRANSFORM_KEY)
                colnames = parsed_response["X.colnames"].decode("utf-8").split(
                    "\n")
                assert len(colnames) == transformed_out.shape[1]
                if pass_target:
                    # this shouldn't be sparse even though features are
                    if use_arrow:
                        target_out = read_arrow_payload(
                            parsed_response, Y_TRANSFORM_KEY)
                        if pass_target:
                            assert parsed_response["y.format"] == "arrow"
                    else:
                        target_out = read_csv_payload(parsed_response,
                                                      Y_TRANSFORM_KEY)
                        if pass_target:
                            assert parsed_response["y.format"] == "csv"
                actual_num_predictions = transformed_out.shape[0]
                assert parsed_response["X.format"] == "sparse"
            validate_transformed_output(
                transformed_out,
                should_be_sparse=framework == SKLEARN_TRANSFORM)
            if pass_target:
                assert all(pd.read_csv(target_dataset) == target_out)
            assert in_data.shape[0] == actual_num_predictions
Example #3
0
 def load_transform_output(response, is_sparse, request_key):
     parsed_response = parse_multi_part_response(response)
     if is_sparse:
         return pd.DataFrame(read_mtx_payload(parsed_response, request_key))
     else:
         return pd.DataFrame(read_csv_payload(parsed_response, request_key))