Beispiel #1
0
    def test_response_content_type(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        ret_charset,
        ret_mimetype,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:

            text_data = u"my text, мой текст"
            params = {}
            params["ret_one_or_two"] = "two"

            # do predictions
            url = run.url_server_address + "/predictUnstructured/"
            headers = {"Content-Type": "text/plain; charset=UTF-8"}
            if ret_charset is not None:
                params["ret_charset"] = ret_charset
            if ret_mimetype is not None:
                params["ret_mimetype"] = ret_mimetype
            response = requests.post(url=url,
                                     data=text_data.encode("utf8"),
                                     params=params,
                                     headers=headers)

            assert response.ok
            content_type_header = response.headers["Content-Type"]
            assert ret_mimetype in content_type_header
            assert ("charset=utf8" if ret_charset is None else
                    "charset={}".format(ret_charset) in content_type_header)

            charset_to_decode = "utf8" if ret_charset is None else ret_charset
            assert text_data == response.content.decode(charset_to_decode)
Beispiel #2
0
    def test_unstructured_models_batch(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        mimetype,
        ret_mode,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        input_dataset = resources.datasets(framework, problem)

        output = tmp_path / "output"

        content_type = "--content-type '{};'".format(
            mimetype) if mimetype is not None else ""
        cmd = "{} score --code-dir {} --input {} --output {} --target-type unstructured {} --query 'ret_mode={}'".format(
            ArgumentsOptions.MAIN_COMMAND,
            custom_model_dir,
            input_dataset,
            output,
            content_type,
            ret_mode,
        )

        if docker:
            cmd += " --docker {} --verbose ".format(docker)

        _exec_shell_cmd(
            cmd, "Failed in {} command line! {}".format(
                ArgumentsOptions.MAIN_COMMAND, cmd))
        if ret_mode == "binary":
            with open(output, "rb") as f:
                out_data = f.read()
                assert 10 == int.from_bytes(out_data, byteorder="big")
        else:
            with open(output) as f:
                out_data = f.read()
                assert "10" in out_data
    def test_custom_models_with_drum_prediction_server(
        self,
        resources,
        framework,
        problem,
        language,
        nginx,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
                nginx=nginx,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            for ret_mode in ["text", "binary"]:
                # do predictions
                url = run.url_server_address + "/predictUnstructured/"
                data = open(input_dataset, "rb").read()
                params = {"ret_mode": ret_mode}
                response = requests.post(url=url, data=data, params=params)

                assert response.ok
                if ret_mode == "text":
                    assert response.text == "10"
                else:
                    assert 10 == int.from_bytes(response.content,
                                                byteorder="big")
Beispiel #4
0
    def test_response_one_var_return(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        one_or_two,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            input_dataset = resources.datasets(framework, problem)

            params = {"ret_one_or_two": one_or_two}
            url = run.url_server_address + "/predictUnstructured/"

            # sending None data
            data = None
            headers = {"Content-Type": "text/plain; charset=UTF-8"}
            response = requests.post(url=url,
                                     data=data,
                                     params=params,
                                     headers=headers)
            content_type_header = response.headers["Content-Type"]
            assert response.ok
            assert "text/plain" in content_type_header
            assert "charset=utf8" in content_type_header
            assert len(response.content) == 0

            # sending text data
            data = open(input_dataset, "rb").read()
            response = requests.post(url=url,
                                     data=data,
                                     params=params,
                                     headers=headers)
            assert response.ok
            content_type_header = response.headers["Content-Type"]
            assert "text/plain" in content_type_header
            assert "charset=utf8" in content_type_header
            assert response.content == data

            # sending binary data
            headers = {"Content-Type": "application/octet-stream;"}
            response = requests.post(url=url,
                                     data=data,
                                     params=params,
                                     headers=headers)
            content_type_header = response.headers["Content-Type"]
            assert "application/octet-stream" in content_type_header
            assert "charset=utf8" in content_type_header
            assert response.content == data
    def test_response_one_var_return(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:
            url = run.url_server_address + "/predictUnstructured/"

            for one_or_two in ["one", "one-with-none"]:
                input_dataset = resources.datasets(framework, problem)
                data_bytes = open(input_dataset, "rb").read()
                params = {"ret_one_or_two": one_or_two}

                # Sending None or text_data encoded with utf8, by default text files are opened using utf8
                # Content-Type is not used in the hook, but used by drum to decode
                # Expected response content type is default: "text/plain; charset=UTF-8"
                for data in [None, data_bytes]:
                    for ct in [
                            "text/plain; charset=UTF-8", "text/some_other;"
                    ]:
                        headers = {"Content-Type": ct}
                        response = requests.post(url=url,
                                                 data=data,
                                                 params=params,
                                                 headers=headers)
                        assert response.ok
                        content_type_header = response.headers["Content-Type"]
                        mimetype, content_type_params_dict = werkzeug.http.parse_options_header(
                            content_type_header)
                        assert mimetype == "text/plain"
                        assert content_type_params_dict["charset"] == UTF8
                        if data is None:
                            assert len(response.content) == 0
                        else:
                            assert response.content == data_bytes

                # Sending text_data encoded with utf16.
                # Content-Type is not used in the hook, but used by drum to decode.
                # Expected response content type is default: "text/plain; charset=UTF-8"
                data_text = u"some text текст"
                data_bytes = u"some text текст".encode(UTF16)
                for data in [data_bytes]:
                    for ct in ["text/plain; charset={}".format(UTF16)]:
                        headers = {"Content-Type": ct}
                        response = requests.post(url=url,
                                                 data=data,
                                                 params=params,
                                                 headers=headers)
                        assert response.ok
                        content_type_header = response.headers["Content-Type"]
                        mimetype, content_type_params_dict = werkzeug.http.parse_options_header(
                            content_type_header)
                        assert mimetype == "text/plain"
                        assert content_type_params_dict["charset"] == UTF8
                        if data is None:
                            assert len(response.content) == 0
                        else:
                            assert response.content == data_text.encode(UTF8)

                # sending binary data
                headers = {"Content-Type": "application/octet-stream;"}
                response = requests.post(url=url,
                                         data=data_bytes,
                                         params=params,
                                         headers=headers)
                assert response.ok
                content_type_header = response.headers["Content-Type"]
                mimetype, content_type_params_dict = werkzeug.http.parse_options_header(
                    content_type_header)
                assert "application/octet-stream" == mimetype
                # check params dict is empty
                assert any(content_type_params_dict) == False
                assert response.content == data_bytes
    def test_response_content_type(
        self,
        resources,
        framework,
        problem,
        language,
        docker,
        tmp_path,
    ):
        custom_model_dir = _create_custom_model_dir(
            resources,
            tmp_path,
            framework,
            problem,
            language,
        )

        with DrumServerRun(
                "unstructured",
                resources.class_labels(framework, problem),
                custom_model_dir,
                docker,
        ) as run:

            text_data = u"my text, мой текст"

            # Fixtures are not used as don't want to spin up server for each test case
            # Test case with "application/octet-stream" is not very correct as data is returned as text.
            # In this test data is sent with mimetype=text/plain, so score_unstructured receives data as text.
            # Hook returns data as text with ret_charset, so response data will be encoded with this charset.
            for request_charset in [None, UTF8, UTF16]:
                for ret_charset in [None, UTF8, UTF16]:
                    for ret_mimetype in [
                            "application/octet-stream", "text/plain_drum_test"
                    ]:
                        params = {}
                        params["ret_one_or_two"] = "two"
                        charset_to_encode = UTF8 if request_charset is None else request_charset
                        # do predictions
                        url = run.url_server_address + "/predictUnstructured/"
                        headers = {
                            "Content-Type":
                            "text/plain; charset={}".format(charset_to_encode)
                        }
                        if ret_charset is not None:
                            params["ret_charset"] = ret_charset
                        if ret_mimetype is not None:
                            params["ret_mimetype"] = ret_mimetype
                        response = requests.post(
                            url=url,
                            data=text_data.encode(charset_to_encode),
                            params=params,
                            headers=headers,
                        )

                        expected_charset = UTF8 if ret_charset is None else ret_charset
                        assert response.ok
                        content_type_header = response.headers["Content-Type"]
                        assert ret_mimetype in content_type_header
                        assert "charset={}".format(
                            expected_charset) in content_type_header
                        assert text_data == response.content.decode(
                            expected_charset)