Esempio n. 1
0
 def test_do_not_crash_on_utf8_encoded_content_disposition_header(self):
     # If the server responded with a UTF-8-encoded header, that's a bug
     # on the server: the author didn't realize all headers are
     # latin1-encoded, so the header is actually double-encoded.
     #
     # The result: if a developer unwittingly utf8-encodes a filename, then
     # the result is _unambiguously_ something else. For instance, "café"
     # encodes to "café".
     #
     # We're spec-compliant here. We will correctly return "café". The
     # caller can second-guess us if it sees fit.
     #
     # https://www.pivotaltracker.com/story/show/174715741
     with tempfile.NamedTemporaryFile() as tf:
         path = Path(tf.name)
         path.write_bytes(
             gzip.compress(b"".join([
                 b'{"url":"http://example.com/hello"}\r\n',
                 b"200 OK\r\n",
                 b"content-disposition: attachment; filename=caf\xc3\xa9\r\n",
                 b"\r\n",
                 b"Some text",
             ])))
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert headers == [("content-disposition",
                                 "attachment; filename=café")]
Esempio n. 2
0
def _render_file(path: Path, output_path: Path, params: Dict[str, Any]):
    with httpfile.read(path) as (parameters, status_line, headers, body_path):
        content_type = httpfile.extract_first_header(headers,
                                                     "Content-Type") or ""
        content_disposition = httpfile.extract_first_header(
            headers, "Content-Disposition")

        mime_type = guess_mime_type_or_none(content_type, content_disposition,
                                            parameters["url"])
        if not mime_type:
            return [
                trans(
                    "error.unhandledContentType",
                    "Server responded with unhandled Content-Type {content_type}. "
                    "Please use a different URL.",
                    {"content_type": content_type},
                )
            ]
        maybe_charset = guess_charset_or_none(content_type)

        return parse_file(
            body_path,
            output_path=output_path,
            encoding=maybe_charset,
            mime_type=mime_type,
            has_header=params["has_header"],
        )
Esempio n. 3
0
 def test_fetch_xlsx_file(self):
     body = b"abcd"
     self.mock_http_response = MockHttpResponse.ok(
         body,
         [(
             "Content-Type",
             "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
         )],
     )
     with self.fetch(
             P(
                 file={
                     **default_file,
                     "mimeType":
                     "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                 }),
             secrets(DEFAULT_SECRET),
     ) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (_, __, headers, body_path):
             self.assertEqual(body_path.read_bytes(), body)
             self.assertEqual(
                 headers,
                 [
                     (
                         "content-type",
                         "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                     ),
                     ("content-length", "4"),
                 ],
             )
     self.assertRegex(self.last_http_requestline, "/files/.*?alt=media")
Esempio n. 4
0
 def test_fetch_chunked_csv(self):
     self.mock_http_response = MockHttpResponse.ok(
         [b"A,B\nx", b",y\nz,", b"a"], [("Content-Type", "text/csv; charset=utf-8")]
     )
     url = self.build_url("/path/to.csv.chunks")
     with call_fetch(url) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (_, __, headers, body_path):
             self.assertEqual(body_path.read_bytes(), b"A,B\nx,y\nz,a")
Esempio n. 5
0
 def test_fetch_gzip_encoded_csv(self):
     body = b"A,B\nx,y\nz,a"
     url = self.build_url("/path/to.csv.gz")
     self.mock_http_response = MockHttpResponse.ok(
         gzip.compress(body),
         [("Content-Type", "text/csv; charset=utf-8"), ("Content-Encoding", "gzip")],
     )
     with call_fetch(url) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (_, __, headers, body_path):
             self.assertEqual(body_path.read_bytes(), body)
Esempio n. 6
0
 def test_fetch_native_sheet(self):
     body = b"A,B\nx,y\nz,a"
     self.mock_http_response = MockHttpResponse.ok(
         body, [("Content-Type", "text/csv")])
     with self.fetch(P(), secrets(DEFAULT_SECRET)) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (_, __, headers, body_path):
             self.assertEqual(body_path.read_bytes(), body)
             self.assertEqual(headers, [("content-type", "text/csv"),
                                        ("content-length", "11")])
     self.assertRegex(self.last_http_requestline,
                      "/files/.*/export\?mimeType=text%2Fcsv")
Esempio n. 7
0
def _render_file(path: Path, params: Dict[str, Any], output_path: Path):
    with httpfile.read(path) as (parameters, status_line, headers, body_path):
        content_type = httpfile.extract_first_header(headers, "Content-Type")
        mime_type = _calculate_mime_type(content_type)
        # Ignore Google-reported charset. Google's headers imply latin-1 when
        # their data is utf-8.
        return parse_file(
            body_path,
            encoding=None,
            mime_type=mime_type,
            has_header=params["has_header"],
            output_path=output_path,
        )
Esempio n. 8
0
 def test_fetch_csv_file(self):
     body = b"A,B\nx,y\nz,a"
     self.mock_http_response = MockHttpResponse.ok(
         body, [("Content-Type", "text/csv")])
     with self.fetch(P(file={
             **default_file, "mimeType": "text/csv"
     }), secrets(DEFAULT_SECRET)) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (_, __, headers, body_path):
             self.assertEqual(body_path.read_bytes(), body)
             self.assertEqual(headers, [("content-type", "text/csv"),
                                        ("content-length", "11")])
     self.assertRegex(self.last_http_requestline, "/files/.*?alt=media")
Esempio n. 9
0
 def test_latin1_headers(self):
     with tempfile.NamedTemporaryFile() as tf:
         path = Path(tf.name)
         path.write_bytes(
             gzip.compress(b"".join([
                 b'{"url":"http://example.com/hello"}\r\n',
                 b"200 OK\r\n",
                 b"content-disposition: attachment; filename=caf\xe9\r\n",
                 b"\r\n",
                 b"Some text",
             ])))
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert headers == [("content-disposition",
                                 "attachment; filename=café")]
Esempio n. 10
0
 def test_special_headers(self):
     # Content-Length doesn't get stored in the httpfile format, because it
     # would be ambiguous. (It does not specify the number of bytes of body.
     # That's because httpfile stores *decoded* body, and it stores headers
     # as passed over HTTP.)
     with tempfile.NamedTemporaryFile() as tf:
         path = Path(tf.name)
         path.write_bytes(
             gzip.compress(b'{"url":"http://example.com/hello"}\r\n'
                           b"200 OK\r\n"
                           b"Cjw-Original-content-length: 9\r\n"
                           b"\r\n"
                           b"Some text"))
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert headers == [("content-length", "9")]
Esempio n. 11
0
 def test_fetch_deflate_encoded_csv(self):
     body = b"A,B\nx,y\nz,a"
     zo = zlib.compressobj(wbits=-zlib.MAX_WBITS)
     zbody = zo.compress(body) + zo.flush()
     url = self.build_url("/path/to.csv.gz")
     self.mock_http_response = MockHttpResponse.ok(
         zbody,
         [
             ("Content-Type", "text/csv; charset=utf-8"),
             ("Content-Encoding", "deflate"),
         ],
     )
     with call_fetch(url) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (_, __, headers, body_path):
             self.assertEqual(body_path.read_bytes(), body)
Esempio n. 12
0
 def test_fetch_csv(self):
     body = b"A,B\nx,y\nz,a"
     url = self.build_url("/path/to.csv")
     self.mock_http_response = MockHttpResponse.ok(
         body, [("Content-Type", "text/csv; charset=utf-8")])
     with call_fetch(url) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (_, __, headers, body_path):
             self.assertEqual(body_path.read_bytes(), body)
             self.assertEqual(
                 headers,
                 [
                     ("content-type", "text/csv; charset=utf-8"),
                     ("content-length", "11"),
                 ],
             )
Esempio n. 13
0
 async def test_decode_chunked_csv(self, http_server):
     http_server.mock_response(
         MockHttpResponse.ok(
             [b"A,B\nx", b",y\nz,", b"a"],
             [("content-type", "text/csv; charset=utf-8")],
         ))
     url = http_server.build_url("/path/to.csv.chunks")
     async with self.download(url) as path:
         assert (b"\r\nCjw-Original-transfer-encoding: chunked\r\n"
                 in gzip.decompress(path.read_bytes()))
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert body_path.read_bytes() == b"A,B\nx,y\nz,a"
             assert headers == [
                 ("content-type", "text/csv; charset=utf-8"),
                 ("transfer-encoding", "chunked"),
             ]
Esempio n. 14
0
 def test_fetch_follow_redirect(self):
     url1 = self.build_url("/url1.csv")
     url2 = self.build_url("/url2.csv")
     url3 = self.build_url("/url3.csv")
     self.mock_http_response = iter([
         MockHttpResponse(302, [("Location", url2)]),
         MockHttpResponse(302, [("Location", url3)]),
         MockHttpResponse.ok(b"A,B\n1,2", [("Content-Type", "text/csv")]),
     ])
     with call_fetch(url1) as result:
         self.assertEqual(result.errors, [])
         with httpfile.read(result.path) as (parameters, __, headers,
                                             body_path):
             self.assertEqual(body_path.read_bytes(), b"A,B\n1,2")
             self.assertEqual(parameters, {"url": url1})
     self.assertIn("/url1.csv", self.http_requestlines[0])
     self.assertIn("/url2.csv", self.http_requestlines[1])
     self.assertIn("/url3.csv", self.http_requestlines[2])
Esempio n. 15
0
 async def test_follow_redirect(self, http_server):
     url1 = http_server.build_url("/url1.csv")
     url2 = http_server.build_url("/url2.csv")
     url3 = http_server.build_url("/url3.csv")
     http_server.mock_response(
         iter([
             MockHttpResponse(302, [("location", url2)]),
             MockHttpResponse(302, [("location", url3)]),
             MockHttpResponse.ok(b"A,B\n1,2",
                                 [("content-type", "text/csv")]),
         ]))
     async with self.download(url1) as path:
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert body_path.read_bytes() == b"A,B\n1,2"
     assert http_server.requested_paths == [
         "/url1.csv", "/url2.csv", "/url3.csv"
     ]
Esempio n. 16
0
def _render_file(path: Path, output_path: Path, params: Dict[str, Any]):
    with httpfile.read(path) as (parameters, status_line, headers, body_path):
        content_type = httpfile.extract_first_header(headers, "Content-Type")

        mime_type = guess_mime_type_or_none(content_type, parameters["url"])
        if not mime_type:
            return RenderResult(errors=[
                RenderError(
                    I18nMessage.TODO_i18n(
                        ("Server responded with unhandled Content-Type %r. "
                         "Please use a different URL.") % content_type))
            ])
        maybe_charset = guess_charset_or_none(content_type)

        return parse_file(
            body_path,
            output_path=output_path,
            encoding=maybe_charset,
            mime_type=mime_type,
            has_header=params["has_header"],
        )
Esempio n. 17
0
 def test_happy_path(self):
     with tempfile.NamedTemporaryFile() as tf:
         path = Path(tf.name)
         path.write_bytes(
             gzip.compress(b"".join([
                 b'{"url":"http://example.com/hello"}\r\n',
                 b"200 OK\r\n",
                 b"content-type: text/plain; charset=utf-8\r\n",
                 b"content-disposition: inline\r\n",
                 b"\r\n",
                 b"Some text",
             ])))
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert parameters == {"url": "http://example.com/hello"}
             assert status_line == "200 OK"
             assert headers == [
                 ("content-type", "text/plain; charset=utf-8"),
                 ("content-disposition", "inline"),
             ]
             assert body_path.read_bytes() == b"Some text"
Esempio n. 18
0
 async def test_gunzip_encoded_body(self, http_server):
     body = b"A,B\nx,y\nz,a"
     gzbody = gzip.compress(body)
     url = http_server.build_url("/path/to.csv.gz")
     http_server.mock_response(
         MockHttpResponse.ok(
             gzbody,
             [
                 ("content-type", "text/csv; charset=utf-8"),
                 ("content-encoding", "gzip"),
             ],
         ))
     async with self.download(url) as path:
         assert b"\r\nCjw-Original-content-encoding: gzip\r\n" in gzip.decompress(
             path.read_bytes())
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert body_path.read_bytes() == body
             assert headers == [
                 ("content-type", "text/csv; charset=utf-8"),
                 ("content-encoding", "gzip"),
                 ("content-length", str(len(gzbody))),
             ]
Esempio n. 19
0
 async def test_deflate_encoded_body(self, http_server):
     body = b"A,B\nx,y\nz,a"
     zo = zlib.compressobj(wbits=-zlib.MAX_WBITS)
     zbody = zo.compress(body) + zo.flush()
     url = http_server.build_url("/path/to.csv.gz")
     http_server.mock_response(
         MockHttpResponse.ok(
             zbody,
             [
                 ("content-type", "text/csv; charset=utf-8"),
                 ("content-encoding", "deflate"),
             ],
         ))
     async with self.download(url) as path:
         assert b"\r\nCjw-Original-content-encoding: deflate\r\n" in gzip.decompress(
             path.read_bytes())
         with httpfile.read(path) as (parameters, status_line, headers,
                                      body_path):
             assert body_path.read_bytes() == body
             assert headers == [
                 ("content-type", "text/csv; charset=utf-8"),
                 ("content-encoding", "deflate"),
                 ("content-length", str(len(zbody))),
             ]