Example #1
0
 def test_render_deprecated_process_result(self):
     result = render(
         pd.DataFrame(),
         P(has_header=True),
         fetch_result=ProcessResult(pd.DataFrame({"A": [1, 2]})),
     )
     assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
Example #2
0
 def test_render_deprecated_process_result_and_has_header_false(self):
     # The deprecated loadurl load original data; so this is lossy.
     # Here we're testing the "happy path" where we miraculously switched
     # from text => number => text and formatting remained intact.
     result = render(
         pd.DataFrame(),
         P(has_header=False),
         fetch_result=ProcessResult(pd.DataFrame({"A": [1, 2]})),
     )
     # Deprecated: columns are numbered
     assert_process_result_equal(result,
                                 pd.DataFrame({"0": ["A", "1", "2"]}))
Example #3
0
 def test_render_json_invalid_json(self):
     with tempfile_context("fetch-") as tf:
         tf.write_bytes(
             gzip.compress(b"\r\n".join([
                 b'{"url":"http://example.com/x.json"}',
                 b"200 OK",
                 b"Content-Type: application/json",
                 b"",
                 b"not json",
             ])))
         result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf))
         assert_process_result_equal(
             result, "JSON lexical error: invalid string in json text.")
Example #4
0
 def test_render_xlsx_bad_content(self):
     with tempfile_context("fetch-") as tf:
         tf.write_bytes(
             gzip.compress(b"\r\n".join([
                 b'{"url":"http://example.org/bad-xlsx"}',
                 b"200 OK",
                 b"Content-Type: " + XLSX_MIME_TYPE.encode("latin1"),
                 b"",
                 "ceçi n'est pas une .xlsx".encode("utf-8"),
             ])))
         result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf))
         assert_process_result_equal(
             result,
             ("Error reading Excel file: Unsupported format, or corrupt "
              'file: Expected BOF record; found b"ce\\xc3\\xa7i n\'"'),
         )
Example #5
0
    def test_render_xlsx(self):
        with open(mock_xlsx_path, "rb") as f:
            xlsx_bytes = f.read()
            xlsx_table = pd.read_excel(mock_xlsx_path)

        with tempfile_context("fetch-") as tf:
            tf.write_bytes(
                gzip.compress(b"\r\n".join([
                    b'{"url":"http://example.org/xlsx"}',
                    b"200 OK",
                    b"Content-Type: " + XLSX_MIME_TYPE.encode("latin1"),
                    b"",
                    xlsx_bytes,
                ])))
            result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf))
            assert_process_result_equal(result, xlsx_table)
Example #6
0
 def test_render_csv_use_ext_given_bad_content_type(self):
     # Use text/plain type and rely on filename detection, as
     # https://raw.githubusercontent.com/ does
     with tempfile_context("fetch-") as tf:
         tf.write_bytes(
             gzip.compress(b"\r\n".join([
                 b'{"url":"http://raw.githubusercontent.com/user/project/x.csv"}',
                 b"200 OK",
                 b"Content-Type: text/plain",
                 b"",
                 b"A\n1\n2",
             ])))
         result = render(pd.DataFrame(),
                         P(has_header=True),
                         fetch_result=FetchResult(tf))
         assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
Example #7
0
 def test_render_csv_handle_nonstandard_mime_type(self):
     # Transform 'application/csv' into 'text/csv', etc.
     #
     # Sysadmins sometimes invent MIME types. We hard-code to rewrite fake
     # MIME types we've seen in the wild that seem unambiguous.
     with tempfile_context("fetch-") as tf:
         tf.write_bytes(
             gzip.compress(b"\r\n".join([
                 b'{"url":"http://example.com/the.data?format=csv&foo=bar"}',
                 b"200 OK",
                 b"Content-Type: application/x-csv",
                 b"",
                 b"A\n1\n2",
             ])))
         result = render(pd.DataFrame(),
                         P(has_header=True),
                         fetch_result=FetchResult(tf))
         assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
Example #8
0
 def test_render_csv(self):
     with tempfile_context("fetch-") as tf:
         tf.write_bytes(
             gzip.compress(b"\r\n".join([
                 b'{"url":"http://example.org/x"}',
                 b"200 OK",
                 b"Content-Type: text/csv; charset=utf-8",
                 b"",
                 b'A,B\n0,"y,z"\n1,2',
             ])))
         result = render(pd.DataFrame(),
                         P(has_header=True),
                         fetch_result=FetchResult(tf))
         assert_process_result_equal(
             result, pd.DataFrame({
                 "A": [0, 1],
                 "B": ["y,z", "2"]
             }))
Example #9
0
 def test_render_json(self):
     # Transform 'application/csv' into 'text/csv', etc.
     #
     # Sysadmins sometimes invent MIME types. We hard-code to rewrite fake
     # MIME types we've seen in the wild that seem unambiguous.
     with tempfile_context("fetch-") as tf:
         tf.write_bytes(
             gzip.compress(b"\r\n".join([
                 b'{"url":"http://example.com/api/foo"}',
                 b"200 OK",
                 b"Content-Type: application/json",
                 b"",
                 b'[{"A":1},{"A": 2}]',
             ])))
         result = render(
             pd.DataFrame(),
             # has_header is ignored
             P(has_header=False),
             fetch_result=FetchResult(tf),
         )
         assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
Example #10
0
 def test_render_empty_process_result(self):
     result = render(pd.DataFrame(),
                     P(has_header=False),
                     fetch_result=ProcessResult())
     assert_process_result_equal(result, pd.DataFrame())
Example #11
0
 def test_render_error_process_result(self):
     result = render(pd.DataFrame(),
                     P(),
                     fetch_result=ProcessResult(error="hi"))
     assert_process_result_equal(result, "hi")