def test_render_deprecated_process_result(self): result = render( pd.DataFrame(), P(has_header=True), fetch_result=ProcessResult(pd.DataFrame({"A": [1, 2]})), ) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
def test_render_deprecated_process_result_and_has_header_false(self): # The deprecated loadurl load original data; so this is lossy. # Here we're testing the "happy path" where we miraculously switched # from text => number => text and formatting remained intact. result = render( pd.DataFrame(), P(has_header=False), fetch_result=ProcessResult(pd.DataFrame({"A": [1, 2]})), ) # Deprecated: columns are numbered assert_process_result_equal(result, pd.DataFrame({"0": ["A", "1", "2"]}))
def test_render_json_invalid_json(self): with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.com/x.json"}', b"200 OK", b"Content-Type: application/json", b"", b"not json", ]))) result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf)) assert_process_result_equal( result, "JSON lexical error: invalid string in json text.")
def test_render_xlsx_bad_content(self): with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.org/bad-xlsx"}', b"200 OK", b"Content-Type: " + XLSX_MIME_TYPE.encode("latin1"), b"", "ceçi n'est pas une .xlsx".encode("utf-8"), ]))) result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf)) assert_process_result_equal( result, ("Error reading Excel file: Unsupported format, or corrupt " 'file: Expected BOF record; found b"ce\\xc3\\xa7i n\'"'), )
def test_render_xlsx(self): with open(mock_xlsx_path, "rb") as f: xlsx_bytes = f.read() xlsx_table = pd.read_excel(mock_xlsx_path) with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.org/xlsx"}', b"200 OK", b"Content-Type: " + XLSX_MIME_TYPE.encode("latin1"), b"", xlsx_bytes, ]))) result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf)) assert_process_result_equal(result, xlsx_table)
def test_render_csv_use_ext_given_bad_content_type(self): # Use text/plain type and rely on filename detection, as # https://raw.githubusercontent.com/ does with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://raw.githubusercontent.com/user/project/x.csv"}', b"200 OK", b"Content-Type: text/plain", b"", b"A\n1\n2", ]))) result = render(pd.DataFrame(), P(has_header=True), fetch_result=FetchResult(tf)) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
def test_render_csv_handle_nonstandard_mime_type(self): # Transform 'application/csv' into 'text/csv', etc. # # Sysadmins sometimes invent MIME types. We hard-code to rewrite fake # MIME types we've seen in the wild that seem unambiguous. with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.com/the.data?format=csv&foo=bar"}', b"200 OK", b"Content-Type: application/x-csv", b"", b"A\n1\n2", ]))) result = render(pd.DataFrame(), P(has_header=True), fetch_result=FetchResult(tf)) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
def test_render_csv(self): with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.org/x"}', b"200 OK", b"Content-Type: text/csv; charset=utf-8", b"", b'A,B\n0,"y,z"\n1,2', ]))) result = render(pd.DataFrame(), P(has_header=True), fetch_result=FetchResult(tf)) assert_process_result_equal( result, pd.DataFrame({ "A": [0, 1], "B": ["y,z", "2"] }))
def test_render_json(self): # Transform 'application/csv' into 'text/csv', etc. # # Sysadmins sometimes invent MIME types. We hard-code to rewrite fake # MIME types we've seen in the wild that seem unambiguous. with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.com/api/foo"}', b"200 OK", b"Content-Type: application/json", b"", b'[{"A":1},{"A": 2}]', ]))) result = render( pd.DataFrame(), # has_header is ignored P(has_header=False), fetch_result=FetchResult(tf), ) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
def test_render_empty_process_result(self): result = render(pd.DataFrame(), P(has_header=False), fetch_result=ProcessResult()) assert_process_result_equal(result, pd.DataFrame())
def test_render_error_process_result(self): result = render(pd.DataFrame(), P(), fetch_result=ProcessResult(error="hi")) assert_process_result_equal(result, "hi")