async def test_not_found(self): self.export_path_responses[ "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj"] = web.Response( status=404) fetch_result = await fetch(P(), secrets=secrets(DEFAULT_SECRET)) assert_process_result_equal( fetch_result, "File not found. Please choose a different file.")
def test_render_deprecated_process_result(self): result = render( pd.DataFrame(), P(has_header=True), fetch_result=ProcessResult(pd.DataFrame({"A": [1, 2]})), ) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
async def test_invalid_auth_error(self): self.export_path_responses[ "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj"] = web.Response( status=401) fetch_result = await fetch(P(), secrets=secrets(DEFAULT_SECRET)) assert_process_result_equal( fetch_result, "Invalid credentials. Please reconnect to Google Drive.")
async def test_no_access_error(self): self.export_path_responses[ "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj"] = web.Response( status=403) fetch_result = await fetch(P(), secrets=secrets(DEFAULT_SECRET)) assert_process_result_equal( fetch_result, "You chose a file your logged-in user cannot access. Please reconnect to Google Drive or choose a different file.", )
async def test_unhandled_http_error(self): self.export_path_responses[ "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj"] = web.Response( body=b"hi", headers={"Content-Encoding": "gzip"}) fetch_result = await fetch(P(), secrets=secrets(DEFAULT_SECRET)) assert_process_result_equal( fetch_result, # googlesheet should pass through aiohttp's message "Error during GDrive request: 400, message='Can not decode content-encoding: gzip'", )
def test_accept_and_return_dataframe(self): result = eval_process( """ def process(table): return table * 2 """, pd.DataFrame({"A": [1, 2]}), ) assert_process_result_equal( result, (pd.DataFrame({"A": [2, 4]}), "", EMPTY_OUTPUT))
async def test_fetch_native_sheet(self): self.export_path_responses[ "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj"] = web.Response( body=b"A,B\nx,y\nz,a") fetch_result = await fetch(P(), secrets=secrets(DEFAULT_SECRET)) assert_process_result_equal( fetch_result, pd.DataFrame({ "A": ["x", "z"], "B": ["y", "a"] }))
def test_has_numpy_as_np(self): result = eval_process( """ def process(table): return pd.DataFrame({'A': np.array([1, 2])}) """, EMPTY_DATAFRAME, ) assert_process_result_equal( result, (pd.DataFrame({"A": [1, 2]}), "", EMPTY_OUTPUT))
def test_has_pandas_as_pd(self): result = eval_process( """ def process(table): return pd.DataFrame({'A': [1, 2]}) """, EMPTY_DATAFRAME, ) assert_process_result_equal( result, (pd.DataFrame({"A": [1, 2]}), "", EMPTY_OUTPUT))
def test_return_str_for_error(self): result = eval_process( """ def process(table): return 'hi' """, EMPTY_DATAFRAME, ) assert_process_result_equal(result, (EMPTY_DATAFRAME, "hi", { "output": "hi" }))
def test_has_math(self): result = eval_process( """ def process(table): return str(math.sqrt(4)) """, EMPTY_DATAFRAME, ) assert_process_result_equal(result, (EMPTY_DATAFRAME, "2.0", { "output": "2.0" }))
def test_bad_process_signature(self): result = eval_process( """ def process(table, params): return table """, EMPTY_DATAFRAME, ) text = "Please make your process(table) function accept exactly 1 argument" assert_process_result_equal(result, (EMPTY_DATAFRAME, text, { "output": text }))
def test_missing_process(self): result = eval_process( """ def xprocess(table): return table """, EMPTY_DATAFRAME, ) text = 'Please define a "process(table)" function' assert_process_result_equal(result, (EMPTY_DATAFRAME, text, { "output": text }))
def test_syntax_error(self): result = eval_process( """ def process(table): return ta( """, EMPTY_DATAFRAME, ) text = "Line 3: unexpected EOF while parsing (your code, line 3)" assert_process_result_equal(result, (EMPTY_DATAFRAME, text, { "output": text }))
def test_render_deprecated_process_result_and_has_header_false(self): # The deprecated loadurl load original data; so this is lossy. # Here we're testing the "happy path" where we miraculously switched # from text => number => text and formatting remained intact. result = render( pd.DataFrame(), P(has_header=False), fetch_result=ProcessResult(pd.DataFrame({"A": [1, 2]})), ) # Deprecated: columns are numbered assert_process_result_equal(result, pd.DataFrame({"0": ["A", "1", "2"]}))
def test_builtins(self): # spot-check: do `list`, `sum` and `str` work the way we expect? result = eval_process( """ def process(table): return str(sum(list([1, 2, 3]))) """, EMPTY_DATAFRAME, ) assert_process_result_equal(result, (EMPTY_DATAFRAME, "6", { "output": "6" }))
def test_invalid_retval(self): result = eval_process( """ def process(table): return None """, EMPTY_DATAFRAME, ) text = ("Please make process(table) return a pd.DataFrame. " "(Yours returned a NoneType.)") assert_process_result_equal(result, (EMPTY_DATAFRAME, text, { "output": text }))
def test_render_json_invalid_json(self): with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.com/x.json"}', b"200 OK", b"Content-Type: application/json", b"", b"not json", ]))) result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf)) assert_process_result_equal( result, "JSON lexical error: invalid string in json text.")
def test_print_is_captured(self): result = eval_process( """ def process(table): print('hello') print('world') return table """, EMPTY_DATAFRAME, ) assert_process_result_equal(result, {"json": { "output": "hello\nworld\n" }})
def test_import(self): result = eval_process( """ from typing import Dict def process(table): x: Dict[str, str] = {"x": "y"} return list(x.keys())[0] """, EMPTY_DATAFRAME, ) assert_process_result_equal(result, (EMPTY_DATAFRAME, "x", { "output": "x" }))
async def test_fetch_tsv_file(self): self.file_path_responses[ "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj"] = web.Response( body=b"A\tB\nx\ty\nz\tb") fetch_result = await fetch( P(file={ **default_file, "mimeType": "text/tab-separated-values" }), secrets=secrets(DEFAULT_SECRET), ) assert_process_result_equal( fetch_result, pd.DataFrame({ "A": ["x", "z"], "B": ["y", "b"] }))
async def test_fetch_xls_file(self): self.file_path_responses[ "aushwyhtbndh7365YHALsdfsdf987IBHJB98uc9uisdj"] = web.FileResponse( Path(__file__).parent / "test_data" / "example.xls") fetch_result = await fetch( P(file={ **default_file, "mimeType": "application/vnd.ms-excel" }), secrets=secrets(DEFAULT_SECRET), ) assert_process_result_equal( fetch_result, pd.DataFrame({ "foo": [1, 2], "bar": [2, 3] }))
def test_error_during_process(self): result = eval_process( """ def process(table): return ta() """, EMPTY_DATAFRAME, ) trace = """Traceback (most recent call last): File "your code", line 3, in process NameError: name 'ta' is not defined """ text = "Line 3: NameError: name 'ta' is not defined" assert_process_result_equal(result, (EMPTY_DATAFRAME, text, { "output": trace + text }))
def test_render_csv_use_ext_given_bad_content_type(self): # Use text/plain type and rely on filename detection, as # https://raw.githubusercontent.com/ does with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://raw.githubusercontent.com/user/project/x.csv"}', b"200 OK", b"Content-Type: text/plain", b"", b"A\n1\n2", ]))) result = render(pd.DataFrame(), P(has_header=True), fetch_result=FetchResult(tf)) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
def test_render_xlsx_bad_content(self): with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.org/bad-xlsx"}', b"200 OK", b"Content-Type: " + XLSX_MIME_TYPE.encode("latin1"), b"", "ceçi n'est pas une .xlsx".encode("utf-8"), ]))) result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf)) assert_process_result_equal( result, ("Error reading Excel file: Unsupported format, or corrupt " 'file: Expected BOF record; found b"ce\\xc3\\xa7i n\'"'), )
def test_render_has_header_false(self): # TODO "no first row" should be a parse option. Fetch should store # raw data, and render should parse. result = render( pd.DataFrame(), P(has_header=False), fetch_result=ProcessResult(pd.DataFrame({ "A": [1], "B": [2] })), ) assert_process_result_equal( result, pd.DataFrame({ "0": ["A", "1"], "1": ["B", "2"] }))
def test_render_xlsx(self): with open(mock_xlsx_path, "rb") as f: xlsx_bytes = f.read() xlsx_table = pd.read_excel(mock_xlsx_path) with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.org/xlsx"}', b"200 OK", b"Content-Type: " + XLSX_MIME_TYPE.encode("latin1"), b"", xlsx_bytes, ]))) result = render(pd.DataFrame(), P(), fetch_result=FetchResult(tf)) assert_process_result_equal(result, xlsx_table)
def test_render_csv(self): with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.org/x"}', b"200 OK", b"Content-Type: text/csv; charset=utf-8", b"", b'A,B\n0,"y,z"\n1,2', ]))) result = render(pd.DataFrame(), P(has_header=True), fetch_result=FetchResult(tf)) assert_process_result_equal( result, pd.DataFrame({ "A": [0, 1], "B": ["y,z", "2"] }))
def test_render_csv_handle_nonstandard_mime_type(self): # Transform 'application/csv' into 'text/csv', etc. # # Sysadmins sometimes invent MIME types. We hard-code to rewrite fake # MIME types we've seen in the wild that seem unambiguous. with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.com/the.data?format=csv&foo=bar"}', b"200 OK", b"Content-Type: application/x-csv", b"", b"A\n1\n2", ]))) result = render(pd.DataFrame(), P(has_header=True), fetch_result=FetchResult(tf)) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))
def test_render_json(self): # Transform 'application/csv' into 'text/csv', etc. # # Sysadmins sometimes invent MIME types. We hard-code to rewrite fake # MIME types we've seen in the wild that seem unambiguous. with tempfile_context("fetch-") as tf: tf.write_bytes( gzip.compress(b"\r\n".join([ b'{"url":"http://example.com/api/foo"}', b"200 OK", b"Content-Type: application/json", b"", b'[{"A":1},{"A": 2}]', ]))) result = render( pd.DataFrame(), # has_header is ignored P(has_header=False), fetch_result=FetchResult(tf), ) assert_process_result_equal(result, pd.DataFrame({"A": [1, 2]}))