def _test(self, table: pd.DataFrame, params: Dict[str, Any]={}, expected_table: pd.DataFrame=pd.DataFrame(), expected_error: str=''): result = ProcessResult.coerce(Formula.render(P(**params), table)) result.sanitize_in_place() expected = ProcessResult(expected_table, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
def _module_dispatch_render_static(dispatch, params, table, fetch_result): try: result = dispatch.render(params, table, fetch_result=fetch_result) except Exception as err: traceback.print_exc() result = ProcessResult(error=f'Internal error: {err}') result = ProcessResult.coerce(result) result.truncate_in_place_if_too_big() result.sanitize_in_place() return result
def test_render(in_table, patch_json, out_table=pd.DataFrame(), out_error=''): sanitize_dataframe(in_table) result = EditCells.render(MockParams(celledits=patch_json), in_table) result = ProcessResult.coerce(result) result.sanitize_in_place() expected = ProcessResult(out_table, out_error) expected.sanitize_in_place() assert result.error == expected.error assert_frame_equal(result.dataframe, expected.dataframe)
def test_render(in_table, patch_json, out_table=pd.DataFrame(), out_error=''): wfm = MockWfModule(patch_json) sanitize_dataframe(in_table) result = ProcessResult.coerce(EditCells.render(wfm, in_table)) result.sanitize_in_place() expected = ProcessResult(out_table, out_error) expected.sanitize_in_place() assert result.error == expected.error assert_frame_equal(result.dataframe, expected.dataframe)
def _test_render(self, in_table: pd.DataFrame, column: str, edits_json: str, expected_out: pd.DataFrame=pd.DataFrame(), expected_error: str='') -> None: """Test that the render method works (kinda an integration test).""" wf_module = MockWfModule(column, edits_json) result = Refine.render(wf_module, in_table) result.sanitize_in_place() expected = ProcessResult(expected_out, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str, spec: RefineSpec, expected_out: pd.DataFrame=pd.DataFrame(), expected_error: str='') -> None: """Render and assert the output is as expected.""" result = ProcessResult.coerce(spec.apply(in_table, column)) # Sanitize result+expected, so if sanitize changes these tests may # break (which is what we want). result.sanitize_in_place() expected = ProcessResult(expected_out, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
def test_load_json(self): self.wf_module.url = 'http://test.com/the.json' # use a complex example with nested data fname = os.path.join(settings.BASE_DIR, 'server/tests/test_data/sfpd.json') sfpd_json = open(fname).read() # OrderedDict otherwise cols get sorted sfpd_table = pd.DataFrame(json.loads(sfpd_json, object_pairs_hook=OrderedDict)) expected = ProcessResult(sfpd_table) expected.sanitize_in_place() with patch('requests.get', respond(sfpd_json, 'application/json')): run_event(self.wf_module) self.commit_result.assert_called_with(self.wf_module, expected)
def test_render_truncate_and_sanitize(self): calls = [] retval = ProcessResult(pd.DataFrame({'A': [1]})) retval.truncate_in_place_if_too_big = lambda: calls.append('truncate') retval.sanitize_in_place = lambda: calls.append('sanitize') lm = LoadedModule('int', '1', False, render_impl=lambda _a, _b: retval) with self.assertLogs(): lm.render(MockParams(), pd.DataFrame(), fetch_result=None) self.assertEqual(calls, ['truncate', 'sanitize'])
def test_load_json(self): with open(os.path.join(settings.BASE_DIR, 'server/tests/test_data/sfpd.json'), 'rb') as f: # TODO nix this big file and use a sensible unit test. This extra # computation merely tests that the code uses the same JSON-parsing # logic as the test. sfpd_json = f.read() # OrderedDict otherwise cols get sorted sfpd_table = pd.DataFrame( json.loads(sfpd_json, object_pairs_hook=OrderedDict) ) expected = ProcessResult(sfpd_table) expected.sanitize_in_place() with patch('server.modules.utils.spooled_data_from_url', fake_spooled_data_from_url(sfpd_json, 'application/json', 'utf-8')): fetch_result = fetch(url='http://test.com/the.json') self.assertEqual(fetch_result, expected)
def test_load_json(self): url = 'http://test.com/the.json' self.url_pval.set_value(url) self.url_pval.save() # use a complex example with nested data fname = os.path.join(settings.BASE_DIR, 'server/tests/test_data/sfpd.json') sfpd_json = open(fname).read() # OrderedDict otherwise cols get sorted sfpd_table = pd.DataFrame( json.loads(sfpd_json, object_pairs_hook=OrderedDict)) expected = ProcessResult(sfpd_table) expected.sanitize_in_place() with patch('requests.get') as get: get.return_value = mock_text_response(sfpd_json, 'application/json') self.press_fetch_button() result = execute_wfmodule(self.wfmodule) self.assertEqual(result, expected)
def table_to_result(table): result = ProcessResult(table) result.sanitize_in_place() # alters dataframe.equals() result return result
class URLScraperTests(SimpleTestCase): def setUp(self): super().setUp() self.urls = list(simple_result_table['url']) self.expected_url_table_result = ProcessResult(url_table) self.expected_url_table_result.sanitize_in_place() # Simple test that .event() calls scrape_urls() in the right way # We don't test all the scrape error cases (invalid urls etc.) as they are # tested above def test_scrape_column(self): wf_module = MockWfModule('Input column', 'url') scraped_table = simple_result_table.copy() # modifies the table in place to add results, just like the real thing async def mock_scrapeurls(urls, table): table['status'] = scraped_table['status'] table['html'] = scraped_table['html'] return with patch('django.utils.timezone.now') as now: now.return_value = testnow with patch('server.modules.urlscraper.scrape_urls') as scrape: # call the mock function instead, the real fn is tested above scrape.side_effect = mock_scrapeurls wf_module.previous = MockWfModule('', '') wf_module.previous.cached_render_result = \ MockCachedRenderResult(pd.DataFrame({ 'url': self.urls, })) fetch(wf_module) result = URLScraper.render(wf_module, pd.DataFrame()) result = ProcessResult.coerce(result) self.assertEqual(result, ProcessResult(scraped_table)) # Tests scraping from a list of URLs def test_scrape_list(self): wf_module = MockWfModule( 'List', '', '\n'.join([ 'http://a.com/file', 'https://b.com/file2', 'c.com/file/dir' # Removed 'http://' to test the URL-fixing part ])) scraped_table = simple_result_table.copy() # Code below mostly lifted from the column test async def mock_scrapeurls(urls, table): table['status'] = scraped_table['status'] table['html'] = scraped_table['html'] return with patch('django.utils.timezone.now') as now: now.return_value = testnow with patch('server.modules.urlscraper.scrape_urls') as scrape: # call the mock function instead, the real fn is tested above scrape.side_effect = mock_scrapeurls fetch(wf_module) result = URLScraper.render(wf_module, pd.DataFrame()) result = ProcessResult.coerce(result) self.assertEqual(result, ProcessResult(scraped_table))
class URLScraperTests(LoggedInTestCase): def setUp(self): super().setUp() # log in self.scraped_table = simple_result_table self.urls = list(self.scraped_table['url']) # create a workflow that feeds our urls via PasteCSV into a URLScraper self.url_table = pd.DataFrame(self.urls, columns=['url']) self.expected_url_table_result = ProcessResult(self.url_table) self.expected_url_table_result.sanitize_in_place() url_csv = 'url\n' + '\n'.join(self.urls) workflow = create_testdata_workflow(url_csv) self.wfmodule = load_and_add_module('urlscraper', workflow=workflow) def press_fetch_button(self): version_id = get_param_by_id_name('version_select').id self.client.post(f'/api/parameters/{version_id}/event') self.wfmodule.refresh_from_db() # new last_relevant_workflow_id def test_initial_nop(self): result = execute_wfmodule(self.wfmodule) self.assertEqual(result, self.expected_url_table_result) def test_nop_with_initial_col_selection(self): # When a column is first selected and no scraping is performed, the # initial table should be returned source_options = "List of URLs|Load from column".split('|') source_pval = get_param_by_id_name('urlsource') source_pval.value = source_options.index('Load from column') source_pval.save() column_pval = get_param_by_id_name('urlcol') column_pval.value = 'url' column_pval.save() result = execute_wfmodule(self.wfmodule) self.assertEqual(result, self.expected_url_table_result) # Simple test that .event() calls scrape_urls() in the right way # We don't test all the scrape error cases (invalid urls etc.) as they are # tested above def test_scrape_column(self): source_options = "List|Input column".split('|') source_pval = get_param_by_id_name('urlsource') source_pval.value = source_options.index('Input column') source_pval.save() get_param_by_id_name('urlcol').set_value('url') # modifies the table in place to add results, just like the real thing async def mock_scrapeurls(urls, table): table['status'] = self.scraped_table['status'] table['html'] = self.scraped_table['html'] return with mock.patch('django.utils.timezone.now') as now: now.return_value = testnow with mock.patch('server.modules.urlscraper.scrape_urls') as scrape: # call the mock function instead, the real fn is tested above scrape.side_effect = mock_scrapeurls self.press_fetch_button() result = execute_wfmodule(self.wfmodule) self.assertEqual(result, ProcessResult(self.scraped_table)) # Tests scraping from a list of URLs def test_scrape_list(self): source_options = "List|Input column".split('|') source_pval = get_param_by_id_name('urlsource') source_pval.value = source_options.index('List') source_pval.save() get_param_by_id_name('urllist').set_value('\n'.join([ 'http://a.com/file', 'https://b.com/file2', 'c.com/file/dir' # Removed 'http://' to test the URL-fixing part ])) # Code below mostly lifted from the column test async def mock_scrapeurls(urls, table): table['status'] = self.scraped_table['status'] table['html'] = self.scraped_table['html'] return with mock.patch('django.utils.timezone.now') as now: now.return_value = testnow with mock.patch('server.modules.urlscraper.scrape_urls') as scrape: # call the mock function instead, the real fn is tested above scrape.side_effect = mock_scrapeurls self.press_fetch_button() result = execute_wfmodule(self.wfmodule) self.assertEqual(result, ProcessResult(self.scraped_table))
def test_sanitize(self): expected = ProcessResult(DataFrame({'foo': ['[1]', '[2]']})) result = ProcessResult(DataFrame({'foo': [[1], [2]]})) result.sanitize_in_place() self.assertEqual(result, expected)