def test_scrape_list(self): wf_module = MockWfModule( 'List', '', '\n'.join([ 'http://a.com/file', 'https://b.com/file2', 'c.com/file/dir' # Removed 'http://' to test the URL-fixing part ])) scraped_table = simple_result_table.copy() # Code below mostly lifted from the column test async def mock_scrapeurls(urls, table): table['status'] = scraped_table['status'] table['html'] = scraped_table['html'] return with patch('django.utils.timezone.now') as now: now.return_value = testnow with patch('server.modules.urlscraper.scrape_urls') as scrape: # call the mock function instead, the real fn is tested above scrape.side_effect = mock_scrapeurls fetch(wf_module) result = URLScraper.render(wf_module, pd.DataFrame()) result = ProcessResult.coerce(result) self.assertEqual(result, ProcessResult(scraped_table))
def test_scrape_column(self): wf_module = MockWfModule('Input column', 'url') scraped_table = simple_result_table.copy() # modifies the table in place to add results, just like the real thing async def mock_scrapeurls(urls, table): table['status'] = scraped_table['status'] table['html'] = scraped_table['html'] return with patch('django.utils.timezone.now') as now: now.return_value = testnow with patch('server.modules.urlscraper.scrape_urls') as scrape: # call the mock function instead, the real fn is tested above scrape.side_effect = mock_scrapeurls wf_module.previous = MockWfModule('', '') wf_module.previous.cached_render_result = \ MockCachedRenderResult(pd.DataFrame({ 'url': self.urls, })) fetch(wf_module) result = URLScraper.render(wf_module, pd.DataFrame()) result = ProcessResult.coerce(result) self.assertEqual(result, ProcessResult(scraped_table))
def render(has_header, table, fetch_result): x = UploadFile.render(MockParams(has_header=has_header), table, fetch_result=fetch_result) result = ProcessResult.coerce(x) result.sanitize_in_place() return result
def test_coerce_dict_with_quickfix_dict(self): dataframe = DataFrame({'A': [1, 2]}) quick_fix = QuickFix('Hi', 'prependModule', ['texttodate', { 'column': 'created_at' }]) result = ProcessResult.coerce({ 'dataframe': dataframe, 'error': 'an error', 'json': { 'foo': 'bar' }, 'quick_fixes': [ { 'text': 'Hi', 'action': 'prependModule', 'args': ['texttodate', { 'column': 'created_at' }], }, ] }) expected = ProcessResult(dataframe, 'an error', json={'foo': 'bar'}, quick_fixes=[quick_fix]) self.assertEqual(result, expected)
def _call_method(self, method_name: str, *args, **kwargs) -> ProcessResult: """ Call module.method_name(*params, **kwargs) and coerce result. Exceptions become error messages. This method cannot produce an exception. """ method = getattr(self.module, method_name) try: out = method(*args, **kwargs) except Exception as e: from server.importmodulefromgithub import original_module_lineno # Catch exceptions in the module render function, and return error # message + line number to user exc_name = type(e).__name__ exc_type, exc_obj, exc_tb = sys.exc_info() # [1] = where the exception ocurred, not the render() just above tb = traceback.extract_tb(exc_tb)[1] fname = os.path.split(tb[0])[1] lineno = original_module_lineno(tb[1]) error = f'{exc_name}: {str(e)} at line {lineno} of {fname}' return ProcessResult(error=error) out = ProcessResult.coerce(out) out.truncate_in_place_if_too_big() out.sanitize_in_place() return out
def test_coerce_dict_bad_quickfix_dict(self): with self.assertRaises(ValueError): ProcessResult.coerce({ 'error': 'an error', 'json': { 'foo': 'bar' }, 'quick_fixes': [ { 'text': 'Hi', 'action': 'prependModule', 'arguments': ['texttodate', { 'column': 'created_at' }], }, ] })
def test_first_row_is_header(self): # TODO make fetch_result _not_ a pd.DataFrame, so we don't lose info # when converting types here fetch_result = ProcessResult(pd.DataFrame(a_table.copy())) result = ScrapeTable.render(P(first_row_is_header=True), pd.DataFrame(), fetch_result=fetch_result) result = ProcessResult.coerce(result) expected = pd.DataFrame({'1': [2], '2': [3]}) self.assertEqual(result, ProcessResult(pd.DataFrame(expected)))
def test_render_empty_without_columns(self): # An empty table might be stored as zero-column. This is a bug, but we # must handle it because we have actual data like this. We want to # output all the same columns as a tweet table. result = Twitter.render(P(querytype=1, query='cat'), pd.DataFrame(), fetch_result=ProcessResult(pd.DataFrame())) result = ProcessResult.coerce(result) assert_frame_equal(result.dataframe, mock_tweet_table[0:0])
def _module_dispatch_render_static(dispatch, params, table, fetch_result): try: result = dispatch.render(params, table, fetch_result=fetch_result) except Exception as err: traceback.print_exc() result = ProcessResult(error=f'Internal error: {err}') result = ProcessResult.coerce(result) result.truncate_in_place_if_too_big() result.sanitize_in_place() return result
def render(wf_module): if hasattr(wf_module, 'fetch_result'): fetch_result = wf_module.fetch_result else: fetch_result = None result = ScrapeTable.render(wf_module.get_params(), pd.DataFrame(), fetch_result=fetch_result) result = ProcessResult.coerce(result) return result
def _test(self, table: pd.DataFrame, params: Dict[str, Any]={}, expected_table: pd.DataFrame=pd.DataFrame(), expected_error: str=''): result = ProcessResult.coerce(Formula.render(P(**params), table)) result.sanitize_in_place() expected = ProcessResult(expected_table, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
def test_render(in_table, patch_json, out_table=pd.DataFrame(), out_error=''): sanitize_dataframe(in_table) result = EditCells.render(MockParams(celledits=patch_json), in_table) result = ProcessResult.coerce(result) result.sanitize_in_place() expected = ProcessResult(out_table, out_error) expected.sanitize_in_place() assert result.error == expected.error assert_frame_equal(result.dataframe, expected.dataframe)
def test_render(in_table, patch_json, out_table=pd.DataFrame(), out_error=''): wfm = MockWfModule(patch_json) sanitize_dataframe(in_table) result = ProcessResult.coerce(EditCells.render(wfm, in_table)) result.sanitize_in_place() expected = ProcessResult(out_table, out_error) expected.sanitize_in_place() assert result.error == expected.error assert_frame_equal(result.dataframe, expected.dataframe)
def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str, spec: RefineSpec, expected_out: pd.DataFrame=pd.DataFrame(), expected_error: str='') -> None: """Render and assert the output is as expected.""" result = ProcessResult.coerce(spec.apply(in_table, column)) # Sanitize result+expected, so if sanitize changes these tests may # break (which is what we want). result.sanitize_in_place() expected = ProcessResult(expected_out, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
def render(reorder_history_json, table): wf_module = MockWfModule(reorder_history_json) return ProcessResult.coerce(ReorderFromTable.render(wf_module, table.copy()))
def render(wf_module, table): return ProcessResult.coerce(CountByDate.render(wf_module, table))
def render(wf_module, table): result = DuplicateColumn.render(wf_module, table) result = ProcessResult.coerce(result) result.sanitize_in_place() # important: duplicate makes colname conflicts return result
def test_coerce_tuple_dataframe_none_dict(self): df = DataFrame({'foo': ['bar']}) expected = ProcessResult(df, '', json={'a': 'b'}) result = ProcessResult.coerce((df, None, {'a': 'b'})) self.assertEqual(result, expected)
def test_coerce_tuple_dataframe_str(self): df = DataFrame({'foo': ['bar']}) expected = ProcessResult(dataframe=df, error='hi') result = ProcessResult.coerce((df, 'hi')) self.assertEqual(result, expected)
def test_coerce_tuple_none_str_dict(self): expected = ProcessResult(error='hi', json={'a': 'b'}) result = ProcessResult.coerce((None, 'hi', {'a': 'b'})) self.assertEqual(result, expected)
def test_coerce_tuple_dataframe_none_none(self): df = DataFrame({'foo': ['bar']}) expected = ProcessResult(df) result = ProcessResult.coerce((df, None, None)) self.assertEqual(result, expected)
def render(colnames, table): result = DuplicateColumn.render(MockParams(colnames=colnames), table) result = ProcessResult.coerce(result) result.sanitize_in_place() # important: duplicate makes colname conflicts return result
def render(params, table): return ProcessResult.coerce(CountByDate.render(params, table))
def test_coerce_tuple_none_none_dict(self): expected = ProcessResult(json={'a': 'b'}) result = ProcessResult.coerce((None, None, {'a': 'b'})) self.assertEqual(result, expected)
def test_coerce_tuple_none_str_none(self): expected = ProcessResult(error='hi') result = ProcessResult.coerce((None, 'hi', None)) self.assertEqual(result, expected)
def test_coerce_tuple_none_none_none(self): expected = ProcessResult() result = ProcessResult.coerce((None, None, None)) self.assertEqual(result, expected)
def test_coerce_bad_tuple(self): result = ProcessResult.coerce(('foo', 'bar', 'baz', 'moo')) self.assertIsNotNone(result.error)
def _module_dispatch_render_static(dispatch, wf_module, table): result = dispatch.render(wf_module, table) result = ProcessResult.coerce(result) result.truncate_in_place_if_too_big() result.sanitize_in_place() return result
def test_coerce_2tuple_no_dataframe(self): result = ProcessResult.coerce(('foo', 'bar')) self.assertIsNotNone(result.error)
def render(reorder_history, table): params = MockParams(reorder_history=reorder_history) result = ReorderFromTable.render(params, table.copy()) return ProcessResult.coerce(result)