async def fetch(params, *, get_stored_dataframe): querytype = QueryType(params['querytype']) query: str = params[querytype.query_param_name] access_token = (params['twitter_credentials'] or {}).get('secret') if not query.strip() and not access_token: return None # Don't create a version if not query.strip(): return Err('Please enter a query') if not access_token: return Err('Please sign in to Twitter') try: if params['accumulate']: old_tweets = await get_stored_tweets(get_stored_dataframe) tweets = await get_new_tweets(access_token, querytype, query, old_tweets) tweets = merge_tweets(old_tweets, tweets) else: tweets = await get_new_tweets(access_token, querytype, query, None) except ValueError as err: return Err(str(err)) except ClientResponseError as err: if err.status: if querytype == QueryType.USER_TIMELINE and err.status == 401: return Err("User %s's tweets are private" % query) elif querytype == QueryType.USER_TIMELINE and err.status == 404: return Err('User %s does not exist' % query) elif err.status == 429: return Err( 'Twitter API rate limit exceeded. ' 'Please wait a few minutes and try again.' ) else: return Err('Error from Twitter: %d %s' % (err.status, err.message)) else: return Err('Error fetching tweets: %s' % str(err)) result = ProcessResult(dataframe=tweets) result.truncate_in_place_if_too_big() result.sanitize_in_place() return result
def _test_render(self, in_table: pd.DataFrame, column: str, edits_json: Dict[str, Any], expected_out: pd.DataFrame = pd.DataFrame(), expected_error: str = '') -> None: """Test that the render method works (kinda an integration test).""" params = P(column, edits_json) result = render(in_table, params) result.sanitize_in_place() expected = ProcessResult(expected_out, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str, spec: RefineSpec, expected_out: pd.DataFrame = pd.DataFrame(), expected_error: str = '') -> None: """Render and assert the output is as expected.""" result = ProcessResult.coerce(spec.apply(in_table, column)) # Sanitize result+expected, so if sanitize changes these tests may # break (which is what we want). result.sanitize_in_place() expected = ProcessResult(expected_out, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
async def fetch(params): table = None url: str = params['url'].strip() tablenum: int = params['tablenum'] - 1 # 1-based for user if tablenum < 0: return ProcessResult(error='Table number must be at least 1') result = None try: async with utils.spooled_data_from_url(url) as (spool, headers, charset): # TODO use charset for encoding detection tables = pd.read_html(spool, encoding=charset, flavor='html5lib') except asyncio.TimeoutError: return ProcessResult(error=f'Timeout fetching {url}') except aiohttp.InvalidURL: return ProcessResult(error=f'Invalid URL') except aiohttp.ClientResponseError as err: return ProcessResult(error=('Error from server: %d %s' % (err.status, err.message))) except aiohttp.ClientError as err: return ProcessResult(error=str(err)) except ValueError: return ProcessResult( error='Did not find any <table> tags on that page') except IndexError: # pandas.read_html() gives this unhelpful error message.... return ProcessResult(error='Table has no columns') if not tables: return ProcessResult( error='Did not find any <table> tags on that page') if tablenum >= len(tables): return ProcessResult( error=(f'The maximum table number on this page is {len(tables)}')) table = tables[tablenum] merge_colspan_headers_in_place(table) result = ProcessResult(dataframe=table) result.truncate_in_place_if_too_big() result.sanitize_in_place() return result
def test_render_truncate_and_sanitize(self): calls = [] retval = ProcessResult(pd.DataFrame({'A': [1]})) retval.truncate_in_place_if_too_big = lambda: calls.append('truncate') retval.sanitize_in_place = lambda: calls.append('sanitize') lm = LoadedModule('int', '1', render_impl=lambda _a, _b: retval) with self.assertLogs(): lm.render(ProcessResult(), {}, tab_name='x', fetch_result=None) self.assertEqual(calls, ['truncate', 'sanitize'])
def test_sanitize(self): expected = ProcessResult(DataFrame({'foo': ['[1]', '[2]']})) result = ProcessResult(DataFrame({'foo': [[1], [2]]})) result.sanitize_in_place() self.assertEqual(result, expected)
def table_to_result(table): result = ProcessResult(table) result.sanitize_in_place() # alters dataframe.equals() result return result