Esempio n. 1
0
async def fetch(params, *, get_stored_dataframe):
    querytype = QueryType(params['querytype'])
    query: str = params[querytype.query_param_name]
    access_token = (params['twitter_credentials'] or {}).get('secret')

    if not query.strip() and not access_token:
        return None  # Don't create a version

    if not query.strip():
        return Err('Please enter a query')

    if not access_token:
        return Err('Please sign in to Twitter')

    try:
        if params['accumulate']:
            old_tweets = await get_stored_tweets(get_stored_dataframe)
            tweets = await get_new_tweets(access_token, querytype, query,
                                          old_tweets)
            tweets = merge_tweets(old_tweets, tweets)
        else:
            tweets = await get_new_tweets(access_token, querytype,
                                          query, None)

    except ValueError as err:
        return Err(str(err))

    except ClientResponseError as err:
        if err.status:
            if querytype == QueryType.USER_TIMELINE and err.status == 401:
                return Err("User %s's tweets are private" % query)
            elif querytype == QueryType.USER_TIMELINE and err.status == 404:
                return Err('User %s does not exist' % query)
            elif err.status == 429:
                return Err(
                    'Twitter API rate limit exceeded. '
                    'Please wait a few minutes and try again.'
                )
            else:
                return Err('Error from Twitter: %d %s'
                           % (err.status, err.message))
        else:
            return Err('Error fetching tweets: %s' % str(err))

    result = ProcessResult(dataframe=tweets)
    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()

    return result
Esempio n. 2
0
    def _test_render(self,
                     in_table: pd.DataFrame,
                     column: str,
                     edits_json: Dict[str, Any],
                     expected_out: pd.DataFrame = pd.DataFrame(),
                     expected_error: str = '') -> None:
        """Test that the render method works (kinda an integration test)."""
        params = P(column, edits_json)
        result = render(in_table, params)
        result.sanitize_in_place()

        expected = ProcessResult(expected_out, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
Esempio n. 3
0
    def _test_refine_spec_apply(self,
                                in_table: pd.DataFrame,
                                column: str,
                                spec: RefineSpec,
                                expected_out: pd.DataFrame = pd.DataFrame(),
                                expected_error: str = '') -> None:
        """Render and assert the output is as expected."""
        result = ProcessResult.coerce(spec.apply(in_table, column))
        # Sanitize result+expected, so if sanitize changes these tests may
        # break (which is what we want).
        result.sanitize_in_place()

        expected = ProcessResult(expected_out, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
Esempio n. 4
0
async def fetch(params):
    table = None
    url: str = params['url'].strip()
    tablenum: int = params['tablenum'] - 1  # 1-based for user

    if tablenum < 0:
        return ProcessResult(error='Table number must be at least 1')

    result = None

    try:
        async with utils.spooled_data_from_url(url) as (spool, headers,
                                                        charset):
            # TODO use charset for encoding detection
            tables = pd.read_html(spool, encoding=charset, flavor='html5lib')
    except asyncio.TimeoutError:
        return ProcessResult(error=f'Timeout fetching {url}')
    except aiohttp.InvalidURL:
        return ProcessResult(error=f'Invalid URL')
    except aiohttp.ClientResponseError as err:
        return ProcessResult(error=('Error from server: %d %s' %
                                    (err.status, err.message)))
    except aiohttp.ClientError as err:
        return ProcessResult(error=str(err))
    except ValueError:
        return ProcessResult(
            error='Did not find any <table> tags on that page')
    except IndexError:
        # pandas.read_html() gives this unhelpful error message....
        return ProcessResult(error='Table has no columns')

    if not tables:
        return ProcessResult(
            error='Did not find any <table> tags on that page')

    if tablenum >= len(tables):
        return ProcessResult(
            error=(f'The maximum table number on this page is {len(tables)}'))

    table = tables[tablenum]
    merge_colspan_headers_in_place(table)
    result = ProcessResult(dataframe=table)
    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()
    return result
Esempio n. 5
0
    def test_render_truncate_and_sanitize(self):
        calls = []

        retval = ProcessResult(pd.DataFrame({'A': [1]}))
        retval.truncate_in_place_if_too_big = lambda: calls.append('truncate')
        retval.sanitize_in_place = lambda: calls.append('sanitize')

        lm = LoadedModule('int', '1', render_impl=lambda _a, _b: retval)
        with self.assertLogs():
            lm.render(ProcessResult(), {}, tab_name='x', fetch_result=None)
        self.assertEqual(calls, ['truncate', 'sanitize'])
Esempio n. 6
0
 def test_sanitize(self):
     expected = ProcessResult(DataFrame({'foo': ['[1]', '[2]']}))
     result = ProcessResult(DataFrame({'foo': [[1], [2]]}))
     result.sanitize_in_place()
     self.assertEqual(result, expected)
Esempio n. 7
0
def table_to_result(table):
    result = ProcessResult(table)
    result.sanitize_in_place()  # alters dataframe.equals() result
    return result