Exemple #1
0
async def _fetch_story(reader, feed_id, offset, url, use_proxy):
    for i in range(2):
        response = await reader.read(url, use_proxy=use_proxy)
        if response and response.url:
            url = str(response.url)
        LOG.info(
            f'fetch story#{feed_id},{offset} url={unquote(url)} status={response.status} finished')
        if not (response and response.ok and response.content):
            return None
        try:
            content = response.content.decode(response.encoding)
        except UnicodeDecodeError as ex:
            LOG.warning('fetch story unicode decode error=%s url=%r', ex, url)
            content = response.content.decode(response.encoding, errors='ignore')
        html_redirect = get_html_redirect_url(content)
        if (not html_redirect) or html_redirect == url:
            return url, content
        LOG.info('story#%s,%s resolve html redirect to %r', feed_id, offset, html_redirect)
        url = html_redirect
    return url, content
Exemple #2
0
def test_get_html_redirect_url(filename):
    base_url = 'https://blog.example.com'
    expect = 'https://blog.example.com/html-redirect/'
    html = _read_text(filename)
    got = get_html_redirect_url(html, base_url=base_url)
    assert got == expect