Exemple #1
0
    def test_request_retries(self):
        retries = 5
        e = error.HTTPError('some url', 503, 'some msg', '', BytesIO())
        self.mock_open = MagicMock(side_effect=e)

        request.make_request('http://fauxurl.com', max_retries=retries, open_func=self.mock_open)

        self.assertEqual(self.mock_open.call_count, retries)
Exemple #2
0
def save_from_url(url, filename):
    """
    Saves a remote file to S3 and returns
    its S3 URL.
    """
    try:
        res = make_request(url)

    except error.HTTPError as e:
        # Wikimedia 404 errors are very common, since images may go
        # out of date.
        # So common that for now these exceptions are just ignored.
        if e.code == 404 and 'wikimedia' in url:
            logger.warning('Error requesting {0} : {1}'.format(url, e))

        # Other exceptions are more remarkable and should be brought up.
        else:
            logger.exception('Error requesting {0} : {1}'.format(url, e))
        return None

    except (ConnectionResetError, BadStatusLine, ValueError) as e:
        logger.exception('Error requesting {0} : {1}'.format(url, e))
        return None

    data = BytesIO(res.read())
    return save_from_file(data, filename)
Exemple #3
0
def save_from_url(url, filename):
    """
    Saves a remote file to S3 and returns
    its S3 URL.
    """
    try:
        res = make_request(url)

    except error.HTTPError as e:
        # Wikimedia 404 errors are very common, since images may go
        # out of date.
        # So common that for now these exceptions are just ignored.
        if e.code == 404 and 'wikimedia' in url:
            logger.warning('Error requesting {0} : {1}'.format(url, e))

        # Other exceptions are more remarkable and should be brought up.
        else:
            logger.exception('Error requesting {0} : {1}'.format(url, e))
        return None

    except (ConnectionResetError, BadStatusLine, ValueError) as e:
        logger.exception('Error requesting {0} : {1}'.format(url, e))
        return None

    data = BytesIO(res.read())
    return save_from_file(data, filename)
Exemple #4
0
def _request(endpoint, url, format='json'):
    complete_url = '{0}{1}'.format(endpoint, url)
    res = make_request(complete_url)
    content = res.read()
    if format == 'json':
        return json.loads(content.decode('utf-8'))
    elif format == 'xml':
        return xmltodict.parse(content)
    return None
Exemple #5
0
def _request(endpoint, url, format="json"):
    complete_url = "{0}{1}".format(endpoint, url)
    res = make_request(complete_url)
    content = res.read()
    if format == "json":
        return json.loads(content.decode("utf-8"))
    elif format == "xml":
        return xmltodict.parse(content)
    return None
Exemple #6
0
def _get_html(url):
    # Some sites, such as NYTimes, track which
    # articles have been viewed with cookies.
    # Without cookies, you get thrown into an infinite loop.
    cookies = CookieJar()
    opener = request.build_opener(request.HTTPCookieProcessor(cookies))

    # Get the raw html.
    # Spoof a user agent.
    # This can help get around 403 (forbidden) errors.
    try:
        html = make_request(url, open_func=opener.open, headers={'User-Agent': 'Chrome'}).read()
    except IncompleteRead as e:
        html = e.partial

    return html
Exemple #7
0
def _get_html(url):
    # Some sites, such as NYTimes, track which
    # articles have been viewed with cookies.
    # Without cookies, you get thrown into an infinite loop.
    cookies = CookieJar()
    opener = request.build_opener(request.HTTPCookieProcessor(cookies))

    # Get the raw html.
    # Spoof a user agent.
    # This can help get around 403 (forbidden) errors.
    try:
        html = make_request(url,
                            open_func=opener.open,
                            headers={
                                'User-Agent': 'Chrome'
                            }).read()
    except IncompleteRead as e:
        html = e.partial

    return html