Beispiel #1
0
    def __init__(self):
        self.settings = EsiSettings.getInstance()
        self.server_base: ApiBase = supported_servers[self.settings.get("server")]

        # session request stuff
        self._session = Session()
        self._basicHeaders = {
            'Accept': 'application/json',
            'User-Agent': (
                'pyfa v{}'.format(config.version)
            )
        }
        self._session.headers.update(self._basicHeaders)
        self._session.proxies = NetworkSettings.getInstance().getProxySettingsInRequestsFormat()

        # Set up cached session. This is only used for SSO meta data for now, but can be expanded to actually handle
        # various ESI caching (using ETag, for example) in the future
        cached_session = CachedSession(
            os.path.join(config.savePath, config.ESI_CACHE),
            backend="sqlite",
            cache_control=True,                # Use Cache-Control headers for expiration, if available
            expire_after=timedelta(days=1),    # Otherwise expire responses after one day
            stale_if_error=True,               # In case of request errors, use stale cache data if possible
        )
        cached_session.headers.update(self._basicHeaders)
        cached_session.proxies = NetworkSettings.getInstance().getProxySettingsInRequestsFormat()

        meta_call = cached_session.get("https://%s/.well-known/oauth-authorization-server" % self.server_base.sso)
        meta_call.raise_for_status()
        self.server_meta = meta_call.json()

        jwks_call = cached_session.get(self.server_meta["jwks_uri"])
        jwks_call.raise_for_status()
        self.jwks = jwks_call.json()
Beispiel #2
0
 def __init__(self):
     super(AsteroidsSkill, self).__init__(name="AsteroidsSkill")
     if "nasa_key" not in self.settings:
         self.settings["nasa_key"] = "DEMO_KEY"
     _expire_after = timedelta(hours=1)
     self._session = CachedSession(backend='memory',
                                   expire_after=_expire_after)
Beispiel #3
0
    def test_return_old_data_on_error(self, datetime_mock):
        datetime_mock.utcnow.return_value = datetime.utcnow()
        expire_after = 100
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=True, expire_after=expire_after)
        header = "X-Tst"

        def get(n):
            return s.get(url, headers={header: n}).json()["headers"][header]

        get("expired")
        self.assertEquals(get("2"), "expired")
        datetime_mock.utcnow.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)

        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            self.assertEquals(get("3"), "expired")

        with mock.patch("requests_cache.core.OriginalSession.send") as send_mock:
            resp_mock = requests.Response()
            request = requests.Request("GET", url)
            resp_mock.request = request.prepare()
            resp_mock.status_code = 400
            resp_mock._content = '{"other": "content"}'
            send_mock.return_value = resp_mock
            self.assertEquals(get("3"), "expired")

            resp_mock.status_code = 200
            self.assertIs(s.get(url).content, resp_mock.content)

        # default behaviour
        datetime_mock.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=False, expire_after=100)
        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            with self.assertRaises(Exception):
                s.get(url)
Beispiel #4
0
    def __init__(self,
                 source=None,
                 filter=None,
                 reverse_geocode=False,
                 progressbar=False,
                 quick_mode=False,
                 dry_run=False):
        self.source = source
        self.reverse_geocode = reverse_geocode
        self.dry_run = dry_run
        self.progressbar = progressbar
        self.filter = filter

        # Quick mode only imports the first few datasets to speed things up.
        self.quick_mode = quick_mode

        # Cache responses from the luftdaten.info API for five minutes.
        # TODO: Make backend configurable.

        # Configure User-Agent string.
        user_agent = APP_NAME + '/' + APP_VERSION

        # Configure cached requests session.
        self.session = CachedSession(cache_name='api.luftdaten.info',
                                     backend='redis',
                                     expire_after=300,
                                     user_agent=user_agent)

        # Probe Redis for availability.
        try:
            self.session.cache.responses.get('test')
        except redis.exceptions.ConnectionError as ex:
            log.error('Unable to connect to Redis: %s', ex)
            sys.exit(2)
Beispiel #5
0
 def __init__(self):
     super().__init__("XKCDSkill")
     if not self.settings.get("idle_random"):
         self.settings["idle_random"] = True
     self.session = CachedSession(backend='memory',
                                  expire_after=timedelta(hours=6))
     self.current_comic = 0
Beispiel #6
0
 def __init__(
     self,
     client_id,
     client_secret,
     cache_name: str,
     token=None,
     token_secret=None,
     redirect_uri=None,
     rsa_key=None,
     verifier=None,
     signature_method=SIGNATURE_HMAC_SHA1,
     signature_type=SIGNATURE_TYPE_HEADER,
     force_include_body=False,
     backend='redis',
     expire_after=300,
     **kwargs,
 ):
     CachedSession.__init__(self, cache_name, backend, expire_after)
     OAuth1Client.__init__(
         self,
         session=self,
         client_id=client_id,
         client_secret=client_secret,
         token=token,
         token_secret=token_secret,
         redirect_uri=redirect_uri,
         rsa_key=rsa_key,
         verifier=verifier,
         signature_method=signature_method,
         signature_type=signature_type,
         force_include_body=force_include_body,
         **kwargs,
     )
Beispiel #7
0
def main():
    session = CachedSession('example_cache', backend='sqlite')

    # By default, cached responses never expire
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    assert not response.expires

    # We can set default expiration for the session using expire_after
    session = CachedSession('example_cache', backend='sqlite', expire_after=60)
    session.cache.clear()
    response = session.get('https://httpbin.org/get')
    response = session.get('https://httpbin.org/get')
    print('Expiration time:', response.expires)

    # This can also be overridden for individual requests
    session.cache.clear()
    response = session.get('https://httpbin.org/get', expire_after=1)
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    print('Expiration time:', response.expires)

    # After 1 second, the cached value will expired
    time.sleep(1.2)
    assert response.is_expired
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache
Beispiel #8
0
def url_get(url):
    """
    Download an URL using a cache and return the response object
    :param url:
    :return:
    """
    s = None
    info = dict()

    log.debug("GET URL {!s}".format(url))

    if 'file://' in url:
        s = requests.session()
        s.mount('file://', FileAdapter())
    else:
        s = CachedSession(cache_name="pyff_cache",
                          backend=config.request_cache_backend,
                          expire_after=config.request_cache_time,
                          old_data_on_error=True)
    headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'}
    r = s.get(url,
              headers=headers,
              verify=False,
              timeout=config.request_timeout)
    if config.request_override_encoding is not None:
        r.encoding = config.request_override_encoding

    return r
Beispiel #9
0
def url_get(url):
    """
    Download an URL using a cache and return the response object
    :param url:
    :return:
    """
    s = None
    info = dict()

    if 'file://' in url:
        s = requests.session()
        s.mount('file://', FileAdapter())
    else:
        s = CachedSession(cache_name="pyff_cache",
                          backend=config.request_cache_backend,
                          expire_after=config.request_cache_time,
                          old_data_on_error=True)
    headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'}
    try:
        r = s.get(url, headers=headers, verify=False, timeout=config.request_timeout)
    except IOError as ex:
        s = requests.Session()
        r = s.get(url, headers=headers, verify=False, timeout=config.request_timeout)

    if six.PY2:
        r.encoding = "utf-8"

    log.debug("url_get({}) returns {} chrs encoded as {}".format(url, len(r.content), r.encoding))

    if config.request_override_encoding is not None:
        r.encoding = config.request_override_encoding

    return r
Beispiel #10
0
def get_page_url():
    """
    Initial function to get the list of page urls with respect to different alphabet.
    :return: list of urls
    """
    page_url_list = []
    url = "http://devri.bzh/dictionnaire/a/"
    session = CachedSession()
    page = session.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    letter_list = soup.find_all("a", class_="enfant")
    for i in tqdm(letter_list):
        url_letter = "http://devri.bzh" + i["href"]
        page_letter = session.get(url_letter)
        soup_letter = BeautifulSoup(page_letter.content, "html.parser")
        page_num = soup_letter.find("li", class_=["MarkupPagerNavLast MarkupPagerNavLastNum", "MarkupPagerNavLastNum"])
        try:
            page_num = page_num.a.text
            url_list = [url_letter]
            for j in range(2, int(page_num) + 1):
                url_list.append(url_letter + f"page{j}")
            page_url_list += url_list
        except AttributeError:
            pass
    # add page for alphabet z manually
    page_url_list += ["http://devri.bzh/dictionnaire/z/"]
    return page_url_list
Beispiel #11
0
    def session_obj(self):
        session = CachedSession(allowable_methods=('GET', 'POST'),
                                ignored_parameters=['smpbss'])

        if not IceDaily.RECAPTCHA:
            with session.cache_disabled():
                response = session.get(
                    url=
                    'https://www.theice.com/marketdata/reports/datawarehouse/ConsolidatedEndOfDayReportPDF.shtml',
                    headers={
                        'User-Agent': 'Mozilla/5.0',
                        'X-Requested-With': 'XMLHttpRequest'
                    },
                    params={
                        'selectionForm': '',
                        'exchangeCode': 'IFEU',
                        'optionRequest': self.flavor['optionRequest']
                    })

            bs = BeautifulSoup(response.text)

            df = pd.DataFrame([(opt['value'], opt.text)
                               for opt in bs.find_all('option')],
                              columns=["WebActiveCode", "ActiveName"])

            df['ActiveCode'] = df.WebActiveCode.apply(
                lambda s: s.split('|', 1)[1] if '|' in s else None)
            df = df.dropna(how='any')

            self.update_actives(df)

        return session
def get_services(**options):
    """
    This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo
    for runtime injection.

    It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just
    let the framework define them. You can also define your own services and naming is up to you.

    :return: dict
    """

    if options['use_cache']:
        from requests_cache import CachedSession
        servicenow = CachedSession('http.cache')
    else:
        servicenow = requests.Session()

    servicenow.headers = {'User-Agent': 'Mozilla/ETL/v1'}
    servicenow.auth = HTTPBasicAuth(options['sn_username'],
                                    options['sn_password'])
    servicenow.headers.update({'Accept-encoding': 'text/json'})

    return {
        'servicenow':
        servicenow,
        'db':
        create_engine('sqlite:///test.sqlite', echo=False),
        'vertica':
        create_engine(options['vertica'].format(
            username=options['vertica_username'],
            password=options['vertica_password']),
                      echo=False)
    }
Beispiel #13
0
def imdb_search(query):
    session = CachedSession(expire_after=60 * 60 * 24)
    stripped_query = "".join([x for x in query
                              if x.isalnum() or x == " "]).lower()
    r = session.get("https://www.imdb.com/find",
                    params={
                        "q": stripped_query,
                        "s": "tt",
                        "ttype": "ft"
                    })
    page_html = r.text
    soup = BeautifulSoup(page_html, "html.parser")
    results = []
    for result in soup.select(".result_text"):
        title = result.a.get_text()
        imdb_id = result.a.get("href").split("/")
        try:
            imdb_id = result.a.get("href").split("/title/")[1].split("/")[0]
        except IndexError:
            imdb_id = None
            continue
        result.a.decompose()
        try:
            year = result.get_text().split("(")[1].split(")")[0]
            int(year)
        except:
            year = None
            continue

        results.append((title, year, imdb_id))

    return results
Beispiel #14
0
def test_urls_expire_after__evaluation_order(url, expected_expire_after):
    """If there are multiple matches, the first match should be used in the order defined"""
    session = CachedSession(urls_expire_after={
        '*.site_1.com/resource': 60 * 60 * 2,
        '*.site_1.com': 60 * 60,
        '*': 1,
    }, )
    assert session._url_expire_after(url) == expected_expire_after
Beispiel #15
0
def test_urls_expire_after(url, expected_expire_after):
    session = CachedSession(urls_expire_after={
        '*.site_1.com': 60 * 60,
        'site_2.com/resource_1': 60 * 60 * 2,
        'site_2.com/resource_2': 60 * 60 * 24,
        'site_2.com/static': -1,
    }, )
    assert session._url_expire_after(url) == expected_expire_after
Beispiel #16
0
 def wrapper(page_html=None, imdb_id=None, soup=None, *args, **kwargs):
     if not (page_html or imdb_id or soup):
         return None
     if imdb_id:
         session = CachedSession(expire_after=60 * 60 * 24)
         r = session.get("https://www.imdb.com/title/{}".format(imdb_id))
         page_html = r.text
     soup = soup or BeautifulSoup(page_html, "html.parser")
     return f(soup, *args, **kwargs)
Beispiel #17
0
 def test_post_parameters_normalization(self):
     params = {"a": "a", "b": ["1", "2", "3"], "c": "4"}
     url = httpbin("post")
     s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                       allowable_methods=('GET', 'POST'))
     self.assertFalse(s.post(url, data=params).from_cache)
     self.assertTrue(s.post(url, data=params).from_cache)
     self.assertTrue(s.post(url, data=sorted(params.items())).from_cache)
     self.assertFalse(s.post(url, data=sorted(params.items(), reverse=True)).from_cache)
Beispiel #18
0
 def __init__(self):
     super(HelioViewerSkill, self).__init__(name="HelioViewerSkill")
     self.session = CachedSession(backend='memory',
                                  expire_after=timedelta(hours=6))
     self.translate_cache = {}  # save calls to avoid ip banning
     self.img_cache = {}  # dont re-parse for speed
     self.current_date = datetime.now()
     self.current_camera = "sunspots"
     create_daemon(self.bootstrap)
Beispiel #19
0
 def test_throttle_cache(self):
     url = httpbin('get')
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
     s.throttle(url, 0.5) # one every 2 seconds
     r = s.get(url)
     time.sleep(0.6)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, 1)
def get_data(leaderboard, year):
    lb_path = LEADERBOARDS.get(leaderboard, leaderboard)
    url = f"https://adventofcode.com/{year}/leaderboard/private/view/{lb_path}"
    cache_options = {"backend": "filesystem", "expire_after": 86400}
    session = CachedSession("../../site_cache", **cache_options)
    cookies = dict([Path("../.session-cookie").read_text().strip().split("=")])
    response = session.get(url, cookies=cookies)
    if response.status_code != 200:
        pdb.set_trace()
    return response.json()
Beispiel #21
0
def get_session() -> CachedSession:
    """Make a cached session."""
    path = settings.STATE_PATH.joinpath("http").as_posix()
    session = CachedSession(cache_name=path,
                            expire_after=settings.CACHE_EXPIRE)
    session.headers.update(HEADERS)
    # weird monkey-patch: default timeout for requests sessions
    session.request = functools.partial(session.request,
                                        timeout=settings.HTTP_TIMEOUT)
    return session
Beispiel #22
0
def get_page_content(url: str) -> str:
    """
    Utilization function to get page content literally
    :param url: string
    :return: string
    """
    session = CachedSession()
    page = session.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    content = soup.prettify()
    return content
Beispiel #23
0
 def fetch_df(url):
     r = requests.get(url)
     js = r.json()
     if "features" not in js:
         s = CachedSession()
         with s.cache_disabled():
             r = s.get(url)
             js = r.json()
     features = [feature["attributes"] for feature in js["features"]]
     df = pd.DataFrame(features)
     return df
Beispiel #24
0
    def get(self, resource: str, params=None):
        client = BackendApplicationClient(client_id=self.client_id)
        oauth = OAuth2Session(client=client)
        token = oauth.fetch_token(token_url='https://us.battle.net/oauth/token', client_id=self.client_id, client_secret=self.client_secret)

        if params is None:
            params = {}
        root = 'https://us.api.blizzard.com/'
        params.update({'locale': 'en_US', 'namespace': 'static-us', 'access_token': token['access_token']})

        session = CachedSession(expire_after=timedelta(hours=1))
        return session.get(root + resource, params=params).json()
Beispiel #25
0
 def __init__(self, url, cache=False, cachetime=300):
     super(PlexServer, self).__init__(url)
     session = None  # use the default session
     cachetime = cachetime or None
     if CachedSession and cache and isinstance(cache, six.string_types):
         if cache == 'memory':
             session = CachedSession(cache, cache, cachetime)
         elif cache == 'sqlite':
             session = CachedSession('request-cache',
                                     'sqlite',
                                     cachetime,
                                     fast_save=True)
     self._config = RequestConfig(session=session)
Beispiel #26
0
 def wrapper(title, year, *args, **kwargs):
     session = CachedSession(expire_after=60 * 60 * 24)
     # https://www.rottentomatoes.com/napi/search/?query=parasite&offset=0&limit=10
     stripped_title = "".join([x for x in title
                               if x.isalnum() or x == " "]).lower().strip()
     response = session.get("https://www.rottentomatoes.com/napi/search",
                            params={
                                "query": stripped_title,
                                "offset": 0,
                                "limit": 10
                            })
     json = response.json()
     return f(stripped_title, year, json, *args, **kwargs)
Beispiel #27
0
 def __init__(self,
              registry_config="app-registry.yaml",
              product="common",
              stub=False):
     self.registry = self._get_registry(registry_config, product=product)
     """ Uncomment this and related lines when this code goes live,. 
     Use a timeout on the API so the unit tests are not slowed down. """
     if not os.environ.get('DEV_PHASE') == 'stub':
         self.client = TychoClient(
             url=os.environ.get('TYCHO_URL', "http://localhost:5000"))
     self.product = product
     self.apps = self._grok()
     self.http_session = CachedSession(cache_name='tycho-registry')
Beispiel #28
0
 def test_expire_cache(self):
     delay = 1
     url = httpbin('delay/%s' % delay)
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
     time.sleep(0.5)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
Beispiel #29
0
    def __init__(self, data_dir: Optional[str] = None):
        super().__init__(data_dir)

        env_var = f'{__name__:s}.requests_cache'.replace('.', '_').upper()
        fn = os.environ.get(env_var, None)
        if fn is None:
            logging.debug(f'Environment variable {env_var} not found.')
            fn = os.path.join(self.get_data_dir(), 'requests_cache.sqlite')
        logging.warning(f'Using requests cache at {os.path.abspath(fn)}')

        fn = fn[:-len('.sqlite')] if fn.endswith('.sqlite') else fn
        self.__cached_session = CachedSession(cache_name=fn)
        self.__hash_constructors: Dict[str, callable] = dict()
Beispiel #30
0
 def test_expire_cache(self):
     delay = 1
     url = httpbin('delay/%s' % delay)
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
     time.sleep(0.5)
     t = time.time()
     r = s.get(url)
     delta = time.time() - t
     self.assertGreaterEqual(delta, delay)
    def __init__(
            self,
            allowed_licenses=None,
            excluded_packages=None,
            includes=None,
            cache_name='~/.cache/pre-commit-license-check.sqlite') -> None:

        self.allowed_licenses = allowed_licenses if allowed_licenses else []
        self.excluded_packages = excluded_packages if excluded_packages else []
        self.includes = includes if includes else []

        self.session = CachedSession(cache_name,
                                     expire_after=timedelta(days=1))
Beispiel #32
0
    def test_ignore_parameters_post_raw(self):
        url = httpbin("post")
        ignored_param = "ignored"
        raw_data = "raw test data"

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        self.assertFalse(s.post(url, data=raw_data).from_cache)
        self.assertTrue(s.post(url, data=raw_data).from_cache)

        raw_data = "new raw data"
        self.assertFalse(s.post(url, data=raw_data).from_cache)
Beispiel #33
0
    def test_post_params(self):
        # issue #2
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))

        d = {'param1': 'test1'}
        for _ in range(2):
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)

        self.assertTrue(self.s.post(httpbin('post'), data=d).from_cache)
        d.update({'something': 'else'})
        self.assertFalse(self.s.post(httpbin('post'), data=d).from_cache)
Beispiel #34
0
 def __init__(self):
     super(HubbleTelescopeSkill, self).__init__(name="HubbleTelescopeSkill")
     if "random" not in self.settings:
         # idle screen, random or latest
         self.settings["random"] = True
     if "include_james_webb" not in self.settings:
         self.settings["include_james_webb"] = False
     if "exclude_long" not in self.settings:
         self.settings["exclude_long"] = True
     self.session = CachedSession(backend='memory',
                                  expire_after=timedelta(hours=6))
     self.translate_cache = {}
     # bootstrap - cache image data
     create_daemon(self.latest_hubble)
Beispiel #35
0
def get_profile(request, provider_name):
    provider = SOCIAL_AUTH_PROVIDERS[provider_name]()

    # Prepare the session for fetching the token.
    session = OAuth2Session(
        client_id=provider.client_id,
        scope=provider.scope,
        state=request.session.get('social_login_state', ''),
        redirect_uri=request.build_absolute_uri(
            reverse('social_login_callback',
                    kwargs={'provider_name': provider_name})),
    )

    # Clear the session state data before continuing.
    if 'social_login_state' in request.session:
        del request.session['social_login_state']

    # Go and fetch the oauth token.
    token = session.fetch_token(
        token_url=provider.token_uri,
        client_secret=provider.client_secret,
        authorization_response=request.build_absolute_uri())

    # Get the id_token from the oauth token.
    unparsed_id_token = token['id_token']

    # Retrieve the certificates from the provider, this is in json format and is cached for 1 hour.
    expire_after = datetime.timedelta(hours=1)
    cached_session = CachedSession(backend='memory', expire_after=expire_after)
    provider_certificates = cached_session.get(
        provider.jwks_uri).json().get('keys')

    # Put the certificates in a dict, with the identifier as key.
    certificate_set = {cert['kid']: cert for cert in provider_certificates}

    # Look up which certificate was used to sign the id_token.
    kid = jwt.get_unverified_header(unparsed_id_token).get('kid')

    # Convert the certificate from json to something the jwt library can use.
    certificate = RSAAlgorithm.from_jwk(json.dumps(certificate_set[kid]))

    # Now finally do the actual decoding and verifying.
    id_token = jwt.decode(unparsed_id_token,
                          certificate,
                          audience=provider.client_id)

    # Put the parsed id_token in the response.
    token['id_token'] = id_token

    return provider.parse_profile(session, token)
Beispiel #36
0
    def test_ignore_parameters_post_raw(self):
        url = httpbin("post")
        ignored_param = "ignored"
        raw_data = "raw test data"

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        self.assertFalse(s.post(url, data=raw_data).from_cache)
        self.assertTrue(s.post(url, data=raw_data).from_cache)

        raw_data = "new raw data"
        self.assertFalse(s.post(url, data=raw_data).from_cache)
Beispiel #37
0
def api_get(endpoint, query, cache=False):
    if not cache:
        s = CachedSession()
        with s.cache_disabled():
            r = s.get(
                app.config['API_URL'] + endpoint + '?q=' + urllib.parse.quote_plus(json.dumps(query)),
                headers=gen_api_header(),
                verify=app.config['VERIFY_SSL'])
    else:
        r = requests.get(
            app.config['API_URL'] + endpoint + '?q=' + urllib.parse.quote_plus(json.dumps(query)),
            headers=gen_api_header(),
            verify=app.config['VERIFY_SSL'])
    if r.status_code == 200:
        # If created then it returns the object data
        return json.loads(r.text).get('objects')
    else:
        return {}
Beispiel #38
0
    def test_ignore_parameters_get(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        params = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          ignored_parameters=[ignored_param])

        r = s.get(url, params=params)
        self.assertIn(ignored_param, r.json()['args'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)
Beispiel #39
0
    def test_headers_in_get_query(self):
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, include_get_headers=True)
        headers = {"Accept": "text/json"}
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["Accept"] = "text/xml"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["X-custom-header"] = "custom"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        self.assertFalse(s.get(url).from_cache)
        self.assertTrue(s.get(url).from_cache)
Beispiel #40
0
    def test_ignore_parameters_post_json(self):
        url = httpbin("post")
        ignored_param = "ignored"
        usual_param = "some"
        d = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        r = s.post(url, json=d)
        self.assertIn(ignored_param, json.loads(r.json()['data']).keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.post(url, json=d).from_cache)

        d[ignored_param] = "new"
        self.assertTrue(s.post(url, json=d).from_cache)

        d[usual_param] = "new"
        self.assertFalse(s.post(url, json=d).from_cache)
Beispiel #41
0
    def test_post_params(self):
        # issue #2
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))

        d = {'param1': 'test1'}
        for _ in range(2):
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)

        self.assertTrue(self.s.post(httpbin('post'), data=d).from_cache)
        d.update({'something': 'else'})
        self.assertFalse(self.s.post(httpbin('post'), data=d).from_cache)
Beispiel #42
0
    def test_remove_expired_entries(self, datetime_mock, datetime_mock2):
        expire_after = timedelta(minutes=10)
        start_time = datetime.utcnow().replace(year=2010, minute=0)
        datetime_mock.utcnow.return_value = start_time
        datetime_mock2.utcnow.return_value = start_time

        s = CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=expire_after)
        s.get(httpbin('get'))
        s.get(httpbin('relative-redirect/3'))
        datetime_mock.utcnow.return_value = start_time + expire_after * 2
        datetime_mock2.utcnow.return_value = datetime_mock.utcnow.return_value

        ok_url = 'get?x=1'
        s.get(httpbin(ok_url))
        self.assertEqual(len(s.cache.responses), 3)
        self.assertEqual(len(s.cache.keys_map), 3)
        s.remove_expired_responses()
        self.assertEqual(len(s.cache.responses), 1)
        self.assertEqual(len(s.cache.keys_map), 0)
        self.assertIn(ok_url, list(s.cache.responses.values())[0][0].url)
Beispiel #43
0
    def __init__(self,
                 url=None,
                 max_file_size=None,
                 allowed_content_types=(),
                 jsfetcher=None,
                 marker=r'<meta.*name="Generator".*>'):
        logger.info('Running downloader for url: %s' % url)
        self.url = str(url)
        self.max_file_size = max_file_size or settings.RFDOCS.get('MAX_FILE_SIZE')
        self.allowed_content_types = allowed_content_types or settings.RFDOCS.get('ALLOWED_CONTENT_TYPES')

        self.s = CachedSession()
        self.r = None
        self.error = {}
        self.jsfetcher = jsfetcher
        self.marker = marker
Beispiel #44
0
 def test_ignore_cgi_parameter(self):
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=5)
     s.ignore_cgi('foo') 
     url = httpbin('get?foo=1')
     r = s.get(url)
     self.assertFalse(r.from_cache)
     r = s.get(url)
     self.assertTrue(r.from_cache)
     url = httpbin('get')
     r = s.get(url)
     self.assertTrue(r.from_cache)
    def test_ignore_parameters(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, ignored_parameters=[ignored_param])

        params = {ignored_param: "1", usual_param: "1"}
        s.get(url, params=params)
        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)
Beispiel #46
0
    def test_post_data(self):
        # issue #2, raw payload
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))
        d1 = json.dumps({'param1': 'test1'})
        d2 = json.dumps({'param1': 'test1', 'param2': 'test2'})
        d3 = str('some unicode data')
        if is_py3:
            bin_data = bytes('some binary data', 'utf8')
        else:
            bin_data = bytes('some binary data')

        for d in (d1, d2, d3):
            self.assertEqual(self.post(d)['data'], d)
            r = self.s.post(httpbin('post'), data=d)
            self.assert_(hasattr(r, 'from_cache'))

        self.assertEqual(self.post(bin_data)['data'],
                         bin_data.decode('utf8'))
        r = self.s.post(httpbin('post'), data=bin_data)
        self.assert_(hasattr(r, 'from_cache'))
Beispiel #47
0
class CacheTestCase(unittest.TestCase):

    def setUp(self):
        self.s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        self.s.cache.clear()
        requests_cache.uninstall_cache()

    def test_expire_cache(self):
        delay = 1
        url = httpbin('delay/%s' % delay)
        s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)
        time.sleep(0.5)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)

    def test_delete_urls(self):
        url = httpbin('relative-redirect/3')
        r = self.s.get(url)
        for i in range(1, 4):
            self.assert_(self.s.cache.has_url(httpbin('relative-redirect/%s' % i)))
        self.s.cache.delete_url(url)
        self.assert_(not self.s.cache.has_url(url))

    def test_unregistered_backend(self):
        with self.assertRaises(ValueError):
            CachedSession(CACHE_NAME, backend='nonexistent')

    def test_hooks(self):
        state = defaultdict(int)
        for hook in ('response',):  # TODO it's only one hook here

            def hook_func(r, *args, **kwargs):
                state[hook] += 1
                return r
            n = 5
            for i in range(n):
                r = self.s.get(httpbin('get'), hooks={hook: hook_func})
            self.assertEqual(state[hook], n)

    def test_attr_from_cache_in_hook(self):
        state = defaultdict(int)
        hook = 'response'

        def hook_func(r, *args, **kwargs):
            if state[hook] > 0:
                self.assert_(r.from_cache, True)
            state[hook] += 1
            return r
        n = 5
        for i in range(n):
            r = self.s.get(httpbin('get'), hooks={hook: hook_func})
        self.assertEqual(state[hook], n)

    def test_post(self):
        url = httpbin('post')
        r1 = json.loads(self.s.post(url, data={'test1': 'test1'}).text)
        r2 = json.loads(self.s.post(url, data={'test2': 'test2'}).text)
        self.assertIn('test2', r2['form'])
        req = Request('POST', url).prepare()
        self.assert_(not self.s.cache.has_key(self.s.cache.create_key(req)))

    def test_disabled(self):

        url = httpbin('get')
        requests_cache.install_cache(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        requests.get(url)
        with requests_cache.disabled():
            for i in range(2):
                r = requests.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        with self.s.cache_disabled():
            for i in range(2):
                r = self.s.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        r = self.s.get(url)
        self.assertTrue(getattr(r, 'from_cache', False))

    def test_enabled(self):
        url = httpbin('get')
        options = dict(cache_name=CACHE_NAME, backend=CACHE_BACKEND,
                       fast_save=FAST_SAVE)
        with requests_cache.enabled(**options):
            r = requests.get(url)
            self.assertFalse(getattr(r, 'from_cache', False))
            for i in range(2):
                r = requests.get(url)
                self.assertTrue(getattr(r, 'from_cache', False))
        r = requests.get(url)
        self.assertFalse(getattr(r, 'from_cache', False))

    def test_content_and_cookies(self):
        requests_cache.install_cache(CACHE_NAME, CACHE_BACKEND)
        s = requests.session()
        def js(url):
            return json.loads(s.get(url).text)
        r1 = js(httpbin('cookies/set/test1/test2'))
        with requests_cache.disabled():
            r2 = js(httpbin('cookies'))
        self.assertEqual(r1, r2)
        r3 = js(httpbin('cookies'))
        with requests_cache.disabled():
            r4 = js(httpbin('cookies/set/test3/test4'))
        # from cache
        self.assertEqual(r3, js(httpbin('cookies')))
        # updated
        with requests_cache.disabled():
            self.assertEqual(r4, js(httpbin('cookies')))

    def test_response_history(self):
        r1 = self.s.get(httpbin('relative-redirect/3'))
        def test_redirect_history(url):
            r2 = self.s.get(url)
            self.assertTrue(r2.from_cache)
            for r11, r22 in zip(r1.history, r2.history):
                self.assertEqual(r11.url, r22.url)
        test_redirect_history(httpbin('relative-redirect/3'))
        test_redirect_history(httpbin('relative-redirect/2'))
        r3 = requests.get(httpbin('relative-redirect/1'))
        self.assertEqual(len(r3.history), 1)

    def test_response_history_simple(self):
        r1 = self.s.get(httpbin('relative-redirect/2'))
        r2 = self.s.get(httpbin('relative-redirect/1'))
        self.assertTrue(r2.from_cache)

    def post(self, data):
        return json.loads(self.s.post(httpbin('post'), data=data).text)

    def test_post_params(self):
        # issue #2
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))

        d = {'param1': 'test1'}
        for _ in range(2):
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)

        self.assertTrue(self.s.post(httpbin('post'), data=d).from_cache)
        d.update({'something': 'else'})
        self.assertFalse(self.s.post(httpbin('post'), data=d).from_cache)

    def test_post_data(self):
        # issue #2, raw payload
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))
        d1 = json.dumps({'param1': 'test1'})
        d2 = json.dumps({'param1': 'test1', 'param2': 'test2'})
        d3 = str('some unicode data')
        if is_py3:
            bin_data = bytes('some binary data', 'utf8')
        else:
            bin_data = bytes('some binary data')

        for d in (d1, d2, d3):
            self.assertEqual(self.post(d)['data'], d)
            r = self.s.post(httpbin('post'), data=d)
            self.assert_(hasattr(r, 'from_cache'))

        self.assertEqual(self.post(bin_data)['data'],
                         bin_data.decode('utf8'))
        r = self.s.post(httpbin('post'), data=bin_data)
        self.assert_(hasattr(r, 'from_cache'))

    def test_get_params_as_argument(self):
        for _ in range(5):
            p = {'arg1': 'value1'}
            r = self.s.get(httpbin('get'), params=p)
            self.assert_(self.s.cache.has_url( httpbin('get?arg1=value1')))

    def test_https_support(self):
        n = 10
        delay = 1
        url = 'https://httpbin.org/delay/%s?ar1=value1' % delay
        t = time.time()
        for _ in range(n):
            r = self.s.get(url, verify=False)
        self.assertLessEqual(time.time() - t, delay * n / 2)

    def test_from_cache_attribute(self):
        url = httpbin('get?q=1')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)
        self.s.cache.clear()
        self.assertFalse(self.s.get(url).from_cache)

    def test_gzip_response(self):
        url = httpbin('gzip')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)

    def test_close_response(self):
        for _ in range(3):
            r = self.s.get(httpbin("get"))
            r.close()

    def test_get_parameters_normalization(self):
        url = httpbin("get")
        params = {"a": "a", "b": ["1", "2", "3"], "c": "4"}

        self.assertFalse(self.s.get(url, params=params).from_cache)
        r = self.s.get(url, params=params)
        self.assertTrue(r.from_cache)
        self.assertEquals(r.json()["args"], params)
        self.assertFalse(self.s.get(url, params={"a": "b"}).from_cache)
        self.assertTrue(self.s.get(url, params=sorted(params.items())).from_cache)

        class UserSubclass(dict):
            def items(self):
                return sorted(super(UserSubclass, self).items(), reverse=True)

        params["z"] = "5"
        custom_dict = UserSubclass(params)
        self.assertFalse(self.s.get(url, params=custom_dict).from_cache)
        self.assertTrue(self.s.get(url, params=custom_dict).from_cache)

    def test_post_parameters_normalization(self):
        params = {"a": "a", "b": ["1", "2", "3"], "c": "4"}
        url = httpbin("post")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('GET', 'POST'))
        self.assertFalse(s.post(url, data=params).from_cache)
        self.assertTrue(s.post(url, data=params).from_cache)
        self.assertTrue(s.post(url, data=sorted(params.items())).from_cache)
        self.assertFalse(s.post(url, data=sorted(params.items(), reverse=True)).from_cache)

    def test_stream_requests_support(self):
        n = 100
        url = httpbin("stream/%s" % n)
        r = self.s.get(url, stream=True)
        lines = list(r.iter_lines())
        self.assertEquals(len(lines), n)

        for i in range(2):
            r = self.s.get(url, stream=True)
            self.assertTrue(r.from_cache)
            cached_lines = list(r.iter_lines())
            self.assertEquals(cached_lines, lines)

    def test_headers_in_get_query(self):
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, include_get_headers=True)
        headers = {"Accept": "text/json"}
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["Accept"] = "text/xml"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        headers["X-custom-header"] = "custom"
        self.assertFalse(s.get(url, headers=headers).from_cache)
        self.assertTrue(s.get(url, headers=headers).from_cache)

        self.assertFalse(s.get(url).from_cache)
        self.assertTrue(s.get(url).from_cache)

    def test_str_and_repr(self):
        s = repr(CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=10))
        self.assertIn(CACHE_NAME, s)
        self.assertIn("10", s)

    @mock.patch("requests_cache.core.datetime")
    def test_return_old_data_on_error(self, datetime_mock):
        datetime_mock.utcnow.return_value = datetime.utcnow()
        expire_after = 100
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=True, expire_after=expire_after)
        header = "X-Tst"

        def get(n):
            return s.get(url, headers={header: n}).json()["headers"][header]

        get("expired")
        self.assertEquals(get("2"), "expired")
        datetime_mock.utcnow.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)

        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            self.assertEquals(get("3"), "expired")

        with mock.patch("requests_cache.core.OriginalSession.send") as send_mock:
            resp_mock = requests.Response()
            request = requests.Request("GET", url)
            resp_mock.request = request.prepare()
            resp_mock.status_code = 400
            resp_mock._content = '{"other": "content"}'
            send_mock.return_value = resp_mock
            self.assertEquals(get("3"), "expired")

            resp_mock.status_code = 200
            self.assertIs(s.get(url).content, resp_mock.content)

        # default behaviour
        datetime_mock.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=False, expire_after=100)
        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            with self.assertRaises(Exception):
                s.get(url)

    def test_ignore_parameters_get(self):
        url = httpbin("get")
        ignored_param = "ignored"
        usual_param = "some"
        params = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          ignored_parameters=[ignored_param])

        r = s.get(url, params=params)
        self.assertIn(ignored_param, r.json()['args'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.get(url, params=params).from_cache)

        params[ignored_param] = "new"
        self.assertTrue(s.get(url, params=params).from_cache)

        params[usual_param] = "new"
        self.assertFalse(s.get(url, params=params).from_cache)

    def test_ignore_parameters_post(self):
        url = httpbin("post")
        ignored_param = "ignored"
        usual_param = "some"
        d = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        r = s.post(url, data=d)
        self.assertIn(ignored_param, r.json()['form'].keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.post(url, data=d).from_cache)

        d[ignored_param] = "new"
        self.assertTrue(s.post(url, data=d).from_cache)

        d[usual_param] = "new"
        self.assertFalse(s.post(url, data=d).from_cache)

    def test_ignore_parameters_post_json(self):
        url = httpbin("post")
        ignored_param = "ignored"
        usual_param = "some"
        d = {ignored_param: "1", usual_param: "1"}

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        r = s.post(url, json=d)
        self.assertIn(ignored_param, json.loads(r.json()['data']).keys())
        self.assertFalse(r.from_cache)

        self.assertTrue(s.post(url, json=d).from_cache)

        d[ignored_param] = "new"
        self.assertTrue(s.post(url, json=d).from_cache)

        d[usual_param] = "new"
        self.assertFalse(s.post(url, json=d).from_cache)

    def test_ignore_parameters_post_raw(self):
        url = httpbin("post")
        ignored_param = "ignored"
        raw_data = "raw test data"

        s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                          allowable_methods=('POST'),
                          ignored_parameters=[ignored_param])

        self.assertFalse(s.post(url, data=raw_data).from_cache)
        self.assertTrue(s.post(url, data=raw_data).from_cache)

        raw_data = "new raw data"
        self.assertFalse(s.post(url, data=raw_data).from_cache)

    @mock.patch("requests_cache.backends.base.datetime")
    @mock.patch("requests_cache.core.datetime")
    def test_remove_expired_entries(self, datetime_mock, datetime_mock2):
        expire_after = timedelta(minutes=10)
        start_time = datetime.utcnow().replace(year=2010, minute=0)
        datetime_mock.utcnow.return_value = start_time
        datetime_mock2.utcnow.return_value = start_time

        s = CachedSession(CACHE_NAME, CACHE_BACKEND, expire_after=expire_after)
        s.get(httpbin('get'))
        s.get(httpbin('relative-redirect/3'))
        datetime_mock.utcnow.return_value = start_time + expire_after * 2
        datetime_mock2.utcnow.return_value = datetime_mock.utcnow.return_value

        ok_url = 'get?x=1'
        s.get(httpbin(ok_url))
        self.assertEqual(len(s.cache.responses), 3)
        self.assertEqual(len(s.cache.keys_map), 3)
        s.remove_expired_responses()
        self.assertEqual(len(s.cache.responses), 1)
        self.assertEqual(len(s.cache.keys_map), 0)
        self.assertIn(ok_url, list(s.cache.responses.values())[0][0].url)
Beispiel #48
0
class DownloadExternalResource(object):
    def __init__(self,
                 url=None,
                 max_file_size=None,
                 allowed_content_types=(),
                 jsfetcher=None,
                 marker=r'<meta.*name="Generator".*>'):
        logger.info('Running downloader for url: %s' % url)
        self.url = str(url)
        self.max_file_size = max_file_size or settings.RFDOCS.get('MAX_FILE_SIZE')
        self.allowed_content_types = allowed_content_types or settings.RFDOCS.get('ALLOWED_CONTENT_TYPES')

        self.s = CachedSession()
        self.r = None
        self.error = {}
        self.jsfetcher = jsfetcher
        self.marker = marker

    def _set_error(self, key, value):
        self.error[key] = value
        return self.error

    def _no_error(self):
        self.error = {}
        return self.error

    def send_request(self):
        logger.info('Download external resource: %s' % self.url)
        try:
            self.r = self.s.get(self.url)
        except (requests.exceptions.MissingSchema,
                requests.exceptions.InvalidSchema,
                requests.exceptions.ConnectionError,) as error:
            logger.warn("Failed to download resource. Error: %s" % error)
            self._set_error('error', error)

    def get_response(self):
        # Workaround for Robot Framework libraries with version >= 2.8 (or even 2.7).
        # Libraries are generated with JQuery templates system.
        # Python's `requests` module fetches raw content, of course not rendered HTML.
        # Thus parser will fail in such cases.
        # For this reason an awesome PhantomJS is used. Unfortunately `requests_cache` module
        # wont help here because PhantomJS has to request the URL by its
        # own methods to be able to render HTML.
        # One more notice is that I don't use PhantomJS native features to actually write data to filesystem.
        # (PhantomJS has `fs` module to work with filesystem).
        # Why? I don't know yet :)

        # Instead use `subprocess.Popen` to execute javascript code,
        # let PhantomJS to do his job and give the output back to python.

        # Anyway I want the resource to be validated by Django's validators before we can proceed.
        # So if we came here, the validation has passed.

        # First, check if response content has
        # meta tag with name="Generator"
        # If so, the document uses JQuery templates plugin system and we need help of PhantomJS.
        content = self.r.content
        mo = re.search(self.marker, content, re.DOTALL | re.M | re.I)
        if not mo:
            return content
        logger.info('Using phantomjs to download resource')
        alternate_downloader = PhantomJSHelper(url=self.url, error_callback=self._set_error)
        return alternate_downloader.get_content()

    def validate_response(self):
        logger.info('Validate external resource: %s' % self.url)
        if not self.r:
            self.send_request()
        if self.error:
            return self.error
        try:
            self.r.raise_for_status()
        except requests.exceptions.HTTPError as error:
            logger.warn("Failed to fetch resource. Error: %s" % error)
            return self._set_error('error', error)
        if self.r.status_code == requests.codes.ok:
            content_len = self.r.headers.get('content-length', None)
            if content_len:
                csize = int(content_len)
            else:
                csize = len(self.r.content)
            if csize < self.max_file_size:
                ctype_header = self.r.headers.get('content-type')
                if not ctype_header:
                    return self._set_error('content_type',
                                           'Response does not contain the \'Content-Type\' header. Rejected.')
                ctype = ctype_header.split(';')[0].lower()
                if ctype in [ct.lower() for ct in self.allowed_content_types]:
                    # the place where all the procedure passed and we succeeded
                    return self._no_error()
                else:
                    logger.warn("Failed to fetch resource. "
                                "Allowed content types are: %s."
                                "The content type is: %s" % (', '.join(self.allowed_content_types), ctype,))
                    return self._set_error('content_type', ctype)

            else:
                logger.warn("Failed to fetch resource. "
                            "The content size \'%s\' exceeds maximum allowed size: %s" % (self.max_file_size, csize))
                return self._set_error('content_size', csize)
        else:
            error = requests.exceptions.HTTPError(self.r.status_code)
            logger.warn("Failed to fetch resource. Error: %s" % error)
            return self._set_error('error', error)

    def get_response_from_cache(self):
        if not self.s.cache.has_url(self.url):
            self.send_request()
            self.validate_response()
            return self.get_response()
        self.r = self.s.get(self.url)
        return self.get_response()

    def get_response_from_cache_or_raise_error(self):
        response = self.get_response_from_cache()
        if self.error:
            err = self.error.get('error')
            # these are model's `clean` ValidationError (not the same as forms.ValidationError)
            if err:
                raise exceptions.ValidationError(err)
            else:
                raise exceptions.ValidationError(self.error)
        return response
Beispiel #49
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from requests_cache import CachedSession

cs = CachedSession(allowable_methods=('GET', 'POST'))
cs.cache.clear()
for i in range(2):
    r = cs.get("http://httpbin.org/get?p1=v1", params={'p2': 'v2', 'p3': 'cyrЯЯ'})
    print r
    print r.from_cache

import sys
import time
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

HEADER = ['url', 'link', 'title', 'description', 'content', 'topics', 'organisations']

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
parser.add_argument('--environment', '-e', dest='root_url', default='https://www-origin.staging.publishing.service.gov.uk', help='the environment used to query the search API')
parser.add_argument('--skip', '-s', dest='skip', type=int, default=0, help='Number of input rows to skip. Can be used to resume a partially completed import')
parser.add_argument('--skip-redirects', '-r', dest='skip_redirects', action='store_true', help="Don't test URLs on GOV.UK to resolve redirected links.")
parser.add_argument('--wait-time', '-w', dest='wait_time', type=float, default=0.1, help='Time to wait between each link, to work around rate limiting.')
args = parser.parse_args()

session = CachedSession(cache_name='govuk_cache', backend='sqlite')
retries = Retry(total=5, backoff_factor=args.wait_time, status_forcelist=[ 429 ])
session.mount('http://', HTTPAdapter(max_retries=retries))
session.mount('https://', HTTPAdapter(max_retries=retries))


def test_base_path(original_base_path, args):
    """
    Given a base path, try and classify it as valid, redirected, or gone,
    so that we can fetch data even when the link has been redirected.

    If it can't be retrieved, return None, otherwise return the ultimate base path.

    We might include the same document multiple times in our analysis, but
    this should only happen for a small amount of links and we can strip
    out duplicates later.
Beispiel #51
0
class CacheTestCase(unittest.TestCase):

    def setUp(self):
        self.s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        self.s.cache.clear()
        requests_cache.uninstall_cache()

    def test_expire_cache(self):
        delay = 1
        url = httpbin('delay/%s' % delay)
        s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.06)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)
        time.sleep(0.5)
        t = time.time()
        r = s.get(url)
        delta = time.time() - t
        self.assertGreaterEqual(delta, delay)

    def test_delete_urls(self):
        url = httpbin('redirect/3')
        r = self.s.get(url)
        for i in range(1, 4):
            self.assert_(self.s.cache.has_url(httpbin('redirect/%s' % i)))
        self.s.cache.delete_url(url)
        self.assert_(not self.s.cache.has_url(url))

    def test_unregistered_backend(self):
        with self.assertRaises(ValueError):
            CachedSession(CACHE_NAME, backend='nonexistent')

#    def test_async_compatibility(self):
#        try:
#            import grequests
#        except Exception:
#            self.skipTest('gevent is not installed')
#        n = 3
#        def long_running():
#            t = time.time()
#            rs = [grequests.get(httpbin('delay/%s' % i)) for i in range(n + 1)]
#            grequests.map(rs)
#            return time.time() - t
#        # cache it
#        delta = long_running()
#        self.assertGreaterEqual(delta, n)
#        # fast from cache
#        delta = 0
#        for i in range(n):
#            delta += long_running()
#        self.assertLessEqual(delta, 1)

    def test_hooks(self):
        state = defaultdict(int)
        for hook in ('response',):  # TODO it's only one hook here

            def hook_func(r, *args, **kwargs):
                state[hook] += 1
                return r
            n = 5
            for i in range(n):
                r = self.s.get(httpbin('get'), hooks={hook: hook_func})
            self.assertEqual(state[hook], n)

    def test_attr_from_cache_in_hook(self):
        state = defaultdict(int)
        hook = 'response'

        def hook_func(r, *args, **kwargs):
            if state[hook] > 0:
                self.assert_(r.from_cache, True)
            state[hook] += 1
            return r
        n = 5
        for i in range(n):
            r = self.s.get(httpbin('get'), hooks={hook: hook_func})
        self.assertEqual(state[hook], n)

    def test_post(self):
        url = httpbin('post')
        r1 = json.loads(self.s.post(url, data={'test1': 'test1'}).text)
        r2 = json.loads(self.s.post(url, data={'test2': 'test2'}).text)
        self.assertIn('test2', r2['form'])
        req = Request('POST', url).prepare()
        self.assert_(not self.s.cache.has_key(self.s.cache.create_key(req)))

    def test_disabled(self):

        url = httpbin('get')
        requests_cache.install_cache(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
        requests.get(url)
        with requests_cache.disabled():
            for i in range(2):
                r = requests.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        with self.s.cache_disabled():
            for i in range(2):
                r = self.s.get(url)
                self.assertFalse(getattr(r, 'from_cache', False))
        r = self.s.get(url)
        self.assertTrue(getattr(r, 'from_cache', False))

    def test_enabled(self):
        url = httpbin('get')
        options = dict(cache_name=CACHE_NAME, backend=CACHE_BACKEND,
                       fast_save=FAST_SAVE)
        with requests_cache.enabled(**options):
            r = requests.get(url)
            self.assertFalse(getattr(r, 'from_cache', False))
            for i in range(2):
                r = requests.get(url)
                self.assertTrue(getattr(r, 'from_cache', False))
        r = requests.get(url)
        self.assertFalse(getattr(r, 'from_cache', False))

    def test_content_and_cookies(self):
        requests_cache.install_cache(CACHE_NAME, CACHE_BACKEND)
        s = requests.session()
        def js(url):
            return json.loads(s.get(url).text)
        r1 = js(httpbin('cookies/set/test1/test2'))
        with requests_cache.disabled():
            r2 = js(httpbin('cookies'))
        self.assertEqual(r1, r2)
        r3 = js(httpbin('cookies'))
        with requests_cache.disabled():
            r4 = js(httpbin('cookies/set/test3/test4'))
        # from cache
        self.assertEqual(r3, js(httpbin('cookies')))
        # updated
        with requests_cache.disabled():
            self.assertEqual(r4, js(httpbin('cookies')))

    def test_response_history(self):
        r1 = self.s.get(httpbin('redirect/3'))
        def test_redirect_history(url):
            r2 = self.s.get(url)
            self.assertTrue(r2.from_cache)
            for r11, r22 in zip(r1.history, r2.history):
                self.assertEqual(r11.url, r22.url)
        test_redirect_history(httpbin('redirect/3'))
        test_redirect_history(httpbin('redirect/2'))
        r3 = requests.get(httpbin('redirect/1'))
        self.assertEqual(len(r3.history), 1)

    def test_response_history_simple(self):
        r1 = self.s.get(httpbin('redirect/2'))
        r2 = self.s.get(httpbin('redirect/1'))
        self.assertTrue(r2.from_cache)

    def post(self, data):
        return json.loads(self.s.post(httpbin('post'), data=data).text)

    def test_post_params(self):
        # issue #2
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))

        d = {'param1': 'test1'}
        for _ in range(2):
            self.assertEqual(self.post(d)['form'], d)
            d = {'param1': 'test1', 'param3': 'test3'}
            self.assertEqual(self.post(d)['form'], d)

        self.assertTrue(self.s.post(httpbin('post'), data=d).from_cache)
        d.update({'something': 'else'})
        self.assertFalse(self.s.post(httpbin('post'), data=d).from_cache)

    def test_post_data(self):
        # issue #2, raw payload
        self.s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                               allowable_methods=('GET', 'POST'))
        d1 = json.dumps({'param1': 'test1'})
        d2 = json.dumps({'param1': 'test1', 'param2': 'test2'})
        d3 = str('some unicode data')
        if is_py3:
            bin_data = bytes('some binary data', 'utf8')
        else:
            bin_data = bytes('some binary data')

        for d in (d1, d2, d3):
            self.assertEqual(self.post(d)['data'], d)
            r = self.s.post(httpbin('post'), data=d)
            self.assert_(hasattr(r, 'from_cache'))

        self.assertEqual(self.post(bin_data)['data'],
                         bin_data.decode('utf8'))
        r = self.s.post(httpbin('post'), data=bin_data)
        self.assert_(hasattr(r, 'from_cache'))

    def test_get_params_as_argument(self):
        for _ in range(5):
            p = {'arg1': 'value1'}
            r = self.s.get(httpbin('get'), params=p)
            self.assert_(self.s.cache.has_url( httpbin('get?arg1=value1')))

    def test_https_support(self):
        n = 10
        delay = 1
        url = 'https://httpbin.org/delay/%s?ar1=value1' % delay
        t = time.time()
        for _ in range(n):
            r = self.s.get(url, verify=False)
        self.assertLessEqual(time.time() - t, delay * n / 2)

    def test_from_cache_attribute(self):
        url = httpbin('get?q=1')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)
        self.s.cache.clear()
        self.assertFalse(self.s.get(url).from_cache)

    def test_gzip_response(self):
        url = httpbin('gzip')
        self.assertFalse(self.s.get(url).from_cache)
        self.assertTrue(self.s.get(url).from_cache)
Beispiel #52
0
	def request(self,*args,**kwargs) :
		kwargs.update(self._init["kwargs"])
		return CachedSession.request(self,*args,**kwargs)
Beispiel #53
0
 def test_expire_cache_override_value_error(self):
     url = httpbin('delay/1')
     s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, expire_after=1)
     with self.assertRaises(ValueError):
         s.expire_after(url, 2)
Beispiel #54
0
 def setUp(self):
     self.s = CachedSession(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE)
     self.s.cache.clear()
     requests_cache.uninstall_cache()