Beispiel #1
0
def main():
    session = CachedSession('example_cache', backend='sqlite')

    # By default, cached responses never expire
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    assert not response.expires

    # We can set default expiration for the session using expire_after
    session = CachedSession('example_cache', backend='sqlite', expire_after=60)
    session.cache.clear()
    response = session.get('https://httpbin.org/get')
    response = session.get('https://httpbin.org/get')
    print('Expiration time:', response.expires)

    # This can also be overridden for individual requests
    session.cache.clear()
    response = session.get('https://httpbin.org/get', expire_after=1)
    response = session.get('https://httpbin.org/get')
    assert response.from_cache
    print('Expiration time:', response.expires)

    # After 1 second, the cached value will expired
    time.sleep(1.2)
    assert response.is_expired
    response = session.get('https://httpbin.org/get')
    assert not response.from_cache
Beispiel #2
0
    def test_return_old_data_on_error(self, datetime_mock):
        datetime_mock.utcnow.return_value = datetime.utcnow()
        expire_after = 100
        url = httpbin("get")
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=True, expire_after=expire_after)
        header = "X-Tst"

        def get(n):
            return s.get(url, headers={header: n}).json()["headers"][header]

        get("expired")
        self.assertEquals(get("2"), "expired")
        datetime_mock.utcnow.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)

        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            self.assertEquals(get("3"), "expired")

        with mock.patch("requests_cache.core.OriginalSession.send") as send_mock:
            resp_mock = requests.Response()
            request = requests.Request("GET", url)
            resp_mock.request = request.prepare()
            resp_mock.status_code = 400
            resp_mock._content = '{"other": "content"}'
            send_mock.return_value = resp_mock
            self.assertEquals(get("3"), "expired")

            resp_mock.status_code = 200
            self.assertIs(s.get(url).content, resp_mock.content)

        # default behaviour
        datetime_mock.return_value = datetime.utcnow() + timedelta(seconds=expire_after * 2)
        s = CachedSession(CACHE_NAME, CACHE_BACKEND, old_data_on_error=False, expire_after=100)
        with mock.patch.object(s.cache, "save_response", side_effect=Exception):
            with self.assertRaises(Exception):
                s.get(url)
def add_default_services(services, options):
    services['mysql'] = create_engine('mysql+mysqldb://localhost/aws',
                                      echo=False)

    services['redshift'] = create_engine(options['redshift'].format(
        host=options['redshift_host'],
        port=options['redshift_port'],
        name=options['redshift_name'],
        username=options['redshift_username'],
        password=options['redshift_password']),
                                         echo=False)

    services['vertica'] = create_engine(options['vertica'].format(
        host=options['vertica_host'],
        port=options['vertica_port'],
        name=options['vertica_name'],
        username=options['vertica_username'],
        password=options['vertica_password']),
                                        echo=False)

    if options['local']:
        services['brickftp'] = fs.open_fs("file:///tmp/etl")
        services['centerstone'] = fs.open_fs("file:///tmp/etl")
    else:
        services['brickftp'] = fs.open_fs(
            "ssh://%s@%s" %
            (options['brickftp_username'], options['brickftp_host']))
        # Bug workaround to brickftp's sftp-only server
        services['brickftp']._platform = "Linux"
        services['centerstone'] = fs.open_fs(
            "ssh://[email protected]:/Out/"),

    if options['use_cache']:
        from requests_cache import CachedSession
        services['servicenow'] = CachedSession('http.cache')
        services['workday'] = CachedSession('http.cache')
    else:
        services['servicenow'] = requests.Session()
        services['workday'] = requests.Session()

    services['servicenow'].headers = {'User-Agent': 'Mozilla/ETL/v1'}
    services['servicenow'].auth = HTTPBasicAuth(options['sn_username'],
                                                options['sn_password'])
    services['servicenow'].headers.update({'Accept-encoding': 'text/json'})

    services['workday'].headers = {'User-Agent': 'Mozilla/ETL/v1'}
    services['workday'].auth = HTTPBasicAuth(options['wd_username'],
                                             options['wd_password'])
    services['workday'].headers.update({'Accept-encoding': 'text/json'})

    # Set a file suffix for non-prod jobs
    if options['environment'] == "prod":
        options['suffix'] = ""
        options['table_suffix'] = ""
    else:
        options['suffix'] = '.' + options['environment']
        options['table_suffix'] = '_' + options['environment']

    return
Beispiel #4
0
 def __init__(self, url, cache=False, cachetime=300):
     super(PlexServer, self).__init__(url)
     session = None  # use the default session
     cachetime = cachetime or None
     if CachedSession and cache and isinstance(cache, six.string_types):
         if cache == 'memory':
             session = CachedSession(cache, cache, cachetime)
         elif cache == 'sqlite':
             session = CachedSession('request-cache',
                                     'sqlite',
                                     cachetime,
                                     fast_save=True)
     self._config = RequestConfig(session=session)
Beispiel #5
0
 def test_missing_backend_dependency(self, mocked_registry):
     # Testing that the correct error is thrown when a user does not have
     # the Python package `redis` installed.  We mock out the registry
     # to simulate `redis` not being installed.
     mocked_registry.__getitem__.side_effect = KeyError
     with self.assertRaises(ImportError):
         CachedSession(CACHE_NAME, backend='redis')
Beispiel #6
0
 def __init__(self):
     super().__init__("XKCDSkill")
     if not self.settings.get("idle_random"):
         self.settings["idle_random"] = True
     self.session = CachedSession(backend='memory',
                                  expire_after=timedelta(hours=6))
     self.current_comic = 0
Beispiel #7
0
def imdb_search(query):
    session = CachedSession(expire_after=60 * 60 * 24)
    stripped_query = "".join([x for x in query
                              if x.isalnum() or x == " "]).lower()
    r = session.get("https://www.imdb.com/find",
                    params={
                        "q": stripped_query,
                        "s": "tt",
                        "ttype": "ft"
                    })
    page_html = r.text
    soup = BeautifulSoup(page_html, "html.parser")
    results = []
    for result in soup.select(".result_text"):
        title = result.a.get_text()
        imdb_id = result.a.get("href").split("/")
        try:
            imdb_id = result.a.get("href").split("/title/")[1].split("/")[0]
        except IndexError:
            imdb_id = None
            continue
        result.a.decompose()
        try:
            year = result.get_text().split("(")[1].split(")")[0]
            int(year)
        except:
            year = None
            continue

        results.append((title, year, imdb_id))

    return results
Beispiel #8
0
    def session_obj(self):
        session = CachedSession(allowable_methods=('GET', 'POST'),
                                ignored_parameters=['smpbss'])

        if not IceDaily.RECAPTCHA:
            with session.cache_disabled():
                response = session.get(
                    url=
                    'https://www.theice.com/marketdata/reports/datawarehouse/ConsolidatedEndOfDayReportPDF.shtml',
                    headers={
                        'User-Agent': 'Mozilla/5.0',
                        'X-Requested-With': 'XMLHttpRequest'
                    },
                    params={
                        'selectionForm': '',
                        'exchangeCode': 'IFEU',
                        'optionRequest': self.flavor['optionRequest']
                    })

            bs = BeautifulSoup(response.text)

            df = pd.DataFrame([(opt['value'], opt.text)
                               for opt in bs.find_all('option')],
                              columns=["WebActiveCode", "ActiveName"])

            df['ActiveCode'] = df.WebActiveCode.apply(
                lambda s: s.split('|', 1)[1] if '|' in s else None)
            df = df.dropna(how='any')

            self.update_actives(df)

        return session
Beispiel #9
0
 def __init__(self):
     super(AsteroidsSkill, self).__init__(name="AsteroidsSkill")
     if "nasa_key" not in self.settings:
         self.settings["nasa_key"] = "DEMO_KEY"
     _expire_after = timedelta(hours=1)
     self._session = CachedSession(backend='memory',
                                   expire_after=_expire_after)
def get_services(**options):
    """
    This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo
    for runtime injection.

    It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just
    let the framework define them. You can also define your own services and naming is up to you.

    :return: dict
    """

    if options['use_cache']:
        from requests_cache import CachedSession
        servicenow = CachedSession('http.cache')
    else:
        servicenow = requests.Session()

    servicenow.headers = {'User-Agent': 'Mozilla/ETL/v1'}
    servicenow.auth = HTTPBasicAuth(options['sn_username'],
                                    options['sn_password'])
    servicenow.headers.update({'Accept-encoding': 'text/json'})

    return {
        'servicenow':
        servicenow,
        'db':
        create_engine('sqlite:///test.sqlite', echo=False),
        'vertica':
        create_engine(options['vertica'].format(
            username=options['vertica_username'],
            password=options['vertica_password']),
                      echo=False)
    }
Beispiel #11
0
    def __init__(self):
        self.settings = EsiSettings.getInstance()
        self.server_base: ApiBase = supported_servers[self.settings.get("server")]

        # session request stuff
        self._session = Session()
        self._basicHeaders = {
            'Accept': 'application/json',
            'User-Agent': (
                'pyfa v{}'.format(config.version)
            )
        }
        self._session.headers.update(self._basicHeaders)
        self._session.proxies = NetworkSettings.getInstance().getProxySettingsInRequestsFormat()

        # Set up cached session. This is only used for SSO meta data for now, but can be expanded to actually handle
        # various ESI caching (using ETag, for example) in the future
        cached_session = CachedSession(
            os.path.join(config.savePath, config.ESI_CACHE),
            backend="sqlite",
            cache_control=True,                # Use Cache-Control headers for expiration, if available
            expire_after=timedelta(days=1),    # Otherwise expire responses after one day
            stale_if_error=True,               # In case of request errors, use stale cache data if possible
        )
        cached_session.headers.update(self._basicHeaders)
        cached_session.proxies = NetworkSettings.getInstance().getProxySettingsInRequestsFormat()

        meta_call = cached_session.get("https://%s/.well-known/oauth-authorization-server" % self.server_base.sso)
        meta_call.raise_for_status()
        self.server_meta = meta_call.json()

        jwks_call = cached_session.get(self.server_meta["jwks_uri"])
        jwks_call.raise_for_status()
        self.jwks = jwks_call.json()
Beispiel #12
0
 def __init__(self, path, ttl):
     self.cache = CachedSession(cache_name=path,
                                backend="sqlite",
                                expire_after=ttl,
                                extension="",
                                fast_save=True,
                                allowable_codes=(200, ))
Beispiel #13
0
def get_page_url():
    """
    Initial function to get the list of page urls with respect to different alphabet.
    :return: list of urls
    """
    page_url_list = []
    url = "http://devri.bzh/dictionnaire/a/"
    session = CachedSession()
    page = session.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    letter_list = soup.find_all("a", class_="enfant")
    for i in tqdm(letter_list):
        url_letter = "http://devri.bzh" + i["href"]
        page_letter = session.get(url_letter)
        soup_letter = BeautifulSoup(page_letter.content, "html.parser")
        page_num = soup_letter.find("li", class_=["MarkupPagerNavLast MarkupPagerNavLastNum", "MarkupPagerNavLastNum"])
        try:
            page_num = page_num.a.text
            url_list = [url_letter]
            for j in range(2, int(page_num) + 1):
                url_list.append(url_letter + f"page{j}")
            page_url_list += url_list
        except AttributeError:
            pass
    # add page for alphabet z manually
    page_url_list += ["http://devri.bzh/dictionnaire/z/"]
    return page_url_list
Beispiel #14
0
def url_get(url):
    """
    Download an URL using a cache and return the response object
    :param url:
    :return:
    """
    s = None
    info = dict()

    log.debug("GET URL {!s}".format(url))

    if 'file://' in url:
        s = requests.session()
        s.mount('file://', FileAdapter())
    else:
        s = CachedSession(cache_name="pyff_cache",
                          backend=config.request_cache_backend,
                          expire_after=config.request_cache_time,
                          old_data_on_error=True)
    headers = {'User-Agent': "pyFF/{}".format(__version__), 'Accept': '*/*'}
    r = s.get(url,
              headers=headers,
              verify=False,
              timeout=config.request_timeout)
    if config.request_override_encoding is not None:
        r.encoding = config.request_override_encoding

    return r
Beispiel #15
0
    def __init__(self,
                 source=None,
                 filter=None,
                 reverse_geocode=False,
                 progressbar=False,
                 quick_mode=False,
                 dry_run=False):
        self.source = source
        self.reverse_geocode = reverse_geocode
        self.dry_run = dry_run
        self.progressbar = progressbar
        self.filter = filter

        # Quick mode only imports the first few datasets to speed things up.
        self.quick_mode = quick_mode

        # Cache responses from the luftdaten.info API for five minutes.
        # TODO: Make backend configurable.

        # Configure User-Agent string.
        user_agent = APP_NAME + '/' + APP_VERSION

        # Configure cached requests session.
        self.session = CachedSession(cache_name='api.luftdaten.info',
                                     backend='redis',
                                     expire_after=300,
                                     user_agent=user_agent)

        # Probe Redis for availability.
        try:
            self.session.cache.responses.get('test')
        except redis.exceptions.ConnectionError as ex:
            log.error('Unable to connect to Redis: %s', ex)
            sys.exit(2)
Beispiel #16
0
	def __init__(self):

		self.nombre_radios_pais = []
		self.url_radios_pais = []
		self.resultado_pais_busqueda_nombre = []
		self.resultado_pais_busqueda_numero = []
		self.resultado_pais_busqueda_abreviado = []

		self.nombre_busqueda_general_radio = []
		self.url_busqueda_general_radio = []

		self.nombre_idioma = []
		self.cantidad_idioma = []
		self.busqueda_nombre_idioma = []
		self.busqueda_cantidad_idioma = []

		self.nombre_tag = []
		self.cantidad_tag = []
		self.busqueda_nombre_tag = []
		self.busqueda_cantidad_tag = []

		self.paises_radio_español = []
		self.paises_radio_abreviado = []
		self.paises_numero_emisoras = []
		self.paises_numero_total_emisoras = ""

		expire_after = timedelta(days=3)
		session = CachedSession(
			cache_name=os.path.join(globalVars.appArgs.configPath, "zRadio", "cache"),
			backend='sqlite',
			expire_after=expire_after)
		self.rb = RadioBrowser(session=session)
		self.datos_pais = self.rb.countries()
Beispiel #17
0
class MLSession:
    session: CachedSession = CachedSession(cache_name="api_cache",
                                           backend="filesystem")

    def __post_init__(self):
        self.session.timeout = 30
        self.session.urls_expire_after = urls_expiry_config
        self.session.cache_control = True

    def get(self, url, **kwargs):
        logger.info(
            f"MLSession GET: cached: {self.is_cached} url: {url}, kwargs: {kwargs}"
        )
        return self.session.get(url, **kwargs)

    def head(self, url, **kwargs):
        logger.info(
            f"MLSession HEAD: cached: {self.is_cached} url: {url}, kwargs: {kwargs}"
        )
        return self.session.head(url, **kwargs)

    @property
    def is_cached(self) -> bool:
        """Is this session cached?"""
        if isinstance(self.session, CachedSession):
            return True
        return False
Beispiel #18
0
def test_get_expiration_precedence():
    session = CachedSession(expire_after=1, urls_expire_after={'*.site_1.com': 60 * 60})
    assert session._get_expiration() == 1
    assert session._get_expiration('site_2.com') == 1
    assert session._get_expiration('img.site_1.com/image.jpg') == 60 * 60
    with session.request_expire_after(30):
        assert session._get_expiration() == 30
        assert session._get_expiration('img.site_1.com/image.jpg') == 30
Beispiel #19
0
def test_urls_expire_after(url, expected_expire_after):
    session = CachedSession(urls_expire_after={
        '*.site_1.com': 60 * 60,
        'site_2.com/resource_1': 60 * 60 * 2,
        'site_2.com/resource_2': 60 * 60 * 24,
        'site_2.com/static': -1,
    }, )
    assert session._url_expire_after(url) == expected_expire_after
Beispiel #20
0
    def session(self):
        from requests_cache import CachedSession

        config = self.config()
        trakt_cache = config["cache"]["path"]
        session = CachedSession(trakt_cache)

        return session
Beispiel #21
0
def test_urls_expire_after__evaluation_order(url, expected_expire_after):
    """If there are multiple matches, the first match should be used in the order defined"""
    session = CachedSession(urls_expire_after={
        '*.site_1.com/resource': 60 * 60 * 2,
        '*.site_1.com': 60 * 60,
        '*': 1,
    }, )
    assert session._url_expire_after(url) == expected_expire_after
Beispiel #22
0
 def wrapper(page_html=None, imdb_id=None, soup=None, *args, **kwargs):
     if not (page_html or imdb_id or soup):
         return None
     if imdb_id:
         session = CachedSession(expire_after=60 * 60 * 24)
         r = session.get("https://www.imdb.com/title/{}".format(imdb_id))
         page_html = r.text
     soup = soup or BeautifulSoup(page_html, "html.parser")
     return f(soup, *args, **kwargs)
Beispiel #23
0
 def __init__(self):
     super(HelioViewerSkill, self).__init__(name="HelioViewerSkill")
     self.session = CachedSession(backend='memory',
                                  expire_after=timedelta(hours=6))
     self.translate_cache = {}  # save calls to avoid ip banning
     self.img_cache = {}  # dont re-parse for speed
     self.current_date = datetime.now()
     self.current_camera = "sunspots"
     create_daemon(self.bootstrap)
Beispiel #24
0
 def test_post_parameters_normalization(self):
     params = {"a": "a", "b": ["1", "2", "3"], "c": "4"}
     url = httpbin("post")
     s = CachedSession(CACHE_NAME, CACHE_BACKEND,
                       allowable_methods=('GET', 'POST'))
     self.assertFalse(s.post(url, data=params).from_cache)
     self.assertTrue(s.post(url, data=params).from_cache)
     self.assertTrue(s.post(url, data=sorted(params.items())).from_cache)
     self.assertFalse(s.post(url, data=sorted(params.items(), reverse=True)).from_cache)
Beispiel #25
0
def test_cache_signing():
    # Without a secret key, plain pickle should be used
    session = CachedSession()
    assert session.cache.responses._serializer == pickle

    # With a secret key, itsdangerous should be used
    secret_key = str(uuid4())
    session = CachedSession(secret_key=secret_key)
    assert isinstance(session.cache.responses._serializer, Serializer)

    # Simple serialize/deserialize round trip
    session.cache.responses['key'] = 'value'
    assert session.cache.responses['key'] == 'value'

    # Without the same signing key, the item shouldn't be considered safe to deserialize
    session = CachedSession(secret_key='a different key')
    with pytest.raises(BadSignature):
        session.cache.responses['key']
def get_data(leaderboard, year):
    lb_path = LEADERBOARDS.get(leaderboard, leaderboard)
    url = f"https://adventofcode.com/{year}/leaderboard/private/view/{lb_path}"
    cache_options = {"backend": "filesystem", "expire_after": 86400}
    session = CachedSession("../../site_cache", **cache_options)
    cookies = dict([Path("../.session-cookie").read_text().strip().split("=")])
    response = session.get(url, cookies=cookies)
    if response.status_code != 200:
        pdb.set_trace()
    return response.json()
Beispiel #27
0
def test_repr():
    """Test session and cache string representations"""
    cache_name = 'requests_cache_test'
    session = CachedSession(cache_name=cache_name, backend='memory', expire_after=10)
    session.cache.responses['key'] = 'value'
    session.cache.redirects['key'] = 'value'
    session.cache.redirects['key_2'] = 'value'

    assert cache_name in repr(session) and '10' in repr(session)
    assert 'redirects: 2' in str(session.cache) and 'responses: 1' in str(session.cache)
Beispiel #28
0
def get_session() -> CachedSession:
    """Make a cached session."""
    path = settings.STATE_PATH.joinpath("http").as_posix()
    session = CachedSession(cache_name=path,
                            expire_after=settings.CACHE_EXPIRE)
    session.headers.update(HEADERS)
    # weird monkey-patch: default timeout for requests sessions
    session.request = functools.partial(session.request,
                                        timeout=settings.HTTP_TIMEOUT)
    return session
Beispiel #29
0
    def test_passing_backend_instance_support(self):
        class MyCache(BaseCache):
            pass

        backend = MyCache()
        requests_cache.install_cache(name=CACHE_NAME, backend=backend)
        self.assertIs(requests.Session().cache, backend)

        session = CachedSession(backend=backend)
        self.assertIs(session.cache, backend)
Beispiel #30
0
def convert_cache(*args, **kwargs):
    session = CachedSession(*args, **kwargs)
    print(f'Checking {len(session.cache.responses)} cached responses')

    with session.cache.responses.bulk_commit():
        for key, response in session.cache.responses.items():
            if isinstance(response, tuple):
                print(f'Converting response {key}')
                session.cache.responses[key] = convert_old_response(*response)

    print('Conversion complete')