def __init__(self): self._API_SERVER = "https://data.nba.net" self._cache_control_adapter = CacheControlAdapter(heuristic=LastModified()) self._requests_session = requests.Session() self._requests_session.mount('http://', CacheControlAdapter()) self._requests_session.mount('https://', CacheControlAdapter()) self._TEAM_TRICODES = frozenset(('CHA', 'ATL', 'IND', 'MEM', 'DET', 'UTA', 'CHI', 'TOR', 'CLE', 'OKC', 'DAL', 'MIN', 'BOS', 'SAS', 'MIA', 'DEN', 'LAL', 'PHX', 'NOP', 'MIL', 'HOU', 'NYK', 'ORL', 'SAC', 'PHI', 'BKN', 'POR', 'GSW', 'LAC', 'WAS')) self._STAT_CATEGORIES = frozenset(('ppg', 'trpg', 'apg', 'fgp', 'ftp', 'tpp', 'bpg', 'spg', 'tpg', 'pfpg')) self._CONFERENCES = frozenset(('west', 'east')) self._EASTERN_DIVISIONS = frozenset(('southeast', 'atlantic', 'central')) self._WESTERN_DIVISIONS = frozenset(('southwest', 'pacific', 'northwest')) self._DIVISIONS = {'west': self._WESTERN_DIVISIONS, 'east': self._EASTERN_DIVISIONS} # Cached dictionaries. Saving these copies avoids having to # re-parse JSONs when they are returned from the HTTP cache. self._person_ids = None self._team_ids_to_tricodes = None self._team_tricodes_to_ids = None
def cache_storage(self, value): if value is None: self.mount('http://', requests.addapters.HTTPAdapter()) self.mount('https://', requests.addapters.HTTPAdapter()) else: adapter = CacheControlAdapter(cache=value) if self.cache_all: adapter.heuristic = ExpiresAfter(days=365) self.mount('http://', adapter) self.mount('https://', adapter)
def __init__(self, main_source_domain=None, start_page=None): if main_source_domain is None and start_page is None: raise ValueError('Need to specify main_source_domain or start_page.') if main_source_domain: self.MAIN_SOURCE_DOMAIN = main_source_domain.rstrip('/') self.START_PAGE = self.MAIN_SOURCE_DOMAIN if self.MAIN_SOURCE_DOMAIN is None: parsedurl = urlparse(start_page) self.MAIN_SOURCE_DOMAIN = parsedurl.scheme + '://' + parsedurl.netloc if self.MAIN_SOURCE_DOMAIN not in self.SOURCE_DOMAINS: self.SOURCE_DOMAINS.append(self.MAIN_SOURCE_DOMAIN) if start_page: self.START_PAGE = start_page # make resolve any redirects #verdict, head_response = self.is_html_file(self.START_PAGE) is_new_url, content_type, content_length, return_url = self.get_url_type(self.START_PAGE) if content_type == 'text/html': self.START_PAGE = return_url else: raise ValueError('The Starting URL ' + self.START_PAGE + ' did not return any html.') forever_adapter= CacheControlAdapter(heuristic=CacheForeverHeuristic(), cache=self.CACHE) for source_domain in self.SOURCE_DOMAINS: self.SESSION.mount(source_domain, forever_adapter) # TODO: change to less aggressive in final version
def get_events_from_icalendars(): global now, midnight now = localtz.localize(datetime.datetime.now()) midnight = localtz.localize(datetime.datetime.combine(now, datetime.time(0,0,0))) cz = Calzone() session = FuturesSession() session.mount('https://', CacheControlAdapter(cache=FileCache('.webcache'), heuristic=ForceCacheHeuristic())) cals = {k: session.get(u) for k,u in calendars.items()} concurrent.futures.wait(cals.values()) for k,req in cals.items(): try: cz.load(req.result().text) except Exception as err: print("Failed to load calendar '{}'".format(k)) print (err) try: events = cz.get_events(midnight, midnight + datetime.timedelta(days=90)) except Exception as e: print (e) events.sort(key=lambda e: e.start) return events
def __init__(self): super(ProjectHolder, self).__init__() app_name = __name__.split('.')[0] self.cache_dir = user_cache_dir(app_name) log.info("Using cache directory: {}.".format(self.cache_dir)) self.cache = FileCache(self.cache_dir) cache_adapter = CacheControlAdapter(cache=self.cache) self.mount("http://", cache_adapter) self.mount("https://", cache_adapter) self.headers.update( {'User-Agent': '{}/{}'.format(app_name, __version__)}) log.info('Created instance of {}'.format(type(self).__name__)) self.branches = None self.only = None self.exclude = None self.having_asset = None self.hostname = None # identifies project on a given hostname self.repo = None # short name for "repo", useful in URLs self.name = None # in some case we do not specify repo, but feed is discovered, no repo is given then self.feed_url = None
def _build_session(): """Builds a requests session that caches responses where possible, making redirects faster. Returns: requests.Session -- A shared session to use for the notebook """ result = requests.session() # Set up caching. Particularly obey and cache 307 redirects to avoid duplicate expensive calls when we already # have a result cache_adapter = CacheControlAdapter() cache_adapter.controller = CacheController(cache=cache_adapter.cache, status_codes=(200, 203, 300, 301, 307)) result.mount('http://', cache_adapter) result.mount('https://', cache_adapter) return result
def __init__(self, filename=""): super(BabelNet, self).__init__() if filename == "": filename = "babelnet_cache" self.mount('https://', CacheControlAdapter(cache=FileCache(filename))) self.headers.update({'Accept-Encoding': 'gzip'}) self.params.update({'key': cfg.babelnet_key}) self.endpoint = "https://babelnet.io/v4/"
def request(self, method, url, headers=None, params=None, proxies=None, cache=True, verify=False, *args, **kwargs): if headers is None: headers = {} if params is None: params = {} if proxies is None: proxies = {} headers['Accept-Encoding'] = 'gzip, deflate' headers["User-Agent"] = sickrage.srCore.USER_AGENT # request session ssl verify if sickrage.srCore.srConfig.SSL_VERIFY: try: verify = certifi.where() except: pass # request session proxies if 'Referer' not in headers and sickrage.srCore.srConfig.PROXY_SETTING: sickrage.srCore.srLogger.debug("Using global proxy: " + sickrage.srCore.srConfig.PROXY_SETTING) scheme, address = urllib2.splittype(sickrage.srCore.srConfig.PROXY_SETTING) address = ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING), sickrage.srCore.srConfig.PROXY_SETTING)[scheme] proxies.update({"http": address, "https": address}) headers.update({'Referer': address}) # setup caching adapter if cache: adapter = CacheControlAdapter(DBCache(os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sessions.db')))) self.mount('http://', adapter) self.mount('https://', adapter) # get web response response = super(srSession, self).request( method, url, headers=headers, params=params, verify=verify, proxies=proxies, *args, **kwargs ) try: # check web response for errors response.raise_for_status() except requests.exceptions.SSLError as e: if ssl.OPENSSL_VERSION_INFO < (1, 0, 1, 5): sickrage.srCore.srLogger.info( "SSL Error requesting url: '{}' You have {}, try upgrading OpenSSL to 1.0.1e+".format( e.request.url, ssl.OPENSSL_VERSION)) if sickrage.srCore.srConfig.SSL_VERIFY: sickrage.srCore.srLogger.info( "SSL Error requesting url: '{}', try disabling cert verification in advanced settings".format( e.request.url)) except Exception: pass return response
def set_cache_expiration_delay(seconds): """ Set a cache for requests with a given expiration time. """ adapter = CacheControlAdapter( heuristic=ExpiresAfter(seconds=seconds) ) session.mount('http://', adapter) return session
def session(self): if self._session is None: self._session = real_requests.Session() if CacheControlAdapter: adapter = CacheControlAdapter(cache=FileCache(".webcache")) self._session.mount("http://", adapter) self._session.mount("https://", adapter) print("Caching to .webcache") return self._session
def _get_requests_cache_adapter(heuristic): """ Given a heuristic, constructs and returns a :class:`cachecontrol.CacheControlAdapter` attached to the instance's :data:`requests_cache`. """ return CacheControlAdapter(cache=requests_cache, heuristic=heuristic, cache_etags=False)
def get_session(*args, **kwargs): session = OAuth2Session(*args, **kwargs) cache_adapter = CacheControlAdapter( cache=FileCache(CACHE_FILE), pool_connections=config.http.connections, pool_maxsize=config.http.connections, max_retries=config.http.retries, ) session.mount("http://", cache_adapter) return session
def __init__(self, headers=None, cookies=None, cache_name=None, delay=1, expire_hours=12, as_string=False): ''' Base class for common scraping tasks Args: headers: dict of headers cookies: cookiejar object cache_name: should be full path delay: int (be polite!!!) expire_hours: int - default 4 as_string: get string rather than parsed json ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) if not cookies: try: import cookielib cookies = cookielib.MozillaCookieJar() except (NameError, ImportError) as e: try: import http.cookiejar cookies = http.cookiejar.MozillaCookieJar() except Exception as e: pass _s = requests.Session() _s.cookies = cookies if headers: _s.headers.update(headers) else: _s.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}) if cache_name: if not '/' in cache_name: cache_name = os.path.join('/tmp', cache_name) try: from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import ExpiresAfter from cachecontrol.caches import FileCache _s.mount('http://', CacheControlAdapter(cache=FileCache(cache_name), cache_etags = False, heuristic=ExpiresAfter(hours=expire_hours))) except ImportError as e: try: import requests_cache requests_cache.install_cache(cache_name) except: pass self.s = _s self.urls = [] self.as_string = as_string if delay > 0: self.delay = delay else: self.delay = None
def getsession(timeframe): if timeframe in sessions: return sessions[timeframe] else: sess = requests.Session() sess.cookies = __cookie if timeframe == 0: sess.mount("http://", CacheControlAdapter(cache=__cache)) sess.mount("https://", CacheControlAdapter(cache=__cache)) else: sess.mount( "http://", CacheControlAdapter(cache=__cache, heuristic=timecache(timeframe))) sess.mount( "https://", CacheControlAdapter(cache=__cache, heuristic=timecache(timeframe))) sessions[timeframe] = sess return sess
def add_cache_control(session, cache_control_config): """Add cache_control adapter to session object.""" adapter = CacheControlAdapter( DictCache(), cache_etags=cache_control_config.get('cache_etags', True), serializer=cache_control_config.get('serializer', None), heuristic=cache_control_config.get('heuristic', None), ) session.mount('http://', adapter) session.mount('https://', adapter) session.cache_controller = adapter.controller
def _create_session(self, max_retries, proxies, backoff_factor, cache): sess = Session() # Retry only on idempotent methods and only when too many requests retries = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=[429], method_whitelist=['GET', 'UPDATE', 'DELETE']) retries_adapter = HTTPAdapter(max_retries=retries) if cache: cache_adapter = CacheControlAdapter(cache_etags=True) sess.mount('http://', retries_adapter) sess.mount('http://', cache_adapter) sess.proxies.update(proxies) return sess
def __init__(self, proxies=None, cache=True): super(WebSession, self).__init__() # setup caching adapter if cache: adapter = CacheControlAdapter() self.mount('http://', adapter) self.mount('https://', adapter) # add proxies self.proxies = proxies or _add_proxies() # add hooks self.hooks['response'] += [WebHooks.log_url, WebHooks.cloudflare]
def __init__(self, url, max_retries, expires_after_sec): self.hasError = False self.fromCache = False self.url = url self.cacheEnabled = expires_after_sec > 0 self.session = requests.Session() retryPolicy = urllib3.util.Retry(max_retries, status_forcelist=[400]) if self.cacheEnabled: self.session.mount( url, CacheControlAdapter( max_retries=retryPolicy, heuristic=ExpiresAfter(seconds=expires_after_sec))) else: self.session.mount( url, requests.adapters.HTTPAdapter(max_retries=retryPolicy))
def __init__(self, proxies=None, cache=True): super(WebSession, self).__init__() # setup caching adapter if cache: adapter = CacheControlAdapter() self.mount('http://', adapter) self.mount('https://', adapter) # add proxies self.proxies = proxies or _add_proxies() # add hooks self.hooks['response'] += [WebHooks.log_url, WebHooks.cloudflare] # add headers self.headers.update({'Accept-Encoding': 'gzip, deflate', 'User-Agent': sickrage.app.user_agent})
def __init__( self, user=None, prefix_url=settings.SITE_URL, save_auth=None, verify=settings.CERN_SSL_CHAIN, ): super(Session, self).__init__() self.user = user if user else User(save_auth=save_auth) self.auth = self._authorize self.prefix_url = prefix_url self.verify = verify # store last call self._response = None # add caching super(Session, self).mount( self.prefix_url, CacheControlAdapter(cache=FileCache('.webcache')) )
def __init__(self, *args, **kwargs): retries = kwargs.pop("retries", 0) cache = kwargs.pop("cache", None) super(PypiSession, self).__init__(*args, **kwargs) # Attach our User Agent to the request self.headers["User-Agent"] = user_agent() # Attach our Authentication handler to the session self.auth = MultiDomainBasicAuth() # Create our urllib3.Retry instance which will allow us to customize # how we handle retries. retries = urllib3.Retry( # Set the total number of retries that a particular request can # have. total=retries, # A 503 error from PyPI typically means that the Fastly -> Origin # connection got interupted in some way. A 503 error in general # is typically considered a transient error so we'll go ahead and # retry it. status_forcelist=[503], # Add a small amount of back off between failed requests in # order to prevent hammering the service. backoff_factor=0.25, ) # We want to _only_ cache responses on securely fetched origins. We do # this because we can't validate the response of an insecurely fetched # origin, and we don't want someone to be able to poison the cache and # require manual eviction from the cache to fix it. if cache: secure_adapter = CacheControlAdapter( cache=SafeFileCache(cache, use_dir_lock=True), max_retries=retries, ) else: secure_adapter = HTTPAdapter(max_retries=retries) self.mount("https://", secure_adapter) self.mount("file://", LocalFSAdapter())
def get_feed(feed_url): """ Return feed parsed feed """ requests_timeout = getattr(settings, 'FEED_TIMOUT', 1) cache_adapter = CacheControlAdapter( cache=FileCache('.web_cache'), heuristic=ExpiresAfter(hours=1), ) session = requests.Session() session.mount('http://', cache_adapter) session.mount('https://', cache_adapter) show_exceptions = getattr(settings, 'DEBUG', True) feed_request = session.get(feed_url, timeout=requests_timeout) return feedparser.parse(feed_request.text)
def _build_session(self, max_retries): from requests.adapters import HTTPAdapter if not isinstance(max_retries, int): raise ValueError(f'int expected, found {type(max_retries)}.') elif max_retries < 1: raise ValueError('max_retries should be greater or equal to 1.') session = requests.Session() # mount retries adapter session.mount( 'http://', HTTPAdapter(max_retries=Retry(total=max_retries, method_whitelist=frozenset( ['GET', 'POST'])))) # mount cache adapter session.mount('http://', CacheControlAdapter(heuristic=ExpiresAfter(hours=1))) session.headers['User-Agent'] = USER_AGENT self.session = session
def __init__(self, main_source_domain=None, start_page=None): if main_source_domain is None and start_page is None: raise ValueError( 'Need to specify main_source_domain or start_page.') if main_source_domain: self.MAIN_SOURCE_DOMAIN = main_source_domain.rstrip('/') self.START_PAGE = self.MAIN_SOURCE_DOMAIN if self.MAIN_SOURCE_DOMAIN is None: parsedurl = urlparse(start_page) self.MAIN_SOURCE_DOMAIN = parsedurl.scheme + '://' + parsedurl.netloc if self.MAIN_SOURCE_DOMAIN not in self.SOURCE_DOMAINS: self.SOURCE_DOMAINS.append(self.MAIN_SOURCE_DOMAIN) if start_page: self.START_PAGE = start_page # keep track of broken links self.broken_links = [] forever_adapter = CacheControlAdapter( heuristic=CacheForeverHeuristic(), cache=self.CACHE) for source_domain in self.SOURCE_DOMAINS: self.SESSION.mount( source_domain, forever_adapter ) # TODO: change to less aggressive in final version
import zipfile from typing import Any, Dict, Optional import aiohttp import requests from cachecontrol import CacheControl, CacheControlAdapter from cachecontrol.caches.file_cache import FileCache from cachecontrol.heuristics import ExpiresAfter from shared import configuration, perf from shared.pd_exception import OperationalException SESSION = CacheControl(requests.Session(), cache=FileCache(configuration.get('web_cache'))) SESSION.mount('http://whatsinstandard.com', CacheControlAdapter(heuristic=ExpiresAfter(days=14))) AIOSESSION = aiohttp.ClientSession() def unzip(url: str, path: str) -> str: location = '{scratch_dir}/zip'.format( scratch_dir=configuration.get('scratch_dir')) def remove_readonly(func, path, _): os.chmod(path, stat.S_IWRITE) func(path) shutil.rmtree(location, True, remove_readonly) os.mkdir(location) store(url, '{location}/zip.zip'.format(location=location))
def __init__(self, **kwargs): ''' Base class for common scraping tasks Args: ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.urls = [] # use requests HTML to aid parsing # has all same methods as requests.Session _s = HTMLSession() # delay/expire if kwargs.get('delay'): self.delay = kwargs['delay'] else: self.delay = 2 if kwargs.get('expire_hours'): self.expire_hours = kwargs['expire_hours'] else: self.expire_hours = 168 # add cookies if kwargs.get('cookies'): _s.cookies = kwargs['cookies'] else: try: import cookielib _s.cookies = cookielib.MozillaCookieJar() except (NameError, ImportError): import http.cookiejar _s.cookies = http.cookiejar.MozillaCookieJar() # add headers if kwargs.get('headers'): _s.headers = kwargs['headers'] else: ua = ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36') _s.headers = {'User-Agent': ua} # add proxies if kwargs.get('proxies'): _s.proxies = kwargs['proxies'] # add cache if not '/' in kwargs.get('cache_name', ''): self.cache_name = os.path.join('/tmp', kwargs['cache_name']) try: from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import ExpiresAfter from cachecontrol.caches import FileCache _s.mount('http://', CacheControlAdapter(cache=FileCache(self.cache_name), cache_etags = False, heuristic=ExpiresAfter(hours=self.expire_hours))) except ImportError as e: try: import requests_cache requests_cache.install_cache(self.cache_name) except: logging.exception('could not install cache') self.s = _s
@oauth_authorized.connect_via(github_bp) def github_logged_in(blueprint, token): if not token: flash("Failed to log in with Github") if "error_reason" in token: msg = "Access denied. Reason={reason} error={error}".format( reason=request.args["error_reason"], error=request.args["error_description"], ) flash(msg) else: flash("Successfully signed in with Github") # install CacheControl for github session, so we don't eat up API usage unnecessarily github_bp.session.mount(github_bp.session.base_url, CacheControlAdapter()) ## UTILITY FUNCTIONS ## def jira_get(*args, **kwargs): """ JIRA sometimes returns an empty response to a perfectly valid GET request, so this will retry it a few times if that happens. """ for _ in range(3): resp = jira_bp.session.get(*args, **kwargs) if resp.content: return resp return jira_bp.session.get(*args, **kwargs)
import os, datetime import requests from .data.rfeed import Item, Feed from flask import Flask, jsonify, request as flask_request from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import LastModified app = Flask(__name__) adapter = CacheControlAdapter(heuristic=LastModified()) sess = requests.Session() sess.mount('http://', adapter) sess.mount('https://', adapter) SERVICE_NAME = os.path.splitext(os.path.basename(__file__))[0] @app.route("/rss/summary", methods=['GET']) def latest_articles(): if flask_request.method == 'GET': response = sess.get('http://localhost/article/collect/10') article_collection = [] if response.status_code == requests.codes.ok: articles = response.json()['success'] for article in articles: article_collection.append( Item( title=article['title'], author=article['author'], pubDate=datetime.datetime.strptime(
def cli(url, repositories, search, table, rows, minstar, report, description, token): MODE = os.environ.get("GHTOPDEP_ENV") BASE_URL = 'https://437w61gcj1.execute-api.us-west-2.amazonaws.com/api' if MODE == "development": BASE_URL = 'http://127.0.0.1:8080' if report: try: result = requests.get('{}/repos?url={}'.format(BASE_URL, url)) if result.status_code != 404: sorted_repos = sort_repos(result.json()['deps'], rows) repos = readable_stars(sorted_repos) click.echo(tabulate(repos, headers="keys", tablefmt="github")) sys.exit() except requests.exceptions.ConnectionError as e: click.echo(e) if (description or search) and token: gh = github3.login(token=token) CacheControl(gh.session, cache=FileCache(CACHE_DIR), heuristic=OneDayHeuristic()) elif (description or search) and not token: click.echo("Please provide token") sys.exit() destination = "repository" destinations = "repositories" if not repositories: destination = "package" destinations = "packages" repos = [] more_than_zero_count = 0 total_repos_count = 0 spinner = Halo(text="Fetching information about {0}".format(destinations), spinner="dots") spinner.start() sess = requests.session() retries = Retry(total=15, backoff_factor=15, status_forcelist=[429]) adapter = CacheControlAdapter(max_retries=retries, cache=FileCache(CACHE_DIR), heuristic=OneDayHeuristic()) sess.mount("http://", adapter) sess.mount("https://", adapter) page_url = get_page_url(sess, url, destination) while True: response = sess.get(page_url) parsed_node = HTMLParser(response.text) dependents = parsed_node.css(ITEM_SELECTOR) total_repos_count += len(dependents) for dep in dependents: repo_stars_list = dep.css(STARS_SELECTOR) # only for ghost or private? packages if repo_stars_list: repo_stars = repo_stars_list[0].text().strip() repo_stars_num = int(repo_stars.replace(",", "")) else: continue if repo_stars_num != 0: more_than_zero_count += 1 if repo_stars_num >= minstar: relative_repo_url = dep.css( REPO_SELECTOR)[0].attributes["href"] repo_url = "{0}{1}".format(GITHUB_URL, relative_repo_url) # can be listed same package is_already_added = already_added(repo_url, repos) if not is_already_added and repo_url != url: if description: repo_description = fetch_description( gh, relative_repo_url) repos.append({ "url": repo_url, "stars": repo_stars_num, "description": repo_description }) else: repos.append({ "url": repo_url, "stars": repo_stars_num }) node = parsed_node.css(NEXT_BUTTON_SELECTOR) if len(node) == 2: page_url = node[1].attributes["href"] elif len(node) == 0 or node[0].text() == "Previous": spinner.stop() break elif node[0].text() == "Next": page_url = node[0].attributes["href"] if report: try: requests.post('{}/repos'.format(BASE_URL), json={ "url": url, "deps": repos }) except requests.exceptions.ConnectionError as e: click.echo(e) sorted_repos = sort_repos(repos, rows) if search: for repo in repos: repo_path = urlparse(repo["url"]).path[1:] for s in gh.search_code("{0} repo:{1}".format(search, repo_path)): click.echo("{0} with {1} stars".format(s.html_url, repo["stars"])) else: show_result(sorted_repos, total_repos_count, more_than_zero_count, destinations, table)
def __init__(self, **kwargs): """ """ logging.getLogger(__name__).addHandler(logging.NullHandler()) self.urls = [] # use requests HTML to aid parsing # has all same methods as requests.Session _s = HTMLSession() self.delay = kwargs.get("delay", 2) self.expire_hours = kwargs.get("expire_hours", 168) # add cookies if kwargs.get("cookies"): _s.cookies = kwargs["cookies"] else: import http.cookiejar _s.cookies = http.cookiejar.MozillaCookieJar() # add headers default_headers = { "User-Agent": random.choice(USER_AGENTS), "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "accept": "application/json, text/plain, */*", } _s.headers.update(default_headers) if kwargs.get("headers"): _s.headers.update(kwargs["headers"]) # add proxies if kwargs.get("proxies"): _s.proxies = kwargs["proxies"] # add cache if not kwargs.get("cache_name"): self.cache_name = os.path.join("/tmp", random_string(32)) elif "/" not in kwargs.get("cache_name", ""): self.cache_name = os.path.join("/tmp", kwargs["cache_name"]) else: self.cache_name = kwargs.get("cache_name") try: from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import ExpiresAfter from cachecontrol.caches import FileCache _s.mount( "http://", CacheControlAdapter( cache=FileCache(self.cache_name), cache_etags=False, heuristic=ExpiresAfter(hours=self.expire_hours), ), ) except ImportError: try: import requests_cache requests_cache.install_cache(self.cache_name) except BaseException: logging.exception("could not install cache") self.session = _s
TIMEOUT_SECONDS = 10 # Seconds before URL query timeout is raised PROVIDERS_URLS = [ "https://providers.optimade.org/v1/links", "https://raw.githubusercontent.com/Materials-Consortia/providers/master/src" "/links/v1/providers.json", ] CACHE_DIR = Path(appdirs.user_cache_dir("optimade-client", "CasperWA")) CACHE_DIR.mkdir(parents=True, exist_ok=True) CACHED_PROVIDERS = CACHE_DIR / "cached_providers.json" SESSION = requests.Session() SESSION_ADAPTER = CacheControlAdapter( cache=FileCache(CACHE_DIR / ".requests_cache"), heuristic=ExpiresAfter(days=1) ) SESSION_ADAPTER_DEBUG = CacheControlAdapter() SESSION.mount("http://", SESSION_ADAPTER) SESSION.mount("https://", SESSION_ADAPTER) SESSION.mount("http://localhost", SESSION_ADAPTER_DEBUG) SESSION.mount("http://127.0.0.1", SESSION_ADAPTER_DEBUG) # Currently known providers' development OPTIMADE base URLs DEVELOPMENT_PROVIDERS = {"mcloud": "https://dev-www.materialscloud.org/optimade"} try: DEVELOPMENT_MODE = bool(int(os.getenv("OPTIMADE_CLIENT_DEVELOPMENT_MODE", "0"))) except ValueError: LOGGER.debug( (