def request(self, method, url, headers=None, params=None, proxies=None, cache=True, verify=False, *args, **kwargs): if headers is None: headers = {} if params is None: params = {} if proxies is None: proxies = {} url = self.normalize_url(url) headers.update({'Accept-Encoding': 'gzip, deflate'}) headers.update(random.choice(USER_AGENTS)) # request session ssl verify if sickrage.srCore.srConfig.SSL_VERIFY: try: verify = certifi.where() except: pass # request session proxies if 'Referer' not in headers and sickrage.srCore.srConfig.PROXY_SETTING: sickrage.srCore.srLogger.debug("Using global proxy: " + sickrage.srCore.srConfig.PROXY_SETTING) scheme, address = urllib2.splittype(sickrage.srCore.srConfig.PROXY_SETTING) address = ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING), sickrage.srCore.srConfig.PROXY_SETTING)[scheme] proxies.update({"http": address, "https": address}) headers.update({'Referer': address}) # setup session caching if cache: cache_file = os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sessions.db')) self.__class__ = cachecontrol.CacheControl(self, cache=DBCache(cache_file), heuristic=ExpiresAfter(days=7)).__class__ # get web response response = super(srSession, self).request(method, url, headers=headers, params=params, verify=verify, proxies=proxies, *args, **kwargs) try: # check web response for errors response.raise_for_status() except requests.exceptions.SSLError as e: if ssl.OPENSSL_VERSION_INFO < (1, 0, 1, 5): sickrage.srCore.srLogger.info( "SSL Error requesting url: '{}' You have {}, try upgrading OpenSSL to 1.0.1e+".format( e.request.url, ssl.OPENSSL_VERSION)) if sickrage.srCore.srConfig.SSL_VERIFY: sickrage.srCore.srLogger.info( "SSL Error requesting url: '{}', try disabling cert verification in advanced settings".format( e.request.url)) except Exception: pass return response
def __init__(self, config={}, cache=None): self.config = config if cache is None: # sticky local cache directory for testing cache = FileCache(".cache", forever=True) self.session = CacheControl(requests.Session(), cache=cache, heuristic=ExpiresAfter(days=30))
def request(self, method, url, headers=None, params=None, cache=True, raise_exceptions=True, *args, **kwargs): url = self.normalize_url(url) kwargs.setdefault('params', {}).update(params or {}) kwargs.setdefault('headers', {}).update(headers or {}) # if method == 'POST': # self.session.headers.update({"Content-type": "application/x-www-form-urlencoded"}) kwargs.setdefault('headers', {}).update({'Accept-Encoding': 'gzip, deflate'}) kwargs.setdefault('headers', {}).update(random.choice(USER_AGENTS)) # request session ssl verify kwargs['verify'] = False if sickrage.srCore.srConfig.SSL_VERIFY: try: kwargs['verify'] = certifi.where() except: pass # request session proxies if 'Referer' not in kwargs.get( 'headers', {}) and sickrage.srCore.srConfig.PROXY_SETTING: sickrage.srCore.srLogger.debug( "Using global proxy: " + sickrage.srCore.srConfig.PROXY_SETTING) scheme, address = urllib2.splittype( sickrage.srCore.srConfig.PROXY_SETTING) address = \ ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING), sickrage.srCore.srConfig.PROXY_SETTING)[scheme] kwargs.setdefault('proxies', {}).update({ "http": address, "https": address }) kwargs.setdefault('headers', {}).update({'Referer': address}) # setup session caching if cache: cache_file = os.path.abspath( os.path.join(sickrage.DATA_DIR, 'sessions.db')) cachecontrol.CacheControl(self, cache=DBCache(cache_file), heuristic=ExpiresAfter(days=7)) # get result response = super(srSession, self).request(method, url, *args, **kwargs).result() if raise_exceptions: response.raise_for_status() return response
def set_cache_expiration_delay(seconds): """ Set a cache for requests with a given expiration time. """ adapter = CacheControlAdapter( heuristic=ExpiresAfter(seconds=seconds) ) session.mount('http://', adapter) return session
def __init__(self): self.s = requests.Session() # We cache ALL responses for 60 min. so eg. inline lyrics request don't make two calls right after each other. # This MAY have unforeseen consequences, but hopefully we can deal with those. self.s = CacheControl(self.s, cache_etags=False, heuristic=ExpiresAfter(minutes=60)) self.s.headers.update({ 'Accept': 'application/json', 'User-Agent': VOCADB_USER_AGENT }) self.opts = {'nameMatchMode': 'Auto', 'getTotalCount': 'true'} self._resources = {}
def __init__(self, place_id, cache=True, time_to_cache=SCHEDULE_CACHE_TIMING, **kwargs): """ Initialize! """ if cache: session = CacheControl( Session(), heuristic=ExpiresAfter(seconds=time_to_cache)) else: session = None super(TrashClient, self).__init__(BASE_URL, session, **kwargs) self._place_id = place_id
def __init__(self, app_name: str, expires_after: datetime.timedelta = datetime.timedelta(days=28)): self.app_name: str = str(app_name) self.cache_dir = PathPlus(platformdirs.user_cache_dir(self.app_name)) self.cache_dir.maybe_make(parents=True) self.session: requests.Session = CacheControl( sess=requests.Session(), cache=FileCache(self.cache_dir), heuristic=ExpiresAfter( days=expires_after.days, seconds=expires_after.seconds, microseconds=expires_after.microseconds, ), adapter_class=RateLimitAdapter )
def get_http_session(): global _http_session if _http_session is None: _http_session = requests.session() if cachecontrol: _http_session = cachecontrol.CacheControl( _http_session, cache=FileCache( user_cache_dir(__appname__, __appauthor__), forever=True ), heuristic=ExpiresAfter(days=14), ) return _http_session
def __init__(self, url, max_retries, expires_after_sec): self.hasError = False self.fromCache = False self.url = url self.cacheEnabled = expires_after_sec > 0 self.session = requests.Session() retryPolicy = urllib3.util.Retry(max_retries, status_forcelist=[400]) if self.cacheEnabled: self.session.mount( url, CacheControlAdapter( max_retries=retryPolicy, heuristic=ExpiresAfter(seconds=expires_after_sec))) else: self.session.mount( url, requests.adapters.HTTPAdapter(max_retries=retryPolicy))
def includeme(config: Configurator) -> None: """ Called automatically via config.include("zam_repondeur.services.fetch.http") """ session = requests.session() http_cache_dir = config.registry.settings["zam.http_cache_dir"] http_cache_duration = int( config.registry.settings["zam.http_cache_duration"]) cached_session = CacheControl( session, cache=FileCache(http_cache_dir), heuristic=ExpiresAfter(minutes=http_cache_duration), controller_class=CustomCacheController, ) config.registry.registerUtility(component=cached_session, provided=IHTTPSession)
def fromParameters( cls, sessionFactory: Callable[[], requests.Session], cachePath: str, maxAgeDictionary: Mapping[str, int]) -> 'IntersphinxCache': """ Construct an instance with the given parameters. @param sessionFactory: A zero-argument L{callable} that returns a L{requests.Session}. @param cachePath: Path of the cache directory. @param maxAgeDictionary: A mapping describing the maximum age of any cache entry. @see: L{parseMaxAge} """ session = CacheControl(sessionFactory(), cache=FileCache(cachePath), heuristic=ExpiresAfter(**maxAgeDictionary)) return cls(session)
def fromParameters(cls, sessionFactory, cachePath, maxAgeDictionary): """ Construct an instance with the given parameters. @param sessionFactory: A zero-argument L{callable} that returns a L{requests.Session}. @param cachePath: Path of the cache directory. @type cachePath: L{str} @param maxAgeDictionary: A dictionary describing the maximum age of any cache entry. @type maxAgeDictionary: L{dict} @see: L{parseMaxAge} """ session = CacheControl(sessionFactory(), cache=FileCache(cachePath), heuristic=ExpiresAfter(**maxAgeDictionary)) return cls(session)
def get_feed(feed_url): """ Return feed parsed feed """ requests_timeout = getattr(settings, 'FEED_TIMOUT', 1) cache_adapter = CacheControlAdapter( cache=FileCache('.web_cache'), heuristic=ExpiresAfter(hours=1), ) session = requests.Session() session.mount('http://', cache_adapter) session.mount('https://', cache_adapter) show_exceptions = getattr(settings, 'DEBUG', True) feed_request = session.get(feed_url, timeout=requests_timeout) return feedparser.parse(feed_request.text)
def get_session(target='http://', heuristic=None): """ Gets a pre-configured :mod:`requests` session. This function configures the following behavior into the session : - Proxy settings are added to the session. - It is configured to use the instance's :data:`requests_cache`. - Permanent redirect caching is handled by :mod:`CacheControl`. - Temporary redirect caching is not supported. Each module / class instance which uses this should subsequently maintain it's own session with whatever modifications it requires within a scope which makes sense for the use case (and probably close it when it's done). The session returned from here uses the instance's REQUESTS_CACHE with a single - though configurable - heuristic. If additional caches or heuristics need to be added, it's the caller's problem to set them up. .. note:: The caching here seems to be pretty bad, particularly for digikey passive component search. I don't know why. :param target: Defaults to ``'http://'``. string containing a prefix for the targets that should be cached. Use this to setup site-specific heuristics. :param heuristic: The heuristic to use for the cache adapter. :type heuristic: :class:`cachecontrol.heuristics.BaseHeuristic` :rtype: :class:`requests.Session` """ s = requests.session() if _proxy_dict is not None: s.proxies.update(_proxy_dict) if heuristic is None: heuristic = ExpiresAfter(seconds=MAX_AGE_DEFAULT) s.mount(target, _get_requests_cache_adapter(heuristic)) return s
def session_factory(cookie_string=None, max_workers=10, cache_dir=None, cache_days=7, cache_forever=False): session = requests.Session() if cookie_string: session.cookies = cookiejar_from_str(cookie_string) if cache_dir: logger.debug('Using CacheControl: dir=%r, days=%r, forever=%r', cache_dir, cache_days, cache_forever) session = CacheControl(session, cache=FileCache(cache_dir, forever=cache_forever), heuristic=ExpiresAfter(days=cache_days)) session = ProgressFuturesSession(max_workers=max_workers, session=session) logger.debug('%s with cookies: %s', type(session).__name__, session.cookies) return session
def _build_session(self, max_retries): from requests.adapters import HTTPAdapter if not isinstance(max_retries, int): raise ValueError(f'int expected, found {type(max_retries)}.') elif max_retries < 1: raise ValueError('max_retries should be greater or equal to 1.') session = requests.Session() # mount retries adapter session.mount( 'http://', HTTPAdapter(max_retries=Retry(total=max_retries, method_whitelist=frozenset( ['GET', 'POST'])))) # mount cache adapter session.mount('http://', CacheControlAdapter(heuristic=ExpiresAfter(hours=1))) session.headers['User-Agent'] = USER_AGENT self.session = session
# language: python # name: python3 # --- # + from gssutils import * from requests import Session from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache from cachecontrol.heuristics import ExpiresAfter scraper = Scraper( 'https://statswales.gov.wales/Catalogue/Housing/Social-Housing-Stock-and-Rents/averageweeklyrentsinstockatsocialrent-by-area-accommodation-providertype', session=CacheControl(Session(), cache=FileCache('.cache'), heuristic=ExpiresAfter(days=7))) scraper # - if len(scraper.distributions) == 0: from gssutils.metadata import Distribution dist = Distribution(scraper) dist.title = 'Dataset' dist.downloadURL = 'http://open.statswales.gov.wales/dataset/hous0601' dist.mediaType = 'application/json' scraper.distributions.append(dist) table = scraper.distribution(title='Dataset').as_pandas() table table.columns
def __init__(self, **kwargs): ''' Base class for common scraping tasks Args: ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) self.urls = [] # use requests HTML to aid parsing # has all same methods as requests.Session _s = HTMLSession() # delay/expire if kwargs.get('delay'): self.delay = kwargs['delay'] else: self.delay = 2 if kwargs.get('expire_hours'): self.expire_hours = kwargs['expire_hours'] else: self.expire_hours = 168 # add cookies if kwargs.get('cookies'): _s.cookies = kwargs['cookies'] else: try: import cookielib _s.cookies = cookielib.MozillaCookieJar() except (NameError, ImportError): import http.cookiejar _s.cookies = http.cookiejar.MozillaCookieJar() # add headers if kwargs.get('headers'): _s.headers = kwargs['headers'] else: ua = ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36') _s.headers = {'User-Agent': ua} # add proxies if kwargs.get('proxies'): _s.proxies = kwargs['proxies'] # add cache if not '/' in kwargs.get('cache_name', ''): self.cache_name = os.path.join('/tmp', kwargs['cache_name']) try: from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import ExpiresAfter from cachecontrol.caches import FileCache _s.mount('http://', CacheControlAdapter(cache=FileCache(self.cache_name), cache_etags = False, heuristic=ExpiresAfter(hours=self.expire_hours))) except ImportError as e: try: import requests_cache requests_cache.install_cache(self.cache_name) except: logging.exception('could not install cache') self.s = _s
import zipfile from typing import Any, Dict, Optional import aiohttp import requests from cachecontrol import CacheControl, CacheControlAdapter from cachecontrol.caches.file_cache import FileCache from cachecontrol.heuristics import ExpiresAfter from shared import configuration, perf from shared.pd_exception import OperationalException SESSION = CacheControl(requests.Session(), cache=FileCache(configuration.get('web_cache'))) SESSION.mount('http://whatsinstandard.com', CacheControlAdapter(heuristic=ExpiresAfter(days=14))) AIOSESSION = aiohttp.ClientSession() def unzip(url: str, path: str) -> str: location = '{scratch_dir}/zip'.format( scratch_dir=configuration.get('scratch_dir')) def remove_readonly(func, path, _): os.chmod(path, stat.S_IWRITE) func(path) shutil.rmtree(location, True, remove_readonly) os.mkdir(location) store(url, '{location}/zip.zip'.format(location=location))
def request(self, method, url, headers=None, params=None, cache=True, *args, **kwargs): url = self.normalize_url(url) kwargs.setdefault('params', {}).update(params or {}) kwargs.setdefault('headers', {}).update(headers or {}) # if method == 'POST': # self.session.headers.update({"Content-type": "application/x-www-form-urlencoded"}) kwargs.setdefault('headers', {}).update({'Accept-Encoding': 'gzip, deflate'}) kwargs.setdefault('headers', {}).update(random.choice(USER_AGENTS)) # request session ssl verify kwargs['verify'] = False if sickrage.srCore.srConfig.SSL_VERIFY: try: kwargs['verify'] = certifi.where() except: pass # request session proxies if 'Referer' not in kwargs.get( 'headers', {}) and sickrage.srCore.srConfig.PROXY_SETTING: sickrage.srCore.srLogger.debug( "Using global proxy: " + sickrage.srCore.srConfig.PROXY_SETTING) scheme, address = urllib2.splittype( sickrage.srCore.srConfig.PROXY_SETTING) address = \ ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING), sickrage.srCore.srConfig.PROXY_SETTING)[ scheme] kwargs.setdefault('proxies', {}).update({ "http": address, "https": address }) kwargs.setdefault('headers', {}).update({'Referer': address}) # setup session caching if cache: cache_file = os.path.abspath( os.path.join(sickrage.DATA_DIR, 'sessions.db')) cachecontrol.CacheControl(self, cache=DBCache(cache_file), heuristic=ExpiresAfter(days=7)) # get web response response = super(srSession, self).request(method, url, *args, **kwargs).result() try: # check web response for errors response.raise_for_status() except requests.exceptions.SSLError as e: if ssl.OPENSSL_VERSION_INFO < (1, 0, 1, 5): sickrage.srCore.srLogger.info( "SSL Error requesting url: '{}' You have {}, try upgrading OpenSSL to 1.0.1e+" .format(e.request.url, ssl.OPENSSL_VERSION)) if sickrage.srCore.srConfig.SSL_VERIFY: sickrage.srCore.srLogger.info( "SSL Error requesting url: '{}' Try disabling Cert Verification on the advanced tab of /config/general" .format(e.request.url)) except Exception as e: sickrage.srCore.srLogger.debug(e.message) return response
import requests from django.conf import settings from django.http import HttpResponse, JsonResponse from cachecontrol import CacheControl from cachecontrol.heuristics import ExpiresAfter from cachecontrol_django import DjangoCache from django.views.decorators.gzip import gzip_page from django.conf import settings cached_sess = CacheControl( requests.session(), cache=DjangoCache(), heuristic=ExpiresAfter(seconds=30) ) @gzip_page def variomes_single_ref(request): # proxy requests to variomes server response = cached_sess.get('%s/fetchLit.jsp' % settings.VARIOMES_BASE_URL, params=request.GET, verify=settings.VARIOMES_VERIFY_REQUESTS) if response.status_code != 200: return HttpResponse(response.content, status=response.status_code) return JsonResponse(response.json()) @gzip_page def variomes_search(request):
def main(group_id, location, time_boundary, event_status, pandoc): key_path = os.path.normpath(os.path.expanduser('~/.meetup.com-key')) if os.path.exists(key_path): with open(key_path) as fh: key = fh.read().strip() cache = FileCache('.web_cache', forever=True) requests = CacheControl( Session(), cache, cache_etags=False, heuristic=ExpiresAfter(days=1) ) while True: resp = requests.get('https://api.meetup.com/status', params=dict(key=key)) if resp.status_code == 200: break elif resp.status_code == 401: click.echo('Your meetup.com key is required. You can get it from https://secure.meetup.com/meetup_api/key/\n') if click.confirm('Open https://secure.meetup.com/meetup_api/key/ in your web browser?'): click.launch('https://secure.meetup.com/meetup_api/key/') click.echo('') key = click.prompt('Key', hide_input=True) else: click.fail('Failed to get meetup.com status. Response was {!r}'.format(resp.text)) click.secho('For convenience your key is saved in `{}`.\n'.format(key_path), fg='magenta') with open(key_path, 'w') as fh: fh.write(key) while not location: location = location or get_input('Location: ', completer=WordCompleter(['cluj', 'iasi', 'timisoara'], ignore_case=True)) while True: group_id = group_id or get_input('Group ID: ', completer=WordCompleter(['Cluj-py', 'RoPython-Timisoara'], ignore_case=True)) resp = requests.get('https://api.meetup.com/2/events', params=dict( key=key, group_urlname=group_id, time=time_boundary, status=event_status, )) if resp.status_code == 200: json = resp.json() if json['results']: break else: click.secho('Invalid group `{}`. It has no events!'.format(group_id), fg='red') group_id = None if resp.status_code == '400': click.fail('Failed to get make correct request. Response was {!r}'.format(resp.text)) else: click.secho('Invalid group `{}`. Response was [{}] {!r}'.format(group_id, resp.status_code, resp.text), fg='red') # click.echo(pformat(dict(resp.headers))) for event in json['results']: dt = datetime.fromtimestamp(event['time']/1000) click.echo("{}: {}".format( dt.strftime('%Y-%m-%d %H:%M:%S'), event['name'] )) existing_path = glob(os.path.join('content', '*', dt.strftime('%Y-%m-%d*'), 'index.rst')) if existing_path: if len(existing_path) > 1: click.secho('\tERROR: multiple paths matched: {}'.format(existing_path)) else: click.secho('\t`{}` already exists. Not importing.'.format(*existing_path), fg='yellow') else: target_dir = os.path.join('content', location, '{}-{}'.format(dt.strftime('%Y-%m-%d'), slugify(event['name']))) target_path = os.path.join(target_dir, 'index.rst') if not os.path.exists(target_dir): os.makedirs(target_dir) if pandoc: with tempfile.NamedTemporaryFile(delete=False) as fh: fh.write(event['description'].encode('utf-8')) rst = subprocess.check_output(['pandoc', '--from=html', '--to=rst', fh.name]).decode('utf-8') print fh.name #os.unlink(fh.name) else: stream = StringIO() html2rest(event['description'].encode('utf-8'), writer=stream) rst = stream.getvalue().decode('utf-8') with io.open(target_path, 'w', encoding='utf-8') as fh: fh.write('''{name} ############################################################### :tags: unknown :registration: meetup.com: {event_url} {rst}'''.format(rst=rst, **event)) click.secho('\tWrote `{}`.'.format(target_path), fg='green')
from operator import itemgetter import requests from django.conf import settings from requests_futures.sessions import FuturesSession from django.http import HttpResponse, JsonResponse from cachecontrol import CacheControl from cachecontrol.heuristics import ExpiresAfter from cachecontrol_django import DjangoCache from api.models import Gene cached_sess = CacheControl(requests.session(), cache=DjangoCache(), heuristic=ExpiresAfter(days=10)) # ------------------------------------------------------------- # --- SOCIBP acquisition endpoints # ------------------------------------------------------------- def get_genes(request): # params: projection=SUMMARY&pageSize=100000&pageNumber=0&direction=ASC response = requests.get(settings.SOCIBP_API_URL + '/genes', params={ 'projection': 'SUMMARY', 'pageSize': 100000, 'pageNumber': '0', 'direction': 'ASC' },
import selenium.webdriver.support.ui as selenium_ui from distutils.dir_util import copy_tree # CHEF and CONTENT DEBUG ################################################################################ DEBUG_MODE = False # print extra-verbose info DOWNLOAD_ONE_TO_webroot = False # produce debug webroot/ and skip cheffing DOWNLOAD_ONLY_N = False # chef only first N books; set to False to disable sess = requests.Session() cache = FileCache('.webcache') chefdev_adapter = CacheControlAdapter(heuristic=ExpiresAfter(days=1), cache=cache) sess.mount('http://3asafeer.com/', chefdev_adapter) sess.mount('http://fonts.googleapis.com/', chefdev_adapter) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0", "Accept-Encoding": "gzip, deflate", "Connection": "keep-alive" } # PAUSES and DELAYS ################################################################################ LOADING_WAIT_TIME = 10 # long delay LOADING_WAIT_TIME_MS = LOADING_WAIT_TIME*1000 LOADING_WAIT_TIME_SHORT = 7 # short delay
def __init__(self, **kwargs): """ """ logging.getLogger(__name__).addHandler(logging.NullHandler()) self.urls = [] # use requests HTML to aid parsing # has all same methods as requests.Session _s = HTMLSession() self.delay = kwargs.get("delay", 2) self.expire_hours = kwargs.get("expire_hours", 168) # add cookies if kwargs.get("cookies"): _s.cookies = kwargs["cookies"] else: import http.cookiejar _s.cookies = http.cookiejar.MozillaCookieJar() # add headers default_headers = { "User-Agent": random.choice(USER_AGENTS), "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "accept": "application/json, text/plain, */*", } _s.headers.update(default_headers) if kwargs.get("headers"): _s.headers.update(kwargs["headers"]) # add proxies if kwargs.get("proxies"): _s.proxies = kwargs["proxies"] # add cache if not kwargs.get("cache_name"): self.cache_name = os.path.join("/tmp", random_string(32)) elif "/" not in kwargs.get("cache_name", ""): self.cache_name = os.path.join("/tmp", kwargs["cache_name"]) else: self.cache_name = kwargs.get("cache_name") try: from cachecontrol import CacheControlAdapter from cachecontrol.heuristics import ExpiresAfter from cachecontrol.caches import FileCache _s.mount( "http://", CacheControlAdapter( cache=FileCache(self.cache_name), cache_etags=False, heuristic=ExpiresAfter(hours=self.expire_hours), ), ) except ImportError: try: import requests_cache requests_cache.install_cache(self.cache_name) except BaseException: logging.exception("could not install cache") self.session = _s
# # WG Notifications of deaths of residents related to COVID-19 in adult care homes from gssutils import * import json import numpy as np import glob from requests import Session from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache from cachecontrol.heuristics import ExpiresAfter scrape = Scraper(seed="info.json", session=CacheControl(Session(), cache=FileCache('.cache'), heuristic=ExpiresAfter(days=1)) ) scraper = scrape scraper dist = scrape.distribution( latest=True, title=lambda x: x.startswith('Notifications of deaths of residents related to COVID-19') ) dist tabs = { tab.name: tab for tab in dist.as_databaker() } list(tabs) # + #Check tab contents is what is expected before continuing. expected_tabs = ['Contents','Information','Table_1','Table_2','Table_3','Table_4','Table_5','Table_6','Table_7','Table_8', 'Table_9'] whats_missing = [item for item in tabs if item not in expected_tabs]
import re import requests import discord from discord.ext import commands from discord.member import Member from jikanpy import Jikan from cachecontrol import CacheControl from cachecontrol.heuristics import ExpiresAfter from cachecontrol.caches.file_cache import FileCache import config expires = ExpiresAfter(days=1) session = CacheControl(requests.Session(), heuristic=expires, cache=FileCache(config.cache_dir)) jikan = Jikan(session=session) class JoinableMessage: def __init__(self, message: discord.message, bot): self.message = message self.bot = bot def is_joinable(self): if self.message.author.id != self.bot.user.id: return False if len(self.message.embeds) == 0: return False if self.get_field('channel') is None:
def setup(self): self.sess = Session() self.cache_sess = CacheControl(self.sess, heuristic=ExpiresAfter(days=1))
TIMEOUT_SECONDS = 10 # Seconds before URL query timeout is raised PROVIDERS_URLS = [ "https://providers.optimade.org/v1/links", "https://raw.githubusercontent.com/Materials-Consortia/providers/master/src" "/links/v1/providers.json", ] CACHE_DIR = Path(appdirs.user_cache_dir("optimade-client", "CasperWA")) CACHE_DIR.mkdir(parents=True, exist_ok=True) CACHED_PROVIDERS = CACHE_DIR / "cached_providers.json" SESSION = requests.Session() SESSION_ADAPTER = CacheControlAdapter( cache=FileCache(CACHE_DIR / ".requests_cache"), heuristic=ExpiresAfter(days=1) ) SESSION_ADAPTER_DEBUG = CacheControlAdapter() SESSION.mount("http://", SESSION_ADAPTER) SESSION.mount("https://", SESSION_ADAPTER) SESSION.mount("http://localhost", SESSION_ADAPTER_DEBUG) SESSION.mount("http://127.0.0.1", SESSION_ADAPTER_DEBUG) # Currently known providers' development OPTIMADE base URLs DEVELOPMENT_PROVIDERS = {"mcloud": "https://dev-www.materialscloud.org/optimade"} try: DEVELOPMENT_MODE = bool(int(os.getenv("OPTIMADE_CLIENT_DEVELOPMENT_MODE", "0"))) except ValueError: LOGGER.debug( (
def main(group_id, location, time_boundary, event_status, pandoc, force): key_path = os.path.normpath(os.path.expanduser('~/.meetup.com-key')) if os.path.exists(key_path): with io.open(key_path, encoding='utf8') as fh: key = fh.read().strip() else: key = None cache = FileCache('.web_cache', forever=True) requests = CacheControl(Session(), cache, cache_etags=False, heuristic=ExpiresAfter(days=1)) while True: resp = requests.get('https://api.meetup.com/status', params=dict(key=key)) if resp.status_code == 200 and resp.json().get('status') == 'ok': break elif resp.status_code == 200 and any( 'auth_fail' == e.code for e in resp.json().get('errors', [])): click.echo( 'Your meetup.com key is required. You can get it from https://secure.meetup.com/meetup_api/key/\n' ) if click.confirm( 'Open https://secure.meetup.com/meetup_api/key/ in your web browser?' ): click.launch('https://secure.meetup.com/meetup_api/key/') click.echo('') key = click.prompt('Key', hide_input=True) else: raise click.ClickException( 'Failed to get meetup.com status. Response was {!r} {!r}'. format(resp.status_code, resp.text)) click.secho( 'For convenience your key is saved in `{}`.\n'.format(key_path), fg='magenta') with open(key_path, 'w') as fh: fh.write(key) while not location: location = location or get_input( u'Location: ', completer=WordCompleter( [u'cluj', u'iasi', u'timisoara', u'bucuresti'], ignore_case=True)) while True: group_id = group_id or get_input( u'Group ID: ', completer=WordCompleter([ u'RoPython-Bucuresti', u'RoPython-Cluj', u'RoPython_Iasi', u'RoPython-Timisoara' ], ignore_case=True)) resp = requests.get('https://api.meetup.com/2/events', params=dict( key=key, group_urlname=group_id, time=time_boundary, status=event_status, )) if resp.status_code == 200: json = resp.json() if json['results']: break else: click.secho( 'Invalid group `{}`. It has no events!'.format(group_id), fg='red') group_id = None if resp.status_code == '400': click.fail( 'Failed to get make correct request. Response was {!r}'.format( resp.text)) else: click.secho('Invalid group `{}`. Response was [{}] {!r}'.format( group_id, resp.status_code, resp.text), fg='red') # click.echo(pformat(dict(resp.headers))) for event in json['results']: dt = datetime.fromtimestamp(event['time'] / 1000) event['duration'] = format_duration( event.get('duration', 3600000) / 1000) event['time'] = dt.strftime('%Y-%m-%d %H:%M') if 'how_to_find_us' in event: address = event['how_to_find_us'], else: address = () if 'venue' in event: address_1 = event['venue'].get('address_1') if address_1: address += address_1, event['venue']['address_1'] = ', '.join(address) else: event['venue'] = {'address_1': address} click.echo("{time}: {name}".format(**event)) click.echo("\t{}".format(pformat(event))) existing_path = glob( os.path.join('content', '*', dt.strftime('%Y-%m-%d*'), 'index.rst')) if existing_path and not force: if len(existing_path) > 1: click.secho('\tERROR: multiple paths matched: {}'.format( existing_path)) else: click.secho('\t`{}` already exists. Not importing.'.format( *existing_path), fg='yellow') else: target_dir = os.path.join( 'content', location, '{}-{}'.format(dt.strftime('%Y-%m-%d'), slugify(event['name']))) target_path = os.path.join(target_dir, 'index.rst') if not os.path.exists(target_dir): os.makedirs(target_dir) if pandoc: with tempfile.NamedTemporaryFile(delete=False) as fh: fh.write(event['description'].encode('utf-8')) rst = subprocess.check_output( ['pandoc', '--from=html', '--to=rst', fh.name]).decode('utf-8') os.unlink(fh.name) else: rst = html2rest(event['description']) doc = u'''{name} ############################################################### :tags: prezentari :registration: meetup.com: {event_url} :start: {time} :duration: {duration} :location: {venue[address_1]}, {venue[city]}, {venue[localized_country_name]} {rst}'''.format(rst=rst, **event) with io.open(target_path, 'w', encoding='utf-8') as fh: fh.write(doc) click.secho('\tWrote `{}`.'.format(target_path), fg='green')