def __init__(self, url, payload=None, post=False): bundleID = bundle() cacheName = volatile(bundleID + "_requests_cache") requests_cache.configure(cacheName) if payload: self.request = requests.get(url, params=payload) if not post else requests.post(url, data=payload) else: self.request = requests.get(url)
def __init__(self, name=CACHE_DEFAULT_NAME, *args, **kwargs): self._name = name self._stats = {"fetched": 0} if "cache_time" in kwargs: cache_time = kwargs["cache_time"] / 60 else: cache_time = CACHE_DEFAULT_TIMEOUT / 60 requests_cache.configure(self._name, expire_after=cache_time)
def __init__(self, timeout_secs=5, raise_for_status=True): self._raise_for_status = raise_for_status self.timeout_secs = timeout_secs self.is_offline = os.getenv("OFFLINE", "false").lower() == "true" if self.is_offline: logging.info("Offline mode: Setting up offline store") requests_cache.configure('../offline_data/data_cache', backend='sqlite', expire_after=10000, allowable_methods=('GET', 'POST'))
def __init__(self, username, password, caching=False): """This will create a new moodle handshake. """ self.login(username, password) if caching: try: import requests_cache requests_cache.configure('.moodle_cache') except ImportError: logging.warning("requests_cache cannot be imported") logging.warning("Moodle will be requested without caching")
def __init__(self, name=CACHE_DEFAULT_NAME, *args, **kwargs): self._name = name self._stats = { 'fetched': 0, } if 'cache_time' in kwargs: cache_time = kwargs['cache_time'] / 60 else: cache_time = CACHE_DEFAULT_TIMEOUT / 60 requests_cache.configure(self._name, expire_after=cache_time)
def __init__(self, username, password, caching=False): """This will create a new moodle handshake. """ self.login(username, password) if caching: logging.info("caching enable") try: import requests_cache requests_cache.configure('.moodle_cache') except ImportError: logging.warning("requests_cache cannot be imported") logging.warning("Moodle will be requested without caching")
def test_expire_cache(self): delay = 1 url = 'http://httpbin.org/delay/%s' % delay requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND, expire_after=0.001) t = time.time() r = requests.get(url) delta = time.time() - t self.assertGreaterEqual(delta, delay) time.sleep(0.5) t = time.time() r = requests.get(url) delta = time.time() - t self.assertGreaterEqual(delta, delay)
def test_post_params(self): # issue #2 requests_cache.configure(CACHE_NAME, CACHE_BACKEND, allowable_methods=('GET', 'POST')) for _ in range(3): d = {'param1': 'test1'} self.assertEqual(self.post(d)['form'], d) d = {'param1': 'test1', 'param3': 'test3'} self.assertEqual(self.post(d)['form'], d) d = {'param1': 'test1', 'param3': 'test3'} self.assertEqual(self.post(d)['form'], d) d = [('param1', 'test1'), ('param2', 'test2'), ('param3', 'test3')] res = sorted(self.post(d)['form'].items()) self.assertEqual(res, d)
def work_in(dirname=None): """ Context manager version of os.chdir. When exited, returns to the working directory prior to entering. """ curdir = os.getcwd() try: if dirname is not None: os.chdir(dirname) requests_cache.configure(expire_after=60 * 10 * 10) changes.initialise() yield finally: os.chdir(curdir)
def do_tests_for(backend): requests_cache.configure(CACHE_NAME, backend) requests_cache.clear() n_threads = 10 url = 'http://httpbin.org/get' def do_requests(url, params): for i in range(10): # for testing write and read from cache requests.get(url, params=params) for _ in range(20): # stress test threads = [Thread(target=do_requests, args=(url, {'param': i})) for i in range(n_threads)] for t in threads: t.start() for t in threads: t.join() for i in range(n_threads): self.assert_(requests_cache.has_url('%s?param=%s' % (url, i)))
def test_post_data(self): # issue #2, raw payload requests_cache.configure(CACHE_NAME, CACHE_BACKEND, allowable_methods=('GET', 'POST')) d1 = json.dumps({'param1': 'test1'}) d2 = json.dumps({'param1': 'test1', 'param2': 'test2'}) d3 = str('some unicode data') if is_py3: bin_data = bytes('some binary data', 'utf8') else: bin_data = bytes('some binary data') for d in (d1, d2, d3): self.assertEqual(self.post(d)['data'], d) r = requests.post(httpbin('post'), data=d) self.assert_(hasattr(r, 'from_cache')) self.assertEqual(self.post(bin_data)['data'], bin_data.decode('utf8')) r = requests.post(httpbin('post'), data=bin_data) self.assert_(hasattr(r, 'from_cache'))
def get_stages(workflow_status, workflow_name, protocol_name, stage_name, use_cache): """Expands information about all stages in the filtered workflows""" workflow_pattern = re.compile(workflow_name) protocol_pattern = re.compile(protocol_name) stage_pattern = re.compile(stage_name) if use_cache: requests_cache.configure("workflow-info") session = ClaritySession.create(None) workflows = [workflow for workflow in session.api.get_workflows() if workflow.status == workflow_status and workflow_pattern.match(workflow.name)] print("workflow\tprotocol\tstage\turi") for workflow in workflows: for stage in workflow.api_resource.stages: if not protocol_pattern.match(stage.protocol.name) or not stage_pattern.match(stage.name): continue try: print("\t".join([stage.workflow.name, stage.protocol.name, stage.name, stage.uri])) except AttributeError as e: print("# ERROR workflow={}: {}".format(workflow.uri, e.message))
def turn_on_request_caching(): import requests_cache requests_cache.configure(os.path.join(os.path.expanduser('~'), '.fundamentals_test_requests'))
import json import requests import requests_cache from lassie import Lassie parser = argparse.ArgumentParser(description='Separates URLs with 200 status code from those without.') parser.add_argument('bmfile', help='Bookmarks file in JSON list format.') args = parser.parse_args() not_ok = [] bookmarks = [] requests_cache.configure('../cache/requests') user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/43.0.2357.130 Chrome/43.0.2357.130 Safari/537.36' headers = {'User-Agent': user_agent} l = Lassie() l.request_opts = {'headers': headers} webclient = requests.Session() webclient.headers.update(headers) with open(args.bmfile, 'r') as f: data = json.load(f) for i, b in enumerate(data['bookmarks']): url = b['url'] if not url or not url.startswith(('http', 'https')): continue
#!/usr/bin/env python # coding: utf-8 # Convert browser bookmarks file to json for further processing. import sys, json, requests, requests_cache, extraction from bs4 import BeautifulSoup def get_extract(html, url): e = extraction.Extractor().extract(html, source_url=url) return {"title": e.title, "description": e.description, "image": e.image, "url": e.url if e.url else url} requests_cache.configure("urls") alltags = [] bmfile = sys.argv[1] html = None with open(bmfile, "r") as f: html = f.read() not_ok = {} links = {} exceptions = {"request": [], "extract": []} soup = BeautifulSoup(html) for a in soup.find_all("a"): url = a.get("href") print("Processing url %s" % url) try: response = requests.get(url, verify=False, timeout=30) except Exception, err: exceptions["request"].append(url)
def turn_on_request_caching(): import requests_cache requests_cache.configure( os.path.join(os.path.expanduser('~'), '.fundamentals_test_requests'))
# -*- coding: utf-8 -*- """ googleapis_translate Refer to mgoogle_translate.py """ import logging from nose.tools import (eq_, with_setup) import requests import requests_cache requests_cache.configure(allowable_methods=( 'GET', 'POST', )) # noqa POST does not seem to work requests_cache.configure( cache_name='baidu_cache', expire_after=604800, allowable_methods=('GET', 'POST')) # noqa a week: 7*24*3600 = 604800 requests_cache.install_cache('googleapis_cache') HEADERS = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch, br", "Accept-Language": "zh-CN,zh;q=0.8", "User-Agent": "Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1", "Referer": "http://wap.baidu.com" }
import gevent from gevent import monkey; monkey.patch_socket() from gevent.queue import Queue from networkx import DiGraph, shortest_path from util import pairwise, process_text, term_conditioner, gevent_throttle import requests import requests_cache # request-level cache using HTTP Cache Control requests_cache.configure('spotify_cache') # term-level cache _Q_CACHE = {} def search_track_url(q, page): params = dict(q=q.encode('UTF-8'), page=page) return requests.Request("http://ws.spotify.com/search/1/track.json", params=params).full_url # Spotify MetaAPI dictates max rate of 10 requests per second throttle = gevent_throttle(10)(lambda:None) # TODO timeout and exception handling # TODO cache only useful stuff | if the track name is a substring of a poem # TODO there could be multiple title matches - perhaps, should # make them all available - maybe even tweak by popularity or genre def search_track(q, page=1): q = term_conditioner(q) has_tracks = False if not _Q_CACHE.has_key(q):
import json import requests, requests_cache from geojson import GeoJSONCoder # select basic sqlite caching backend requests_cache.configure('http_cache') class Resource(object): def __init__(self, uri, name=None, package=None): self.uri = uri self.name = name self.package = package self.lbl = ''.join([self.name, ' (', self.package, ')']) def __unicode__(self): # TODO not working? return self.lbl # TODO caching does not take into account Accept-headers!!! def csv(self): print self.uri r = requests.get(self.uri, headers={'Accept' : 'text/csv'}) return r.text def json(self, syntax='plain'): r = requests.get(self.uri, headers={'Accept' : 'application/json'}) data = r.json if syntax == 'geo': G = GeoJSONCoder()
import json import requests, requests_cache from geojson import GeoJSONCoder # select basic sqlite caching backend requests_cache.configure('http_cache') class Resource(object): def __init__(self, uri, name=None, package=None): self.uri = uri self.name = name self.package = package self.lbl = ''.join([self.name, ' (', self.package, ')']) def __unicode__(self): # TODO not working? return self.lbl # TODO caching does not take into account Accept-headers!!! def csv(self): print self.uri r = requests.get(self.uri, headers={'Accept': 'text/csv'}) return r.text def json(self, syntax='plain'): r = requests.get(self.uri, headers={'Accept': 'application/json'}) data = r.json if syntax == 'geo':
def turn_on_request_caching(): import requests_cache requests_cache.configure(cache_name='vector_cache_test')
def handle(self, *args, **options): import requests_cache requests_cache.configure("update-social") self.logger = logging.getLogger(__name__) self.updater = FeedUpdater(self.logger) self.updater.update_feeds()
'rawdate':item.xpath("self::*//p[@class='date']/text()")[0].strip(), 'summary':htmlize(''.join([lxml.html.tostring(x) for x in item.xpath("self::*//div[@class='item-summary']/p[not(@class)]")])) } builder.append(data) scraperwiki.sqlite.save(table_name = '_index', data = builder, unique_keys = ['url']) print len(builder) if len(builder) != 10: break try: scraperwiki.sqlite.execute("alter table _index add column date") scraperwiki.sqlite.execute("alter table _index add column markdown") except Exception,e: print repr(e) requests_cache.configure() for i in scraperwiki.sqlite.select("url, rawdate from _index"): print i['url'] r=requests.get(i['url']) html=r.text root=lxml.html.fromstring(html) fragment = root.xpath("//div[@class='news-details-body']")[0] markdown = htmlize(lxml.html.tostring(fragment)) scraperwiki.sqlite.execute("update _index set markdown = ? where url = ?",(markdown, i['url'])) nicedate = parsedate(i['rawdate']) nicerdate= datetime.datetime.strftime( datetime.datetime.strptime(nicedate, '%Y-%m-%d'), '%d-%b-%Y') if nicerdate[0]=='0': nicerdate=nicerdate[1:] scraperwiki.sqlite.execute("update _index set date = ? where url = ?",(nicerdate, i['url'])) scraperwiki.sqlite.commit()
register_extensions(app) register_blueprints(app) register_errorhandlers(app) return app def register_extensions(app): assets.init_app(app) bcrypt.init_app(app) cache.init_app(app) debug_toolbar.init_app(app) return None def register_blueprints(app): app.register_blueprint(public.views.blueprint) return None def register_errorhandlers(app): def render_error(error): # If a HTTPException, pull the `code` attribute; default to 500 error_code = getattr(error, 'code', 500) return render_template("{0}.html".format(error_code)), error_code for errcode in [401, 404, 500]: app.errorhandler(errcode)(render_error) return None requests_cache.configure('/tmp/cache', expire_after=600)
def __init__(self, logger=None, use_cache=False): self.logger = logger or logging.getLogger(__name__) if use_cache: cache_name = "process-types" requests_cache.configure(cache_name)
DATA_TYPES = ("elections", "parties", "candidates", "prebudgets", "prebudgets_csv") for imp in options.imports: if imp not in DATA_TYPES: sys.stderr.write("Unsupported data type '%s'.\nSupported data types:\n" % imp) for dt in DATA_TYPES: sys.stderr.write(" %s\n" % dt) exit(1) http = HttpFetcher() if not options.disable_cache: cache_dir = ".cache" http.set_cache_dir(cache_dir) requests_cache.configure("importers") if options.django: from importers.backends.django import DjangoBackend # We need to start logging after Django initializes # because Django can be configured to reset logging. logger = init_logging(debug=options.verbose) backend = DjangoBackend(logger, replace=options.replace) else: logger = init_logging(debug=options.verbose) backend = Backend(logger, replace=options.replace) if not options.disable_cache: logger.debug("Setting up HTTP cache in %s" % cache_dir) else:
""" import sys import os import pprint import argparse import requests import requests.auth import simplejson as json try: CACHE_NAME = __file__ + "-cache" # Use requests-cache if available. We cache successful # requests for 1 minute, because requests_cache only supports # cache duration multiples of a minute... import requests_cache requests_cache.configure(cache_name=CACHE_NAME, expire_after=1) except ImportError: pass def twitter_oauth(): """Return OAUTH authentication to be used in by requests library.""" # Must supply twitter account credentials in environment variables. consumer_key = unicode(os.environ['CONSUMER_KEY']) consumer_secret = unicode(os.environ['CONSUMER_SECRET']) oauth_token = unicode(os.environ['ACCESS_KEY']) oauth_token_secret = unicode(os.environ['ACCESS_SECRET']) return requests.auth.OAuth1(consumer_key, consumer_secret, oauth_token, oauth_token_secret,
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU Affero General Public License for more details. ## ## You should have received a copy of the GNU Affero General Public License ## along with this program. If not, see <http://www.gnu.org/licenses/>. """Provides a simple abstraction against the OSM API""" import requests import requests_cache import logging logging.basicConfig(level=logging.DEBUG) rs = requests.session(headers={'user-agent': 'changemonger/0.0.1'}) requests_cache.configure('osm_cache') server = 'api.openstreetmap.org' def getNode(id, version = None): id = str(id) if version: url = "http://%s/api/0.6/node/%s/%s" % (server, id, str(version)) else: url = "http://%s/api/0.6/node/%s" % (server, id) logging.debug("Retrieving %s for node %s version %s" % ( url, id, version)) r = rs.get(url) r.raise_for_status() return r.content
def setUp(self): requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND, fast_save=FAST_SAVE) requests_cache.clear()
print url r = requests.get(url) doc = html.fromstring(r.text) el_list = doc.xpath('//a[@name]') members = [] for idx, el in enumerate(el_list): # The first council member is sometimes encoded differently... if idx == 0 and el.getnext() != None: name = el.getnext().text_content() else: name = el.tail name = name.strip() members.append((name, party)) return members requests_cache.configure('jyvaskyla') members = [] BASE_URL = 'http://www.jyvaskyla.fi/hallinto/valtuusto/valtuusto09' r = requests.get(BASE_URL) doc = html.fromstring(r.text) # We will be fetching linked pages, so relative paths must be # convert into absolute URLs. doc.make_links_absolute(BASE_URL) # Find the p element that contains the text "Valtuustoryhmät" el = doc.xpath(u"//h2[contains(., 'Valtuustoryhmät')]")[0] # The links to the council groups follow party_links = el.xpath("following-sibling::p/a") for link_el in party_links:
def setUp(self): requests_cache.configure(CACHE_NAME, backend=CACHE_BACKEND) requests_cache.clear()
# -*- coding: utf-8 -*- # Get programming languages data from freebase and save in json format # appropriate for generating a network graph with sigma.js. import requests, requests_cache, json requests_cache.configure('freebase') langs = [] paradigms = {} with open('query.json') as f: query = f.read() r = requests.get('https://www.googleapis.com/freebase/v1/mqlread', params={'query': query}) res = json.loads(r.text)['result'] for index, lang in enumerate(res): paras = [] for i in lang['language_paradigms']: pid = i['id'] name = i['name'] paras.append({'id': pid, 'name': name}) if pid not in paradigms: paradigms[pid] = {'count': 1, 'name': name, 'id': pid} else: paradigms[pid]['count'] += 1 langs.append({ 'index': index, 'size': len(lang['influenced']), 'influenced': [{'id': i['id'], 'name': i['name']} for i in lang['influenced']], 'paradigms': paras,
def test_unregistered_backend(self): with self.assertRaises(ValueError): requests_cache.configure(CACHE_NAME, backend='nonexistent')
#!/usr/bin/env python # -*- coding: utf-8 -*- import requests import requests_cache from lxml import html from utils import ScrapeError, submit_council_members from finland import PARTIES requests_cache.configure('helsinki') r = requests.get('http://www.hel.fi/hki/helsinki/fi/P__t_ksenteko+ja+hallinto/P__t_ksenteko/Kaupunginvaltuusto/Valtuuston+j_senet') doc = html.fromstring(r.text) # Find the p element that contains the text "Kaupunginvaltuuston jäsenet" el = doc.xpath(u"//p/strong[contains(., 'Kaupunginvaltuuston jäsenet')]")[0] # Find the first table element following the p element table_el = el.xpath("../following-sibling::table")[0] rows = table_el.xpath("tr") members = [] # The first row is header, skip it for row in rows[1:]: el = row.xpath("td")[0] # Some of the elements have multiple lines (with email address # on the 2nd line. Take only the first line. s = el.text_content().split('\n')[0].strip() if not s: continue
def __init__(self, session, use_cache): self.session = session if use_cache: # TODO: The cache is being ignored cache_name = "reporting-svc-cache" requests_cache.configure(cache_name)
#!/usr/bin/env python # -*- coding: utf-8 -*- import requests import requests_cache from lxml import html from utils import ScrapeError, submit_council_members from finland import PARTIES requests_cache.configure('helsinki') r = requests.get( 'http://www.hel.fi/hki/helsinki/fi/P__t_ksenteko+ja+hallinto/P__t_ksenteko/Kaupunginvaltuusto/Valtuuston+j_senet' ) doc = html.fromstring(r.text) # Find the p element that contains the text "Kaupunginvaltuuston jäsenet" el = doc.xpath(u"//p/strong[contains(., 'Kaupunginvaltuuston jäsenet')]")[0] # Find the first table element following the p element table_el = el.xpath("../following-sibling::table")[0] rows = table_el.xpath("tr") members = [] # The first row is header, skip it for row in rows[1:]: el = row.xpath("td")[0] # Some of the elements have multiple lines (with email address # on the 2nd line. Take only the first line. s = el.text_content().split('\n')[0].strip()
# -*- coding: utf-8 -*- import requests, requests_cache, json, re from bs4 import BeautifulSoup requests_cache.configure('delicious_links') urllist = [] re_ws = re.compile(r'\s+') def cleanws(s): return re.sub(re_ws, ' ', s) with open('data.json') as f: bookmarks = json.load(f) for p in bookmarks['posts']: url = p['post']['href'] r = requests.get(url) if 200 != r.status_code: continue soup = BeautifulSoup(r.text) if not soup.title: continue title = soup.title.text try: description = str(soup.find_all('meta', attrs={'name':'description'})[0]['content']) except: if soup.p: description = soup.p.text
import requests import requests_cache import bs4 import datetime import json from icalendar import Calendar, Event # icalendar==3.9.0 requests_cache.configure('cache_database', expire_after=60*60) headers = {'Content-Type': 'text/calendar; charset=utf-8', 'Content-Disposition': 'inline; filename=calendar.ics'} def generate_calendar(): req = requests.get('http://bezpieczna.um.warszawa.pl/imprezy-masowe/zgromadzenia') soup = bs4.BeautifulSoup(req.text) trs = soup.find('table', attrs={'class': 'ViewsTable'}).findAll('tr') label = [x.text.strip() for x in trs[0].findAll('th')] cal = Calendar() cal.add('prodid', '-//Zgromadzenia publiczne w Warszawie//jawne.info.pl//') cal.add('version', '0.1.0') for tr in trs[1:]: date_string = tr.find('td').text.strip() date = datetime.datetime.strptime(date_string, '%Y-%m-%d').date() values = [x.text.strip() for x in tr.findAll('td')] text = json.dumps(dict(zip(label, values)), indent=4) event = Event()
#!/usr/bin/env python # -*- coding: utf-8 -*- import time import requests from requests import async import requests_cache requests_cache.configure("example_cache") def main(): # Once cached, delayed page will be taken from cache # redirects also handled for i in range(5): requests.get("http://httpbin.org/delay/2") r = requests.get("http://httpbin.org/redirect/5") print(r.text) # What about async? It's also supported! rs = [async.get("http://httpbin.org/delay/%s" % i) for i in range(5)] for r in async.map(rs): print(r.text) # And if we need to get fresh page or don't want to cache it? with requests_cache.disabled(): print(requests.get("http://httpbin.org/ip").text) # Debugging info about cache print(requests_cache.get_cache())
A Python client for the New York Times Campaign Finance API """ __author__ = "Derek Willis ([email protected])" __version__ = "0.4.0" import os import requests import requests_cache __all__ = ('NytCampfin', 'NytCampfinError', 'NytNotFoundError') DEBUG = False CURRENT_CYCLE = 2012 requests_cache.configure(expire_after=5) # Error classes class NytCampfinError(Exception): """ Exception for New York Times Campaign Finance API errors """ class NytNotFoundError(NytCampfinError): """ Exception for things not found """
""" import sys import os import pprint import argparse import requests import requests.auth import simplejson as json try: CACHE_NAME = __file__ + "-cache" # Use requests-cache if available. We cache successful # requests for 1 minute, because requests_cache only supports # cache duration multiples of a minute... import requests_cache requests_cache.configure(cache_name=CACHE_NAME, expire_after=1) except ImportError: pass def twitter_oauth(): """Return OAUTH authentication to be used in by requests library.""" # Must supply twitter account credentials in environment variables. consumer_key = unicode(os.environ['CONSUMER_KEY']) consumer_secret = unicode(os.environ['CONSUMER_SECRET']) oauth_token = unicode(os.environ['ACCESS_KEY']) oauth_token_secret = unicode(os.environ['ACCESS_SECRET']) return requests.auth.OAuth1( consumer_key, consumer_secret, oauth_token, oauth_token_secret,
def main(): # argument parsing args = parse_args() if args.API_KEY and args.API_SECRET: network = lastfmconnect(api_key=args.API_KEY, api_secret=args.API_SECRET) else: network = lastfmconnect() if not args.columns: args.columns = args.rows # cache for python-requests if not args.disable_cache: cache_folder = os.path.expanduser("~/.local/share/lastfm_cg/") if not os.path.exists(cache_folder): logger.info("Cache folder not found. Creating %s", cache_folder) os.makedirs(cache_folder) if not os.path.isfile(cache_folder + "lastfm_cg_cache.sqlite"): original_folder = os.getcwd() os.chdir(cache_folder) requests_cache.install_cache("lastfm_cg_cache") os.chdir(original_folder) requests_cache.configure(os.path.expanduser(cache_folder + "lastfm_cg_cache")) if args.username: users = [x.strip() for x in args.username.split(",")] else: logger.error("Use the -u/--username flag to set an username.") exit() if args.timeframe not in TIMEFRAME_VALUES: logger.error( "Incorrect value %s for timeframe. Accepted values : %s", args.columns, TIMEFRAME_VALUES, ) exit() for username in users: user = network.get_user(username) nb_covers = args.rows * args.columns if not args.top100 else 100 list_covers = lastfm_utils.get_list_covers( user=user, nb_covers=nb_covers, timeframe=args.timeframe ) img = ( image_utils.create_image(list_covers=list_covers, nb_columns=args.columns) if not args.top100 else image_utils.create_top100_image(list_covers=list_covers) ) # export image if args.output_filename: export_filename = args.output_filename elif args.top100: export_filename = ( f"{args.timeframe}_{username}_top100_{int(time.time())}.png" ) else: export_filename = f"{args.timeframe}_{username}_{args.columns*args.rows:004}_{int(time.time())}.png" img.save(export_filename) logger.info("Runtime : %.2f seconds." % (time.time() - temps_debut))
A Python client for the New York Times Campaign Finance API """ __author__ = "Derek Willis ([email protected])" __version__ = "0.4.0" import os import requests import requests_cache __all__ = ('NytCampfin', 'NytCampfinError', 'NytNotFoundError') DEBUG = False CURRENT_CYCLE = 2012 requests_cache.configure(expire_after=5) # Error classes class NytCampfinError(Exception): """ Exception for New York Times Campaign Finance API errors """ class NytNotFoundError(NytCampfinError): """ Exception for things not found """ # Clients
import os import requests_cache import reversion import time from django.conf import settings from django.core.management.base import BaseCommand from django.db import transaction from gusregon import GUS from tqdm import tqdm from epuap_watchdog.institutions.models import Institution, REGON, REGONError from epuap_watchdog.institutions.utils import normalize_regon requests_cache.configure() class Command(BaseCommand): help = "Command to import REGON database." def add_arguments(self, parser): parser.add_argument('--comment', required=True, help="Description of changes eg. data source description") parser.add_argument('--update', dest='update', action='store_true') parser.add_argument('--institutions_id', type=int, nargs='+', help="Institution IDs updated") parser.add_argument('--no-progress', dest='no_progress', action='store_false') def handle(self, comment, institutions_id, update, no_progress, *args, **options): gus = GUS(api_key=settings.GUSREGON_API_KEY, sandbox=settings.GUSREGON_SANDBOX) if settings.GUSREGON_SANDBOX is True: self.stderr.write("You are using sandbox mode for the REGON database. Data may be incorrect. " "Set the environemnt variable GUSREGON_SANDBOX and GUSREGON_API_KEY correctly.")
from time import sleep from random import random import js2py # type: ignore import requests_cache # type: ignore LOGGER = logging.getLogger(__name__) LOGGER.addHandler(logging.NullHandler()) HOME_FOLDER = Path.home() __FILE__ = globals().get('__file__') or 'test' CACHE_NAME = (Path(HOME_FOLDER) / (Path(__FILE__)).stem).as_posix() EXPIRE_AFTER = 3600 requests_cache.configure(cache_name=CACHE_NAME, expire_after=36000) # 10 hrs URL = 'http://translate.google.cn/translate_a/single' TL = \ """function RL(a, b) { var t = "a"; var Yb = "+"; for (var c = 0; c < b.length - 2; c += 3) { var d = b.charAt(c + 2), d = d >= t ? d.charCodeAt(0) - 87 : Number(d), d = b.charAt(c + 1) == Yb ? a >>> d: a << d; a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d } return a }
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU Affero General Public License for more details. ## ## You should have received a copy of the GNU Affero General Public License ## along with this program. If not, see <http://www.gnu.org/licenses/>. """Provides a simple abstraction against the OSM API""" import requests import requests_cache import logging logging.basicConfig(level=logging.DEBUG) rs = requests.session(headers={'user-agent': 'changemonger/0.0.1'}) requests_cache.configure('osm_cache') server = 'api.openstreetmap.org' def getNode(id, version = None): id = str(id) if version: url = "http://%s/api/0.6/node/%s/%s" % (server, id, str(version)) else: url = "http://%s/api/0.6/node/%s" % (server, id) logging.debug("Retrieving %s for node %s version %s" % ( url, id, version)) r = rs.get(url) r.raise_for_status() return r.text
# Convert browser bookmarks file to json for further processing. import sys, json, requests, requests_cache, extraction from bs4 import BeautifulSoup def get_extract(html, url): e = extraction.Extractor().extract(html, source_url=url) return { 'title': e.title, 'description': e.description, 'image': e.image, 'url': e.url if e.url else url } requests_cache.configure('urls') alltags = [] bmfile = sys.argv[1] html = None with open(bmfile, 'r') as f: html = f.read() not_ok = {} links = {} exceptions = {'request': [], 'extract': []} soup = BeautifulSoup(html) for a in soup.find_all('a'): url = a.get('href') print('Processing url %s' % url) try: