def __init__(self, api_domain, api_key=None, rate_limit=API_LIMIT): """ Instantiates a new CallHub instance >>> callhub = CallHub("https://api-na1.callhub.io") With built-in rate limiting disabled: >>> callhub = CallHub(rate_limit=False) Args: api_domain (``str``): Domain to access API (eg: api.callhub.io, api-na1.callhub.io), this varies by account Keyword Args: api_key (``str``, optional): Optional API key. If not provided, it will attempt to use ``os.environ['CALLHUB_API_KEY']`` rate_limit (``dict``, optional): Enabled by default with settings that respect callhub's API limits. Setting this to false disables ratelimiting, or you can set your own limits by following the example below. Please don't abuse! :) >>> callhub = CallHub(rate_limit={"GENERAL": {"calls": 13, "period": 1}, >>> "BULK_CREATE": {"calls": 1, "period": 70}}) - Default limits bulk_create to 1 per 70 seconds (CallHub states their limit is every 60s but in practice a delay of 60s exactly can trip their rate limiter anyways) - Default limits all other API requests to 13 per second (CallHub support states their limit is 20/s but this plays it on the safe side, because other rate limiters seem a little sensitive) """ self.session = FuturesSession(max_workers=43) # Attempt 3 retries for failed connections adapter = requests.adapters.HTTPAdapter(max_retries=3) self.session.mount('https://', adapter) self.session.mount('http://', adapter) # Truncate final '/' off of API domain if it was provided if api_domain[-1] == "/": self.api_domain = api_domain[:-1] else: self.api_domain = api_domain if rate_limit: # Apply general rate limit to self.session.get rate_limited_get = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.get)) self.session.get = types.MethodType(rate_limited_get, self.session) # Apply general rate limit to self.session.post rate_limited_post = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.post)) self.session.post = types.MethodType(rate_limited_post, self.session) # Apply bulk rate limit to self.bulk_create self.bulk_create = sleep_and_retry(limits(**rate_limit["BULK_CREATE"])(self.bulk_create)) self.session.auth = CallHubAuth(api_key=api_key) # validate_api_key returns administrator email on success self.admin_email = self.validate_api_key() # cache for do-not-contact number/list to id mapping self.dnc_cache = {}
def __init__( self, user_agent: Optional[str] = None, user_agent_config_yaml: Optional[str] = None, user_agent_lookup: Optional[str] = None, use_env: bool = True, fail_on_missing_file: bool = True, rate_limit: Optional[Dict] = None, **kwargs: Any, ) -> None: self.session = get_session( user_agent, user_agent_config_yaml, user_agent_lookup, use_env, fail_on_missing_file, **kwargs, ) self.response = None if rate_limit is not None: self.setup = sleep_and_retry( RateLimitDecorator( calls=rate_limit["calls"], period=rate_limit["period"] ).__call__(self.normal_setup) ) else: self.setup = self.normal_setup
def __init__(self, client_key, client_secret, diskcache = None): """Initialize the client""" self.client_key = client_key self.client_secret = client_secret throttled_get = ratelimit.sleep_and_retry(ratelimit.limits(calls=1, period=1)(requests.get)) self.requests_get = throttled_get if diskcache is None else ( diskcache.memoize()(throttled_get))
def __init__(self, filename, logger_, program_name='corpusbuilder 1.0', user_agent=None, overwrite_warc=True, err_threshold=10, warcinfo_record_data=None, known_bad_urls=None, max_no_of_calls_in_period=2, limit_period=1, proxy_url=None, allow_cookies=False): if known_bad_urls is not None: # Setup the list of cached bad URLs to prevent trying to download them again with open(known_bad_urls, encoding='UTF-8') as fh: self.bad_urls = {line.strip() for line in fh} else: self.bad_urls = set() if not overwrite_warc: # Find out next nonexisting warc filename num = 0 while os.path.exists(filename): filename2, ext = os.path.splitext(filename) # Should be filename.warc.gz if ext == '.gz' and filename2.endswith('.warc'): filename2, ext2 = os.path.splitext(filename2) # Should be filename.warc ext = ext2 + ext # Should be .warc.gz filename = '{0}-{1:05d}{2}'.format(filename2, num, ext) num += 1 logger_.log('INFO', 'Creating archivefile: {0}'.format(filename)) self._output_file = open(filename, 'wb') self._logger_ = logger_ self._req_headers = {'Accept-Encoding': 'identity', 'User-agent': user_agent} self._session = Session() # Setup session for speeding up downloads if proxy_url is not None: # Set socks proxy if provided self._session.proxies['http'] = proxy_url self._session.proxies['https'] = proxy_url self._allow_cookies = allow_cookies # Setup rate limiting to prevent hammering the server self._requests_get = sleep_and_retry(limits(calls=max_no_of_calls_in_period, period=limit_period)(self._http_get_w_cookie_handling)) self._error_count = 0 self._error_threshold = err_threshold # Set the error threshold which cause aborting to prevent deinal self._writer = WARCWriter(self._output_file, gzip=True) if warcinfo_record_data is None: # INFO RECORD # Some custom information about the warc writer program and its settings info_headers = {'software': program_name, 'arguments': ' '.join(sys.argv[1:]), 'format': 'WARC File Format 1.0', 'conformsTo': 'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf'} info_record = self._writer.create_warcinfo_record(filename, info_headers) else: # Must recreate custom headers else they will not be copied custom_headers = ''.join('{0}: {1}\r\n'.format(k, v) for k, v in warcinfo_record_data[1].items()).\ encode('UTF-8') info_record = self._writer.create_warc_record('', 'warcinfo', warc_headers=warcinfo_record_data[0], payload=BytesIO(custom_headers), length=len(custom_headers)) self._writer.write_record(info_record)
def __init__(self, base_url, client_id, client_secret, api_json=None, token_expires_percent=5): # type: (str, str, str, str, str) -> None """[Init method used to create an Api Class for making api calls] :param base_url: Base URL of the API service :type base_url: str :param client_id: Client ID of the application :type client_id: str :param client_secret: Secret of the application :type client_secret: str :param api_json: API file defining all JSON limits and calls, defaults to None because it will use the default :type api_json: str, optional :param token_expires_percent: This is a percentage of time to take off the token renewal to ensure it doesn't run out. For instance 5(%) of 3600 is 180. :type token_expires_percent: int, optional :raises Exception: If this cannot be configured with parameters used :raises AttributeError: If the apis.json file is empty """ self.base_url = base_url self.client_id = client_id self.client_secret = client_secret self.token_expires_percent = int(100 - token_expires_percent) / 100 # This line is needed so pylint doesn't complain about this variable not existing. self.__log = self.__log # If the user doesn't pass an alternate API file use the included one if not api_json: api_json = pkg_resources.resource_filename(__name__, 'apis.json') with open(api_json, encoding='utf-8') as api_file: apis = json.loads(api_file.read()) # If the string is empty if not apis: raise AttributeError("fFile {api_file} loaded is empty") # Create a dict to hold details of scopes (from json) self.scopes = defaultdict(dict) # Create a dict to cache the tokens self.tokens = defaultdict(dict) # Setup all of the calls to the apis with the limits for (client_scope, api) in apis.items(): self.scopes[client_scope]["api_call"] = sleep_and_retry( limits(calls=api.get('limits_calls'), period=api.get('limits_period'))(self._api_call)) # Store the token url associated with this client scope for later self.scopes[client_scope]["token_url"] = api.get('token_url')
def test_separate_decorators(self, sleep): sleep.side_effect = lambda t: self.sleep("main_limit", t) increment_a = sleep_and_retry(self.increment_a) increment_a() sleep.assert_not_called() self.assertEqual(self.a, 1) increment_a() sleep.assert_called_once() assert 9.9 < sleep.call_args[0][0] <= 10 self.assertEqual(self.a, 2)
def add_rate_limiting(self, f: Callable): if self.ratelimit_params: g = limits(**self.ratelimit_params)(f) else: g = limits(calls=self.ratelimit_calls_per_min, period=60)(f) g = sleep_and_retry(g) g = on_exception( expo, (RateLimitException, HTTPError), max_time=self.backoff_timeout_seconds, factor=4, )(g) return g
def auth(self, session: Session, username: str, password: str, calls: Optional[int]=None, period: Optional[int]=None): """Create session manager and optionally use rate limit""" self._manager = sessionManager( keystoneURL=self.keystoneURL, username=username, password=password, headers={ "Fiware-Service": self.service, "Fiware-ServicePath": self.subservice, "X-Auth-Token": "missing", # Otherwise we get error 400 missing auth token } ) if calls is not None and period is not None: self._manager = sleep_and_retry(limits(calls=calls, period=period)(self._manager))
def construct_ratelimit_rows(row_generator_fx, max_rows_per_minute, blocking=True): ''' Case 1: - set a rate limit (min 1 per second) - option to make function blocking row_generator_fx releases 1 row per call (return, not yield) ''' row_generator_fx = limits(calls=max_rows_per_minute, period=ONE_SEC)(row_generator_fx) if blocking: row_generator_fx = sleep_and_retry(row_generator_fx) return row_generator_fx
def __init__(self, url, timeout=60, requests_per_min=100000, retries=10, page_size=0, supports_bundled_mode=True, persistence_file_path="rate_limits.db", agent=__agent__): """Configuration of a SPARQL Endpoint. Args: url (str): URL of the endpoint. timeout (int, optional): Defines the time which the endpoint is given to respond (in seconds). Defaults to 60. requests_per_min (int, optional): Defines the maximal number of requests per minute. Defaults to 100000. retries (int, optional): Defines the number of times a query is retried. Defaults to 10. page_size (int, optional): Limits the page size of the results, since many endpoints have limitations. Defaults to 0. supports_bundled_mode (boolean, optional): If true, bundled mode will be used to query the endpoint. Defaults to True. persistence_file_path (str, optional): Sets the file path for the database that keeps track of past query activities (to comply with usage policies). Defaults to "rate_limits.db". agent (str, optional): The User-Agent for the HTTP request header. Defaults to SPARQLWrapper.__agent__. """ self.url = url self.timeout = timeout self.requests_per_min = requests_per_min self.retries = retries self.page_size = page_size self.supports_bundled_mode = supports_bundled_mode self.persistence_file_path = persistence_file_path self.query = sleep_and_retry( limits(calls=requests_per_min, period=60, storage=self.persistence_file_path, name='"' + url + '"')(self._query)) self.agent = agent
def __init__(self, plan, throttle, block): Plan.__init__(self, plan) self.lock = Lock() scheme = (0, 0) self.throttling = True if throttle is None: self.throttling = False elif throttle == "minute": scheme = self.minute elif throttle == "daily": scheme = self.daily elif throttle == "monthly": scheme = self.monthly else: raise ValueError("Argument throttle must be either ") self.limit = limits(*scheme, raise_on_limit=block)(lambda: None) self.sleep = sleep_and_retry(self.limit) self.block = block
def __init__(self, config_dict): self.session = requests.Session() self.api_key = config_dict.get('api_key') self.domain = config_dict.get('domain') self.start_date = config_dict.get('start_date') self.user_agent = config_dict.get('user_agent', const.USER_AGENT) self.rate_limit_requests = config_dict.get('rate_limit_requests', const.RATE_LIMIT_REQUESTS) self.rate_limit_seconds = config_dict.get('rate_limit_seconds', const.RATE_LIMIT_SECONDS) self.per_page = config_dict.get('per_page', const.PER_PAGE) self.max_retries = config_dict.get('max_retries', const.MAX_RETRIES) self.backoff_factor = config_dict.get('backoff_factor', const.BACKOFF_FACTOR) # usually we would use Python decorators on the request method, but since we want to change the arguments # for the decorators dynamically during runtime based on the provided config we have to override the # request method here self.request = limits(calls=self.rate_limit_requests, period=self.rate_limit_seconds)(self.request) self.request = sleep_and_retry(self.request) self.request = backoff.on_exception( backoff.expo, requests.exceptions.RequestException, max_tries=self.max_retries, giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, factor=self.backoff_factor)(self.request)
def __init__(self, base_url, client_id, client_secret, client_scope, api_json = None): # type: (str, str, str, str, str) -> None """[Init method used to create an Api Class for making api calls] :param base_url: Base URL of the API service :type base_url: str :param client_id: Client ID of the application :type client_id: str :param client_secret: Secret of the application :type client_secret: str :param client_scope: Client scope, must be present in the api_json file :type client_scope: str :param api_json: API file defining all JSON limits and calls, defaults to None because it will use the default :param api_json: str, optional :raises Exception: If this cannot be configured with parameters used """ self.base_url = base_url self.client_id = client_id self.client_secret = client_secret self.client_scope = client_scope # If the user doesn't pass an alternate API file use the included one if not api_json: api_json = pkg_resources.resource_filename(__name__, 'apis.json') with open(api_json, encoding='utf-8') as api_file: apis = json.loads(api_file.read()) if client_scope in apis: api = apis.get(client_scope) self.token_url = api.get('token_url') self.api_call = sleep_and_retry(limits(calls=api.get('limits_calls'), period=api.get('limits_period'))(self._api_call)) self.access_token = self.get_access_token(self.token_url) else: raise Exception(f"Scope {client_scope} not in known API dict")
import os from ratelimit import sleep_and_retry, limits from mp_api.core.settings import MAPISettings DEFAULT_ENDPOINT = os.environ.get( "MP_API_ENDPOINT", "https://api.materialsproject.org/" ) def check_limit(): """ Empty function for enabling global rate limiting. """ return if "api.materialsproject" in DEFAULT_ENDPOINT: check_limit = limits(calls=MAPISettings().requests_per_min, period=60)(check_limit) check_limit = sleep_and_retry(check_limit)
def add_rate_limiting(self, f: Callable): g = sleep_and_retry(f) g = limits(calls=self.ratelimit_calls_per_min, period=60)(g) return g
'xml': _xml,'df':_data_frame, 'csv':_csv,'numpy':_numpy, 'tab': _tab,'pipe': _pipe} else: dispatch = {'dict': _dict,'json': _json,'xml': _xml, } return dispatch[response_type] def _get_request(url_root,api_key,path,response_type,params, ssl_verify): """ Helper funcation that requests a get response from FRED. """ url = _url_builder(url_root,api_key,path,params) content = _fetch(url, ssl_verify) response = _dispatch(response_type)(content) return response if _USE_JOBLIB_CACHE: import joblib one_gb = 1000000000 location = '/tmp/joblib_cache' memory = joblib.Memory(location, verbose=1, bytes_limit=one_gb) if _THROTTLE_REQUESTS: from ratelimit import limits, sleep_and_retry period_seconds = 1 calls_per_second = 20 _get_request = memory.cache(sleep_and_retry(limits(calls=calls_per_second, period=period_seconds)(_get_request))) else: _get_request = memory.cache(_get_request)
def __init__(self, expected_filename, _logger, warcinfo_record_data=None, program_name='WebArticleCurator', user_agent=None, overwrite_warc=True, err_threshold=10, known_bad_urls=None, max_no_of_calls_in_period=2, limit_period=1, proxy_url=None, allow_cookies=False, verify_request=True, stay_offline=False): # Store variables self._logger = _logger self._req_headers = { 'Accept-Encoding': 'identity', 'User-agent': user_agent } self._error_count = 0 self._error_threshold = err_threshold # Set the error threshold which cause aborting to prevent deinal # Setup download function if not stay_offline: self.download_url = self._download_url else: self.download_url = self._dummy_download_url if known_bad_urls is not None: # Setup the list of cached bad URLs to prevent trying to download them again with open(known_bad_urls, encoding='UTF-8') as fh: self.bad_urls = {line.strip() for line in fh} else: self.bad_urls = set() self.good_urls = set() # Setup target file handle filename = self._set_target_filename(expected_filename, overwrite_warc) self._logger.log('INFO', 'Creating archivefile:', filename) self._output_file = open(filename, 'wb') self._session = Session() # Setup session for speeding up downloads if proxy_url is not None: # Set socks proxy if provided self._session.proxies['http'] = proxy_url self._session.proxies['https'] = proxy_url self._allow_cookies = allow_cookies self._verify_request = verify_request if not self._verify_request: disable_warnings(InsecureRequestWarning) # Setup rate limiting to prevent hammering the server self._requests_get = sleep_and_retry( limits(calls=max_no_of_calls_in_period, period=limit_period)(self._http_get_w_cookie_handling)) self._writer = WARCWriter(self._output_file, gzip=True, warc_version='WARC/1.1') if warcinfo_record_data is None: # Or use the parsed else custom headers will not be copied # INFO RECORD # Some custom information about the warc writer program and its settings warcinfo_record_data = { 'software': program_name, 'arguments': ' '.join(sys.argv[1:]), 'format': 'WARC File Format 1.1', 'conformsTo': 'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1-1_latestdraft.pdf' } info_record = self._writer.create_warcinfo_record( filename, warcinfo_record_data) self._writer.write_record(info_record)