Python limits 예제들, ratelimit.limits Python 예제들

예제 #1

0

파일 보기

파일: callhub.py 프로젝트: jamesbrunet/callhub-python-wrapper

    def __init__(self, api_domain, api_key=None, rate_limit=API_LIMIT):
        """
        Instantiates a new CallHub instance
        >>> callhub = CallHub("https://api-na1.callhub.io")
        With built-in rate limiting disabled:
        >>> callhub = CallHub(rate_limit=False)
        Args:
            api_domain (``str``): Domain to access API (eg: api.callhub.io, api-na1.callhub.io), this varies by account
        Keyword Args:
            api_key (``str``, optional): Optional API key. If not provided,
                it will attempt to use ``os.environ['CALLHUB_API_KEY']``
            rate_limit (``dict``, optional): Enabled by default with settings that respect callhub's API limits.
                Setting this to false disables ratelimiting, or you can set your own limits by following the example
                below. Please don't abuse! :)
                >>> callhub = CallHub(rate_limit={"GENERAL": {"calls": 13, "period": 1},
                >>>                               "BULK_CREATE": {"calls": 1, "period": 70}})
                - Default limits bulk_create to 1 per 70 seconds (CallHub states their limit is every 60s but in
                  practice a delay of 60s exactly can trip their rate limiter anyways)
                - Default limits all other API requests to 13 per second (CallHub support states their limit is 20/s but
                  this plays it on the safe side, because other rate limiters seem a little sensitive)
        """
        self.session = FuturesSession(max_workers=43)

        # Attempt 3 retries for failed connections
        adapter = requests.adapters.HTTPAdapter(max_retries=3)
        self.session.mount('https://', adapter)
        self.session.mount('http://', adapter)

        # Truncate final '/' off of API domain if it was provided
        if api_domain[-1] == "/":
            self.api_domain = api_domain[:-1]
        else:
            self.api_domain = api_domain

        if rate_limit:
            # Apply general rate limit to self.session.get
            rate_limited_get = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.get))
            self.session.get = types.MethodType(rate_limited_get, self.session)
            
            # Apply general rate limit to self.session.post
            rate_limited_post = sleep_and_retry(limits(**rate_limit["GENERAL"])(FuturesSession.post))
            self.session.post = types.MethodType(rate_limited_post, self.session)
            
            # Apply bulk rate limit to self.bulk_create
            self.bulk_create = sleep_and_retry(limits(**rate_limit["BULK_CREATE"])(self.bulk_create))

        self.session.auth = CallHubAuth(api_key=api_key)

        # validate_api_key returns administrator email on success
        self.admin_email = self.validate_api_key()

        # cache for do-not-contact number/list to id mapping
        self.dnc_cache = {}

예제 #2

0

파일 보기

파일: connection.py 프로젝트: stanleychris2/snapflow

 def add_rate_limiting(self, f: Callable):
     if self.ratelimit_params:
         g = limits(**self.ratelimit_params)(f)
     else:
         g = limits(calls=self.ratelimit_calls_per_min, period=60)(f)
     g = sleep_and_retry(g)
     g = on_exception(
         expo,
         (RateLimitException, HTTPError),
         max_time=self.backoff_timeout_seconds,
         factor=4,
     )(g)
     return g

예제 #3

0

파일 보기

    def __init__(self, search_string='', max_number_of_requests=30, rate_limit_timeout_period=60, proxies=None):
        """
        Usage Examples
        ----------

        >>> synonym = Synonyms('mother')
        >>> results = synonym.find_synonyms()

        >>> synonym = Synonyms(search_string='mother')
        >>> results = synonym.find_synonyms()

        Parameters
        ----------
        :param search_string: string containing the variable to obtain synonyms for
        :param max_number_of_requests: maximum number of requests for a specific timeout_period
        :param rate_limit_timeout_period: the time period before a session is placed in a temporary hibernation mode
        :param proxies: dictionary of proxies to use with Python Requests
        """

        self._word = search_string
        self._proxies = proxies

        ratelimit_status = False
        self._rate_limit_status = ratelimit_status

        # Retries the requests after a certain time period has elapsed
        handler = on_exception(expo, RateLimitException, max_time=60, on_backoff=self._backoff_handler)
        # Establishes a rate limit for making requests to the synonyms repositories
        limiter = limits(calls=max_number_of_requests, period=rate_limit_timeout_period)
        self.find_synonyms = handler(limiter(self.find_synonyms))

예제 #4

0

파일 보기

 def __init__(self, client_key, client_secret, diskcache = None):
     """Initialize the client"""
     self.client_key = client_key
     self.client_secret = client_secret
     throttled_get = ratelimit.sleep_and_retry(ratelimit.limits(calls=1,
             period=1)(requests.get))
     self.requests_get = throttled_get if diskcache is None else (
             diskcache.memoize()(throttled_get))

예제 #5

0

파일 보기

    def __init__(self,
                 search_string='',
                 output_format='list',
                 max_number_of_requests=30,
                 rate_limit_timeout_period=60,
                 user_agent=None,
                 proxies=None):
        """
        Purpose
        ----------
        This Python class is used to query multiple online repositories for the definition
        associated with a specific word.

        Usage Examples
        ----------

        >>> definition = Definitions('mother')
        >>> results = definition.find_definitions()

        >>> definition = Definitions(search_string='mother')
        >>> results = definition.find_definitions()

        Parameters
        ----------
        :param search_string: string containing the variable to obtain definition for

        :param output_format: Format to use for returned results.
               Default value: list; Acceptable values: dictionary or list

        :param max_number_of_requests: maximum number of requests for a specific timeout_period

        :param rate_limit_timeout_period: the time period before a session is placed in a temporary hibernation mode

        :param user_agent: string containing either a global user agent type or a specific user agent

        :param proxies: dictionary of proxies to use with Python Requests
        """

        self._proxies = proxies
        self._word = search_string
        self._user_agent = user_agent
        self._output_format = output_format

        rate_limit_status = False
        self._rate_limit_status = rate_limit_status

        # Retries the requests after a certain time period has elapsed
        handler = on_exception(expo,
                               RateLimitException,
                               max_time=60,
                               on_backoff=self._backoff_handler)
        # Establishes a rate limit for making requests to the antonyms repositories
        limiter = limits(calls=max_number_of_requests,
                         period=rate_limit_timeout_period)
        self.find_definitions = handler(limiter(self.find_definitions))

예제 #6

0

파일 보기

    def __init__(self, filename, logger_, program_name='corpusbuilder 1.0', user_agent=None, overwrite_warc=True,
                 err_threshold=10, warcinfo_record_data=None, known_bad_urls=None, max_no_of_calls_in_period=2,
                 limit_period=1, proxy_url=None, allow_cookies=False):
        if known_bad_urls is not None:  # Setup the list of cached bad URLs to prevent trying to download them again
            with open(known_bad_urls, encoding='UTF-8') as fh:
                self.bad_urls = {line.strip() for line in fh}
        else:
            self.bad_urls = set()

        if not overwrite_warc:  # Find out next nonexisting warc filename
            num = 0
            while os.path.exists(filename):
                filename2, ext = os.path.splitext(filename)  # Should be filename.warc.gz
                if ext == '.gz' and filename2.endswith('.warc'):
                    filename2, ext2 = os.path.splitext(filename2)  # Should be filename.warc
                    ext = ext2 + ext  # Should be .warc.gz

                filename = '{0}-{1:05d}{2}'.format(filename2, num, ext)
                num += 1

        logger_.log('INFO', 'Creating archivefile: {0}'.format(filename))

        self._output_file = open(filename, 'wb')
        self._logger_ = logger_
        self._req_headers = {'Accept-Encoding': 'identity', 'User-agent': user_agent}

        self._session = Session()  # Setup session for speeding up downloads

        if proxy_url is not None:  # Set socks proxy if provided
            self._session.proxies['http'] = proxy_url
            self._session.proxies['https'] = proxy_url

        self._allow_cookies = allow_cookies

        # Setup rate limiting to prevent hammering the server
        self._requests_get = sleep_and_retry(limits(calls=max_no_of_calls_in_period,
                                                    period=limit_period)(self._http_get_w_cookie_handling))
        self._error_count = 0
        self._error_threshold = err_threshold  # Set the error threshold which cause aborting to prevent deinal

        self._writer = WARCWriter(self._output_file, gzip=True)
        if warcinfo_record_data is None:
            # INFO RECORD
            # Some custom information about the warc writer program and its settings
            info_headers = {'software': program_name, 'arguments': ' '.join(sys.argv[1:]),
                            'format': 'WARC File Format 1.0',
                            'conformsTo': 'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf'}
            info_record = self._writer.create_warcinfo_record(filename, info_headers)
        else:  # Must recreate custom headers else they will not be copied
            custom_headers = ''.join('{0}: {1}\r\n'.format(k, v) for k, v in warcinfo_record_data[1].items()).\
                             encode('UTF-8')
            info_record = self._writer.create_warc_record('', 'warcinfo', warc_headers=warcinfo_record_data[0],
                                                          payload=BytesIO(custom_headers),
                                                          length=len(custom_headers))
        self._writer.write_record(info_record)

예제 #7

0

파일 보기

파일: api_utils.py 프로젝트: zqian/api-utils-python

    def __init__(self,
                 base_url,
                 client_id,
                 client_secret,
                 api_json=None,
                 token_expires_percent=5):
        # type: (str, str, str, str, str) -> None
        """[Init method used to create an Api Class for making api calls]
        
        :param base_url: Base URL of the API service
        :type base_url: str
        :param client_id: Client ID of the application
        :type client_id: str
        :param client_secret: Secret of the application
        :type client_secret: str
        :param api_json: API file defining all JSON limits and calls, defaults to None because it will use the default
        :type api_json: str, optional
        :param token_expires_percent: This is a percentage of time to take off the token renewal to ensure it doesn't run out. For instance 5(%) of 3600 is 180. 
        :type token_expires_percent: int, optional
        :raises Exception: If this cannot be configured with parameters used
        :raises AttributeError: If the apis.json file is empty
        """

        self.base_url = base_url
        self.client_id = client_id
        self.client_secret = client_secret
        self.token_expires_percent = int(100 - token_expires_percent) / 100

        # This line is needed so pylint doesn't complain about this variable not existing.
        self.__log = self.__log

        # If the user doesn't pass an alternate API file use the included one
        if not api_json:
            api_json = pkg_resources.resource_filename(__name__, 'apis.json')

        with open(api_json, encoding='utf-8') as api_file:
            apis = json.loads(api_file.read())

        # If the string is empty
        if not apis:
            raise AttributeError("fFile {api_file} loaded is empty")

        # Create a dict to hold details of scopes (from json)
        self.scopes = defaultdict(dict)
        # Create a dict to cache the tokens
        self.tokens = defaultdict(dict)

        # Setup all of the calls to the apis with the limits
        for (client_scope, api) in apis.items():
            self.scopes[client_scope]["api_call"] = sleep_and_retry(
                limits(calls=api.get('limits_calls'),
                       period=api.get('limits_period'))(self._api_call))
            # Store the token url associated with this client scope for later
            self.scopes[client_scope]["token_url"] = api.get('token_url')

예제 #8

0

파일 보기

파일: __main__.py 프로젝트: warpcomdev/sentiment

 def auth(self, session: Session, username: str, password: str, calls: Optional[int]=None, period: Optional[int]=None):
     """Create session manager and optionally use rate limit"""
     self._manager = sessionManager(
         keystoneURL=self.keystoneURL,
         username=username,
         password=password,
         headers={
             "Fiware-Service": self.service,
             "Fiware-ServicePath": self.subservice,
             "X-Auth-Token": "missing", # Otherwise we get error 400 missing auth token
         }
     )
     if calls is not None and period is not None:
         self._manager = sleep_and_retry(limits(calls=calls, period=period)(self._manager))

예제 #9

0

파일 보기

def construct_ratelimit_rows(row_generator_fx,
                             max_rows_per_minute,
                             blocking=True):
    '''
    Case 1:
    - set a rate limit (min 1 per second)
    - option to make function blocking

    row_generator_fx releases 1 row per call (return, not yield)
    '''
    row_generator_fx = limits(calls=max_rows_per_minute,
                              period=ONE_SEC)(row_generator_fx)
    if blocking:
        row_generator_fx = sleep_and_retry(row_generator_fx)
    return row_generator_fx

예제 #10

0

파일 보기

파일: sparql_helper.py 프로젝트: jbdatascience/kgextension

    def __init__(self,
                 url,
                 timeout=60,
                 requests_per_min=100000,
                 retries=10,
                 page_size=0,
                 supports_bundled_mode=True,
                 persistence_file_path="rate_limits.db",
                 agent=__agent__):
        """Configuration of a SPARQL Endpoint.

        Args:
            url (str): URL of the endpoint.
            timeout (int, optional): Defines the time which the endpoint is 
                given to respond (in seconds). Defaults to 60.
            requests_per_min (int, optional): Defines the maximal number of  
                requests per minute. Defaults to 100000.
            retries (int, optional): Defines the number of times a query is 
                retried. Defaults to 10.
            page_size (int, optional): Limits the page size of the results, 
                since many endpoints have limitations. Defaults to 0.
            supports_bundled_mode (boolean, optional): If true, bundled mode 
                will be used to query the endpoint. Defaults to True.
            persistence_file_path (str, optional): Sets the file path for the 
                database that keeps track of past query activities (to comply 
                with usage policies). Defaults to "rate_limits.db".
            agent (str, optional): The User-Agent for the HTTP request header. 
                Defaults to SPARQLWrapper.__agent__.
        """

        self.url = url
        self.timeout = timeout
        self.requests_per_min = requests_per_min
        self.retries = retries
        self.page_size = page_size
        self.supports_bundled_mode = supports_bundled_mode
        self.persistence_file_path = persistence_file_path
        self.query = sleep_and_retry(
            limits(calls=requests_per_min,
                   period=60,
                   storage=self.persistence_file_path,
                   name='"' + url + '"')(self._query))
        self.agent = agent

예제 #11

0

파일 보기

파일: deep_translator.py 프로젝트: johnbumgarner/wordhoard

    def __init__(self,
                 source_language='',
                 str_to_translate='',
                 api_key=''
                 ):

        self._source_language = source_language
        self._str_to_translate = str_to_translate
        self._api_key = api_key

        ratelimit_status = False
        self._rate_limit_status = ratelimit_status

        # Retries the requests after a certain time period has elapsed
        handler = on_exception(expo, RateLimitException, max_time=60, on_backoff=self._backoff_handler)
        # Establishes a rate limit for making requests to the Deep translation service
        limiter = limits(calls=60, period=60)
        self.translate_word = handler(limiter(self.translate_word))
        self.reverse_translate = handler(limiter(self.reverse_translate))

예제 #12

0

파일 보기

    def __init__(self, plan, throttle, block):
        Plan.__init__(self, plan)

        self.lock = Lock()
        scheme = (0, 0)
        self.throttling = True
        if throttle is None:
            self.throttling = False
        elif throttle == "minute":
            scheme = self.minute
        elif throttle == "daily":
            scheme = self.daily
        elif throttle == "monthly":
            scheme = self.monthly
        else:
            raise ValueError("Argument throttle must be either ")

        self.limit = limits(*scheme, raise_on_limit=block)(lambda: None)
        self.sleep = sleep_and_retry(self.limit)
        self.block = block

예제 #13

0

파일 보기

    def decorator(func: Callable) -> Callable:

        limited = limits(calls=requests, period=seconds)(func)

        if async_:
            @wraps(func)
            async def inner(*args, **kwargs):
                while True:
                    try:
                        return await limited(*args, **kwargs)
                    except RateLimitException:
                        await asyncio.sleep(1)
        else:
            @wraps(func)
            def inner(*args, **kwargs):
                while True:
                    try:
                        return limited(*args, **kwargs)
                    except RateLimitException:
                        time.sleep(1)
        return inner

예제 #14

0

파일 보기

    def __init__(self,
                 source_language='',
                 str_to_translate='',
                 email_address=None):

        self._source_language = source_language
        self._str_to_translate = str_to_translate
        self._url_to_query = 'http://api.mymemory.translated.net/get'
        self._email_address = email_address
        self._headers = http_headers

        ratelimit_status = False
        self._rate_limit_status = ratelimit_status

        # Retries the requests after a certain time period has elapsed
        handler = on_exception(expo,
                               RateLimitException,
                               max_time=60,
                               on_backoff=self._backoff_handler)
        # Establishes a rate limit for making requests to the MyMemory translation service
        limiter = limits(calls=30, period=60)
        self.translate_word = handler(limiter(self.translate_word))
        self.reverse_translate = handler(limiter(self.reverse_translate))

예제 #15

0

파일 보기

    def __init__(self, source_language='', str_to_translate='', proxies=None):

        self._source_language = source_language
        self._str_to_translate = str_to_translate
        self._url_to_query = 'https://translate.google.com/m'
        self._proxies = proxies

        rand_user_agent = get_random_user_agent()
        http_headers = {'user-agent': rand_user_agent}
        self._headers = http_headers

        ratelimit_status = False
        self._rate_limit_status = ratelimit_status

        # Retries the requests after a certain time period has elapsed
        handler = on_exception(expo,
                               RateLimitException,
                               max_time=60,
                               on_backoff=self._backoff_handler)
        # Establishes a rate limit for making requests to the Google translation service
        limiter = limits(calls=60, period=60)
        self.translate_word = handler(limiter(self.translate_word))
        self.reverse_translate = handler(limiter(self.reverse_translate))

예제 #16

0

파일 보기

    def __init__(self, config_dict):
        self.session = requests.Session()
        self.api_key = config_dict.get('api_key')
        self.domain = config_dict.get('domain')
        self.start_date = config_dict.get('start_date')
        self.user_agent = config_dict.get('user_agent', const.USER_AGENT)
        self.rate_limit_requests = config_dict.get('rate_limit_requests', const.RATE_LIMIT_REQUESTS)
        self.rate_limit_seconds = config_dict.get('rate_limit_seconds', const.RATE_LIMIT_SECONDS)
        self.per_page = config_dict.get('per_page', const.PER_PAGE)
        self.max_retries = config_dict.get('max_retries', const.MAX_RETRIES)
        self.backoff_factor = config_dict.get('backoff_factor', const.BACKOFF_FACTOR)

        # usually we would use Python decorators on the request method, but since we want to change the arguments
        # for the decorators dynamically during runtime based on the provided config we have to override the
        # request method here
        self.request = limits(calls=self.rate_limit_requests, period=self.rate_limit_seconds)(self.request)
        self.request = sleep_and_retry(self.request)
        self.request = backoff.on_exception(
            backoff.expo,
            requests.exceptions.RequestException,
            max_tries=self.max_retries,
            giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500,
            factor=self.backoff_factor)(self.request)

예제 #17

0

파일 보기

    def __init__(
        self,
        username=None,
        password=None,
        base_uri=BASE_URI,
        auth_uri=AUTH_URI,
        timeout=TIMEOUT,
        loglevel=None,
        rate_limit=True,
    ):
        self.base_uri = base_uri
        self.auth_uri = auth_uri
        self.username = username
        self.password = password
        self._user_id = None
        self.timeout = timeout
        self.refresh_token = None
        self.session = requests.Session()
        self._set_tokens()

        if loglevel:  # pragma: no cover
            logging.basicConfig(
                level=loglevel,
                format="%(asctime)s %(levelname)-8s %(name)-25s %(message)s",
            )
        else:
            logger.addHandler(logging.NullHandler())

        self.documents = DocumentClient(self)
        self.projects = ProjectClient(self)
        self.users = UserClient(self)
        self.organizations = OrganizationClient(self)

        if rate_limit:
            self._request = ratelimit.limits(calls=RATE_LIMIT,
                                             period=RATE_PERIOD)(self._request)

예제 #18

0

파일 보기

파일: api_utils.py 프로젝트: DanielDugan/api-utils-python-1

    def __init__(self, base_url, client_id, client_secret, client_scope, api_json = None):
        # type: (str, str, str, str, str) -> None
        """[Init method used to create an Api Class for making api calls]
        
        :param base_url: Base URL of the API service
        :type base_url: str
        :param client_id: Client ID of the application
        :type client_id: str
        :param client_secret: Secret of the application
        :type client_secret: str
        :param client_scope: Client scope, must be present in the api_json file
        :type client_scope: str
        :param api_json: API file defining all JSON limits and calls, defaults to None because it will use the default
        :param api_json: str, optional
        :raises Exception: If this cannot be configured with parameters used
        """

        self.base_url = base_url
        self.client_id = client_id
        self.client_secret = client_secret
        self.client_scope = client_scope

        # If the user doesn't pass an alternate API file use the included one
        if not api_json:
            api_json = pkg_resources.resource_filename(__name__, 'apis.json')

        with open(api_json, encoding='utf-8') as api_file:
            apis = json.loads(api_file.read())

        if client_scope in apis: 
            api = apis.get(client_scope)
            self.token_url = api.get('token_url')
            self.api_call = sleep_and_retry(limits(calls=api.get('limits_calls'), period=api.get('limits_period'))(self._api_call))
            self.access_token = self.get_access_token(self.token_url)
        else: 
            raise Exception(f"Scope {client_scope} not in known API dict")

예제 #19

0

파일 보기

파일: enhanced_downloader.py 프로젝트: aknap/WebArticleCurator

    def __init__(self,
                 expected_filename,
                 _logger,
                 warcinfo_record_data=None,
                 program_name='WebArticleCurator',
                 user_agent=None,
                 overwrite_warc=True,
                 err_threshold=10,
                 known_bad_urls=None,
                 max_no_of_calls_in_period=2,
                 limit_period=1,
                 proxy_url=None,
                 allow_cookies=False,
                 verify_request=True,
                 stay_offline=False):
        # Store variables
        self._logger = _logger
        self._req_headers = {
            'Accept-Encoding': 'identity',
            'User-agent': user_agent
        }
        self._error_count = 0
        self._error_threshold = err_threshold  # Set the error threshold which cause aborting to prevent deinal

        # Setup download function
        if not stay_offline:
            self.download_url = self._download_url
        else:
            self.download_url = self._dummy_download_url

        if known_bad_urls is not None:  # Setup the list of cached bad URLs to prevent trying to download them again
            with open(known_bad_urls, encoding='UTF-8') as fh:
                self.bad_urls = {line.strip() for line in fh}
        else:
            self.bad_urls = set()

        self.good_urls = set()

        # Setup target file handle
        filename = self._set_target_filename(expected_filename, overwrite_warc)
        self._logger.log('INFO', 'Creating archivefile:', filename)
        self._output_file = open(filename, 'wb')

        self._session = Session()  # Setup session for speeding up downloads
        if proxy_url is not None:  # Set socks proxy if provided
            self._session.proxies['http'] = proxy_url
            self._session.proxies['https'] = proxy_url

        self._allow_cookies = allow_cookies
        self._verify_request = verify_request
        if not self._verify_request:
            disable_warnings(InsecureRequestWarning)

        # Setup rate limiting to prevent hammering the server
        self._requests_get = sleep_and_retry(
            limits(calls=max_no_of_calls_in_period,
                   period=limit_period)(self._http_get_w_cookie_handling))

        self._writer = WARCWriter(self._output_file,
                                  gzip=True,
                                  warc_version='WARC/1.1')
        if warcinfo_record_data is None:  # Or use the parsed else custom headers will not be copied
            # INFO RECORD
            # Some custom information about the warc writer program and its settings
            warcinfo_record_data = {
                'software':
                program_name,
                'arguments':
                ' '.join(sys.argv[1:]),
                'format':
                'WARC File Format 1.1',
                'conformsTo':
                'http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1-1_latestdraft.pdf'
            }
        info_record = self._writer.create_warcinfo_record(
            filename, warcinfo_record_data)
        self._writer.write_record(info_record)

예제 #20

0

파일 보기

파일: ratelimit.py 프로젝트: hhaoyan/api

import os
from ratelimit import sleep_and_retry, limits
from mp_api.core.settings import MAPISettings

DEFAULT_ENDPOINT = os.environ.get(
    "MP_API_ENDPOINT", "https://api.materialsproject.org/"
)


def check_limit():
    """
    Empty function for enabling global rate limiting.
    """
    return


if "api.materialsproject" in DEFAULT_ENDPOINT:
    check_limit = limits(calls=MAPISettings().requests_per_min, period=60)(check_limit)
    check_limit = sleep_and_retry(check_limit)

예제 #21

0

파일 보기

import re

from bs4 import BeautifulSoup
from ratelimit import limits, sleep_and_retry
from requests import get
from tqdm import tqdm
from yaml import dump
from toolz.curried import comp

data_path = 'data.yaml'
country_list_url = 'https://simple.wikipedia.org/wiki/List_of_European_countries'
border_list_url = 'https://en.wikipedia.org/wiki/List_of_countries_and_territories_by_land_borders'
base_url = 'https://en.wikipedia.org/wiki/'
wikilimiter = comp(sleep_and_retry, limits(1, 1))

@wikilimiter
def download_countries():
    country_list_page = BeautifulSoup(get(country_list_url).text, features='html.parser')
    country_table = [row.find_all('a', href=re.compile(r'^/wiki/'))[1:]
                     for row in country_list_page.find('tbody').find_all('tr')[1:]]
    return {str(a[0].string): str(a[-1]['href']).replace('/wiki/', '') for a in country_table}

@wikilimiter
def download_borders():
    border_list_page = BeautifulSoup(get(border_list_url).text, features='html.parser')
    return dict(((q:=[str(a.string) for a in row.find_all('a', href=re.compile(r'^/wiki/')) if str(a.string)[0].isupper()])[0], q[1:])
                for row in border_list_page.find('tbody').find_all('tr')[2:] if not ('overseas' in str(row) and 'excluding' not in str(row)))

@wikilimiter
def download_coords(capital):
    capital_page = BeautifulSoup(get(base_url + capital).text, features='html.parser')

예제 #22

0

파일 보기

파일: __init__.py 프로젝트: stjordanis/FRB

                    'xml': _xml,'df':_data_frame,
                    'csv':_csv,'numpy':_numpy,
                    'tab': _tab,'pipe': _pipe}
    else:
        dispatch = {'dict': _dict,'json': _json,'xml': _xml, }

    return dispatch[response_type]


def _get_request(url_root,api_key,path,response_type,params, ssl_verify):
    """
    Helper funcation that requests a get response from FRED.
    """
    url = _url_builder(url_root,api_key,path,params)
    content = _fetch(url, ssl_verify)
    response = _dispatch(response_type)(content)
    return response

if _USE_JOBLIB_CACHE:
    import joblib
    one_gb = 1000000000
    location = '/tmp/joblib_cache'
    memory = joblib.Memory(location, verbose=1, bytes_limit=one_gb)
    if _THROTTLE_REQUESTS:
        from ratelimit import limits, sleep_and_retry
        period_seconds = 1
        calls_per_second = 20
        _get_request = memory.cache(sleep_and_retry(limits(calls=calls_per_second, period=period_seconds)(_get_request)))
    else:
        _get_request = memory.cache(_get_request)

예제 #23

0

파일 보기

파일: connection.py 프로젝트: next-generation-search-engine/snapflow

 def add_rate_limiting(self, f: Callable):
     g = sleep_and_retry(f)
     g = limits(calls=self.ratelimit_calls_per_min, period=60)(g)
     return g