def _lookup_throttle(self, url): p_url = urlparse(url) a = self._cache_throttle.get(p_url.netloc, []) for path, requests_per_second in a: if p_url.path.startswith(path): return requests_per_second return None
def _lookup_expire_after(self, url): if not self._cache_expire_after: return None p_url = urlparse(url) a = self._cache_expire_after_override.get(p_url.netloc, []) for path, secs in a: if p_url.path.startswith(path): return secs return self._cache_expire_after
def throttle(self, url, requests_per_second): """ Specify a throttle rate for requests to the given url. All urls below the given url will be throttled at this rate. To throttle an entire domain, provide the root url. """ requests_per_second = float(requests_per_second) if requests_per_second < 0 or requests_per_second > 1000: raise ValueError # a dictionary of lists of tuples p_url = urlparse(url) a = self._cache_throttle[p_url.netloc] = [] a.append((p_url.path, requests_per_second)) a.sort(key=lambda t: len(t[0]), reverse=True) self._cache_throttle[p_url.netloc] = a return requests_per_second
def expire_after(self, url, expire_after=300): """ Override the base expire_after setting by url prefix. Will find the longest registered setting for each domain which will then be used for expiring such requests. These overrides MUST BE lower than the default setting. """ if not self._cache_expire_after: return None if expire_after > self._cache_expire_after or expire_after < 0: raise ValueError # a dictionary of lists of tuples p_url = urlparse(url) a = self._cache_expire_after_override[p_url.netloc] = [] a.append((p_url.path, expire_after)) a.sort(key=lambda t: len(t[0]), reverse=True) self._cache_expire_after_override[p_url.netloc] = a return expire_after