def __init__(self, api_key=None, client_id=None, secret_key=None, reverse_cache_geohash=9): if api_key: self._geolocator = geopy.GoogleV3(api_key=api_key) elif client_id and secret_key: self._geolocator = geopy.GoogleV3(client_id=client_id, secret_key=secret_key) else: raise ValueError('One of either the api_key or both client_id and secret_key must be provided.') self._geocode_limiter = RateLimiter(10) self._reverse_limiter = RateLimiter(10) self._reverse_cache_geohash_length = reverse_cache_geohash self._reverse_cache = {}
def test_ignore_equals(self): collector = HitChangesCollector() sensitivity = 1/60.0 # 1 hit a minute # 1 hit every second allowed total_hits = 3600 remaining_hits = 3600 limiter = RateLimiter(sensitivity, collector) # hit the api maximum times allowed for i in range(total_hits): limiter.update(total_hits, remaining_hits) self.assertEqual(1, len(collector.changes))
async def test_rate_limiter3(self): self._rl = rl = RateLimiter(1, 1, self._logger) await asyncio.wait_for(self.acquire(rl), 0.01) await asyncio.sleep(1.1) await asyncio.wait_for(self.acquire(rl), 0.01)
class Geocoder(object): def __init__(self, api_key=None, client_id=None, secret_key=None, reverse_cache_geohash=9): if api_key: self._geolocator = geopy.GoogleV3(api_key=api_key) elif client_id and secret_key: self._geolocator = geopy.GoogleV3(client_id=client_id, secret_key=secret_key) else: raise ValueError('One of either the api_key or both client_id and secret_key must be provided.') self._geocode_limiter = RateLimiter(10) self._reverse_limiter = RateLimiter(10) self._reverse_cache_geohash_length = reverse_cache_geohash self._reverse_cache = {} def _using_cache(self): return 0 < self._reverse_cache_geohash_length <= 12 def geocode(self, address): self._geocode_limiter.wait() loc = self._geolocator.geocode(address) return [loc.latitude, loc.longitude] def reverse(self, latitude, longitude): addr = None # try to get the address from the local cache, if we're using it if self._using_cache(): ghash = geohash.encode(float(latitude), float(longitude), self._reverse_cache_geohash_length) addr = self._reverse_cache.get(ghash) # if we didn't get the address from the cache, or we're not using the cache # then get it from Google if not addr: self._reverse_limiter.wait() loc = self._geolocator.reverse((latitude, longitude), exactly_one=True) addr = loc.address # if we're using the cache, save the value we just got back if addr and self._using_cache(): self._reverse_cache[ghash] = addr return addr
async def test_rate_limiter5(self): self._rl = rl = RateLimiter(3, 2, self._logger) for i in range(1, 4): asyncio.ensure_future(self.acquire(rl, i)) # in this scenario we'll have to wait 1 + 2, 2 + 2, 3 + 2 seconds times = sorted(await asyncio.gather( *[asyncio.wait_for(self.acquire(rl), 5.1) for _ in range(3)])) self.assertRecursiveAlmostEqual(times, [3, 4, 5], delta=0.1)
def test_fast_limiter(self): collector = HitChangesCollector() sensitivity = 1.0/60.0 # 1 hit a second # 10 hits every second allowed total_hits = 3600 * 10 remaining_hits = 3600 * 10 limiter = RateLimiter(sensitivity, collector) limiter.update(total_hits, remaining_hits) self.assertEqual(1, len(collector.changes)) # hit the api once a second for i in range(10): time.sleep(1) limiter.update(total_hits, remaining_hits) remaining_hits -= 1 self.assertEqual(2, len(collector.changes)) self.assertEqual(int(collector.changes[1]), int(collector.changes[0] + sensitivity))
def test_slow_limiter(self): collector = HitChangesCollector() sensitivity = 1/60.0 # 1 hit a minute # 1 hit every second allowed total_hits = 3600 remaining_hits = 3600 limiter = RateLimiter(sensitivity, collector) limiter.update(total_hits, remaining_hits) self.assertEqual(1, len(collector.changes)) # hit the api 100 times in 1/10 sec for i in range(100): time.sleep(0.001) limiter.update(total_hits, remaining_hits) remaining_hits -= 1 self.assertEqual(2, len(collector.changes)) self.assertEqual(int(collector.changes[1]), int(collector.changes[0] - sensitivity))
async def test_rate_limiter4(self): self._rl = rl = RateLimiter(3, 2, self._logger) # these won't register the call as having happened 1, 2, 3 seconds after await asyncio.gather(*[ self.validate_elapsed(self.acquire(rl, i), i + 0.1) for i in range(1, 4) ]) # we've waited 3s, so 1s one is released, 2nd has 1s wait, 3rd has 2s wait times = sorted(await asyncio.gather( *[asyncio.wait_for(self.acquire(rl), 3) for _ in range(3)])) self.assertRecursiveAlmostEqual(times, [0, 1, 2], delta=0.1)
async def munch_defendants(self, defendants): self.log(f"Munching started") timeout = aiohttp.ClientTimeout(total=None) async with aiohttp.ClientSession(headers={"Connection": "close"}, timeout=timeout) as session: session = RateLimiter(session, self.rate, self.max_tokens) await asyncio.gather(*[ self.download_defendant_data(session, defendant) for defendant in defendants ]) await asyncio.gather(*[ self.download_sheet_data(session, link) for link in self.links ]) self.links = set() self.log("Munching completed")
async def test_rate_limiter2(self): # test parallel self._rl = rl = RateLimiter(3, 2, self._logger) await asyncio.gather(*[ asyncio.wait_for(self.acquire(rl), 0.1) for _ in range(3) ]) # 1, 2, 3 fut = asyncio.Task(self.acquire(rl)) # 4 # this one should timeout but count with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for(asyncio.shield(fut), 0.1) # 4 (fail) # these should take 2 seconds and pass await asyncio.gather( self.validate_elapsed(fut, 1.95), *[self.validate_elapsed(self.acquire(rl), 1.95) for _ in range(2)])
async def test_rate_limiter1(self): # test sequential self._rl = rl = RateLimiter(3, 2, self._logger) await asyncio.wait_for(self.acquire(rl), 0.01) # 1 await asyncio.wait_for(self.acquire(rl), 0.01) # 2 await asyncio.wait_for(self.acquire(rl), 0.01) # 3 fut = asyncio.Task(self.acquire(rl)) # 4 with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for(asyncio.shield(fut), 0.1) # 4 (fail) await self.validate_elapsed(fut, 1.95) # 4 (complete) await asyncio.wait_for(self.acquire(rl), 0.01) # 5 await asyncio.wait_for(self.acquire(rl), 0.01) # 6 fut = asyncio.Task(self.acquire(rl)) # 7 with self.assertRaises(asyncio.TimeoutError): await asyncio.wait_for(asyncio.shield(fut), 0.01) # 7 (fail) await self.validate_elapsed(fut, 2) # 7 (complete)
import time class DefaultTimer(object): def time(self): return time.time() def wait(self, duration): time.sleep(duration) from rate_limiter import RateLimiter limiter = RateLimiter(2, 2, timer=DefaultTimer()) def hello(): time.sleep(2) # Вот тут менял значение на sleep и смотрел результаты print("Hello!") return "kek" def test_call(): start = time.time() for _ in range(6): print(limiter.call(hello)) print("Elapsed %.2f seconds" % (time.time() - start)) # Суть в том, что call(), как я понял, должен отрабатывать последовательно # Например, прее sleep_time=2, range=6 должно отработать за 12 секунд
except Exception as e: print(e) print('product callback exception') ch.basic_ack(delivery_tag=method.delivery_tag) if __name__ == '__main__': try: RABBITMQ_USER = os.environ.get('RABBITMQ_USER') RABBITMQ_PASSWORD = os.environ.get('RABBITMQ_PASSWORD') RABBITMQ_HOST = os.environ.get('RABBITMQ_HOST') credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASSWORD) connection = pika.BlockingConnection( pika.ConnectionParameters(host=RABBITMQ_HOST, credentials=credentials)) ch = connection.channel() ch.queue_declare(queue='products', durable=True) ch.basic_qos(prefetch_count=1) rate_limiter = RateLimiter() redis_cache = RedisCache() downloader = Downloader(rate_limiter, cache=redis_cache) product_crawler = ProductCrawler(downloader) ch.basic_consume( queue='products', on_message_callback=product_crawler.callback) ch.start_consuming() except: connection = None finally: if connection is not None: connection.close()
import json from wsgiref import simple_server import falcon from rate_limiter import RateLimiter class HelloResource(object): def on_get(self, req, resp): resp.content_type = 'application/json' resp.status = falcon.HTTP_200 resp.body = json.dumps({'message': 'hello'}) app = falcon.API(middleware=[RateLimiter(limit=2)]) hello = HelloResource() app.add_route('/hello', hello) if __name__ == '__main__': httpd = simple_server.make_server('0.0.0.0', 8080, app) httpd.serve_forever()