def __init__(self, api_key=None, client_id=None, secret_key=None, reverse_cache_geohash=9):
        if api_key:
            self._geolocator = geopy.GoogleV3(api_key=api_key)
        elif client_id and secret_key:
            self._geolocator = geopy.GoogleV3(client_id=client_id, secret_key=secret_key)
        else:
            raise ValueError('One of either the api_key or both client_id and secret_key must be provided.')

        self._geocode_limiter = RateLimiter(10)
        self._reverse_limiter = RateLimiter(10)

        self._reverse_cache_geohash_length = reverse_cache_geohash
        self._reverse_cache = {}
Example #2
0
    def test_ignore_equals(self):
        collector = HitChangesCollector()
        sensitivity = 1/60.0 # 1 hit a minute

        # 1 hit every second allowed
        total_hits = 3600
        remaining_hits = 3600

        limiter = RateLimiter(sensitivity, collector)

        # hit the api maximum times allowed
        for i in range(total_hits):
            limiter.update(total_hits, remaining_hits)

        self.assertEqual(1, len(collector.changes))
Example #3
0
    async def test_rate_limiter3(self):
        self._rl = rl = RateLimiter(1, 1, self._logger)
        await asyncio.wait_for(self.acquire(rl), 0.01)

        await asyncio.sleep(1.1)

        await asyncio.wait_for(self.acquire(rl), 0.01)
class Geocoder(object):
    def __init__(self, api_key=None, client_id=None, secret_key=None, reverse_cache_geohash=9):
        if api_key:
            self._geolocator = geopy.GoogleV3(api_key=api_key)
        elif client_id and secret_key:
            self._geolocator = geopy.GoogleV3(client_id=client_id, secret_key=secret_key)
        else:
            raise ValueError('One of either the api_key or both client_id and secret_key must be provided.')

        self._geocode_limiter = RateLimiter(10)
        self._reverse_limiter = RateLimiter(10)

        self._reverse_cache_geohash_length = reverse_cache_geohash
        self._reverse_cache = {}

    def _using_cache(self):
        return 0 < self._reverse_cache_geohash_length <= 12

    def geocode(self, address):
        self._geocode_limiter.wait()
        loc = self._geolocator.geocode(address)
        return [loc.latitude, loc.longitude]

    def reverse(self, latitude, longitude):
        addr = None

        # try to get the address from the local cache, if we're using it
        if self._using_cache():
            ghash = geohash.encode(float(latitude), float(longitude), self._reverse_cache_geohash_length)
            addr = self._reverse_cache.get(ghash)

        # if we didn't get the address from the cache, or we're not using the cache
        # then get it from Google
        if not addr:
            self._reverse_limiter.wait()
            loc = self._geolocator.reverse((latitude, longitude), exactly_one=True)
            addr = loc.address

        # if we're using the cache, save the value we just got back
        if addr and self._using_cache():
            self._reverse_cache[ghash] = addr

        return addr
Example #5
0
    async def test_rate_limiter5(self):
        self._rl = rl = RateLimiter(3, 2, self._logger)

        for i in range(1, 4):
            asyncio.ensure_future(self.acquire(rl, i))

        # in this scenario we'll have to wait 1 + 2, 2 + 2, 3 + 2 seconds
        times = sorted(await asyncio.gather(
            *[asyncio.wait_for(self.acquire(rl), 5.1) for _ in range(3)]))
        self.assertRecursiveAlmostEqual(times, [3, 4, 5], delta=0.1)
Example #6
0
    def test_fast_limiter(self):
        collector = HitChangesCollector()
        sensitivity = 1.0/60.0 # 1 hit a second

        # 10 hits every second allowed
        total_hits = 3600 * 10
        remaining_hits = 3600 * 10

        limiter = RateLimiter(sensitivity, collector)

        limiter.update(total_hits, remaining_hits)
        self.assertEqual(1, len(collector.changes))

        # hit the api once a second
        for i in range(10):
            time.sleep(1)
            limiter.update(total_hits, remaining_hits)
            remaining_hits -= 1

        self.assertEqual(2, len(collector.changes))
        self.assertEqual(int(collector.changes[1]), int(collector.changes[0] + sensitivity))
Example #7
0
    def test_slow_limiter(self):
        collector = HitChangesCollector()
        sensitivity = 1/60.0 # 1 hit a minute

        # 1 hit every second allowed
        total_hits = 3600
        remaining_hits = 3600

        limiter = RateLimiter(sensitivity, collector)

        limiter.update(total_hits, remaining_hits)
        self.assertEqual(1, len(collector.changes))

        # hit the api 100 times in 1/10 sec
        for i in range(100):
            time.sleep(0.001)
            limiter.update(total_hits, remaining_hits)
            remaining_hits -= 1

        self.assertEqual(2, len(collector.changes))
        self.assertEqual(int(collector.changes[1]), int(collector.changes[0] - sensitivity))
Example #8
0
    async def test_rate_limiter4(self):
        self._rl = rl = RateLimiter(3, 2, self._logger)

        # these won't register the call as having happened 1, 2, 3 seconds after
        await asyncio.gather(*[
            self.validate_elapsed(self.acquire(rl, i), i + 0.1)
            for i in range(1, 4)
        ])

        # we've waited 3s, so 1s one is released, 2nd has 1s wait, 3rd has 2s wait
        times = sorted(await asyncio.gather(
            *[asyncio.wait_for(self.acquire(rl), 3) for _ in range(3)]))
        self.assertRecursiveAlmostEqual(times, [0, 1, 2], delta=0.1)
    async def munch_defendants(self, defendants):
        self.log(f"Munching started")
        timeout = aiohttp.ClientTimeout(total=None)
        async with aiohttp.ClientSession(headers={"Connection": "close"},
                                         timeout=timeout) as session:
            session = RateLimiter(session, self.rate, self.max_tokens)
            await asyncio.gather(*[
                self.download_defendant_data(session, defendant)
                for defendant in defendants
            ])

            await asyncio.gather(*[
                self.download_sheet_data(session, link) for link in self.links
            ])
            self.links = set()
            self.log("Munching completed")
Example #10
0
    async def test_rate_limiter2(self):
        # test parallel
        self._rl = rl = RateLimiter(3, 2, self._logger)

        await asyncio.gather(*[
            asyncio.wait_for(self.acquire(rl), 0.1) for _ in range(3)
        ])  # 1, 2, 3

        fut = asyncio.Task(self.acquire(rl))  # 4

        # this one should timeout but count
        with self.assertRaises(asyncio.TimeoutError):
            await asyncio.wait_for(asyncio.shield(fut), 0.1)  # 4 (fail)

        # these should take 2 seconds and pass
        await asyncio.gather(
            self.validate_elapsed(fut, 1.95),
            *[self.validate_elapsed(self.acquire(rl), 1.95) for _ in range(2)])
Example #11
0
    async def test_rate_limiter1(self):
        # test sequential
        self._rl = rl = RateLimiter(3, 2, self._logger)

        await asyncio.wait_for(self.acquire(rl), 0.01)  # 1
        await asyncio.wait_for(self.acquire(rl), 0.01)  # 2
        await asyncio.wait_for(self.acquire(rl), 0.01)  # 3

        fut = asyncio.Task(self.acquire(rl))  # 4

        with self.assertRaises(asyncio.TimeoutError):
            await asyncio.wait_for(asyncio.shield(fut), 0.1)  # 4 (fail)

        await self.validate_elapsed(fut, 1.95)  # 4 (complete)
        await asyncio.wait_for(self.acquire(rl), 0.01)  # 5
        await asyncio.wait_for(self.acquire(rl), 0.01)  # 6

        fut = asyncio.Task(self.acquire(rl))  # 7

        with self.assertRaises(asyncio.TimeoutError):
            await asyncio.wait_for(asyncio.shield(fut), 0.01)  # 7 (fail)

        await self.validate_elapsed(fut, 2)  # 7 (complete)
Example #12
0
import time


class DefaultTimer(object):
    def time(self):
        return time.time()

    def wait(self, duration):
        time.sleep(duration)


from rate_limiter import RateLimiter

limiter = RateLimiter(2, 2, timer=DefaultTimer())


def hello():
    time.sleep(2)  # Вот тут менял значение на sleep и смотрел результаты
    print("Hello!")
    return "kek"


def test_call():
    start = time.time()
    for _ in range(6):
        print(limiter.call(hello))
    print("Elapsed %.2f seconds" % (time.time() - start))


# Суть в том, что call(), как я понял, должен отрабатывать последовательно
# Например, прее sleep_time=2, range=6 должно отработать за 12 секунд
Example #13
0
        except Exception as e:
            print(e)
            print('product callback exception')

        ch.basic_ack(delivery_tag=method.delivery_tag)


if __name__ == '__main__':
    try:
        RABBITMQ_USER = os.environ.get('RABBITMQ_USER')
        RABBITMQ_PASSWORD = os.environ.get('RABBITMQ_PASSWORD')
        RABBITMQ_HOST = os.environ.get('RABBITMQ_HOST')
        credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASSWORD)
        connection = pika.BlockingConnection(
                pika.ConnectionParameters(host=RABBITMQ_HOST, credentials=credentials))
        ch = connection.channel()
        ch.queue_declare(queue='products', durable=True)
        ch.basic_qos(prefetch_count=1)
        rate_limiter = RateLimiter()
        redis_cache = RedisCache()
        downloader = Downloader(rate_limiter, cache=redis_cache)
        product_crawler = ProductCrawler(downloader)
        ch.basic_consume(
                queue='products', on_message_callback=product_crawler.callback)
        ch.start_consuming()
    except:
        connection = None
    finally:
        if connection is not None:
            connection.close()
Example #14
0
import json
from wsgiref import simple_server

import falcon
from rate_limiter import RateLimiter


class HelloResource(object):
    def on_get(self, req, resp):
        resp.content_type = 'application/json'
        resp.status = falcon.HTTP_200
        resp.body = json.dumps({'message': 'hello'})


app = falcon.API(middleware=[RateLimiter(limit=2)])
hello = HelloResource()
app.add_route('/hello', hello)

if __name__ == '__main__':
    httpd = simple_server.make_server('0.0.0.0', 8080, app)
    httpd.serve_forever()