def test_chunks(self): # n == 2 x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 2) self.assertEquals(list(x), [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10]]) # n == 3 x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3) self.assertEquals(list(x), [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10]]) # n == 2 (exact) x = chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 2) self.assertEquals(list(x), [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
def test_chunks(self): # n == 2 x = utils.chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 2) self.assertListEqual(list(x), [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10]]) # n == 3 x = utils.chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3) self.assertListEqual(list(x), [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10]]) # n == 2 (exact) x = utils.chunks(iter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), 2) self.assertListEqual(list(x), [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
def even_time_distribution(task, size, time_window, iterable, **apply_kwargs): """With an iterator yielding task args, kwargs tuples, evenly distribute the processing of its tasks throughout the time window available. :param task: The kind of task (a :class:`celery.task.base.Task`.) :param size: Total number of elements the iterator gives. :param time_window: Total time available, in minutes. :param iterable: Iterable yielding task args, kwargs tuples. :param \*\*apply_kwargs: Additional keyword arguments to be passed on to :func:`celery.execute.apply_async`. Example >>> class RefreshAllFeeds(Task): ... ... def run(self, **kwargs): ... feeds = Feed.objects.all() ... total = feeds.count() ... ... time_window = REFRESH_FEEDS_EVERY_INTERVAL_MINUTES ... ... def iter_feed_task_args(iterable): ... for feed in iterable: ... yield ([feed.feed_url], {}) # args, kwargs tuple ... ... it = iter_feed_task_args(feeds.iterator()) ... ... even_time_distribution(RefreshFeedTask, total, ... time_window, it) """ bucketsize = size / time_window buckets = chunks(iterable, int(bucketsize)) connection = task.establish_connection() try: for bucket_count, bucket in enumerate(buckets): # Skew the countdown for items in this bucket by one. seconds_eta = (60 * bucket_count if bucket_count else None) for args, kwargs in bucket: task.apply_async(args=args, kwargs=kwargs, connection=connection, countdown=seconds_eta, **apply_kwargs) finally: connection.close()
def test_chunks(items, n, expected): x = chunks(iter(list(items)), n) assert list(x) == expected
def collect_frequencies(chunksize=10, post_limit=10): for chunk in chunks(Feed.objects.all().iterator(), chunksize): update_frequency_chunk.delay(chunk, post_limit=post_limit)