def batch_delete_items(model, args=None, kwargs=None, skip_items=None, limit_items=None, batch_number=500, chunk_size=10000, delay=20): """If something goes wrong and you've got a lot of orphaned entries in the database, then this is the task you want. Be aware: this task pulls a list of PKs from the database which may cause increased memory use in the short term. * model is a string * args and kwargs should be obvious * batch_number is the number of delete tasks that get sent off in one go * chunk_size is the number of PKs that are loaded into memory at once * delay is the number of seconds between each batch of delete tasks """ items = create_queryset(model, args=args, kwargs=kwargs, skip_items=skip_items, limit_items=limit_items) for idx, chunk in chunk_queryset(items, chunk_size): items = delete_inboxen_item.chunks([(model, i) for i in chunk], batch_number).group() task_group_skew(items, start=(idx + 1) * delay, step=delay) items.apply_async()
def user_suspended_delete_user(kwargs, batch_number=500, chunk_size=10000, delay=20): users = create_queryset(get_user_model(), kwargs=kwargs) for idx, chunk in chunk_queryset(users, chunk_size): user_tasks = delete_account.chunks([(i, ) for i in chunk], batch_number).group() task_group_skew(user_tasks, start=idx + 1, step=delay) user_tasks.delay()
def calculate_quota(batch_number=500, chunk_size=10000, delay=20): if not settings.PER_USER_EMAIL_QUOTA: return users = get_user_model().objects.all() for idx, chunk in chunk_queryset(users, chunk_size): user_tasks = calculate_user_quota.chunks([(i, ) for i in chunk], batch_number).group() task_group_skew(user_tasks, start=(idx + 1) * delay, step=delay) user_tasks.delay()
def user_suspended_delete_emails(kwargs, batch_number=500, chunk_size=10000, delay=20): kwargs = {"inbox__user__%s" % k: v for k, v in kwargs.items()} emails = create_queryset("email", kwargs=kwargs) for idx, chunk in chunk_queryset(emails, chunk_size): email_tasks = delete_inboxen_item.chunks([("email", i) for i in chunk], batch_number).group() task_group_skew(email_tasks, start=(idx + 1) * delay, step=delay) email_tasks.delay()
def test_over_chunk_size(self): chunker = task_utils.chunk_queryset(models.Inbox.objects.all().order_by("pk"), 10) result = [i for i in chunker] self.assertEqual(result, [ (0, self.inbox_pks[:10]), (1, self.inbox_pks[10:20]), (2, self.inbox_pks[20:30]), (3, self.inbox_pks[30:40]), (4, self.inbox_pks[40:50]), (5, self.inbox_pks[50:60]), (6, self.inbox_pks[60:70]), (7, self.inbox_pks[70:80]), (8, self.inbox_pks[80:90]), (9, self.inbox_pks[90:]), ])
def test_empty(self): chunker = task_utils.chunk_queryset(models.Inbox.objects.none(), 100) result = [i for i in chunker] self.assertEqual(result, [])
def test_at_chunk_size(self): chunker = task_utils.chunk_queryset(models.Inbox.objects.all().order_by("pk"), 100) result = [i for i in chunker] self.assertEqual(result, [(0, self.inbox_pks)])