def batch_delete_items(model, args=None, kwargs=None, skip_items=None, limit_items=None, batch_number=500, chunk_size=10000, delay=20): """If something goes wrong and you've got a lot of orphaned entries in the database, then this is the task you want. Be aware: this task pulls a list of PKs from the database which may cause increased memory use in the short term. * model is a string * args and kwargs should be obvious * batch_number is the number of delete tasks that get sent off in one go * chunk_size is the number of PKs that are loaded into memory at once * delay is the number of seconds between each batch of delete tasks """ items = create_queryset(model, args=args, kwargs=kwargs, skip_items=skip_items, limit_items=limit_items) for idx, chunk in chunk_queryset(items, chunk_size): items = delete_inboxen_item.chunks([(model, i) for i in chunk], batch_number).group() task_group_skew(items, start=(idx + 1) * delay, step=delay) items.apply_async()
def user_suspended_delete_user(kwargs, batch_number=500, chunk_size=10000, delay=20): users = create_queryset(get_user_model(), kwargs=kwargs) for idx, chunk in chunk_queryset(users, chunk_size): user_tasks = delete_account.chunks([(i, ) for i in chunk], batch_number).group() task_group_skew(user_tasks, start=idx + 1, step=delay) user_tasks.delay()
def user_suspended_delete_emails(kwargs, batch_number=500, chunk_size=10000, delay=20): kwargs = {"inbox__user__%s" % k: v for k, v in kwargs.items()} emails = create_queryset("email", kwargs=kwargs) for idx, chunk in chunk_queryset(emails, chunk_size): email_tasks = delete_inboxen_item.chunks([("email", i) for i in chunk], batch_number).group() task_group_skew(email_tasks, start=(idx + 1) * delay, step=delay) email_tasks.delay()
def batch_mark_as_deleted(model, app="inboxen", args=None, kwargs=None, skip_items=None, limit_items=None): """Marks emails as deleted, but don't actually delete them""" items = create_queryset(model, args=args, kwargs=kwargs, skip_items=skip_items, limit_items=limit_items) # cannot slice and update at the same time, so we subquery items.model.objects.filter(pk__in=items).update(deleted=True)
def batch_set_new_flags(user_id=None, args=None, kwargs=None, batch_number=500): inbox_list = create_queryset("inbox", args=args, kwargs=kwargs).distinct().values_list( "pk", "user_id") inboxes = [] users = set() for inbox, user in inbox_list.iterator(): inboxes.append((user, inbox)) users.add((user, )) inbox_tasks = inbox_new_flag.chunks(inboxes, batch_number).group() task_group_skew(inbox_tasks, step=batch_number / 10.0) inbox_tasks.apply_async() if user_id is None and users: user_tasks = inbox_new_flag.chunks(users, batch_number).group() task_group_skew(user_tasks, step=batch_number / 10.0) user_tasks.apply_async() elif user_id is not None: inbox_new_flag.delay(user_id)
def testcreate_queryset_exception(self): with self.assertRaises(Exception): task_utils.create_queryset("email")
def test_skip_and_limit_items(self): result_qs = task_utils.create_queryset("email", kwargs={"pk__isnull": False}, skip_items=1, limit_items=2) self.assertEqual(list(result_qs.values_list("pk", flat=True)), self.emails[1:][:2])
def test_args(self): result_qs = task_utils.create_queryset("email", args=(Q(pk__isnull=False),)) self.assertEqual(list(result_qs.values_list("pk", flat=True)), self.emails)
def test_kwargs(self): result_qs = task_utils.create_queryset("email", kwargs={"pk__isnull": False}) self.assertEqual(list(result_qs.values_list("pk", flat=True)), self.emails)
def user_suspended_disable_emails(kwargs): kwargs = {"user__%s" % k: v for k, v in kwargs.items()} items = create_queryset("userprofile", kwargs=kwargs) items.update(receiving_emails=False)