def disambiguation_daemon(): """Run disambiguation daemon as Celery task. The method is invoked by Celery beat schedule and can be configured from 'config.py'. The method collects the records UUIDs from the database, dispatch each of the phonetic blocks found within these records to Celery workers and finally dumps the records. """ phonetic_blocks = set() # Get the records classified for clustering. records_to_cluster = DisambiguationRecord.query.distinct( "disambiguation_records.record_id").all() # For each record get the phonetic blocks. for record in records_to_cluster: phonetic_blocks.update( get_blocks_from_record(record.record_id)) # For each phonetic block, run clustering job. while phonetic_blocks: current_app.send_task('inspirehep.modules.disambiguation.tasks.' 'disambiguation_clustering', args=(phonetic_blocks.pop(),)) # Clear the database table. DisambiguationRecord.query.delete() db.session.commit() db.session.close()
def push_task(org, queue, task, args, priority=DEFAULT_PRIORITY): """ Adds a task to queue_name with the supplied arguments. Ex: add_task(nyaruka, 'flows', 'start_flow', [1,2,3,4,5,6,7,8,9,10]) """ r = get_redis_connection('default') # calculate our score from the current time and priority, this could get us in trouble # if things are queued for more than ~100 days, but otherwise gives us the properties of prioritizing # first based on priority, then insertion order. score = time.time() + priority # push our task onto the right queue and make sure it is in the active list (atomically) with r.pipeline() as pipe: key = "%s:%d" % (task, org.id) pipe.zadd(key, dict_to_json(args), score) # and make sure this key is in our list of queues so this job will get worked on pipe.sadd("%s:active" % task, key) pipe.execute() # if we were given a queue to schedule on, then add this task to celery. # # note that the task that is fired needs no arguments as it should just use pop_task with the # task name to determine what to work on. if queue: if getattr(settings, 'CELERY_ALWAYS_EAGER', False): task_function = lookup_task_function(task) task_function() else: current_app.send_task(task, args=[], kwargs={}, queue=queue)
def mutate_and_get_payload(root, info, **input): upload = Upload.objects.get(id=Node.gid2id(input.get('id'))) upload.status = UploadStatus.UPLOADED upload.save() current_app.send_task('backend.contrib.tasks.process_upload', (upload.id, )) return UploadFinished(upload=upload)
def compute_checksums(app_label, module_name, pk, min_size=104857600, formats=None, sendtask_callback=None, sendtask_tasksetid=None): ''' Computes sha256 checksum for resources files of at least min_size If a list of formats is given then only these checkums are updated ''' logger = get_task_logger(__name__) cls = apps.get_model(app_label, module_name) try: instance = cls.objects.get(pk=pk) except ObjectDoesNotExist: logger.warning("Could not find model object %s.%s:%s" % (app_label, module_name, pk)) return if formats: checksums = instance.checksums else: checksums = {} for r_name in dir(instance.Archive): if not isinstance(getattr(instance.Archive, r_name), ResourceManager): continue if formats and r_name not in formats: continue resource = getattr(instance, 'resource_%s' % r_name) # Skip non-file resources (e.g. zoomable): if not resource or not os.path.isfile(resource.path): continue # Skip files smaller than min_size if resource.size < min_size: continue h = hashlib.sha256() with open(resource.path) as f: while True: buf = f.read(2**20) # Read 1MB at a time if not buf: break h.update(buf) checksums[r_name] = h.hexdigest() logger.info('Computed checksum for "%s (%s)": %s', pk, r_name, checksums[r_name]) cls.objects.filter(pk=pk).update(checksums=checksums) if settings.USE_I18N: # Normally this would be done automatically in TranslationModel.save() # But we use update to avoid an infinite task loop cls.objects.filter(source=pk).update(checksums=checksums) # send_task callback if sendtask_callback: args, kwargs = sendtask_callback # pylint: disable=W0633 current_app.send_task(*args, **str_keys(kwargs))
def disambiguation_daemon(): """Run disambiguation daemon as Celery task. The method is invoked by Celery beat schedule and can be configured from 'config.py'. The method collects the records UUIDs from the database, dispatch each of the phonetic blocks found within these records to Celery workers and finally dumps the records. """ phonetic_blocks = set() # Get the records classified for clustering. records_to_cluster = DisambiguationRecord.query.distinct( "disambiguation_records.record_id").all() # For each record get the phonetic blocks. for record in records_to_cluster: phonetic_blocks.update(get_blocks_from_record(record.record_id)) # For each phonetic block, run clustering job. while phonetic_blocks: current_app.send_task( 'inspirehep.modules.disambiguation.tasks.' 'disambiguation_clustering', args=(phonetic_blocks.pop(), )) # Clear the database table. DisambiguationRecord.query.delete() db.session.commit() db.session.close()
def push_task(org, queue, task_name, args, priority=DEFAULT_PRIORITY): """ Adds a task to queue_name with the supplied arguments. Ex: add_task(nyaruka, 'flows', 'start_flow', [1,2,3,4,5,6,7,8,9,10]) """ r = get_redis_connection('default') # calculate our score from the current time and priority, this could get us in trouble # if things are queued for more than ~100 days, but otherwise gives us the properties of prioritizing # first based on priority, then insertion order. score = time.time() + priority # push our task onto the right queue and make sure it is in the active list (atomically) with r.pipeline() as pipe: key = "%s:%d" % (task_name, org.id) pipe.zadd(key, dict_to_json(args), score) # and make sure this key is in our list of queues so this job will get worked on pipe.sadd("%s:active" % task_name, key) pipe.execute() # if we were given a queue to schedule on, then add this task to celery. # # note that the task that is fired needs no arguments as it should just use pop_task with the # task name to determine what to work on. if queue: if getattr(settings, 'CELERY_ALWAYS_EAGER', False): task_function = lookup_task_function(task_name) task_function() else: current_app.send_task(task_name, args=[], kwargs={}, queue=queue)
def delete_resources( app_label, module_name, object_id, resources, sendtask_callback=None, sendtask_tasksetid=None ): """ Delete specific resource from a model object. """ logger = delete_resources.get_logger() obj = None try: obj = apps.get_model( app_label, module_name ).objects.get( id=object_id ) logger.debug( "Found %s.%s with id %s " % ( app_label, module_name, object_id ) ) except ObjectDoesNotExist: logger.warning( "Could not find model object %s.%s:%s" % ( app_label, module_name, object_id ) ) if obj: for r in resources: if not hasattr( obj, 'resource_%s' % r ): continue res = getattr( obj, 'resource_%s' % r ) try: if res is not None: if os.path.isdir( res.path ): shutil.rmtree( res.path ) else: os.remove( res.path ) except Exception: logger.error( "Error while deleting resource %s for model object %s.%s:%s" % ( r, app_label, module_name, object_id ) ) # send_task callback if sendtask_callback: args, kwargs = sendtask_callback # pylint: disable=W0633 current_app.send_task( *args, **str_keys( kwargs ) )
def handle_sync_permissions(): # TODO @shipperizer move it to an internal function in the datastore permissions = datastore.db_session.query( datastore.permission_model).filter( datastore.permission_model.is_sent == False) for permission in permissions: if permission.is_deleted: logger.info('deleting permission {}'.format(permission.uuid)) current_app.send_task( PERMISSIONS_DELETE, (permission.uuid, ), exchange=MESSAGING_EXCHANGE, routing_key=PERMISSIONS_DELETE, ) else: logger.info('adding permission {}'.format(permission.uuid)) current_app.send_task( PERMISSIONS_NEW, (permission.uuid, permission.service_name, permission.permission), exchange=MESSAGING_EXCHANGE, routing_key=PERMISSIONS_NEW, ) permission.is_sent = True datastore.commit()
def create_bulk_enrollment_job(cls, enqueuing_user_id, enterprise_customer_uuid, user_emails, course_run_keys, notify_learners, subscription_uuid=None): """ Creates an asynchronous ``enterprise_enrollment_license_subsidy_task`` for the given batch of (users, courses). """ bulk_enrollment_job = cls( enterprise_customer_uuid=enterprise_customer_uuid, lms_user_id=enqueuing_user_id, ) bulk_enrollment_job.save() logger.info( 'enqueuing enterprise_enrollment_license_subsidy_task ' f'for bulk_enrollment_job_uuid={str(bulk_enrollment_job.uuid)}') # avoid circular dependency # https://stackoverflow.com/a/26382812 current_app.send_task( 'license_manager.apps.api.tasks.enterprise_enrollment_license_subsidy_task', ( str(bulk_enrollment_job.uuid), str(enterprise_customer_uuid), user_emails, course_run_keys, notify_learners, subscription_uuid, ), ) return bulk_enrollment_job
def image_color(image_id, sendtask_callback=None, sendtask_tasksetid=None): """ Celery task to determine color of image. """ from djangoplicity.media.models import Image, Color try: # Load image im = Image.objects.get(id=image_id) logger.debug("Found image %s " % (im.id)) # Determine color of image. if not im.colors.all(): imcolors = Color.create_dominant_colors(im, 'resource_medium') if imcolors: for imc in imcolors: imc.save() except Image.DoesNotExist: logger.warning("Could not find image %s." % image_id) # send_task callback if sendtask_callback: # pylint: disable=W0633 args, kwargs = sendtask_callback current_app.send_task(*args, **str_keys(kwargs))
def save(self, *args, **kwargs): created = False if not self.pk: created = True super().save(*args, **kwargs) if created: current_app.send_task('crs.tasks.generate_csv_task', args=(self.pk, ), queue='bill')
def init_scrape(scrape_id): ''' initialize Scrape Model ''' scrape = Scrape.objects.get(id=scrape_id) scrape.status = ScrapeStatus.SCRAPPING scrape.save() current_app.send_task(f'{TASKS}.scrape_reviews', (scrape_id, )) current_app.send_task(f'{TASKS}.scrape_questions', (scrape_id, ))
def scrape_answers(scrape_id, question_id): ''' Scrape Answers ''' scrape = Scrape.objects.get(id=scrape_id) scrape.status_answers = ScrapeStatus.SCRAPPING scrape.save() question = Question.objects.get(id=question_id) print(question) current_app.send_task(f'{TASKS}.scrape_completed', (scrape_id, ))
def generate_thumbnail(app_label, model_name, pk, sendtask_callback=None, sendtask_tasksetid=None): ''' Generate a thumbnail if no original format is available ''' cls = apps.get_model(app_label, model_name) try: # Load video v = cls.objects.get(pk=pk) except cls.DoesNotExist: logger.warning('Could not find video %s.%s %s.', app_label, model_name, pk) return if not v.resource_original: # Identify the largest useable format path = '' for fmt in cls.UPLOAD_FORMATS: try: r = getattr(v, 'resource_' + fmt) if not r: continue logger.info('Will use format: %s' % fmt) path = r.path break except AttributeError: continue else: logger.warning('Couldn\'t find valid format to upload for "%s"', pk) return output = os.path.join(settings.MEDIA_ROOT, cls.Archive.Meta.root, 'original', pk + '.tif') # Takes the screenshot at 5s position = 5 cmd = 'ffmpeg -ss {position} -i {input} -vframes 1 -q:v 2 {output}'.format( position=position, input=path, output=output) logger.info(cmd) call(cmd, shell=True) add_admin_history(v, 'Generated thumbnail from %s at %d seconds' % (fmt, position)) # pylint: disable=undefined-loop-variable # Send_task callback if sendtask_callback: args, kwargs = sendtask_callback current_app.send_task(*args, **str_keys(kwargs))
def request_groups_republish(): """ Request updated groups if none are present in the db """ if not datastore.have_group_associations(): current_app.send_task( 'complex-group.republish', ({},), exchange='ts.messaging', routing_key='complex-group.republish' )
def resend_failed_task(task, filtered_user=[]): """ 重发数据库中记录的失败的任务 :param task: djcelery.models.TaskState instance :param filtered_user:过滤的用户id列表[123, 4456] :return: """ task_args = ast.literal_eval(task.args) task_kwargs = ast.literal_eval(task.kwargs) if task_kwargs.get('user_id') in filtered_user: return task_name = task.name current_app.send_task(task_name, args=task_args, kwargs=task_kwargs)
def problem_capture_invoice(): for organization in Organization.objects.filter( organization_billing__plan=BillingPlan.PLAN_ENTERPRISE, is_suspended=False): if organization.organization_billing.problem_capture_invoice: organization.is_suspended = True organization.save(update_fields=["is_suspended"]) organization.organization_billing.is_active = False organization.organization_billing.save(update_fields=["is_active"]) for project in organization.project.all(): current_app.send_task( # pragma: no cover name="update_suspend_project", args=[project.flow_organization, True], )
def reicever(*args, **kwargs): """ Post-save hook of Source objects. Queues the task of extracting the source content. The `if update_fields is None` check is required to prevent the event from triggering again, causing an endless loop. """ instance = kwargs['instance'] update_fields = kwargs['update_fields'] if update_fields is None: current_app.send_task('sources_main.tasks.contentreader_task', (instance.pk, ))
def handle(self, *args, **options): register_provenance = options['register_provenance'] analysispk = options['analysispk'][0] artifact = options['artifact'][0] artifact = open(artifact, 'rb') analysis = Analysis.objects.get(pk=analysispk) upfile = File(upload_type="A", userprofile=UserProfile.objects.first()) upfile.upload_file = files.File(artifact, name=artifact.name.split("/")[-1]) upfile.save() current_app.send_task('db.tasks.react_to_file', (upfile.pk, ), kwargs={ 'analysis_pk': analysis.pk, 'register_provenance': register_provenance })
def get(self, request): id = self.request.GET.get('id', None) name = self.request.GET.get('name', None) try: querry_res = PeriodicTask.objects.get(id=id, name=name) args = json.loads(querry_res.args) kwargs = json.loads(querry_res.kwargs) queue = querry_res.queue routing_key = querry_res.routing_key task_name = querry_res.task except ObjectDoesNotExist: response = { 'status': 'fail', 'message': 'task name %s doesn\'t exits' % (name) } return HttpResponse(json.dumps(response), content_type="application/json") if queue == 'None' or queue == None: queue = 'celery' res = current_app.send_task(task_name, args=args, kwargs=kwargs, queue=queue, routing_key=routing_key) response = { 'status': 'success', 'message': 'task name %s has been running,task id is %s' % (name, res.id) } return HttpResponse(json.dumps(response), content_type="application/json")
def ingest_artifact_directory(directory, analysis, userprofile, sleep=1): directory = os.path.relpath(directory, settings.BASE_DIR) prev_dir = os.getcwd() os.chdir(settings.BASE_DIR + "/" + directory) for file in os.listdir("."): if file.endswith(".qza") or file.endswith(".qzv"): print("Ingesting %s via celery" % (file, )) upf = UploadFile(upload_file=File(open(file, 'rb')), userprofile=userprofile, upload_type="A") upf.save() current_app.send_task('db.tasks.react_to_file', (upf.pk, ), kwargs={'analysis_pk': analysis.pk}) # Give this file a headstart to process in a concurrency scenario time.sleep(sleep) os.chdir(prev_dir)
def message(request): """ Create a new message """ text = request.POST.get("text") modelType = request.POST.get("modelType") uuid = request.POST.get("modelId") if modelType == 'channel': current_app.send_task( "core.tasks.create_message_4_web", args=[request.user, uuid, text]) else: current_app.send_task( "core.tasks.create_pm_4_web", args=[request.user, uuid, text]) return JsonResponse({})
def audio_extras(app_label, module_name, pks, formats=None, sendtask_callback=None, sendtask_tasksetid=None): ''' Generate the given formats (or all applicable) for the given archives ''' for pk in pks: get_audio_duration(app_label, module_name, pk) # send_task callback if sendtask_callback: args, kwargs = sendtask_callback # pylint: disable=W0633 current_app.send_task(*args, **str_keys(kwargs))
def retry_billing_tasks(): task_failed = SyncManagerTask.objects.filter(status=False, retried=False) for task in task_failed: task.retried = True task.save() if task.task_type == "count_contacts": current_app.send_task( # pragma: no cover name="count_contacts", args=[task.before, task.after, task.uuid]) elif task.task_type == "sync_contacts": current_app.send_task( # pragma: no cover name="sync_contacts", args=[task.before, task.after, task.uuid]) return True
def resubmit(self): """Submit a copy of this task with the same args+kwargs""" from celery import current_app result = current_app.send_task(self.task_name, args=eval(self.task_args), kwargs=eval(self.task_kwargs)) self.delete() return result
def save_model(self, request, user, form, change): if 'password' in form.changed_data: if user.password: user.password = hashpw(user.password.encode('utf-8'), gensalt(10)) user.save() if not user.is_verified: current_app.send_task( 'api.tasks.email_notifications', ( user.id, 'verify', ), queue='api.tasks.email_notifications', routing_key='api.tasks.email_notifications', serializer='json', )
def send_task(self): if not self.ran: if len(self.taskset) > 0: next_args, next_kwargs = None, None while len(self.taskset) > 0: args, kwargs = self.taskset.pop() if next_args is not None and next_kwargs is not None: if 'kwargs' not in kwargs: kwargs['kwargs'] = {} kwargs['kwargs']['sendtask_callback'] = ( next_args, next_kwargs ) kwargs['kwargs']['sendtask_tasksetid'] = self.id next_args, next_kwargs = args, kwargs current_app.send_task( *args, **kwargs ) self.ran = True else: raise Exception( "SerialSendTaskSet already applied" )
def sync_content_server(module_path, cls_name, instance_id, formats=None, delay=False, prefetch=True, purge=True, sendtask_callback=None, sendtask_tasksetid=None): ''' Task that will synchronise the resources from the given archive onto the content server Django Classes can't be serialized easily, so we pass instead the module path and class names and import them dynamically ''' if settings.SITE_ENVIRONMENT != 'prod': logger.info('%s only runs on a production system', sync_content_server.__name__) return # Dynamically import the class module = import_module(module_path) cls = getattr(module, cls_name) try: instance = cls.objects.get(id=instance_id) except cls.DoesNotExist: logger.warning('Could not find archive "%s" (%s)', instance_id, cls) return if hasattr(instance, 'content_server') and instance.content_server: try: content_server = MEDIA_CONTENT_SERVERS[instance.content_server] content_server.sync_resources(instance, formats, delay, prefetch, purge) except KeyError: logger.warning('Unkown content server: "%s" for %s: "%s"', instance.content_server, cls, instance.id) # send_task callback if sendtask_callback: args, kwargs = sendtask_callback # pylint: disable=W0633 current_app.send_task(*args, **str_keys(kwargs))
def start_task(self, task: Task, **kwargs): celery_task = current_app.send_task( task.task_name, args=[task.args], task_id=task.id, link=on_task_success.s(task.dict()).set(queue='system'), link_error=on_task_failure.s().set(queue='system'), countdown=kwargs.get('countdown')) celery_task.forget() # Results are saved on task success! return task
def save(self, *args, **kwargs): created = not self.pk super().save(*args, **kwargs) if self.study_group.meeting_set.active().count(): self.study_group.start_date = self.study_group.first_meeting( ).meeting_date self.study_group.end_date = self.study_group.last_meeting( ).meeting_date self.study_group.save() rescheduled = any([ self._original_meeting_time != self.meeting_time, self._original_meeting_date != self.meeting_date, ]) deleted = self.deleted_at if rescheduled or created or deleted: if self.reminder_set.filter(sent_at__isnull=False).count() == 1: # a reminder has been sent, disassociate it self.reminder_set.update(study_group_meeting=None) tz = pytz.timezone(self.study_group.timezone) original_meeting_datetime = tz.localize( datetime.datetime.combine(self._original_meeting_date, self._original_meeting_time)) # the previous date was in the future if original_meeting_datetime > timezone.now(): # send meeting change notification current_app.send_task( 'studygroups.tasks.send_meeting_change_notification', (self.pk, original_meeting_datetime)) else: # no reminder has been sent # deleted reminder if any self.reminder_set.all().delete() # generate a reminder if the meeting is in the future if self.meeting_datetime() > timezone.now( ) and not self.deleted_at: generate_meeting_reminder(self)
def method_delay(self, *args, **kwargs): __instance_id__ = self.__instance_id__ _, instantiation_args = self.__class__.instances[(__instance_id__, self.__class__)] kwargs.update({ "__class_name__": self.__class__.__name__, "__instance_id__": __instance_id__, "__instantiation_args__": instantiation_args }) res = current_app.send_task(task_name, args, kwargs) return res
def retry_task(modeladmin, request, queryset): """Admin Site Action that retries the selected tasks. """ task_ids = [] for task in queryset.all(): task_name = task.task task_args = serialization.loads(task.args) task_kwargs = serialization.loads(task.kwargs) result = current_app.send_task( task_name, args=task_args, kwargs=task_kwargs) task_ids.append(result.task_id) modeladmin.message_user(request, "Tasks sent: {0}".format(task_ids))
def dispatch_run_task(sender, instance, created, **kwargs): if created: res = current_app.send_task( 'tasks.sandbox.run_user_code', (instance.language.code, instance.code, instance.stdin), chain=[ # Without the .set() this request goes to the default queue # instead of the queue defined in settings. WHY?! save_run_output.s(instance.id).set(queue='callbacks') ]) instance.celery_task_id = res.task_id instance.save()
def send_scheduled_task(): """Task that sends due scheduled tasks for execution. Each DB operation must be in its own commit so that even if send_scheduled_task is called multiple times concurrently, a task is ensured to only be sent **at most once**. If a crash occurs while sending a task, the task will stay indefinitely in the SCHEDULED state while having a schedule id. 1. Tasks due to be executed are marked with a schedule id. This prevents the task from being sent for execution twice. 2. Each task marked by the schedule id is sent for exeuction without an ETA. 3. We change the status from SCHEDULED to SCHEDULED SENT after a task is being sent for execution. **IMPORTANT: ** Never call this task inside an atomic block or you could end up sending tasks more than once. Always use autocommit (each SQL query executed in its own transaction). """ limit = now() schedule_id = uuid4().hex # Mark tasks ready to be scheduled TaskResultMeta.objects.filter( scheduled_id__isnull=True, eta__lt=limit, status=SCHEDULED).update( scheduled_id=schedule_id) # Fetch and apply by removing eta for task in TaskResultMeta.objects.filter( scheduled_id=schedule_id).all(): task_name = task.task task_args = serialization.loads(task.args) task_kwargs = serialization.loads(task.kwargs) result = current_app.send_task( task_name, args=task_args, kwargs=task_kwargs) task.status = SCHEDULED_SENT task.result = {"new_task_id": result.task_id} task.save()
def fibonacci(n): for i in range(n): task = app.send_task('tasks.calcula_fibonacci', args=(i,), queue='remote-worker') print task.get()