def after_return(self, status, retval, task_id, args, kwargs, einfo): # This will only run in the PCF environment to shut down unused workers. super(EventKitBaseTask, self).after_return(status, retval, task_id, args, kwargs, einfo) pcf_scaling = os.getenv("PCF_SCALING", False) if pcf_scaling: from eventkit_cloud.tasks.util_tasks import shutdown_celery_workers queue_type, hostname = self.request.hostname.split("@") # In our current setup the queue name always mirrors the routing_key, if this changes this logic will break. queue_name = self.request.delivery_info["routing_key"] if not getattr(settings, "CELERY_SCALE_BY_RUN"): logger.info( f"{self.name} has completed, sending shutdown_celery_workers task to queue {queue_name}." ) if os.getenv("CELERY_TASK_APP"): app_name = os.getenv("CELERY_TASK_APP") else: app_name = json.loads(os.getenv( "VCAP_APPLICATION", "{}")).get("application_name") if os.getenv("PCF_SCALING"): client = Pcf() client.login() else: client = Docker() app_name = settings.DOCKER_IMAGE_NAME # The message was a generic shutdown sent to a specific queue_name. if not (hostname or queue_type): queue_type, hostname = self.request.hostname.split("@") workers = [f"{queue_type}@{hostname}", f"priority@{hostname}"] if queue_type in ["run", "scale"]: return {"action": "skip_shutdown", "workers": workers} messages = get_message_count(queue_name) running_tasks_by_queue = client.get_running_tasks( app_name, queue_name) print(f"RUNNING TASKS BY QUEUE: {running_tasks_by_queue}") running_tasks_by_queue_count = running_tasks_by_queue[ "pagination"]["total_results"] export_tasks = ExportTaskRecord.objects.filter( worker=hostname, status__in=[ task_state.value for task_state in TaskState.get_not_finished_states() ]) if not export_tasks: if running_tasks_by_queue_count > messages or ( running_tasks_by_queue == 0 and messages == 0): shutdown_celery_workers.s().apply_async( queue=queue_name, routing_key=queue_name) # return value is unused but useful for storing in the celery result. return {"action": "shutdown", "workers": workers}
def rerun_data_provider_records(run_uid, user_id, data_provider_slugs): from eventkit_cloud.tasks.task_factory import create_run, Error, Unauthorized, InvalidLicense with transaction.atomic(): old_run: ExportRun = ExportRun.objects.select_related( "job__user", "parent_run__job__user").get(uid=run_uid) user: User = User.objects.get(pk=user_id) while old_run and old_run.is_cloning: # Find pending providers and add them to list for dptr in old_run.data_provider_task_records.all(): if dptr.status == TaskState.PENDING.value: data_provider_slugs.append(dptr.provider.slug) old_run: ExportRun = old_run.parent_run # Remove any duplicates data_provider_slugs = list(set(data_provider_slugs)) try: new_run_uid = create_run(job=old_run.job, user=user, clone=old_run, download_data=False) except Unauthorized: raise PermissionDenied( code="permission_denied", detail="ADMIN permission is required to run this DataPack.") except (InvalidLicense, Error) as err: return Response([{ "detail": _(str(err)) }], status.HTTP_400_BAD_REQUEST) run: ExportRun = ExportRun.objects.get(uid=new_run_uid) # Reset the old data provider task record for the providers we're recreating. data_provider_task_record: DataProviderTaskRecord run.data_provider_task_records.filter(slug="run").delete() for data_provider_task_record in run.data_provider_task_records.all(): if data_provider_task_record.provider is not None: # Have to clean out the tasks that were finished and request the ones that weren't. if (data_provider_task_record.provider.slug in data_provider_slugs or TaskState[data_provider_task_record.status] in TaskState.get_not_finished_states()): data_provider_task_record.status = TaskState.PENDING.value # Delete the associated tasks so that they can be recreated. data_provider_task_record.tasks.all().delete() data_provider_task_record.save() run.status = TaskState.SUBMITTED.value run.save()
def after_return(self, status, retval, task_id, args, kwargs, einfo): # This will only run in the PCF environment to shut down unused workers. super(EventKitBaseTask, self).after_return(status, retval, task_id, args, kwargs, einfo) pcf_scaling = settings.PCF_SCALING # type: ignore # issue with django-stubs if pcf_scaling: from eventkit_cloud.tasks.scheduled_tasks import kill_worker # type: ignore queue_type, hostname = self.request.hostname.split("@") # In our current setup the queue name always mirrors the routing_key, if this changes this logic will break. queue_name = self.request.delivery_info["routing_key"] if not getattr(settings, "CELERY_SCALE_BY_RUN"): logger.info( f"{self.name} has completed, shutting down queue {queue_name}." ) client, app_name = get_scale_client() # The message was a generic shutdown sent to a specific queue_name. if not (hostname or queue_type): queue_type, hostname = self.request.hostname.split("@") workers = [f"{queue_type}@{hostname}", f"priority@{hostname}"] if queue_type in ["run", "scale"]: return {"action": "skip_shutdown", "workers": workers} messages = get_message_count(queue_name) running_tasks_by_queue = client.get_running_tasks( app_name, queue_name) print(f"RUNNING TASKS BY QUEUE: {running_tasks_by_queue}") running_tasks_by_queue_count = running_tasks_by_queue[ "pagination"]["total_results"] export_tasks = ExportTaskRecord.objects.filter( worker=hostname, status__in=[ task_state.value for task_state in TaskState.get_not_finished_states() ]) if not export_tasks: if running_tasks_by_queue_count > messages or ( running_tasks_by_queue == 0 and messages == 0): kill_worker(task_name=queue_name, client=client) # return value is unused but useful for storing in the celery result. return {"action": "shutdown", "workers": workers}