def get_active_user_tasks(self) -> QuerySet: execution_delay = now() - datetime.timedelta( seconds=settings.USER_TASK_EXECUTION_DELAY) start_date_limit = now() - datetime.timedelta(seconds=3 * 24 * 60 * 60) return self \ .filter(Q(main_task__isnull=True) | Q(main_task_id=F('id'))) \ .filter(status__in=UNREADY_STATES) \ .exclude(name__in=settings.EXCLUDE_FROM_TRACKING) \ .filter(Q(date_start__isnull=True) | Q(date_start__gt=start_date_limit)) \ .filter(Q(date_work_start__isnull=True) | Q(date_work_start__gt=execution_delay))
def mark_main_task_completed(): # TODO Support "propagating exception" flag properly # Parent task should not crash if the child was not marked as "propagating exception" # (important/critical) # For now it is made crashing always if any sub-task crashes. completed = total_status in READY_STATES date_done = total_date_done or (now() if completed else None) self.filter(id=parent_task_id).update(date_done=date_done, status=total_status, completed=completed, progress=total_progress) try: main_task = self.get(id=parent_task_id) # type: Task if main_task.parent_task_id is not None: from apps.task.tasks import update_parent_task update_parent_task.apply_async( (main_task.parent_task_id, )) if total_status != SUCCESS: main_task.write_log( '{0} #{1}: some/all of sub-tasks have been crashed' .format(main_task.name, parent_task_id), level='error') except: import logging logging.error( 'Was unable to log SUCCESS/FAILURE to task log. Task id: {0}' .format(parent_task_id)) if total_status in PROPAGATE_STATES: revoke_task(AsyncResult(parent_task_id))
def report_on_failed_tasks(cls) -> None: """ Find top-level tasks that are not completed for a long time or are in failed state and report on each of these tasks by one email """ if not ENABLE_ALERTS.val: return configs = list(TaskConfig.objects.filter(notify_on_fail=True)) if not configs: return all_names = [c.name for c in configs] now_time = now() # we wait N seconds for the log records to be stored failed_before = now_time - datetime.timedelta(seconds=cls.MESSAGE_COOLDOWN_SECONDS) failed_tasks = list(Task.objects.filter(status=FAILURE, parent_task_id__isnull=True, failure_reported=False, name__in=all_names, date_done__lt=failed_before)) # type: List[Task] default_watch_mins = ALERT_DEFAULT_INTERVAL.val name_by_interval = {c.watchdog_minutes or default_watch_mins: c.name for c in configs} for interval in name_by_interval: task_name = name_by_interval[interval] max_start = now_time - datetime.timedelta(minutes=interval) failed_tasks += Task.objects.filter( status=PENDING, parent_task_id__isnull=True, failure_reported=False, name=task_name, date_work_start__lt=max_start) if not failed_tasks: return # notify admin on failed tasks cls.send_emails_on_failed_tasks(failed_tasks)
def on_task_completed(self, succeeded: bool): if self.has_sub_tasks: self.own_progress = 100 self.own_status = SUCCESS if succeeded else FAILURE self.own_date_done = self.own_date_done or now() self.save() return self.update_progress(100, succeeded)
def init_task(self, task_id: str, task_name: str, main_task_id: str, parent_task_id: str, description: str = None, args: Tuple = None, source_data=None, run_after_sub_tasks_finished=False, run_if_parent_task_failed=False): if description and len(description) > 1020: description = description[:1020] obj, created = self.get_or_create(id=task_id, defaults={ 'name': task_name, 'description': description, 'main_task_id': main_task_id, 'parent_task_id': parent_task_id, 'date_start': now(), 'args': args, 'source_data': source_data, 'status': PENDING, 'own_status': PENDING, 'progress': 0, 'own_progress': 0, 'failure_reported': False, 'run_after_sub_tasks_finished': run_after_sub_tasks_finished, 'run_if_parent_task_failed': run_if_parent_task_failed }) if not created: # If the task model is already created then this is a retry of a failed task # We need to clear its status and progress obj.date_done = None obj.own_date_done = None obj.status = PENDING obj.own_status = PENDING obj.progress = 0 obj.own_progress = 0 obj.save() return obj
def report_on_failed_tasks(cls) -> None: """ Find top-level tasks that are not completed for a long time or are in failed state and report on each of these tasks by one email """ from apps.project.notifications import notify_failed_load_document from apps.task.tasks import LoadDocuments from apps.project.tasks import LoadArchive recent_failed_load_document_tasks = Task.objects \ .filter(name__in=[LoadDocuments.name, LoadArchive.name], status=FAILURE) \ .exclude(metadata__failed_load_document_sent__isnull=False) for task in recent_failed_load_document_tasks: if not task.metadata: continue # send ws notification (include task data for debugging) file_name = task.metadata.get('file_name') session_id = task.metadata.get('session_id') if file_name and session_id: notify_failed_load_document(file_name=file_name, session_id=session_id, directory_path=task.kwargs.get('directory_path') if task.kwargs else None) task.metadata['failed_load_document_sent'] = True task.save() from apps.task.app_vars import ENABLE_ALERTS if not ENABLE_ALERTS.val: return configs = list(TaskConfig.objects.filter(notify_on_fail=True)) if not configs: return all_names = [c.name for c in configs] now_time = now() # we wait N seconds for the log records to be stored failed_before = now_time - datetime.timedelta(seconds=cls.MESSAGE_COOLDOWN_SECONDS) failed_tasks = list(Task.objects.filter(status=FAILURE, parent_task_id__isnull=True, failure_reported=False, name__in=all_names, date_done__lt=failed_before)) # type: List[Task] from apps.task.app_vars import ALERT_DEFAULT_INTERVAL default_watch_mins = ALERT_DEFAULT_INTERVAL.val name_by_interval = {c.watchdog_minutes or default_watch_mins: c.name for c in configs} for interval in name_by_interval: task_name = name_by_interval[interval] max_start = now_time - datetime.timedelta(minutes=interval) failed_tasks += Task.objects.filter( status=PENDING, parent_task_id__isnull=True, failure_reported=False, name=task_name, date_work_start__lt=max_start) if not failed_tasks: return # notify admin on failed tasks cls.send_emails_on_failed_tasks(failed_tasks)
def save(self, *args, **kwargs): if not self.display_name: self.display_name = TASK_FRIENDLY_NAME.get(self.name) or self.name if self.progress > 100: self.progress = 100 if self.status in FAIL_READY_STATES: self.progress = 100 self.own_status = self.status self.date_done = self.date_done or now() if self.status in READY_STATES: self.own_status = self.status self.progress = 100 self.date_done = self.date_done or now() if self.own_status in READY_STATES: self.own_progress = 100 self.own_date_done = self.own_date_done or now() super().save(*args, **kwargs)
def init_task(self, task_id: str, task_name: str, main_task_id: str, description: str = None, args: Tuple = None, source_data=None, run_after_sub_tasks_finished=False): try: main_task = self.get( id=main_task_id ) if main_task_id and main_task_id != task_id else None except: print('Bad sub-task: task_id={0}, main_task_id={1}, task_name={2}'. format(task_id, main_task_id, task_name)) raise if description and len(description) > 1020: description = description[:1020] obj, created = self.get_or_create( id=task_id, defaults={ 'name': task_name, 'description': description, 'main_task_id': main_task.id if main_task else None, 'date_start': now(), 'args': args, 'source_data': source_data, 'run_after_sub_tasks_finished': run_after_sub_tasks_finished }) if not created: obj.name = task_name obj.description = description obj.main_task_id = main_task.id if main_task else None obj.date_done = None obj.run_after_sub_tasks_finished = run_after_sub_tasks_finished obj.save() return obj
def update_progress(self, new_progress: int, succeeded: bool = True): new_progress = min(new_progress, 100) self.own_progress = new_progress if self.own_progress == 100: self.own_status = SUCCESS if succeeded else FAILURE now_time = now() self.own_date_done = self.own_date_done or now_time try: if self.has_sub_tasks: self.save(update_fields=['own_progress', 'own_status', 'own_date_done']) else: self.status = self.own_status self.progress = self.own_progress self.date_done = self.own_date_done self.save(update_fields=['progress', 'own_progress', 'status', 'own_status', 'date_done', 'own_date_done']) except DatabaseError: # task itself might have been deleted pass
def update_progress(self, new_progress: int, succeeded: bool = True): new_progress = self.ensure_correct_progress(new_progress) if new_progress == self.progress: return self.progress = min(new_progress, 100) if self.progress == 100: self.status = SUCCESS if succeeded else FAILURE if self.own_status not in READY_STATES: self.own_status = self.status now_time = now() self.own_date_done = self.own_date_done or now_time self.date_done = self.date_done or now_time try: self.save(update_fields=[ 'progress', 'own_progress', 'status', 'own_status', 'date_done', 'own_date_done' ]) except DatabaseError: # task itself might have been deleted pass # propagate changes to parent tasks parent_task = self.parent_task # type: Task if not parent_task: return progresses = [ p for p in Task.objects.filter( parent_task_id=parent_task.pk).values_list('progress', flat=True) ] if progresses: parent_progress = int( (sum(progresses) + parent_task.own_progress) / (len(progresses) + 1)) parent_task.update_progress(parent_progress)
def duration(self): if not self.date_start: return None date_done = self.date_done or now() duration = date_done - self.date_start return duration
def get_all_expired(self, expires): """Get all expired task results.""" return self.filter(own_date_done__lt=now() - maybe_timedelta(expires))
def store_result(self, task_id: str, main_task_id: str, task_name: str, result, status: str, traceback=None, metadata=None): date_now = now() result = TaskManager._prepare_task_result(result) initial_values = { 'name': task_name, 'main_task_id': main_task_id, 'own_status': status, 'date_start': date_now, 'own_date_done': date_now if status in READY_STATES else None, 'result': result, 'traceback': traceback, 'celery_metadata': metadata, } if initial_values['own_status'] in READY_STATES: initial_values['own_progress'] = 100 if main_task_id and main_task_id != task_id: # this is a sub-task initial_values['status'] = initial_values.get('own_status') initial_values['date_done'] = initial_values.get('own_date_done') initial_values['progress'] = initial_values.get('own_progress') try: obj, created = self.get_or_create(id=task_id, defaults=initial_values) if not created: if task_name and not obj.name: obj.name = task_name # Main task id should be assigned in init_task or on initial store result. # If the task is already initialized with main_task_id = None - here it can be rewritten # with some value by Celery itself. obj.set_own_status(status) if obj.own_date_done is None: obj.own_date_done = now( ) if status in READY_STATES else None obj.result = result obj.traceback = traceback obj.celery_metadata = metadata if not obj.has_sub_tasks: obj.status = obj.own_status obj.date_done = obj.own_date_done obj.progress = obj.own_progress obj.save() except IntegrityError: print('Orphan sub-task detected: {0}'.format(initial_values)) obj = self.model(**initial_values) return obj
def start_processing(self, task_id, worker): self.filter(id=task_id).update(date_work_start=now(), worker=worker)
def init_task(self, task_id: str, task_name: str, main_task_id: str, parent_task_id: str, description: str = None, args: Tuple = None, kwargs: Dict = None, queue: str = None, priority: int = None, source_data=None, run_after_sub_tasks_finished=False, run_if_parent_task_failed=False): if description and len(description) > 1020: description = description[:1020] display_name = TASK_FRIENDLY_NAME.get(task_name) or task_name obj, created = self.get_or_create(id=task_id, defaults={ 'name': task_name, 'display_name': display_name, 'description': description, 'main_task_id': main_task_id, 'parent_task_id': parent_task_id, 'date_start': now(), 'args': args, 'kwargs': kwargs, 'queue': queue, 'priority': priority, 'restart_count': 0, 'bad_health_check_num': 0, 'source_data': source_data, 'status': PENDING, 'own_status': PENDING, 'progress': 0, 'own_progress': 0, 'failure_reported': False, 'run_after_sub_tasks_finished': run_after_sub_tasks_finished, 'run_if_parent_task_failed': run_if_parent_task_failed }) if not created: # If the task model is already created then this is a retry of a failed task # We need to clear its status and progress obj.description = description obj.date_done = None obj.own_date_done = None obj.status = PENDING obj.own_status = PENDING obj.progress = 0 obj.own_progress = 0 # obj.save() # Instead of unconditional .save() we update only the specified fields # and only if they are different in the database. self.filter(id=task_id) \ .exclude(date_done__isnull=True, own_date_done__isnull=True, status=PENDING, own_status=PENDING, progress=0, own_progress=0, description=description) \ .update(date_done=None, own_date_done=None, status=PENDING, own_status=PENDING, progress=0, own_progress=0, description=description) else: if settings.DEBUG_LOG_TASK_RUN_COUNT: with connection.cursor() as cursor: cursor.execute(f''' insert into task_taskstatentry (task_name, run_counter) values ('{task_name}', 1) on conflict (task_name) do update set run_counter = task_taskstatentry.run_counter + 1; ''') return obj
def report_on_failed_tasks(cls) -> None: """ Find top-level tasks that are not completed for a long time or are in failed state and report on each of these tasks by one email """ from apps.project.notifications import notify_failed_load_document from apps.task.tasks import LoadDocuments from apps.project.tasks import LoadArchive # args for notify_failed_load_document() call notification_messages = [] # we can also lock the table like # with ModelLock(None, Task, ModelLock.LOCK_MODE_ACCESS_EXCLUSIVE): # but this would be an overkill recent_failed_load_document_tasks = Task.objects \ .filter(name__in=[LoadDocuments.name, LoadArchive.name], status=FAILURE) \ .exclude(metadata__failed_load_document_sent__isnull=False) for task in recent_failed_load_document_tasks: if not task.metadata: continue # compose ws notification messages (include task data for debugging) file_name = task.metadata.get('file_name') session_id = task.upload_session.pk if task.upload_session else task.metadata.get( 'session_id') if file_name and session_id: notification_messages.append({ 'file_name': file_name, 'session_id': session_id, 'directory_path': task.kwargs.get('directory_path') if task.kwargs else None }) task.metadata['failed_load_document_sent'] = True task.save(update_fields={'metadata'}) # send ws notification for msg in notification_messages: notify_failed_load_document(**msg) from apps.task.app_vars import ENABLE_ALERTS if not ENABLE_ALERTS.val: return configs = list(TaskConfig.objects.filter(notify_on_fail=True)) if not configs: return all_names = [c.name for c in configs] now_time = now() # we wait N seconds for the log records to be stored failed_before = now_time - datetime.timedelta( seconds=cls.MESSAGE_COOLDOWN_SECONDS) failed_tasks = list( Task.objects.filter( status=FAILURE, parent_task_id__isnull=True, failure_reported=False, name__in=all_names, date_done__lt=failed_before)) # type: List[Task] from apps.task.app_vars import ALERT_DEFAULT_INTERVAL default_watch_mins = ALERT_DEFAULT_INTERVAL.val name_by_interval = { c.watchdog_minutes or default_watch_mins: c.name for c in configs } for interval in name_by_interval: task_name = name_by_interval[interval] max_start = now_time - datetime.timedelta(minutes=interval) failed_tasks += Task.objects.filter(status=PENDING, parent_task_id__isnull=True, failure_reported=False, name=task_name, date_work_start__lt=max_start) if not failed_tasks: return # notify admin on failed tasks cls.send_emails_on_failed_tasks(failed_tasks)