def get_active_user_tasks(self) -> QuerySet:
     execution_delay = now() - datetime.timedelta(
         seconds=settings.USER_TASK_EXECUTION_DELAY)
     start_date_limit = now() - datetime.timedelta(seconds=3 * 24 * 60 * 60)
     return self \
         .filter(Q(main_task__isnull=True) | Q(main_task_id=F('id'))) \
         .filter(status__in=UNREADY_STATES) \
         .exclude(name__in=settings.EXCLUDE_FROM_TRACKING) \
         .filter(Q(date_start__isnull=True) | Q(date_start__gt=start_date_limit)) \
         .filter(Q(date_work_start__isnull=True) | Q(date_work_start__gt=execution_delay))
            def mark_main_task_completed():
                # TODO Support "propagating exception" flag properly
                # Parent task should not crash if the child was not marked as "propagating exception"
                # (important/critical)
                # For now it is made crashing always if any sub-task crashes.
                completed = total_status in READY_STATES
                date_done = total_date_done or (now() if completed else None)
                self.filter(id=parent_task_id).update(date_done=date_done,
                                                      status=total_status,
                                                      completed=completed,
                                                      progress=total_progress)
                try:
                    main_task = self.get(id=parent_task_id)  # type: Task
                    if main_task.parent_task_id is not None:
                        from apps.task.tasks import update_parent_task
                        update_parent_task.apply_async(
                            (main_task.parent_task_id, ))

                    if total_status != SUCCESS:
                        main_task.write_log(
                            '{0} #{1}: some/all of sub-tasks have been crashed'
                            .format(main_task.name, parent_task_id),
                            level='error')
                except:
                    import logging
                    logging.error(
                        'Was unable to log SUCCESS/FAILURE to task log. Task id: {0}'
                        .format(parent_task_id))

                if total_status in PROPAGATE_STATES:
                    revoke_task(AsyncResult(parent_task_id))
    def report_on_failed_tasks(cls) -> None:
        """
        Find top-level tasks that are not completed for a long time or are in failed state
        and report on each of these tasks by one email
        """
        if not ENABLE_ALERTS.val:
            return
        configs = list(TaskConfig.objects.filter(notify_on_fail=True))
        if not configs:
            return

        all_names = [c.name for c in configs]
        now_time = now()
        # we wait N seconds for the log records to be stored
        failed_before = now_time - datetime.timedelta(seconds=cls.MESSAGE_COOLDOWN_SECONDS)
        failed_tasks = list(Task.objects.filter(status=FAILURE,
                                                parent_task_id__isnull=True,
                                                failure_reported=False,
                                                name__in=all_names,
                                                date_done__lt=failed_before))  # type: List[Task]

        default_watch_mins = ALERT_DEFAULT_INTERVAL.val
        name_by_interval = {c.watchdog_minutes or default_watch_mins: c.name for c in configs}
        for interval in name_by_interval:
            task_name = name_by_interval[interval]
            max_start = now_time - datetime.timedelta(minutes=interval)
            failed_tasks += Task.objects.filter(
                status=PENDING,
                parent_task_id__isnull=True,
                failure_reported=False, name=task_name, date_work_start__lt=max_start)

        if not failed_tasks:
            return
        # notify admin on failed tasks
        cls.send_emails_on_failed_tasks(failed_tasks)
Esempio n. 4
0
 def on_task_completed(self, succeeded: bool):
     if self.has_sub_tasks:
         self.own_progress = 100
         self.own_status = SUCCESS if succeeded else FAILURE
         self.own_date_done = self.own_date_done or now()
         self.save()
         return
     self.update_progress(100, succeeded)
    def init_task(self,
                  task_id: str,
                  task_name: str,
                  main_task_id: str,
                  parent_task_id: str,
                  description: str = None,
                  args: Tuple = None,
                  source_data=None,
                  run_after_sub_tasks_finished=False,
                  run_if_parent_task_failed=False):

        if description and len(description) > 1020:
            description = description[:1020]

        obj, created = self.get_or_create(id=task_id,
                                          defaults={
                                              'name':
                                              task_name,
                                              'description':
                                              description,
                                              'main_task_id':
                                              main_task_id,
                                              'parent_task_id':
                                              parent_task_id,
                                              'date_start':
                                              now(),
                                              'args':
                                              args,
                                              'source_data':
                                              source_data,
                                              'status':
                                              PENDING,
                                              'own_status':
                                              PENDING,
                                              'progress':
                                              0,
                                              'own_progress':
                                              0,
                                              'failure_reported':
                                              False,
                                              'run_after_sub_tasks_finished':
                                              run_after_sub_tasks_finished,
                                              'run_if_parent_task_failed':
                                              run_if_parent_task_failed
                                          })
        if not created:
            # If the task model is already created then this is a retry of a failed task
            # We need to clear its status and progress
            obj.date_done = None
            obj.own_date_done = None
            obj.status = PENDING
            obj.own_status = PENDING
            obj.progress = 0
            obj.own_progress = 0
            obj.save()

        return obj
    def report_on_failed_tasks(cls) -> None:
        """
        Find top-level tasks that are not completed for a long time or are in failed state
        and report on each of these tasks by one email
        """

        from apps.project.notifications import notify_failed_load_document
        from apps.task.tasks import LoadDocuments
        from apps.project.tasks import LoadArchive
        recent_failed_load_document_tasks = Task.objects \
            .filter(name__in=[LoadDocuments.name, LoadArchive.name], status=FAILURE) \
            .exclude(metadata__failed_load_document_sent__isnull=False)
        for task in recent_failed_load_document_tasks:
            if not task.metadata:
                continue
            # send ws notification (include task data for debugging)
            file_name = task.metadata.get('file_name')
            session_id = task.metadata.get('session_id')
            if file_name and session_id:
                notify_failed_load_document(file_name=file_name,
                                            session_id=session_id,
                                            directory_path=task.kwargs.get('directory_path') if task.kwargs else None)
                task.metadata['failed_load_document_sent'] = True
                task.save()

        from apps.task.app_vars import ENABLE_ALERTS
        if not ENABLE_ALERTS.val:
            return
        configs = list(TaskConfig.objects.filter(notify_on_fail=True))
        if not configs:
            return

        all_names = [c.name for c in configs]
        now_time = now()
        # we wait N seconds for the log records to be stored
        failed_before = now_time - datetime.timedelta(seconds=cls.MESSAGE_COOLDOWN_SECONDS)
        failed_tasks = list(Task.objects.filter(status=FAILURE,
                                                parent_task_id__isnull=True,
                                                failure_reported=False,
                                                name__in=all_names,
                                                date_done__lt=failed_before))  # type: List[Task]

        from apps.task.app_vars import ALERT_DEFAULT_INTERVAL
        default_watch_mins = ALERT_DEFAULT_INTERVAL.val
        name_by_interval = {c.watchdog_minutes or default_watch_mins: c.name for c in configs}
        for interval in name_by_interval:
            task_name = name_by_interval[interval]
            max_start = now_time - datetime.timedelta(minutes=interval)
            failed_tasks += Task.objects.filter(
                status=PENDING,
                parent_task_id__isnull=True,
                failure_reported=False, name=task_name, date_work_start__lt=max_start)

        if not failed_tasks:
            return
        # notify admin on failed tasks
        cls.send_emails_on_failed_tasks(failed_tasks)
Esempio n. 7
0
    def save(self, *args, **kwargs):
        if not self.display_name:
            self.display_name = TASK_FRIENDLY_NAME.get(self.name) or self.name
        if self.progress > 100:
            self.progress = 100
        if self.status in FAIL_READY_STATES:
            self.progress = 100
            self.own_status = self.status
            self.date_done = self.date_done or now()

        if self.status in READY_STATES:
            self.own_status = self.status
            self.progress = 100
            self.date_done = self.date_done or now()
        if self.own_status in READY_STATES:
            self.own_progress = 100
            self.own_date_done = self.own_date_done or now()

        super().save(*args, **kwargs)
Esempio n. 8
0
    def init_task(self,
                  task_id: str,
                  task_name: str,
                  main_task_id: str,
                  description: str = None,
                  args: Tuple = None,
                  source_data=None,
                  run_after_sub_tasks_finished=False):
        try:
            main_task = self.get(
                id=main_task_id
            ) if main_task_id and main_task_id != task_id else None
        except:
            print('Bad sub-task: task_id={0}, main_task_id={1}, task_name={2}'.
                  format(task_id, main_task_id, task_name))
            raise

        if description and len(description) > 1020:
            description = description[:1020]

        obj, created = self.get_or_create(
            id=task_id,
            defaults={
                'name': task_name,
                'description': description,
                'main_task_id': main_task.id if main_task else None,
                'date_start': now(),
                'args': args,
                'source_data': source_data,
                'run_after_sub_tasks_finished': run_after_sub_tasks_finished
            })
        if not created:
            obj.name = task_name
            obj.description = description
            obj.main_task_id = main_task.id if main_task else None
            obj.date_done = None
            obj.run_after_sub_tasks_finished = run_after_sub_tasks_finished
            obj.save()

        return obj
    def update_progress(self,
                        new_progress: int,
                        succeeded: bool = True):
        new_progress = min(new_progress, 100)
        self.own_progress = new_progress
        if self.own_progress == 100:
            self.own_status = SUCCESS if succeeded else FAILURE
            now_time = now()
            self.own_date_done = self.own_date_done or now_time

        try:
            if self.has_sub_tasks:
                self.save(update_fields=['own_progress', 'own_status', 'own_date_done'])
            else:
                self.status = self.own_status
                self.progress = self.own_progress
                self.date_done = self.own_date_done
                self.save(update_fields=['progress', 'own_progress',
                                         'status', 'own_status',
                                         'date_done', 'own_date_done'])
        except DatabaseError:
            # task itself might have been deleted
            pass
Esempio n. 10
0
    def update_progress(self, new_progress: int, succeeded: bool = True):
        new_progress = self.ensure_correct_progress(new_progress)
        if new_progress == self.progress:
            return
        self.progress = min(new_progress, 100)
        if self.progress == 100:
            self.status = SUCCESS if succeeded else FAILURE
            if self.own_status not in READY_STATES:
                self.own_status = self.status

            now_time = now()
            self.own_date_done = self.own_date_done or now_time
            self.date_done = self.date_done or now_time
        try:
            self.save(update_fields=[
                'progress', 'own_progress', 'status', 'own_status',
                'date_done', 'own_date_done'
            ])
        except DatabaseError:
            # task itself might have been deleted
            pass

        # propagate changes to parent tasks
        parent_task = self.parent_task  # type: Task
        if not parent_task:
            return

        progresses = [
            p for p in Task.objects.filter(
                parent_task_id=parent_task.pk).values_list('progress',
                                                           flat=True)
        ]
        if progresses:
            parent_progress = int(
                (sum(progresses) + parent_task.own_progress) /
                (len(progresses) + 1))
            parent_task.update_progress(parent_progress)
Esempio n. 11
0
 def duration(self):
     if not self.date_start:
         return None
     date_done = self.date_done or now()
     duration = date_done - self.date_start
     return duration
 def get_all_expired(self, expires):
     """Get all expired task results."""
     return self.filter(own_date_done__lt=now() - maybe_timedelta(expires))
    def store_result(self,
                     task_id: str,
                     main_task_id: str,
                     task_name: str,
                     result,
                     status: str,
                     traceback=None,
                     metadata=None):
        date_now = now()
        result = TaskManager._prepare_task_result(result)

        initial_values = {
            'name': task_name,
            'main_task_id': main_task_id,
            'own_status': status,
            'date_start': date_now,
            'own_date_done': date_now if status in READY_STATES else None,
            'result': result,
            'traceback': traceback,
            'celery_metadata': metadata,
        }

        if initial_values['own_status'] in READY_STATES:
            initial_values['own_progress'] = 100

        if main_task_id and main_task_id != task_id:  # this is a sub-task
            initial_values['status'] = initial_values.get('own_status')
            initial_values['date_done'] = initial_values.get('own_date_done')
            initial_values['progress'] = initial_values.get('own_progress')

        try:
            obj, created = self.get_or_create(id=task_id,
                                              defaults=initial_values)

            if not created:
                if task_name and not obj.name:
                    obj.name = task_name

                # Main task id should be assigned in init_task or on initial store result.
                # If the task is already initialized with main_task_id = None - here it can be rewritten
                # with some value by Celery itself.

                obj.set_own_status(status)

                if obj.own_date_done is None:
                    obj.own_date_done = now(
                    ) if status in READY_STATES else None

                obj.result = result
                obj.traceback = traceback
                obj.celery_metadata = metadata

                if not obj.has_sub_tasks:
                    obj.status = obj.own_status
                    obj.date_done = obj.own_date_done
                    obj.progress = obj.own_progress

                obj.save()
        except IntegrityError:
            print('Orphan sub-task detected: {0}'.format(initial_values))
            obj = self.model(**initial_values)

        return obj
 def start_processing(self, task_id, worker):
     self.filter(id=task_id).update(date_work_start=now(), worker=worker)
Esempio n. 15
0
    def init_task(self,
                  task_id: str,
                  task_name: str,
                  main_task_id: str,
                  parent_task_id: str,
                  description: str = None,
                  args: Tuple = None,
                  kwargs: Dict = None,
                  queue: str = None,
                  priority: int = None,
                  source_data=None,
                  run_after_sub_tasks_finished=False,
                  run_if_parent_task_failed=False):

        if description and len(description) > 1020:
            description = description[:1020]

        display_name = TASK_FRIENDLY_NAME.get(task_name) or task_name

        obj, created = self.get_or_create(id=task_id,
                                          defaults={
                                              'name':
                                              task_name,
                                              'display_name':
                                              display_name,
                                              'description':
                                              description,
                                              'main_task_id':
                                              main_task_id,
                                              'parent_task_id':
                                              parent_task_id,
                                              'date_start':
                                              now(),
                                              'args':
                                              args,
                                              'kwargs':
                                              kwargs,
                                              'queue':
                                              queue,
                                              'priority':
                                              priority,
                                              'restart_count':
                                              0,
                                              'bad_health_check_num':
                                              0,
                                              'source_data':
                                              source_data,
                                              'status':
                                              PENDING,
                                              'own_status':
                                              PENDING,
                                              'progress':
                                              0,
                                              'own_progress':
                                              0,
                                              'failure_reported':
                                              False,
                                              'run_after_sub_tasks_finished':
                                              run_after_sub_tasks_finished,
                                              'run_if_parent_task_failed':
                                              run_if_parent_task_failed
                                          })
        if not created:
            # If the task model is already created then this is a retry of a failed task
            # We need to clear its status and progress
            obj.description = description
            obj.date_done = None
            obj.own_date_done = None
            obj.status = PENDING
            obj.own_status = PENDING
            obj.progress = 0
            obj.own_progress = 0
            # obj.save()
            # Instead of unconditional .save() we update only the specified fields
            # and only if they are different in the database.
            self.filter(id=task_id) \
                .exclude(date_done__isnull=True,
                         own_date_done__isnull=True,
                         status=PENDING,
                         own_status=PENDING,
                         progress=0,
                         own_progress=0,
                         description=description) \
                .update(date_done=None,
                        own_date_done=None,
                        status=PENDING,
                        own_status=PENDING,
                        progress=0,
                        own_progress=0,
                        description=description)

        else:
            if settings.DEBUG_LOG_TASK_RUN_COUNT:
                with connection.cursor() as cursor:
                    cursor.execute(f'''
    insert into task_taskstatentry (task_name, run_counter) 
    values ('{task_name}', 1) 
    on conflict (task_name) do update set run_counter = task_taskstatentry.run_counter + 1;
                    ''')

        return obj
Esempio n. 16
0
    def report_on_failed_tasks(cls) -> None:
        """
        Find top-level tasks that are not completed for a long time or are in failed state
        and report on each of these tasks by one email
        """

        from apps.project.notifications import notify_failed_load_document
        from apps.task.tasks import LoadDocuments
        from apps.project.tasks import LoadArchive
        # args for notify_failed_load_document() call
        notification_messages = []

        # we can also lock the table like
        # with ModelLock(None, Task, ModelLock.LOCK_MODE_ACCESS_EXCLUSIVE):
        # but this would be an overkill
        recent_failed_load_document_tasks = Task.objects \
            .filter(name__in=[LoadDocuments.name, LoadArchive.name], status=FAILURE) \
            .exclude(metadata__failed_load_document_sent__isnull=False)
        for task in recent_failed_load_document_tasks:
            if not task.metadata:
                continue
            # compose ws notification messages (include task data for debugging)
            file_name = task.metadata.get('file_name')
            session_id = task.upload_session.pk if task.upload_session else task.metadata.get(
                'session_id')
            if file_name and session_id:
                notification_messages.append({
                    'file_name':
                    file_name,
                    'session_id':
                    session_id,
                    'directory_path':
                    task.kwargs.get('directory_path') if task.kwargs else None
                })
                task.metadata['failed_load_document_sent'] = True
                task.save(update_fields={'metadata'})
        # send ws notification
        for msg in notification_messages:
            notify_failed_load_document(**msg)

        from apps.task.app_vars import ENABLE_ALERTS
        if not ENABLE_ALERTS.val:
            return
        configs = list(TaskConfig.objects.filter(notify_on_fail=True))
        if not configs:
            return

        all_names = [c.name for c in configs]
        now_time = now()
        # we wait N seconds for the log records to be stored
        failed_before = now_time - datetime.timedelta(
            seconds=cls.MESSAGE_COOLDOWN_SECONDS)
        failed_tasks = list(
            Task.objects.filter(
                status=FAILURE,
                parent_task_id__isnull=True,
                failure_reported=False,
                name__in=all_names,
                date_done__lt=failed_before))  # type: List[Task]

        from apps.task.app_vars import ALERT_DEFAULT_INTERVAL
        default_watch_mins = ALERT_DEFAULT_INTERVAL.val
        name_by_interval = {
            c.watchdog_minutes or default_watch_mins: c.name
            for c in configs
        }
        for interval in name_by_interval:
            task_name = name_by_interval[interval]
            max_start = now_time - datetime.timedelta(minutes=interval)
            failed_tasks += Task.objects.filter(status=PENDING,
                                                parent_task_id__isnull=True,
                                                failure_reported=False,
                                                name=task_name,
                                                date_work_start__lt=max_start)

        if not failed_tasks:
            return
        # notify admin on failed tasks
        cls.send_emails_on_failed_tasks(failed_tasks)