def usage_stats(self):
    TaskUtils.prepare_task_execution()
    d, _created = Deployment.objects.get_or_create(pk=1)  # type: Deployment
    if d.last_report_date and (
            now() - d.last_report_date) < datetime.timedelta(hours=12):
        return

    resp = None
    data = {
        'installation_id':
        str(d.installation_id) if d.installation_id else None,
        'deployment_date':
        d.deployment_date.isoformat() if d.deployment_date else None,
        'version_number': settings.VERSION_NUMBER,
        'version_commit': settings.VERSION_COMMIT
    }
    for url in settings.STATS_URLS:
        try:
            resp = requests.post(url, json=data)
        except:
            logging.exception('Unable to run usage_stats try')
        if resp and resp.status_code == 200:
            break

    if not resp or resp.status_code != 200:
        raise usage_stats.retry(countdown=60)
    else:
        d.last_report_date = now()
        d.save()
예제 #2
0
def track_session_completed(_celery_task):
    """
    Filter sessions where users were notified that upload job started
    i.e. a user set "send email notifications" flag,
    filter sessions where users were not notified that a session job is completed and
    check that upload job is completed,
    send notification email.

    Track uncompleted session and send WS notification.
    """
    TaskUtils.prepare_task_execution()

    sessions_for_ws_notification = list(UploadSession.objects.exclude(completed=True))

    if sessions_for_ws_notification:
        for session in sessions_for_ws_notification:
            session.check_and_set_completed()
        notify_active_upload_sessions(sessions_for_ws_notification)

    from apps.project.api.v1 import ProjectViewSet
    for session in UploadSession.objects.filter(pk__in=[i.pk for i in sessions_for_ws_notification],
                                                completed=True,
                                                project__type__code=DocumentType.GENERIC_TYPE_CODE):
        ProjectViewSet._cluster(project=session.project, user_id=session.created_by.id)

    for session in UploadSession.objects.filter(
            notified_upload_started=True,
            notified_upload_completed=False):
        if session.check_and_set_completed():
            session.notify_upload_completed()
예제 #3
0
    def process(self, **kwargs):
        ant_uids = kwargs.get('ids')
        status_id = kwargs.get('status_id')

        # for preventing "connection already closed"
        TaskUtils.prepare_task_execution()
        ann_status = FieldAnnotationStatus.objects.get(pk=status_id)
        user = User.objects.get(pk=kwargs.get('user_id'))

        true_annotations = FieldAnnotation.objects.filter(uid__in=ant_uids)
        false_annotations = FieldAnnotationFalseMatch.objects.filter(uid__in=ant_uids)

        if ann_status.is_rejected:
            from apps.document.repository.document_field_repository import DocumentFieldRepository
            field_repo = DocumentFieldRepository()
            for ant in true_annotations:
                field_repo.delete_field_annotation_and_update_field_value(ant, user)
        else:
            import apps.document.repository.document_field_repository as dfr
            field_repo = dfr.DocumentFieldRepository()
            field_repo.update_field_annotations_by_ant_ids(
                ant_uids, [(f'{FIELD_CODE_STATUS_ID}', status_id)])

            if false_annotations:
                for false_ant in false_annotations:
                    field_repo.restore_field_annotation_and_update_field_value(
                        false_ant, status_id, user)

        ant_docs = set(FieldAnnotation.objects.filter(
            uid__in=ant_uids).values_list('document_id', flat=True))
        false_ant_docs = set(FieldAnnotationFalseMatch.objects.filter(
            uid__in=ant_uids).values_list('document_id', flat=True))
        ant_docs.update(false_ant_docs)
        Document.reset_status_from_annotations(ann_status=ann_status,
                                               document_ids=list(ant_docs))
예제 #4
0
def retrain_dirty_fields(self):
    TaskUtils.prepare_task_execution()
    if DocumentTypeField.objects.has_dirty_fields():
        task_name = TrainDirtyDocumentFieldDetectorModel.name
        execution_delay = now() - datetime.timedelta(
            seconds=settings.RETRAINING_TASK_EXECUTION_DELAY_IN_SEC)
        if not Task.objects.active_tasks_exist(task_name, execution_delay):
            call_task(task_name, module_name='apps.document.tasks')
    def trace_task_patched(uuid, *args1, **kwargs1):
        from apps.task.utils.task_utils import TaskUtils
        from apps.task.models import Task

        TaskUtils.prepare_task_execution()
        Task.objects.start_processing(task_id=uuid, worker=worker)

        return old_trace_task(uuid, *args1, **kwargs1)
예제 #6
0
 def on_failure(*args, **kwargs):
     exc = kwargs.get('exception')
     if not exc:
         return
     if isinstance(exc, InterfaceError):
         if exc.args and 'connection already closed' in exc.args[0]:
             print('on_failure(InterfaceError): shutting down DB connection')
             # clear the DB connection
             TaskUtils.prepare_task_execution()
예제 #7
0
def retrain_dirty_fields(self):
    TaskUtils.prepare_task_execution()
    if DocumentTypeField.objects.has_dirty_fields():
        task_name = TrainDirtyDocumentFieldDetectorModel.name
        execution_delay = now() - datetime.timedelta(
            seconds=settings.RETRAINING_TASK_EXECUTION_DELAY_IN_SEC)
        if not Task.objects.filter(name=task_name,
                                   own_status='PENDING',
                                   date_start__gt=execution_delay).exists():
            call_task(TrainDirtyDocumentFieldDetectorModel.name,
                      module_name='apps.document.tasks')
예제 #8
0
def track_session_completed(*args, **kwargs):
    """
    Filter sessions where users were notified that upload job started
    i.e. a user set "send email notifications" flag,
    filter sessions where users were not notified that a session job is completed and
    check that upload job is completed,
    send notification email.
    """
    TaskUtils.prepare_task_execution()

    for session in UploadSession.objects.filter(
            notified_upload_started=True, notified_upload_completed=False):
        if session.is_completed():
            session.notify_upload_completed()
    def send_task(self,
                  name,
                  args=None,
                  kwargs=None,
                  countdown=None,
                  eta=None,
                  task_id=None,
                  producer=None,
                  connection=None,
                  router=None,
                  result_cls=None,
                  expires=None,
                  publisher=None,
                  link=None,
                  link_error=None,
                  add_to_parent=True,
                  group_id=None,
                  retries=0,
                  chord=None,
                  reply_to=None,
                  time_limit=None,
                  soft_time_limit=None,
                  root_id=None,
                  parent_id=None,
                  source_data=None,
                  run_after_sub_tasks_finished=False,
                  route_name=None,
                  shadow=None,
                  chain=None,
                  task_type=None,
                  main_task_id=None,
                  **options):
        task_id = task_id or str(fast_uuid())

        TaskUtils.prepare_task_execution()

        main_task_id = main_task_id or parent_id or root_id
        Task.objects.init_task(task_id, name, main_task_id,
                               'Args: {0}\nKwargs: {1}'.format(
                                   str(args), str(kwargs)), args, source_data,
                               run_after_sub_tasks_finished)  # type: Task

        return super().send_task(name, args, kwargs, countdown, eta, task_id,
                                 producer, connection, router, result_cls,
                                 expires, publisher, link, link_error,
                                 add_to_parent, group_id, retries, chord,
                                 reply_to, time_limit, soft_time_limit,
                                 root_id, parent_id, route_name, shadow, chain,
                                 task_type, **options)
예제 #10
0
def track_pdf2pdfa_status(_celery_task):
    """
    Track uncompleted CreateSearchablePDF tasks and send WS notification.
    """
    TaskUtils.prepare_task_execution()

    qs = Task.objects.filter(name=CreateSearchablePDF.name).exclude(
        completed=True)

    if not qs.exists():
        return

    data = list(
        qs.values('pk', 'name', 'progress', 'user_id', 'status', 'kwargs'))
    for task_data in data:
        task_data['details'] = []
        if task_data['kwargs'] and 'document_ids' in task_data['kwargs']:
            document_ids = task_data['kwargs']['document_ids']
            task_data['details'] = list(
                Document.objects.filter(pk__in=document_ids).annotate(
                    done=Case(When(alt_source_path__isnull=False,
                                   then=Value(True)),
                              default=Value(False),
                              output_field=BooleanField())).values(
                                  'name', 'id', 'done'))

        del task_data['kwargs']

    # for task_data in data:
    #     task_data['details'] = []
    #     subtasks = Task.objects.get(pk=task_data['pk']).subtasks
    #     if subtasks:
    #         subtasks_data = subtasks.values('status', 'args')
    #         for subtask_data in subtasks_data:
    #             task_data['details'].append({
    #                 'status': subtask_data['status'],
    #                 'file_name': subtask_data['args'][0] if subtask_data['args'] else None,
    #                 'document_id': subtask_data['args'][1] if subtask_data['args'] else None,
    #             })

    if data:
        notify_active_pdf2pdfa_tasks(data)
        qs.filter(progress=100).update(completed=True)
예제 #11
0
 def _inner(*args, **kwargs):
     _max_retries = kwargs.pop('exception_retry_count', max_retries)
     for retries in count(0):
         try:
             return fun(*args, **kwargs)
         except SchedulingError as e:
             log_task_failure(e, *args, **kwargs)
             TaskUtils.prepare_task_execution()
             if retries >= _max_retries:
                 raise
         except InterfaceError as e:
             log_task_failure(e, *args, **kwargs)
             TaskUtils.prepare_task_execution()
             if retries >= _max_retries:
                 raise
         except OperationalError as e:
             log_task_failure(e, *args, **kwargs)
             TaskUtils.prepare_task_execution()
             if retries >= _max_retries:
                 raise
         except Exception as e:  # pragma: no cover
             # Depending on the database backend used we can experience
             # various exceptions. E.g. psycopg2 raises an exception
             # if some operation breaks the transaction, so saving
             # the task result won't be possible until we rollback
             # the transaction.
             log_task_failure(e, *args, **kwargs)
             if retries >= _max_retries:
                 raise
예제 #12
0
def track_session_completed(_celery_task):
    """
    Filter sessions where users were notified that upload job started
    i.e. a user set "send email notifications" flag,
    filter sessions where users were not notified that a session job is completed and
    check that upload job is completed,
    send notification email.

    Track uncompleted session and send WS notification.
    """
    TaskUtils.prepare_task_execution()

    sessions_for_ws_notification = list(UploadSession.objects.exclude(completed=True))

    if sessions_for_ws_notification:
        for session in sessions_for_ws_notification:
            session.check_and_set_completed()
        notify_active_upload_sessions(sessions_for_ws_notification)

    for session in UploadSession.objects.filter(
            notified_upload_started=True,
            notified_upload_completed=False):
        if session.check_and_set_completed():
            session.notify_upload_completed()
    def update_main_task(self, main_task_id: str):
        TaskUtils.prepare_task_execution()

        if self.request.id != main_task_id:
            Task.objects.update_main_task(main_task_id)
    def send_task(self,
                  name,
                  args=None,
                  kwargs=None,
                  countdown=None,
                  eta=None,
                  task_id=None,
                  producer=None,
                  connection=None,
                  router=None,
                  result_cls=None,
                  expires=None,
                  publisher=None,
                  link=None,
                  link_error=None,
                  add_to_parent=True,
                  group_id=None,
                  retries=0,
                  chord=None,
                  reply_to=None,
                  time_limit=None,
                  soft_time_limit=None,
                  root_id=None,
                  parent_id=None,
                  source_data=None,
                  run_after_sub_tasks_finished=False,
                  run_if_parent_task_failed=False,
                  route_name=None,
                  shadow=None,
                  chain=None,
                  task_type=None,
                  main_task_id=None,
                  **options):
        task_id = task_id or str(fast_uuid())

        main_task_id = main_task_id or parent_id or root_id
        args_str = ', '.join([str(arg) for arg in args]) if args else ''
        kwargs_str = '\n'.join([f'{f}: {str(v)}'
                                for f, v in kwargs.items()]) if kwargs else ''

        description = list()
        if args_str:
            description.append(args_str)

        if kwargs_str:
            description.append(kwargs_str)

        TaskUtils.prepare_task_execution()
        with transaction.atomic():
            Task.objects.init_task(
                task_id=task_id,
                task_name=name,
                main_task_id=main_task_id,
                parent_task_id=parent_id,
                description='\n'.join(description),
                args=args,
                source_data=source_data,
                run_after_sub_tasks_finished=run_after_sub_tasks_finished,
                run_if_parent_task_failed=run_if_parent_task_failed
            )  # type: Task
            if parent_id is not None:
                Task.objects.filter(id=parent_id).update(has_sub_tasks=True)

        return super().send_task(name, args, kwargs, countdown, eta, task_id,
                                 producer, connection, router, result_cls,
                                 expires, publisher, link, link_error,
                                 add_to_parent, group_id, retries, chord,
                                 reply_to, time_limit, soft_time_limit,
                                 root_id, parent_id, route_name, shadow, chain,
                                 task_type, **options)
    def send_task(self,
                  name,
                  args=None,
                  kwargs=None,
                  countdown=None,
                  eta=None,
                  task_id=None,
                  producer=None,
                  connection=None,
                  router=None,
                  result_cls=None,
                  expires=None,
                  publisher=None,
                  link=None,
                  link_error=None,
                  add_to_parent=True,
                  group_id=None,
                  retries=0,
                  chord=None,
                  reply_to=None,
                  time_limit=None,
                  soft_time_limit=None,
                  root_id=None,
                  parent_id=None,
                  source_data=None,
                  run_after_sub_tasks_finished=False,
                  run_if_parent_task_failed=False,
                  route_name=None,
                  shadow=None,
                  chain=None,
                  task_type=None,
                  main_task_id=None,
                  **options):
        """
        Custom Celery send_task() method which stores a lot of additional required info in the DB.
        There is a task re-sending method in this class which is used to re-start hanged tasks
        lost by workers because of unexpected too fast restart or any other similar reason.
        Task re-send feature requires all the required task info to be stored in the DB
        because the info in the RabbitMQ usually appears lost in case the worker has lost the task.

        Take into account that args, kwargs, queue, priority are required to save in the DB in the same
        form as can be used for loading and re-sending.
        """
        task_id = task_id or str(fast_uuid())

        main_task_id = main_task_id or parent_id or root_id
        args_str = ', '.join([str(arg) for arg in args]) if args else ''
        kwargs_str = '\n'.join([f'{f}: {str(v)}'
                                for f, v in kwargs.items()]) if kwargs else ''

        description = list()
        if args_str:
            description.append(args_str)

        if kwargs_str:
            description.append(kwargs_str)

        TaskUtils.prepare_task_execution()
        with transaction.atomic():
            # it is important to save args, kwargs, queue, priority and other task fields
            # because they can be used for the task re-sending
            Task.objects.init_task(
                task_id=task_id,
                task_name=name,
                main_task_id=main_task_id,
                parent_task_id=parent_id,
                description='\n'.join(description),
                args=args,
                kwargs=kwargs,
                queue=options.get('queue'),
                priority=options.get('priority'),
                source_data=source_data,
                run_after_sub_tasks_finished=run_after_sub_tasks_finished,
                run_if_parent_task_failed=run_if_parent_task_failed
            )  # type: Task
            if parent_id is not None:
                Task.objects.filter(id=parent_id).exclude(
                    has_sub_tasks=True).update(has_sub_tasks=True)

        return super().send_task(name, args, kwargs, countdown, eta, task_id,
                                 producer, connection, router, result_cls,
                                 expires, publisher, link, link_error,
                                 add_to_parent, group_id, retries, chord,
                                 reply_to, time_limit, soft_time_limit,
                                 root_id, parent_id, route_name, shadow, chain,
                                 task_type, **options)