def usage_stats(self): TaskUtils.prepare_task_execution() d, _created = Deployment.objects.get_or_create(pk=1) # type: Deployment if d.last_report_date and ( now() - d.last_report_date) < datetime.timedelta(hours=12): return resp = None data = { 'installation_id': str(d.installation_id) if d.installation_id else None, 'deployment_date': d.deployment_date.isoformat() if d.deployment_date else None, 'version_number': settings.VERSION_NUMBER, 'version_commit': settings.VERSION_COMMIT } for url in settings.STATS_URLS: try: resp = requests.post(url, json=data) except: logging.exception('Unable to run usage_stats try') if resp and resp.status_code == 200: break if not resp or resp.status_code != 200: raise usage_stats.retry(countdown=60) else: d.last_report_date = now() d.save()
def track_session_completed(_celery_task): """ Filter sessions where users were notified that upload job started i.e. a user set "send email notifications" flag, filter sessions where users were not notified that a session job is completed and check that upload job is completed, send notification email. Track uncompleted session and send WS notification. """ TaskUtils.prepare_task_execution() sessions_for_ws_notification = list(UploadSession.objects.exclude(completed=True)) if sessions_for_ws_notification: for session in sessions_for_ws_notification: session.check_and_set_completed() notify_active_upload_sessions(sessions_for_ws_notification) from apps.project.api.v1 import ProjectViewSet for session in UploadSession.objects.filter(pk__in=[i.pk for i in sessions_for_ws_notification], completed=True, project__type__code=DocumentType.GENERIC_TYPE_CODE): ProjectViewSet._cluster(project=session.project, user_id=session.created_by.id) for session in UploadSession.objects.filter( notified_upload_started=True, notified_upload_completed=False): if session.check_and_set_completed(): session.notify_upload_completed()
def process(self, **kwargs): ant_uids = kwargs.get('ids') status_id = kwargs.get('status_id') # for preventing "connection already closed" TaskUtils.prepare_task_execution() ann_status = FieldAnnotationStatus.objects.get(pk=status_id) user = User.objects.get(pk=kwargs.get('user_id')) true_annotations = FieldAnnotation.objects.filter(uid__in=ant_uids) false_annotations = FieldAnnotationFalseMatch.objects.filter(uid__in=ant_uids) if ann_status.is_rejected: from apps.document.repository.document_field_repository import DocumentFieldRepository field_repo = DocumentFieldRepository() for ant in true_annotations: field_repo.delete_field_annotation_and_update_field_value(ant, user) else: import apps.document.repository.document_field_repository as dfr field_repo = dfr.DocumentFieldRepository() field_repo.update_field_annotations_by_ant_ids( ant_uids, [(f'{FIELD_CODE_STATUS_ID}', status_id)]) if false_annotations: for false_ant in false_annotations: field_repo.restore_field_annotation_and_update_field_value( false_ant, status_id, user) ant_docs = set(FieldAnnotation.objects.filter( uid__in=ant_uids).values_list('document_id', flat=True)) false_ant_docs = set(FieldAnnotationFalseMatch.objects.filter( uid__in=ant_uids).values_list('document_id', flat=True)) ant_docs.update(false_ant_docs) Document.reset_status_from_annotations(ann_status=ann_status, document_ids=list(ant_docs))
def retrain_dirty_fields(self): TaskUtils.prepare_task_execution() if DocumentTypeField.objects.has_dirty_fields(): task_name = TrainDirtyDocumentFieldDetectorModel.name execution_delay = now() - datetime.timedelta( seconds=settings.RETRAINING_TASK_EXECUTION_DELAY_IN_SEC) if not Task.objects.active_tasks_exist(task_name, execution_delay): call_task(task_name, module_name='apps.document.tasks')
def trace_task_patched(uuid, *args1, **kwargs1): from apps.task.utils.task_utils import TaskUtils from apps.task.models import Task TaskUtils.prepare_task_execution() Task.objects.start_processing(task_id=uuid, worker=worker) return old_trace_task(uuid, *args1, **kwargs1)
def on_failure(*args, **kwargs): exc = kwargs.get('exception') if not exc: return if isinstance(exc, InterfaceError): if exc.args and 'connection already closed' in exc.args[0]: print('on_failure(InterfaceError): shutting down DB connection') # clear the DB connection TaskUtils.prepare_task_execution()
def retrain_dirty_fields(self): TaskUtils.prepare_task_execution() if DocumentTypeField.objects.has_dirty_fields(): task_name = TrainDirtyDocumentFieldDetectorModel.name execution_delay = now() - datetime.timedelta( seconds=settings.RETRAINING_TASK_EXECUTION_DELAY_IN_SEC) if not Task.objects.filter(name=task_name, own_status='PENDING', date_start__gt=execution_delay).exists(): call_task(TrainDirtyDocumentFieldDetectorModel.name, module_name='apps.document.tasks')
def track_session_completed(*args, **kwargs): """ Filter sessions where users were notified that upload job started i.e. a user set "send email notifications" flag, filter sessions where users were not notified that a session job is completed and check that upload job is completed, send notification email. """ TaskUtils.prepare_task_execution() for session in UploadSession.objects.filter( notified_upload_started=True, notified_upload_completed=False): if session.is_completed(): session.notify_upload_completed()
def send_task(self, name, args=None, kwargs=None, countdown=None, eta=None, task_id=None, producer=None, connection=None, router=None, result_cls=None, expires=None, publisher=None, link=None, link_error=None, add_to_parent=True, group_id=None, retries=0, chord=None, reply_to=None, time_limit=None, soft_time_limit=None, root_id=None, parent_id=None, source_data=None, run_after_sub_tasks_finished=False, route_name=None, shadow=None, chain=None, task_type=None, main_task_id=None, **options): task_id = task_id or str(fast_uuid()) TaskUtils.prepare_task_execution() main_task_id = main_task_id or parent_id or root_id Task.objects.init_task(task_id, name, main_task_id, 'Args: {0}\nKwargs: {1}'.format( str(args), str(kwargs)), args, source_data, run_after_sub_tasks_finished) # type: Task return super().send_task(name, args, kwargs, countdown, eta, task_id, producer, connection, router, result_cls, expires, publisher, link, link_error, add_to_parent, group_id, retries, chord, reply_to, time_limit, soft_time_limit, root_id, parent_id, route_name, shadow, chain, task_type, **options)
def track_pdf2pdfa_status(_celery_task): """ Track uncompleted CreateSearchablePDF tasks and send WS notification. """ TaskUtils.prepare_task_execution() qs = Task.objects.filter(name=CreateSearchablePDF.name).exclude( completed=True) if not qs.exists(): return data = list( qs.values('pk', 'name', 'progress', 'user_id', 'status', 'kwargs')) for task_data in data: task_data['details'] = [] if task_data['kwargs'] and 'document_ids' in task_data['kwargs']: document_ids = task_data['kwargs']['document_ids'] task_data['details'] = list( Document.objects.filter(pk__in=document_ids).annotate( done=Case(When(alt_source_path__isnull=False, then=Value(True)), default=Value(False), output_field=BooleanField())).values( 'name', 'id', 'done')) del task_data['kwargs'] # for task_data in data: # task_data['details'] = [] # subtasks = Task.objects.get(pk=task_data['pk']).subtasks # if subtasks: # subtasks_data = subtasks.values('status', 'args') # for subtask_data in subtasks_data: # task_data['details'].append({ # 'status': subtask_data['status'], # 'file_name': subtask_data['args'][0] if subtask_data['args'] else None, # 'document_id': subtask_data['args'][1] if subtask_data['args'] else None, # }) if data: notify_active_pdf2pdfa_tasks(data) qs.filter(progress=100).update(completed=True)
def _inner(*args, **kwargs): _max_retries = kwargs.pop('exception_retry_count', max_retries) for retries in count(0): try: return fun(*args, **kwargs) except SchedulingError as e: log_task_failure(e, *args, **kwargs) TaskUtils.prepare_task_execution() if retries >= _max_retries: raise except InterfaceError as e: log_task_failure(e, *args, **kwargs) TaskUtils.prepare_task_execution() if retries >= _max_retries: raise except OperationalError as e: log_task_failure(e, *args, **kwargs) TaskUtils.prepare_task_execution() if retries >= _max_retries: raise except Exception as e: # pragma: no cover # Depending on the database backend used we can experience # various exceptions. E.g. psycopg2 raises an exception # if some operation breaks the transaction, so saving # the task result won't be possible until we rollback # the transaction. log_task_failure(e, *args, **kwargs) if retries >= _max_retries: raise
def track_session_completed(_celery_task): """ Filter sessions where users were notified that upload job started i.e. a user set "send email notifications" flag, filter sessions where users were not notified that a session job is completed and check that upload job is completed, send notification email. Track uncompleted session and send WS notification. """ TaskUtils.prepare_task_execution() sessions_for_ws_notification = list(UploadSession.objects.exclude(completed=True)) if sessions_for_ws_notification: for session in sessions_for_ws_notification: session.check_and_set_completed() notify_active_upload_sessions(sessions_for_ws_notification) for session in UploadSession.objects.filter( notified_upload_started=True, notified_upload_completed=False): if session.check_and_set_completed(): session.notify_upload_completed()
def update_main_task(self, main_task_id: str): TaskUtils.prepare_task_execution() if self.request.id != main_task_id: Task.objects.update_main_task(main_task_id)
def send_task(self, name, args=None, kwargs=None, countdown=None, eta=None, task_id=None, producer=None, connection=None, router=None, result_cls=None, expires=None, publisher=None, link=None, link_error=None, add_to_parent=True, group_id=None, retries=0, chord=None, reply_to=None, time_limit=None, soft_time_limit=None, root_id=None, parent_id=None, source_data=None, run_after_sub_tasks_finished=False, run_if_parent_task_failed=False, route_name=None, shadow=None, chain=None, task_type=None, main_task_id=None, **options): task_id = task_id or str(fast_uuid()) main_task_id = main_task_id or parent_id or root_id args_str = ', '.join([str(arg) for arg in args]) if args else '' kwargs_str = '\n'.join([f'{f}: {str(v)}' for f, v in kwargs.items()]) if kwargs else '' description = list() if args_str: description.append(args_str) if kwargs_str: description.append(kwargs_str) TaskUtils.prepare_task_execution() with transaction.atomic(): Task.objects.init_task( task_id=task_id, task_name=name, main_task_id=main_task_id, parent_task_id=parent_id, description='\n'.join(description), args=args, source_data=source_data, run_after_sub_tasks_finished=run_after_sub_tasks_finished, run_if_parent_task_failed=run_if_parent_task_failed ) # type: Task if parent_id is not None: Task.objects.filter(id=parent_id).update(has_sub_tasks=True) return super().send_task(name, args, kwargs, countdown, eta, task_id, producer, connection, router, result_cls, expires, publisher, link, link_error, add_to_parent, group_id, retries, chord, reply_to, time_limit, soft_time_limit, root_id, parent_id, route_name, shadow, chain, task_type, **options)
def send_task(self, name, args=None, kwargs=None, countdown=None, eta=None, task_id=None, producer=None, connection=None, router=None, result_cls=None, expires=None, publisher=None, link=None, link_error=None, add_to_parent=True, group_id=None, retries=0, chord=None, reply_to=None, time_limit=None, soft_time_limit=None, root_id=None, parent_id=None, source_data=None, run_after_sub_tasks_finished=False, run_if_parent_task_failed=False, route_name=None, shadow=None, chain=None, task_type=None, main_task_id=None, **options): """ Custom Celery send_task() method which stores a lot of additional required info in the DB. There is a task re-sending method in this class which is used to re-start hanged tasks lost by workers because of unexpected too fast restart or any other similar reason. Task re-send feature requires all the required task info to be stored in the DB because the info in the RabbitMQ usually appears lost in case the worker has lost the task. Take into account that args, kwargs, queue, priority are required to save in the DB in the same form as can be used for loading and re-sending. """ task_id = task_id or str(fast_uuid()) main_task_id = main_task_id or parent_id or root_id args_str = ', '.join([str(arg) for arg in args]) if args else '' kwargs_str = '\n'.join([f'{f}: {str(v)}' for f, v in kwargs.items()]) if kwargs else '' description = list() if args_str: description.append(args_str) if kwargs_str: description.append(kwargs_str) TaskUtils.prepare_task_execution() with transaction.atomic(): # it is important to save args, kwargs, queue, priority and other task fields # because they can be used for the task re-sending Task.objects.init_task( task_id=task_id, task_name=name, main_task_id=main_task_id, parent_task_id=parent_id, description='\n'.join(description), args=args, kwargs=kwargs, queue=options.get('queue'), priority=options.get('priority'), source_data=source_data, run_after_sub_tasks_finished=run_after_sub_tasks_finished, run_if_parent_task_failed=run_if_parent_task_failed ) # type: Task if parent_id is not None: Task.objects.filter(id=parent_id).exclude( has_sub_tasks=True).update(has_sub_tasks=True) return super().send_task(name, args, kwargs, countdown, eta, task_id, producer, connection, router, result_cls, expires, publisher, link, link_error, add_to_parent, group_id, retries, chord, reply_to, time_limit, soft_time_limit, root_id, parent_id, route_name, shadow, chain, task_type, **options)