def process(self, **kwargs): project_id = kwargs.get('_project_id') delete = bool(kwargs.get('delete')) project = Project.all_objects.get(pk=project_id) # get doc ids and remove docs' source files proj_doc_ids = self.document_repository.get_project_document_ids( project_id) file_paths = self.document_repository.get_all_document_source_paths( proj_doc_ids) from apps.document.tasks import DeleteDocumentFiles call_task(DeleteDocumentFiles, metadata=file_paths) # delete documents from apps.document.repository.document_bulk_delete \ import get_document_bulk_delete get_document_bulk_delete().delete_documents(proj_doc_ids) # delete project itself project.cleanup(delete=delete) # store data about cleanup in ProjectCleanup Task task_model = self.task task_model.metadata = { 'task_name': 'clean-project', '_project_id': project_id # added "_" to avoid detecting task as project task } task_model.save()
def process(self, **kwargs): project_id = kwargs.get('_project_id') delete = bool(kwargs.get('delete')) safe_delete = kwargs.get('safe_delete') if safe_delete is None: safe_delete = True project = Project.all_objects.get(pk=project_id) # get doc ids and remove docs' source files proj_doc_ids = self.document_repository.get_project_document_ids(project_id) file_paths = self.document_repository.get_all_document_source_paths(proj_doc_ids) try: from apps.document.sync_tasks.document_files_cleaner import DocumentFilesCleaner DocumentFilesCleaner.delete_document_files(file_paths) except Exception as e: self.log_error(f'Unable to clean document files: {file_paths}', exc_info=e) # delete documents from apps.document.repository.document_bulk_delete \ import get_document_bulk_delete get_document_bulk_delete(safe_delete).delete_documents(proj_doc_ids) # delete project itself project.cleanup(delete=delete) # store data about cleanup in ProjectCleanup Task if not kwargs.get('skip_task_updating'): task_model = self.task task_model.metadata = { 'task_name': 'clean-project', '_project_id': project_id # added "_" to avoid detecting task as project task } task_model.save()
def process(self, **kwargs): doc_ids = kwargs.get('_document_ids') file_paths = self.document_repository.get_all_document_source_paths(doc_ids) get_document_bulk_delete().delete_documents(doc_ids) try: DocumentFilesCleaner.delete_document_files(file_paths) except Exception as e: self.log_error(e)
def confirm_delete_view(self, request): from apps.document.repository.document_bulk_delete \ import get_document_bulk_delete doc_ids = request.session.get('_doc_ids') if request.method == 'GET': items_by_table = get_document_bulk_delete().calculate_deleting_count(doc_ids) mdc = ModelClassDictionary() del_count_hash = {mdc.get_model_class_name_hr(t):items_by_table[t] for t in items_by_table if t in mdc.model_by_table} del_count = [(d, del_count_hash[d], False) for d in del_count_hash] del_count = sorted(del_count, key=lambda x: x[0]) del_count.insert(0, ('Documents', len(doc_ids), True)) context = { 'deleting_count': del_count, 'return_url': 'admin:document_softdeletedocument_changelist' } from django.shortcuts import render return render(request, "admin/common/confirm_delete_view.html", context) # POST: actual delete from apps.task.tasks import call_task call_task( task_name='DeleteDocuments', module_name='apps.document.tasks', _document_ids=doc_ids, user_id=request.user.id) from django.http import HttpResponseRedirect return HttpResponseRedirect("../")
def process(self, **kwargs): project_id = kwargs.get('_project_id') delete = bool(kwargs.get('delete')) safe_delete = kwargs.get('safe_delete') if safe_delete is None: safe_delete = True project = Project.all_objects.get(pk=project_id) # get doc ids and remove docs' source files proj_doc_ids = self.document_repository.get_project_document_ids( project_id) file_paths = self.document_repository.get_all_document_source_paths( proj_doc_ids) try: from apps.document.sync_tasks.document_files_cleaner import DocumentFilesCleaner DocumentFilesCleaner.delete_document_files(file_paths) except Exception as e: self.log_error(f'Unable to clean document files: {file_paths}', exc_info=e) # delete documents from apps.document.repository.document_bulk_delete \ import get_document_bulk_delete doc_bulk_delete = get_document_bulk_delete(safe_delete) proj_doc_count = len(proj_doc_ids) for start_pos in range(0, len(proj_doc_ids), self.document_chunk_size): end_pos = min( [proj_doc_count, start_pos + self.document_chunk_size]) doc_bulk_delete.delete_documents(proj_doc_ids[start_pos:end_pos]) self.log_info( 'Deleted {}-{} documents from total {} from project #{}'. format(start_pos, end_pos, proj_doc_count, project_id)) # delete project itself project.cleanup(delete=delete) self.log_info('Cleaned up project #{} itself'.format(project_id)) # store data about cleanup in ProjectCleanup Task if not kwargs.get('skip_task_updating'): task_model = self.task task_model.metadata = { 'task_name': 'clean-project', '_project_id': project_id # added "_" to avoid detecting task as project task } task_model.save()
def confirm_delete_view(self, request): project_ids = request.session.get('_project_ids') if request.method == 'GET': doc_ids = Document.all_objects.filter( project_id__in=project_ids).values_list('id', flat=True) details = request.GET.get('details') == 'true' del_count = [] if details: from apps.document.repository.document_bulk_delete \ import get_document_bulk_delete items_by_table = get_document_bulk_delete( ).calculate_deleting_count(doc_ids) mdc = ModelClassDictionary() del_count_hash = { mdc.get_model_class_name_hr(t): items_by_table[t] for t in items_by_table if t in mdc.model_by_table } del_count = [(d, del_count_hash[d], False) for d in del_count_hash] del_count = sorted(del_count, key=lambda x: x[0]) del_count.insert(0, ('Documents', len(doc_ids), True)) del_count.insert(0, ('Projects', len(project_ids), True)) context = { 'deleting_count': del_count, 'return_url': 'admin:project_softdeleteproject_changelist', 'details': details } return render( request, "admin/project/softdeleteproject/confirm_delete_view.html", context) # POST: actual delete from apps.task.tasks import _call_task _call_task(task_name='CleanProjects', module_name='apps.project.tasks', _project_ids=project_ids, user_id=request.user.id, delete=True) return HttpResponseRedirect("../")
def process(self, **kwargs): session_id = kwargs['session_id'] session = UploadSession.objects.get(pk=session_id) # 1. Purge Tasks self.track_timelog('') session_tasks = Task.objects.main_tasks().filter(metadata__session_id=session_id) self.log_info(f'Purge {session_tasks.count()} session tasks.') for a_task in session_tasks: try: purge_task(a_task.id) except: # case when task is already deleted as subtask pass self.track_timelog('1 - purge tasks') # 2. Remove Documents+ document_ids = \ list(Document.objects.filter(upload_session_id=session_id).values_list('pk', flat=True)) self.log_info(f'Remove {len(document_ids)} documents') from apps.document.repository.document_bulk_delete import get_document_bulk_delete # TODO: WHY it fails with # psycopg2.errors.ForeignKeyViolation: update or delete # on table "document_textunit" violates foreign key constraint attempts = 3 delay = 60 attempts_made = 0 delete_manager = get_document_bulk_delete() error_logged = False for attempt in range(1, attempts + 1): attempts_made += 1 try: delete_manager.delete_documents(document_ids) break except Exception as e: if not error_logged: self.log_error('Error while deleting documents', exc_info=e) error_logged = True self.log_info(f'Attempt #{attempt} of {attempts} to delete documents failed, retry') time.sleep(delay) self.track_timelog(f'2 - bulk delete for {len(document_ids)} documents') if attempts_made > 1: self.log_error(f'{attempts_made} of {attempts} tried to delete documents') # 3. Remove files file_storage_exists = file_storage.document_exists(session_id) self.log_info(f'File Storage exists: {file_storage_exists}') files_removed, failed_removing = (0, 0) if file_storage_exists: files = file_storage.list_documents(session_id) self.log_info(f'Remove {len(files)} files from File Storage.') for file_path in files: file_storage.delete_document(file_path) try: file_storage.delete_document(session_id) files_removed += 1 except: # TODO: removing folders through LocalStorage is not implemented failed_removing += 1 pass self.track_timelog(f'3 - remove files ({files_removed} removed, {failed_removing} failed)') # 4. Remove Upload Session if not session: raise Exception(f"Couldn't find session by id ({session_id})") self.log_info(f'Remove session uid="{session_id}".') project = session.project session.delete() self.track_timelog('4 - delete session') # 5. Reindex Project self.log_info(f'Reindex project id="{project.id}" documents.') from apps.rawdb.tasks import reindex_all_project_documents call_task_func(reindex_all_project_documents, (project.pk,), None) self.track_timelog('5 - reindex project')
def process(self, **kwargs): doc_ids = kwargs.get('_document_ids') file_paths = self.document_repository.get_all_document_source_paths(doc_ids) get_document_bulk_delete().delete_documents(doc_ids) DocumentFilesCleaner.delete_document_files(file_paths)
def process(self, **kwargs): doc_ids = kwargs.get('_document_ids') file_paths = self.document_repository.get_all_document_source_paths( doc_ids) get_document_bulk_delete().delete_documents(doc_ids) call_task(DeleteDocumentFiles, metadata=file_paths)