예제 #1
0
    def process(self, **kwargs):
        project_id = kwargs.get('_project_id')
        delete = bool(kwargs.get('delete'))
        project = Project.all_objects.get(pk=project_id)

        # get doc ids and remove docs' source files
        proj_doc_ids = self.document_repository.get_project_document_ids(
            project_id)
        file_paths = self.document_repository.get_all_document_source_paths(
            proj_doc_ids)
        from apps.document.tasks import DeleteDocumentFiles
        call_task(DeleteDocumentFiles, metadata=file_paths)

        # delete documents
        from apps.document.repository.document_bulk_delete \
            import get_document_bulk_delete
        get_document_bulk_delete().delete_documents(proj_doc_ids)

        # delete project itself
        project.cleanup(delete=delete)

        # store data about cleanup in ProjectCleanup Task
        task_model = self.task
        task_model.metadata = {
            'task_name': 'clean-project',
            '_project_id':
            project_id  # added "_" to avoid detecting task as project task
        }
        task_model.save()
예제 #2
0
    def process(self, **kwargs):
        project_id = kwargs.get('_project_id')
        delete = bool(kwargs.get('delete'))
        safe_delete = kwargs.get('safe_delete')
        if safe_delete is None:
            safe_delete = True
        project = Project.all_objects.get(pk=project_id)

        # get doc ids and remove docs' source files
        proj_doc_ids = self.document_repository.get_project_document_ids(project_id)
        file_paths = self.document_repository.get_all_document_source_paths(proj_doc_ids)
        try:
            from apps.document.sync_tasks.document_files_cleaner import DocumentFilesCleaner
            DocumentFilesCleaner.delete_document_files(file_paths)
        except Exception as e:
            self.log_error(f'Unable to clean document files: {file_paths}', exc_info=e)

        # delete documents
        from apps.document.repository.document_bulk_delete \
            import get_document_bulk_delete
        get_document_bulk_delete(safe_delete).delete_documents(proj_doc_ids)

        # delete project itself
        project.cleanup(delete=delete)

        # store data about cleanup in ProjectCleanup Task
        if not kwargs.get('skip_task_updating'):
            task_model = self.task
            task_model.metadata = {
                'task_name': 'clean-project',
                '_project_id': project_id  # added "_" to avoid detecting task as project task
            }
            task_model.save()
예제 #3
0
 def process(self, **kwargs):
     doc_ids = kwargs.get('_document_ids')
     file_paths = self.document_repository.get_all_document_source_paths(doc_ids)
     get_document_bulk_delete().delete_documents(doc_ids)
     try:
         DocumentFilesCleaner.delete_document_files(file_paths)
     except Exception as e:
         self.log_error(e)
예제 #4
0
    def confirm_delete_view(self, request):
        from apps.document.repository.document_bulk_delete \
            import get_document_bulk_delete
        doc_ids = request.session.get('_doc_ids')

        if request.method == 'GET':
            items_by_table = get_document_bulk_delete().calculate_deleting_count(doc_ids)
            mdc = ModelClassDictionary()
            del_count_hash = {mdc.get_model_class_name_hr(t):items_by_table[t]
                         for t in items_by_table if t in mdc.model_by_table}
            del_count = [(d, del_count_hash[d], False) for d in del_count_hash]
            del_count = sorted(del_count, key=lambda x: x[0])
            del_count.insert(0, ('Documents', len(doc_ids), True))

            context = {
                'deleting_count': del_count,
                'return_url': 'admin:document_softdeletedocument_changelist'
            }
            from django.shortcuts import render
            return render(request, "admin/common/confirm_delete_view.html", context)

        # POST: actual delete
        from apps.task.tasks import call_task
        call_task(
            task_name='DeleteDocuments',
            module_name='apps.document.tasks',
            _document_ids=doc_ids,
            user_id=request.user.id)
        from django.http import HttpResponseRedirect
        return HttpResponseRedirect("../")
예제 #5
0
    def process(self, **kwargs):
        project_id = kwargs.get('_project_id')
        delete = bool(kwargs.get('delete'))
        safe_delete = kwargs.get('safe_delete')
        if safe_delete is None:
            safe_delete = True
        project = Project.all_objects.get(pk=project_id)

        # get doc ids and remove docs' source files
        proj_doc_ids = self.document_repository.get_project_document_ids(
            project_id)
        file_paths = self.document_repository.get_all_document_source_paths(
            proj_doc_ids)
        try:
            from apps.document.sync_tasks.document_files_cleaner import DocumentFilesCleaner
            DocumentFilesCleaner.delete_document_files(file_paths)
        except Exception as e:
            self.log_error(f'Unable to clean document files: {file_paths}',
                           exc_info=e)

        # delete documents
        from apps.document.repository.document_bulk_delete \
            import get_document_bulk_delete

        doc_bulk_delete = get_document_bulk_delete(safe_delete)
        proj_doc_count = len(proj_doc_ids)
        for start_pos in range(0, len(proj_doc_ids), self.document_chunk_size):
            end_pos = min(
                [proj_doc_count, start_pos + self.document_chunk_size])
            doc_bulk_delete.delete_documents(proj_doc_ids[start_pos:end_pos])
            self.log_info(
                'Deleted {}-{} documents from total {} from project #{}'.
                format(start_pos, end_pos, proj_doc_count, project_id))

        # delete project itself
        project.cleanup(delete=delete)
        self.log_info('Cleaned up project #{} itself'.format(project_id))

        # store data about cleanup in ProjectCleanup Task
        if not kwargs.get('skip_task_updating'):
            task_model = self.task
            task_model.metadata = {
                'task_name': 'clean-project',
                '_project_id':
                project_id  # added "_" to avoid detecting task as project task
            }
            task_model.save()
예제 #6
0
    def confirm_delete_view(self, request):
        project_ids = request.session.get('_project_ids')

        if request.method == 'GET':
            doc_ids = Document.all_objects.filter(
                project_id__in=project_ids).values_list('id', flat=True)

            details = request.GET.get('details') == 'true'
            del_count = []
            if details:
                from apps.document.repository.document_bulk_delete \
                    import get_document_bulk_delete
                items_by_table = get_document_bulk_delete(
                ).calculate_deleting_count(doc_ids)
                mdc = ModelClassDictionary()
                del_count_hash = {
                    mdc.get_model_class_name_hr(t): items_by_table[t]
                    for t in items_by_table if t in mdc.model_by_table
                }
                del_count = [(d, del_count_hash[d], False)
                             for d in del_count_hash]
                del_count = sorted(del_count, key=lambda x: x[0])
                del_count.insert(0, ('Documents', len(doc_ids), True))
                del_count.insert(0, ('Projects', len(project_ids), True))

            context = {
                'deleting_count': del_count,
                'return_url': 'admin:project_softdeleteproject_changelist',
                'details': details
            }
            return render(
                request,
                "admin/project/softdeleteproject/confirm_delete_view.html",
                context)

        # POST: actual delete
        from apps.task.tasks import _call_task
        _call_task(task_name='CleanProjects',
                   module_name='apps.project.tasks',
                   _project_ids=project_ids,
                   user_id=request.user.id,
                   delete=True)
        return HttpResponseRedirect("../")
예제 #7
0
    def process(self, **kwargs):
        session_id = kwargs['session_id']
        session = UploadSession.objects.get(pk=session_id)

        # 1. Purge Tasks
        self.track_timelog('')
        session_tasks = Task.objects.main_tasks().filter(metadata__session_id=session_id)
        self.log_info(f'Purge {session_tasks.count()} session tasks.')
        for a_task in session_tasks:
            try:
                purge_task(a_task.id)
            except:
                # case when task is already deleted as subtask
                pass
        self.track_timelog('1 - purge tasks')

        # 2. Remove Documents+
        document_ids = \
            list(Document.objects.filter(upload_session_id=session_id).values_list('pk', flat=True))
        self.log_info(f'Remove {len(document_ids)} documents')
        from apps.document.repository.document_bulk_delete import get_document_bulk_delete

        # TODO: WHY it fails with
        # psycopg2.errors.ForeignKeyViolation: update or delete
        # on table "document_textunit" violates foreign key constraint
        attempts = 3
        delay = 60
        attempts_made = 0
        delete_manager = get_document_bulk_delete()
        error_logged = False

        for attempt in range(1, attempts + 1):
            attempts_made += 1
            try:
                delete_manager.delete_documents(document_ids)
                break
            except Exception as e:
                if not error_logged:
                    self.log_error('Error while deleting documents', exc_info=e)
                    error_logged = True
                self.log_info(f'Attempt #{attempt} of {attempts} to delete documents failed, retry')

                time.sleep(delay)
        self.track_timelog(f'2 - bulk delete for {len(document_ids)} documents')
        if attempts_made > 1:
            self.log_error(f'{attempts_made} of {attempts} tried to delete documents')

        # 3. Remove files
        file_storage_exists = file_storage.document_exists(session_id)
        self.log_info(f'File Storage exists: {file_storage_exists}')

        files_removed, failed_removing = (0, 0)
        if file_storage_exists:
            files = file_storage.list_documents(session_id)
            self.log_info(f'Remove {len(files)} files from File Storage.')
            for file_path in files:
                file_storage.delete_document(file_path)
            try:
                file_storage.delete_document(session_id)
                files_removed += 1
            except:
                # TODO: removing folders through LocalStorage is not implemented
                failed_removing += 1
                pass
        self.track_timelog(f'3 - remove files ({files_removed} removed, {failed_removing} failed)')

        # 4. Remove Upload Session
        if not session:
            raise Exception(f"Couldn't find session by id ({session_id})")

        self.log_info(f'Remove session uid="{session_id}".')
        project = session.project
        session.delete()
        self.track_timelog('4 - delete session')

        # 5. Reindex Project
        self.log_info(f'Reindex project id="{project.id}" documents.')
        from apps.rawdb.tasks import reindex_all_project_documents
        call_task_func(reindex_all_project_documents, (project.pk,), None)
        self.track_timelog('5 - reindex project')
예제 #8
0
 def process(self, **kwargs):
     doc_ids = kwargs.get('_document_ids')
     file_paths = self.document_repository.get_all_document_source_paths(doc_ids)
     get_document_bulk_delete().delete_documents(doc_ids)
     DocumentFilesCleaner.delete_document_files(file_paths)
 def process(self, **kwargs):
     doc_ids = kwargs.get('_document_ids')
     file_paths = self.document_repository.get_all_document_source_paths(
         doc_ids)
     get_document_bulk_delete().delete_documents(doc_ids)
     call_task(DeleteDocumentFiles, metadata=file_paths)