def test_file_deletion(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, list(range(1, len(file_content) + 1)), init_total_size=False, ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete assert tested_file.size == len(file_content) chunks = StagedFile.objects.filter(file_id=uploaded_file_uuid).all() assert os.path.isdir( os.path.join( settings.MEDIA_ROOT, settings.JQFILEUPLOAD_UPLOAD_SUBIDRECTORY, str(uploaded_file_uuid), )) file_paths = [ os.path.join(settings.MEDIA_ROOT, chunk.file.name) for chunk in chunks ] for path in file_paths: assert os.path.exists(path) tested_file.delete() assert not tested_file.exists assert not tested_file.is_complete assert not os.path.isdir( os.path.join( settings.MEDIA_ROOT, settings.JQFILEUPLOAD_UPLOAD_SUBIDRECTORY, str(uploaded_file_uuid), )) for path in file_paths: assert not os.path.exists(path)
def _delete_session_files(*, session_files, upload_session): dicom_group = Group.objects.get( name=settings.DICOM_DATA_CREATORS_GROUP_NAME) users = dicom_group.user_set.values_list("username", flat=True) for file in session_files: try: if file.staged_file_id: saf = StagedAjaxFile(file.staged_file_id) if not file.consumed and upload_session.archive: # Keep unconsumed archive files saf.staged_files.update(timeout=timezone.now() + timedelta(days=90)) continue if (not file.consumed and Path(file.filename).suffix == ".dcm" and getattr(file.creator, "username", None) in users): saf.staged_files.update(timeout=timezone.now() + timedelta(days=90)) continue file.staged_file_id = None saf.delete() file.save() except NotFoundError: pass
def _populate_tmp_dir(tmp_dir, upload_session): session_files = upload_session.rawimagefile_set.all() session_files, duplicates = remove_duplicate_files(session_files) for duplicate in duplicates: # type: RawImageFile duplicate.error = "Filename not unique" saf = StagedAjaxFile(duplicate.staged_file_id) duplicate.staged_file_id = None saf.delete() duplicate.consumed = False duplicate.save() populate_provisioning_directory(session_files, tmp_dir) extract_files(tmp_dir)
def test_missing_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file(file_content, [len(file_content)]) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete chunks = StagedFile.objects.filter(file_id=tested_file.uuid).all() chunks.delete() assert not tested_file.exists assert not tested_file.is_complete with pytest.raises(NotFoundError): tested_file.name with pytest.raises(NotFoundError): tested_file.size with pytest.raises(NotFoundError): tested_file.delete() with pytest.raises(IOError): tested_file.open()
def test_file_deletion(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, list(range(1, len(file_content) + 1)), init_total_size=False, ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete assert tested_file.size == len(file_content) chunks = StagedFile.objects.filter(file_id=uploaded_file_uuid).all() file_paths = [chunk.file.name for chunk in chunks] for path in file_paths: assert private_s3_storage.exists(path) tested_file.delete() assert not tested_file.exists assert not tested_file.is_complete for path in file_paths: assert not private_s3_storage.exists(path)
def test_missing_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, [len(file_content)] ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete chunks = StagedFile.objects.filter(file_id=tested_file.uuid).all() chunks.delete() assert not tested_file.exists assert not tested_file.is_complete with pytest.raises(NotFoundError): tested_file.name with pytest.raises(NotFoundError): tested_file.size with pytest.raises(NotFoundError): tested_file.delete() with pytest.raises(IOError): tested_file.open()
def build_images(upload_session_uuid: UUID): """ Task which analyzes an upload session and attempts to extract and store detected images assembled from files uploaded in the image session. The task updates the state-filed of the associated :class:`RawImageUploadSession` to indicate if it is running or has finished computing. Results are stored in: - `RawImageUploadSession.error_message` if a general error occurred during processing. - The `RawImageFile.error` field of associated `RawImageFile` objects, in case files could not be processed. The operation of building images will delete associated `StagedAjaxFile`s of analyzed images in order to free up space on the server (only done if the function does not error out). If a job fails due to a RawImageUploadSession.DoesNotExist error, the job is queued for a retry (max 15 times). Parameters ---------- upload_session_uuid: UUID The uuid of the upload sessions that should be analyzed. """ upload_session = RawImageUploadSession.objects.get( pk=upload_session_uuid) # type: RawImageUploadSession if upload_session.session_state == UPLOAD_SESSION_STATE.queued: tmp_dir = Path(mkdtemp(prefix="construct_image_volumes-")) try: try: upload_session.session_state = UPLOAD_SESSION_STATE.running upload_session.save() session_files = RawImageFile.objects.filter( upload_session=upload_session.pk).all( ) # type: Tuple[RawImageFile] session_files, duplicates = remove_duplicate_files( session_files) for duplicate in duplicates: # type: RawImageFile duplicate.error = "Filename not unique" saf = StagedAjaxFile(duplicate.staged_file_id) duplicate.staged_file_id = None saf.delete() duplicate.save() populate_provisioning_directory(session_files, tmp_dir) filename_lookup = { StagedAjaxFile(raw_image_file.staged_file_id).name: raw_image_file for raw_image_file in session_files } unconsumed_filenames = set(filename_lookup.keys()) collected_images = [] collected_associated_files = [] for algorithm in IMAGE_BUILDER_ALGORITHMS: algorithm_result = algorithm( tmp_dir) # type: ImageBuilderResult collected_images += list(algorithm_result.new_images) collected_associated_files += list( algorithm_result.new_image_files) for filename in algorithm_result.consumed_files: if filename in unconsumed_filenames: unconsumed_filenames.remove(filename) for ( filename, msg, ) in algorithm_result.file_errors_map.items(): if filename in unconsumed_filenames: unconsumed_filenames.remove(filename) raw_image = filename_lookup[ filename] # type: RawImageFile raw_image.error = str(msg)[:256] raw_image.save() for image in collected_images: image.origin = upload_session store_image(image, collected_associated_files) for unconsumed_filename in unconsumed_filenames: raw_file = filename_lookup[unconsumed_filename] raw_file.error = ( "File could not be processed by any image builder") if upload_session.imageset: upload_session.imageset.images.add(*collected_images) if upload_session.annotationset: upload_session.annotationset.images.add(*collected_images) if upload_session.algorithm: for image in collected_images: Job.objects.create(algorithm=upload_session.algorithm, image=image) if upload_session.algorithm_result: upload_session.algorithm_result.images.add( *collected_images) # Delete any touched file data for file in session_files: try: saf = StagedAjaxFile(file.staged_file_id) file.staged_file_id = None saf.delete() file.save() except NotFoundError: pass except Exception as e: upload_session.error_message = str(e) finally: if tmp_dir is not None: shutil.rmtree(tmp_dir) upload_session.session_state = UPLOAD_SESSION_STATE.stopped upload_session.save()
def build_images(upload_session_uuid: UUID): """ Task which analyzes an upload session and attempts to extract and store detected images assembled from files uploaded in the image session. The task updates the state-filed of the associated :class:`RawImageUploadSession` to indicate if it is running or has finished computing. Results are stored in: - `RawImageUploadSession.error_message` if a general error occurred during processing. - The `RawImageFile.error` field of associated `RawImageFile` objects, in case files could not be processed. The operation of building images will delete associated `StagedAjaxFile`s of analyzed images in order to free up space on the server (only done if the function does not error out). If a job fails due to a RawImageUploadSession.DoesNotExist error, the job is queued for a retry (max 15 times). Parameters ---------- upload_session_uuid: UUID The uuid of the upload sessions that should be analyzed. """ upload_session = RawImageUploadSession.objects.get( pk=upload_session_uuid ) # type: RawImageUploadSession if upload_session.session_state == UPLOAD_SESSION_STATE.queued: tmp_dir = Path(mkdtemp(prefix="construct_image_volumes-")) try: try: upload_session.session_state = UPLOAD_SESSION_STATE.running upload_session.save() session_files = RawImageFile.objects.filter( upload_session=upload_session.pk ).all() # type: Tuple[RawImageFile] session_files, duplicates = remove_duplicate_files( session_files ) for duplicate in duplicates: # type: RawImageFile duplicate.error = "Filename not unique" saf = StagedAjaxFile(duplicate.staged_file_id) duplicate.staged_file_id = None saf.delete() duplicate.save() populate_provisioning_directory(session_files, tmp_dir) filename_lookup = { StagedAjaxFile( raw_image_file.staged_file_id ).name: raw_image_file for raw_image_file in session_files } unconsumed_filenames = set(filename_lookup.keys()) collected_images = [] collected_associated_files = [] for algorithm in IMAGE_BUILDER_ALGORITHMS: algorithm_result = algorithm( tmp_dir ) # type: ImageBuilderResult collected_images += list(algorithm_result.new_images) collected_associated_files += list( algorithm_result.new_image_files ) for filename in algorithm_result.consumed_files: if filename in unconsumed_filenames: unconsumed_filenames.remove(filename) for ( filename, msg, ) in algorithm_result.file_errors_map.items(): if filename in unconsumed_filenames: unconsumed_filenames.remove(filename) raw_image = filename_lookup[ filename ] # type: RawImageFile raw_image.error = str(msg)[:256] raw_image.save() for image in collected_images: image.origin = upload_session store_image(image, collected_associated_files) for unconsumed_filename in unconsumed_filenames: raw_file = filename_lookup[unconsumed_filename] raw_file.error = ( "File could not be processed by any image builder" ) if upload_session.imageset: upload_session.imageset.images.add(*collected_images) if upload_session.annotationset: upload_session.annotationset.images.add(*collected_images) if upload_session.algorithm: for image in collected_images: Job.objects.create( algorithm=upload_session.algorithm, image=image ) if upload_session.algorithm_result: upload_session.algorithm_result.images.add( *collected_images ) # Delete any touched file data for file in session_files: try: saf = StagedAjaxFile(file.staged_file_id) file.staged_file_id = None saf.delete() file.save() except NotFoundError: pass except Exception as e: upload_session.error_message = str(e) finally: if tmp_dir is not None: shutil.rmtree(tmp_dir) upload_session.session_state = UPLOAD_SESSION_STATE.stopped upload_session.save()