def _delete_session_files(*, session_files, upload_session): dicom_group = Group.objects.get( name=settings.DICOM_DATA_CREATORS_GROUP_NAME) users = dicom_group.user_set.values_list("username", flat=True) for file in session_files: try: if file.staged_file_id: saf = StagedAjaxFile(file.staged_file_id) if not file.consumed and upload_session.archive: # Keep unconsumed archive files saf.staged_files.update(timeout=timezone.now() + timedelta(days=90)) continue if (not file.consumed and Path(file.filename).suffix == ".dcm" and getattr(file.creator, "username", None) in users): saf.staged_files.update(timeout=timezone.now() + timedelta(days=90)) continue file.staged_file_id = None saf.delete() file.save() except NotFoundError: pass
def test_file_deletion(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, list(range(1, len(file_content) + 1)), init_total_size=False, ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete assert tested_file.size == len(file_content) chunks = StagedFile.objects.filter(file_id=uploaded_file_uuid).all() assert os.path.isdir( os.path.join( settings.MEDIA_ROOT, settings.JQFILEUPLOAD_UPLOAD_SUBIDRECTORY, str(uploaded_file_uuid), )) file_paths = [ os.path.join(settings.MEDIA_ROOT, chunk.file.name) for chunk in chunks ] for path in file_paths: assert os.path.exists(path) tested_file.delete() assert not tested_file.exists assert not tested_file.is_complete assert not os.path.isdir( os.path.join( settings.MEDIA_ROOT, settings.JQFILEUPLOAD_UPLOAD_SUBIDRECTORY, str(uploaded_file_uuid), )) for path in file_paths: assert not os.path.exists(path)
def validate_docker_image_async(*, pk: uuid.UUID, app_label: str, model_name: str): model = apps.get_model(app_label=app_label, model_name=model_name) instance = model.objects.get(pk=pk) if not instance.image: # Create the image from the staged file uploaded_image = StagedAjaxFile(instance.staged_image_uuid) with uploaded_image.open() as f: instance.image.save(uploaded_image.name, File(f)) try: with instance.image.open(mode="rb") as im, tarfile.open(fileobj=im, mode="r") as t: member = dict(zip(t.getnames(), t.getmembers()))["manifest.json"] manifest = t.extractfile(member).read() except (KeyError, tarfile.ReadError): model.objects.filter(pk=pk).update(status=( "manifest.json not found at the root of the container image file. " "Was this created with docker save?")) raise ValidationError("Invalid Dockerfile") manifest = json.loads(manifest) if len(manifest) != 1: model.objects.filter(pk=pk).update( status=(f"The container image file should only have 1 image. " f"This file contains {len(manifest)}.")) raise ValidationError("Invalid Dockerfile") model.objects.filter(pk=pk).update( image_sha256=f"sha256:{manifest[0]['Config'][:64]}", ready=True)
def test_staged_file_to_django_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file(file_content, client_filename="bla") tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.name == "bla" with tested_file.open() as f: djangofile = files.File(f) assert djangofile.read() == file_content assert djangofile.read(1) == b""
def test_staged_file_to_django_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, client_filename="bla" ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.name == "bla" with tested_file.open() as f: djangofile = files.File(f) assert djangofile.read() == file_content assert djangofile.read(1) == b""
def copy_to_tmpdir(image_file: RawImageFile): staged_file = StagedAjaxFile(image_file.staged_file_id) if not staged_file.exists: raise ValueError( f"staged file {image_file.staged_file_id} does not exist") with open(provisioning_dir / staged_file.name, "wb") as dest_file: with staged_file.open() as src_file: BUFFER_SIZE = 0x10000 first = True while first or (len(buffer) >= BUFFER_SIZE): first = False buffer = src_file.read(BUFFER_SIZE) dest_file.write(buffer)
def test_rfc7233_implementation_api(client): content = load_test_data() upload_id = generate_new_upload_id( test_rfc7233_implementation_api, content ) url = reverse("api:staged-file-list") _, token = AuthToken.objects.create(user=UserFactory()) part_1_response = create_partial_upload_file_request( client, upload_id, content, 0, 10, url=url, extra_headers={"HTTP_AUTHORIZATION": f"Bearer {token}"}, ) assert part_1_response.status_code == 201 part_2_response = create_partial_upload_file_request( client, upload_id, content, 10, len(content) // 2, url=url, extra_headers={"HTTP_AUTHORIZATION": f"Bearer {token}"}, ) assert part_2_response.status_code == 201 part_3_response = create_partial_upload_file_request( client, upload_id, content, len(content) // 2, len(content), url=url, extra_headers={"HTTP_AUTHORIZATION": f"Bearer {token}"}, ) assert part_3_response.status_code == 201 parsed_json = part_3_response.json() staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert len(staged_content) == len(content) assert hash(staged_content) == hash(content) assert staged_content == content
def _populate_tmp_dir(tmp_dir, upload_session): session_files = upload_session.rawimagefile_set.all() session_files, duplicates = remove_duplicate_files(session_files) for duplicate in duplicates: # type: RawImageFile duplicate.error = "Filename not unique" saf = StagedAjaxFile(duplicate.staged_file_id) duplicate.staged_file_id = None saf.delete() duplicate.consumed = False duplicate.save() populate_provisioning_directory(session_files, tmp_dir) extract_files(tmp_dir)
def download_view(self, request, object_id, **kwargs): obj = self.get_object(request, unquote(object_id), None) if not self.has_view_or_change_permission(request, obj): raise PermissionDenied try: saf = StagedAjaxFile(obj.staged_file_id).open() response = HttpResponse(saf.read(), content_type="application/dicom") response[ "Content-Disposition"] = f'attachment; filename="{obj.filename}"' return response except Exception: raise Http404("File not found")
def copy_to_tmpdir(image_file: RawImageFile): staged_file = StagedAjaxFile(image_file.staged_file_id) if not staged_file.exists: raise ValueError( f"staged file {image_file.staged_file_id} does not exist" ) with open(provisioning_dir / staged_file.name, "wb") as dest_file: with staged_file.open() as src_file: BUFFER_SIZE = 0x10000 first = True while first or (len(buffer) >= BUFFER_SIZE): first = False buffer = src_file.read(BUFFER_SIZE) dest_file.write(buffer)
def test_file_cleanup(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, [len(file_content)], client_filename="bla", timeout=timedelta(milliseconds=100), ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists chunks = StagedFile.objects.filter(file_id=tested_file.uuid).all() assert len(chunks) > 0 for chunk in chunks: assert private_s3_storage.exists(name=chunk.file.name) # Force timeout and clean now = timezone.now() for chunk in chunks: chunk.timeout = now - timedelta(hours=1) chunk.save() cleanup_stale_files() assert not tested_file.exists assert len(StagedFile.objects.filter(file_id=tested_file.uuid).all()) == 0 for chunk in chunks: assert not private_s3_storage.exists(name=chunk.file.name)
def create_file_with_content( filename: str, content: bytes, chunk_size=None ) -> StagedAjaxFile: """ This function creates a StagedAjaxFile from a filename and fills it with the provided contents. Tests using this function must specify the pytest.mark.django_db marker! Parameters ---------- filename: str The filename to store for the newly created uploaded file. content: bytes The contents to put into the newly uploaded file. chunk_size: None or int (optional) If chunking should be tested, this function allows to control the chunk size that the file is split up. Default is None and will lead to the file being stored as a single chunk. Returns ------- StagedAjaxFile representing the newly created AjaxFile object """ if (chunk_size is None) and (len(content) > 0): chunks = None else: chunks = list(range(1, len(content), chunk_size)) if chunks[-1] != len(content) - 1: chunks.append(len(content) - 1) uuid = create_uploaded_file( content, client_filename=filename, chunks=chunks ) return StagedAjaxFile(uuid)
def test_single_chunk(rf: RequestFactory): widget = AjaxUploadWidget(ajax_target_path="/ajax") widget.timeout = timedelta(seconds=1) filename = "test.bin" post_request = create_upload_file_request(rf, filename=filename) response = widget.handle_ajax(post_request) assert isinstance(response, JsonResponse) parsed_json = json.loads(response.content) assert len(parsed_json) == 1 assert parsed_json[0]["filename"] == filename assert "uuid" in parsed_json[0] assert "extra_attrs" in parsed_json[0] staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert staged_content == load_test_data()
def _handle_raw_image_files(tmp_dir, upload_session): input_files = { Path(d[0]).joinpath(f) for d in os.walk(tmp_dir) for f in d[2] } session_files = [ RawImageFile.objects.get_or_create( filename=str(f.relative_to(tmp_dir)), upload_session=upload_session, )[0] for f in input_files ] filepath_lookup: Dict[str, RawImageFile] = { raw_image_file.staged_file_id and os.path.join(tmp_dir, StagedAjaxFile(raw_image_file.staged_file_id).name) or os.path.join(tmp_dir, raw_image_file.filename): raw_image_file for raw_image_file in session_files } importer_result = import_images( input_directory=tmp_dir, origin=upload_session, ) _handle_raw_files( input_files=input_files, consumed_files=importer_result.consumed_files, file_errors=importer_result.file_errors, filepath_lookup=filepath_lookup, upload_session=upload_session, ) _delete_session_files(session_files=session_files, )
def test_single_chunk(rf: RequestFactory): widget = AjaxUploadWidget(ajax_target_path="/ajax") widget.timeout = timedelta(seconds=1) filename = 'test.bin' post_request = create_upload_file_request(rf, filename=filename) response = widget.handle_ajax(post_request) assert isinstance(response, JsonResponse) parsed_json = json.loads(response.content) assert len(parsed_json) == 1 assert parsed_json[0]["filename"] == filename assert "uuid" in parsed_json[0] assert "extra_attrs" in parsed_json[0] staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert staged_content == load_test_data()
def create_raw_upload_image_session( images: List[str], delete_file=False, imageset=None, annotationset=None ) -> Tuple[RawImageUploadSession, Dict[str, RawImageFile]]: upload_session = RawImageUploadSession(imageset=imageset, annotationset=annotationset) uploaded_images = {} for image in images: staged_file = create_file_from_filepath(RESOURCE_PATH / image) image = RawImageFile.objects.create( upload_session=upload_session, filename=staged_file.name, staged_file_id=staged_file.uuid, ) uploaded_images[staged_file.name] = image if delete_file: StagedAjaxFile( uploaded_images["image10x10x10.zraw"].staged_file_id).delete() upload_session.save() return upload_session, uploaded_images
def create_raw_upload_image_session( *, images: List[str], delete_file=False, user=None, linked_task=None, ) -> Tuple[RawImageUploadSession, Dict[str, RawImageFile]]: creator = user or UserFactory(email="*****@*****.**") upload_session = RawImageUploadSession(creator=creator) uploaded_images = {} for image in images: staged_file = create_file_from_filepath(RESOURCE_PATH / image) image = RawImageFile.objects.create( upload_session=upload_session, filename=staged_file.name, staged_file_id=staged_file.uuid, ) uploaded_images[staged_file.name] = image if delete_file: StagedAjaxFile( uploaded_images["image10x10x10.zraw"].staged_file_id).delete() upload_session.save() with capture_on_commit_callbacks(execute=True): upload_session.process_images(linked_task=linked_task) return upload_session, uploaded_images
def _populate_tmp_dir(tmp_dir, upload_session): raw_image_files = upload_session.rawimagefile_set.all() session_files = [StagedAjaxFile(f.staged_file_id) for f in raw_image_files] session_files += [*upload_session.user_uploads.all()] populate_provisioning_directory(session_files, tmp_dir) extract_files(source_path=tmp_dir)
def test_uploaded_single_chunk_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file(file_content, client_filename="bla") tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.name == "bla" assert StagedFile.objects.filter(file_id=tested_file.uuid).count() == 1 do_default_content_tests(tested_file, file_content)
def test_file_session_creation(): images = ["image10x10x10.zraw"] _, uploaded_images = create_raw_upload_image_session(images=images) assert len(uploaded_images) == 1 assert uploaded_images[images[0]].staged_file_id is not None a_file = StagedAjaxFile(uploaded_images[images[0]].staged_file_id) assert a_file.exists
def test_rfc7233_implementation_client_api(client): content = load_test_data() upload_id = generate_new_upload_id(test_rfc7233_implementation_client_api, content) token = Token.objects.create(user=UserFactory()) filename = "whatever.bin" start_byte = 0 content_io = BytesIO(content) max_chunk_length = 2**15 assert len(content) > 3 * max_chunk_length while True: chunk = content_io.read(max_chunk_length) if not chunk: break end_byte = start_byte + len(chunk) response = client.post( path=reverse("api:staged-file-list"), data={ filename: BytesIO(chunk), "X-Upload-ID": upload_id }, format="multipart", HTTP_CONTENT_RANGE= f"bytes {start_byte}-{end_byte - 1}/{len(content)}", HTTP_AUTHORIZATION=f"Token {token}", ) assert response.status_code == 201 start_byte += len(chunk) parsed_json = response.json() staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert len(staged_content) == len(content) assert hash(staged_content) == hash(content) assert staged_content == content
def _copy_output_files(self, *, container, base_dir: Path): found_files = container.exec_run(f"find {base_dir} -type f") if found_files.exit_code != 0: logger.warning(f"Error listing {base_dir}") return output_files = [ base_dir / Path(f) for f in found_files.output.decode().splitlines() ] if not output_files: logger.warning("Output directory is empty") return # TODO: This thing should not interact with the database result = Result.objects.create(job_id=self._job_id) # Create the upload session but do not save it until we have the # files upload_session = RawImageUploadSession(algorithm_result=result) images = [] for file in output_files: new_uuid = uuid.uuid4() django_file = File(get_file(container=container, src=file)) staged_file = StagedFile( csrf="staging_conversion_csrf", client_id=self._job_id, client_filename=file.name, file_id=new_uuid, timeout=timezone.now() + timedelta(hours=24), start_byte=0, end_byte=django_file.size - 1, total_size=django_file.size, ) staged_file.file.save(f"{uuid.uuid4()}", django_file) staged_file.save() staged_ajax_file = StagedAjaxFile(new_uuid) images.append( RawImageFile( upload_session=upload_session, filename=staged_ajax_file.name, staged_file_id=staged_ajax_file.uuid, ) ) upload_session.save(skip_processing=True) RawImageFile.objects.bulk_create(images) upload_session.process_images()
def test_file_deletion(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, list(range(1, len(file_content) + 1)), init_total_size=False, ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete assert tested_file.size == len(file_content) chunks = StagedFile.objects.filter(file_id=uploaded_file_uuid).all() file_paths = [chunk.file.name for chunk in chunks] for path in file_paths: assert private_s3_storage.exists(path) tested_file.delete() assert not tested_file.exists assert not tested_file.is_complete for path in file_paths: assert not private_s3_storage.exists(path)
def validate_docker_image(*, pk: uuid.UUID, app_label: str, model_name: str): model = apps.get_model(app_label=app_label, model_name=model_name) instance = model.objects.get(pk=pk) if not instance.image: # Create the image from the staged file uploaded_image = StagedAjaxFile(instance.staged_image_uuid) with uploaded_image.open() as f: instance.image.save(uploaded_image.name, File(f)) try: image_sha256 = _validate_docker_image_manifest(model=model, instance=instance) except ValidationError: send_invalid_dockerfile_email(container_image=instance) raise model.objects.filter(pk=instance.pk).update( image_sha256=f"sha256:{image_sha256}", ready=True)
def test_missing_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file(file_content, [len(file_content)]) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete chunks = StagedFile.objects.filter(file_id=tested_file.uuid).all() chunks.delete() assert not tested_file.exists assert not tested_file.is_complete with pytest.raises(NotFoundError): tested_file.name with pytest.raises(NotFoundError): tested_file.size with pytest.raises(NotFoundError): tested_file.delete() with pytest.raises(OSError): with tested_file.open() as f: f.read()
def test_missing_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, [len(file_content)] ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete chunks = StagedFile.objects.filter(file_id=tested_file.uuid).all() chunks.delete() assert not tested_file.exists assert not tested_file.is_complete with pytest.raises(NotFoundError): tested_file.name with pytest.raises(NotFoundError): tested_file.size with pytest.raises(NotFoundError): tested_file.delete() with pytest.raises(IOError): tested_file.open()
def test_uploaded_multi_chunk_file(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, chunks=[4, 8, 10, 11, len(file_content)], client_filename="splittered", ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.name == "splittered" assert StagedFile.objects.filter(file_id=tested_file.uuid).count() == 5 do_default_content_tests(tested_file, file_content)
def _delete_session_files(*, upload_session): for file in upload_session.rawimagefile_set.all(): try: if file.staged_file_id: saf = StagedAjaxFile(file.staged_file_id) on_commit(saf.delete) except NotFoundError: pass file.delete() upload_session.user_uploads.all().delete()
def test_file_deletion(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, list(range(1, len(file_content) + 1)), init_total_size=False, ) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete assert tested_file.size == len(file_content) chunks = StagedFile.objects.filter(file_id=uploaded_file_uuid).all() file_paths = [chunk.file.name for chunk in chunks] for path in file_paths: assert private_s3_storage.exists(path) tested_file.delete() assert not tested_file.exists assert not tested_file.is_complete for path in file_paths: assert not private_s3_storage.exists(path)
def test_single_chunk_api(client): filename = "test.bin" token = Token.objects.create(user=UserFactory()) response = create_upload_file_request( rf=client, filename=filename, url=reverse("api:staged-file-list"), extra_headers={"HTTP_AUTHORIZATION": f"Token {token}"}, ) assert response.status_code == 201 parsed_json = response.json() assert parsed_json[0]["filename"] == filename assert "uuid" in parsed_json[0] assert "extra_attrs" in parsed_json[0] staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert staged_content == load_test_data()
def test_rfc7233_implementation(rf: RequestFactory): content = load_test_data() upload_id = generate_new_upload_id(test_rfc7233_implementation, content) part_1 = create_partial_upload_file_request(rf, upload_id, content, 0, 10) part_2 = create_partial_upload_file_request(rf, upload_id, content, 10, len(content) // 2) part_3 = create_partial_upload_file_request(rf, upload_id, content, len(content) // 2, len(content)) widget = AjaxUploadWidget(ajax_target_path="/ajax") widget.timeout = timedelta(seconds=1) response = widget.handle_ajax(part_1) assert isinstance(response, JsonResponse) response = widget.handle_ajax(part_2) assert isinstance(response, JsonResponse) response = widget.handle_ajax(part_3) assert isinstance(response, JsonResponse) parsed_json = json.loads(response.content) staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert staged_content == content
def test_rfc7233_implementation(rf: RequestFactory): content = load_test_data() upload_id = generate_new_upload_id(test_rfc7233_implementation, content) part_1 = create_partial_upload_file_request(rf, upload_id, content, 0, 10) part_2 = create_partial_upload_file_request( rf, upload_id, content, 10, len(content) // 2 ) part_3 = create_partial_upload_file_request( rf, upload_id, content, len(content) // 2, len(content) ) widget = AjaxUploadWidget(ajax_target_path="/ajax") widget.timeout = timedelta(seconds=1) response = widget.handle_ajax(part_1) assert isinstance(response, JsonResponse) response = widget.handle_ajax(part_2) assert isinstance(response, JsonResponse) response = widget.handle_ajax(part_3) assert isinstance(response, JsonResponse) parsed_json = json.loads(response.content) staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert staged_content == content
def test_file_overlapping_chunk(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, list(range(1, len(file_content) + 1))) tested_file = StagedAjaxFile(uploaded_file_uuid) chunks = StagedFile.objects.filter(file_id=uploaded_file_uuid).all() chunk4 = chunks[4] chunk4.pk = None chunk4.start_byte = 0 chunk4.end_byte = 10 chunk4.save() assert tested_file.exists assert not tested_file.is_complete assert tested_file.size is None
def validate_docker_image_async( *, pk: uuid.UUID, app_label: str, model_name: str ): model = apps.get_model(app_label=app_label, model_name=model_name) instance = model.objects.get(pk=pk) if not instance.image: # Create the image from the staged file uploaded_image = StagedAjaxFile(instance.staged_image_uuid) with uploaded_image.open() as f: instance.image.save(uploaded_image.name, File(f)) try: image_sha256 = _validate_docker_image_manifest( model=model, instance=instance ) except ValidationError: send_invalid_dockerfile_email(container_image=instance) raise model.objects.filter(pk=instance.pk).update( image_sha256=f"sha256:{image_sha256}", ready=True )
def test_file_missing_last_chunk(): file_content = b"HelloWorld" * 5 uploaded_file_uuid = create_uploaded_file( file_content, list(range(1, len(file_content) + 1))) tested_file = StagedAjaxFile(uploaded_file_uuid) assert tested_file.exists assert tested_file.is_complete assert tested_file.size == len(file_content) # delete chunk chunks = StagedFile.objects.filter(file_id=uploaded_file_uuid).all() chunks[len(chunks) - 1].delete() assert tested_file.exists assert not tested_file.is_complete assert tested_file.size is None
def test_single_chunk_client_api(client): filename = "test.bin" content = load_test_data() token = Token.objects.create(user=UserFactory()) response = client.post( path=reverse("api:staged-file-list"), data={filename: BytesIO(content)}, format="multipart", HTTP_AUTHORIZATION=f"Token {token}", ) assert response.status_code == 201 parsed_json = response.json() assert len(parsed_json) == 1 assert parsed_json[0]["filename"] == filename assert "uuid" in parsed_json[0] assert "extra_attrs" in parsed_json[0] staged_file = StagedAjaxFile(uuid.UUID(parsed_json[0]["uuid"])) with staged_file.open() as f: staged_content = f.read() assert staged_content == load_test_data()
def validate_staged_files(self, *, staged_files): file_ids = [f.staged_file_id for f in staged_files] if any(f_id is None for f_id in file_ids): raise ValidationError("File has not been staged") files = [StagedAjaxFile(f_id) for f_id in file_ids] if not all(s.exists for s in files): raise ValidationError("File does not exist") if len({f.name for f in files}) != len(files): raise ValidationError("Filenames must be unique") if sum([f.size for f in files]) > settings.UPLOAD_SESSION_MAX_BYTES: raise ValidationError( "Total size of all files exceeds the upload limit")
def build_images(upload_session_uuid: UUID): """ Task which analyzes an upload session and attempts to extract and store detected images assembled from files uploaded in the image session. The task updates the state-filed of the associated :class:`RawImageUploadSession` to indicate if it is running or has finished computing. Results are stored in: - `RawImageUploadSession.error_message` if a general error occurred during processing. - The `RawImageFile.error` field of associated `RawImageFile` objects, in case files could not be processed. The operation of building images will delete associated `StagedAjaxFile`s of analyzed images in order to free up space on the server (only done if the function does not error out). If a job fails due to a RawImageUploadSession.DoesNotExist error, the job is queued for a retry (max 15 times). Parameters ---------- upload_session_uuid: UUID The uuid of the upload sessions that should be analyzed. """ upload_session = RawImageUploadSession.objects.get( pk=upload_session_uuid ) # type: RawImageUploadSession if upload_session.session_state == UPLOAD_SESSION_STATE.queued: tmp_dir = Path(mkdtemp(prefix="construct_image_volumes-")) try: try: upload_session.session_state = UPLOAD_SESSION_STATE.running upload_session.save() session_files = RawImageFile.objects.filter( upload_session=upload_session.pk ).all() # type: Tuple[RawImageFile] session_files, duplicates = remove_duplicate_files( session_files ) for duplicate in duplicates: # type: RawImageFile duplicate.error = "Filename not unique" saf = StagedAjaxFile(duplicate.staged_file_id) duplicate.staged_file_id = None saf.delete() duplicate.save() populate_provisioning_directory(session_files, tmp_dir) filename_lookup = { StagedAjaxFile( raw_image_file.staged_file_id ).name: raw_image_file for raw_image_file in session_files } unconsumed_filenames = set(filename_lookup.keys()) collected_images = [] collected_associated_files = [] for algorithm in IMAGE_BUILDER_ALGORITHMS: algorithm_result = algorithm( tmp_dir ) # type: ImageBuilderResult collected_images += list(algorithm_result.new_images) collected_associated_files += list( algorithm_result.new_image_files ) for filename in algorithm_result.consumed_files: if filename in unconsumed_filenames: unconsumed_filenames.remove(filename) for ( filename, msg, ) in algorithm_result.file_errors_map.items(): if filename in unconsumed_filenames: unconsumed_filenames.remove(filename) raw_image = filename_lookup[ filename ] # type: RawImageFile raw_image.error = str(msg)[:256] raw_image.save() for image in collected_images: image.origin = upload_session store_image(image, collected_associated_files) for unconsumed_filename in unconsumed_filenames: raw_file = filename_lookup[unconsumed_filename] raw_file.error = ( "File could not be processed by any image builder" ) if upload_session.imageset: upload_session.imageset.images.add(*collected_images) if upload_session.annotationset: upload_session.annotationset.images.add(*collected_images) if upload_session.algorithm: for image in collected_images: Job.objects.create( algorithm=upload_session.algorithm, image=image ) if upload_session.algorithm_result: upload_session.algorithm_result.images.add( *collected_images ) # Delete any touched file data for file in session_files: try: saf = StagedAjaxFile(file.staged_file_id) file.staged_file_id = None saf.delete() file.save() except NotFoundError: pass except Exception as e: upload_session.error_message = str(e) finally: if tmp_dir is not None: shutil.rmtree(tmp_dir) upload_session.session_state = UPLOAD_SESSION_STATE.stopped upload_session.save()