def test_roles(app, settings, fake_files): root = Path(settings.root) for ff in fake_files.values(): name = ff['filepath'] role = ff['role'] path = root / Path(name) if role == "upload": assert path.has_upload_role() assert not path.has_original_role() assert not path.has_spatial_role() assert not path.has_spectral_role() elif role == "original": assert not path.has_upload_role() assert path.has_original_role() assert not path.has_spatial_role() assert not path.has_spectral_role() elif role == "visualisation": assert not path.has_upload_role() assert not path.has_original_role() assert path.has_spatial_role() assert not path.has_spectral_role() elif role == "spectral": assert not path.has_upload_role() assert not path.has_original_role() assert not path.has_spatial_role() assert path.has_spectral_role() else: assert not path.has_upload_role() assert not path.has_original_role() assert not path.has_spatial_role() assert not path.has_spectral_role()
def test_collection(app, settings, fake_files): root = Path(settings.root) for ff in fake_files.values(): name = ff['filepath'] is_collection = ff['collection'] path = root / Path(name) assert path.is_collection() == is_collection assert path.is_single() == (not is_collection)
def deploy_histogram(self, image: Image) -> Histogram: """ Deploy an histogram representation of the image so that it can be used for efficient histogram requests. """ self.histogram_path = self.processed_dir / Path(HISTOGRAM_STEM) self.notify( ImportEventType.START_HISTOGRAM_DEPLOY, self.histogram_path, image ) try: self.histogram = build_histogram_file( image, self.histogram_path, HistogramType.FAST ) except (FileNotFoundError, FileExistsError) as e: self.notify( ImportEventType.ERROR_HISTOGRAM, self.histogram_path, image, exception=e ) raise FileErrorProblem(self.histogram_path) assert self.histogram.has_histogram_role() self.notify( ImportEventType.END_HISTOGRAM_DEPLOY, self.histogram_path, image ) return self.histogram
def export_upload( background: BackgroundTasks, path: Path = Depends(imagepath_parameter), ): """ Export the upload representation of an image. """ image = path.get_original() check_representation_existence(image) upload_file = image.get_upload().resolve() media_type = image.media_type if upload_file.is_dir(): # if archive has been deleted tmp_export = Path(f"/tmp/{unique_name_generator()}") make_zip_archive(tmp_export, upload_file) def cleanup(tmp): tmp.unlink(missing_ok=True) background.add_task(cleanup, tmp_export) upload_file = tmp_export media_type = "application/zip" return FileResponse(upload_file, media_type=media_type, filename=path.name)
def test_extensions(app, settings): files = ("upload0/myfile.svs", "upload2/processed/myfile.ome.tiff", "upload5/processed/visualisation.mrxs.format") extensions = (".svs", ".ome.tiff", ".mrxs.format") for f, ext in zip(files, extensions): path = Path(settings.root, f) assert path.extension == ext assert path.true_stem == f.split("/")[-1].replace(ext, "")
def run_import( filepath: str, name: str, extra_listeners: Optional[List[ImportListener]] = None, prefer_copy: bool = False ): pending_file = Path(filepath) if extra_listeners is not None: if not type(extra_listeners) is list: extra_listeners = list(extra_listeners) else: extra_listeners = [] listeners = [StdoutListener(name)] + extra_listeners fi = FileImporter(pending_file, name, listeners) fi.run(prefer_copy)
def filepath2path(filepath, config): """ Transform a relative filepath to a path. Parameters ---------- filepath: str Relative filepath Returns ------- path: Path Absolute resolved path """ from pims.files.file import Path return Path(config.root, filepath)
def compute_histogram( response: Response, background: BackgroundTasks, path: Path = Depends(imagepath_parameter), # companion_file_id: Optional[int] = Body(None, description="Cytomine ID for the histogram") sync: bool = True, overwrite: bool = True ): """ Ask for histogram computation """ in_image = path.get_spatial() check_representation_existence(in_image) hist_type = HistogramType.FAST # TODO: allow to build complete histograms hist_path = in_image.processed_root() / Path(HISTOGRAM_STEM) if sync: build_histogram_file(in_image, hist_path, hist_type, overwrite) response.status_code = status.HTTP_201_CREATED else: background.add_task(build_histogram_file, in_image, hist_path, hist_type, overwrite) response.status_code = status.HTTP_202_ACCEPTED
def deploy_spatial(self, format: AbstractFormat) -> Image: """ Deploy a spatial representation of the image so that it can be used for efficient spatial requests. """ self.notify(ImportEventType.START_SPATIAL_DEPLOY, self.original_path) if format.need_conversion: # Do the spatial conversion try: ext = format.conversion_format().get_identifier() spatial_filename = Path(f"{SPATIAL_STEM}.{ext}") self.spatial_path = self.processed_dir / spatial_filename self.notify( ImportEventType.START_CONVERSION, self.spatial_path, self.upload_path ) r = format.convert(self.spatial_path) if not r or not self.spatial_path.exists(): self.notify( ImportEventType.ERROR_CONVERSION, self.spatial_path ) raise FormatConversionProblem() except Exception as e: self.notify( ImportEventType.ERROR_CONVERSION, self.spatial_path, exception=e ) raise FormatConversionProblem() self.notify(ImportEventType.END_CONVERSION, self.spatial_path) # Check format of converted file self.notify(ImportEventType.START_FORMAT_DETECTION, self.spatial_path) spatial_format = SpatialReadableFormatFactory().match(self.spatial_path) if not spatial_format: self.notify(ImportEventType.ERROR_NO_FORMAT, self.spatial_path) raise NoMatchingFormatProblem(self.spatial_path) self.notify( ImportEventType.END_FORMAT_DETECTION, self.spatial_path, spatial_format ) self.spatial = Image(self.spatial_path, format=spatial_format) # Check spatial image integrity self.notify(ImportEventType.START_INTEGRITY_CHECK, self.spatial_path) errors = self.spatial.check_integrity(check_metadata=True) if len(errors) > 0: self.notify( ImportEventType.ERROR_INTEGRITY_CHECK, self.spatial_path, integrity_errors=errors ) raise ImageParsingProblem(self.spatial) self.notify(ImportEventType.END_INTEGRITY_CHECK, self.spatial) else: # Create spatial role spatial_filename = Path(f"{SPATIAL_STEM}.{format.get_identifier()}") self.spatial_path = self.processed_dir / spatial_filename self.mksymlink(self.spatial_path, self.original_path) self.spatial = Image(self.spatial_path, format=format) assert self.spatial.has_spatial_role() self.notify(ImportEventType.END_SPATIAL_DEPLOY, self.spatial) return self.spatial
def test_upload_root(app, settings, fake_files): root = Path(settings.root) fake_names = fake_files.keys() for ff in fake_names: path = root / Path(ff) assert path.upload_root() == root / Path(ff.split("/")[0])
) from pims.files.histogram import Histogram from pims.files.image import Image from pims.formats import AbstractFormat from pims.formats.utils.factories import FormatFactory, SpatialReadableFormatFactory from pims.importer.listeners import ( CytomineListener, ImportEventType, ImportListener, StdoutListener ) from pims.processing.histograms.utils import build_histogram_file from pims.tasks.queue import BG_TASK_MAPPING, CELERY_TASK_MAPPING, Task, func_from_str from pims.utils.strings import unique_name_generator log = logging.getLogger("pims.app") PENDING_PATH = Path(get_settings().pending_path) FILE_ROOT_PATH = Path(get_settings().root) class FileErrorProblem(BadRequestException): pass class ImageParsingProblem(BadRequestException): pass class FormatConversionProblem(BadRequestException): pass
def run(self, prefer_copy: bool = False): """ Import the pending file. It moves a pending file to PIMS root path, tries to identify the file format, converts it if needed and checks its integrity. Parameters ---------- prefer_copy : bool Prefer copy the pending file instead of moving it. Useful for tests. Raises ------ FilepathNotFoundProblem If pending file is not found. """ try: self.notify(ImportEventType.START_DATA_EXTRACTION, self.pending_file) # Check the file is in pending area, # or comes from a extracted collection if (not self.pending_file.is_extracted() and self.pending_file.parent != PENDING_PATH) \ or not self.pending_file.exists(): self.notify(ImportEventType.FILE_NOT_FOUND, self.pending_file) raise FilepathNotFoundProblem(self.pending_file) # Move the file to PIMS root path upload_dir_name = Path( f"{UPLOAD_DIR_PREFIX}" f"{str(unique_name_generator())}" ) self.upload_dir = FILE_ROOT_PATH / upload_dir_name self.mkdir(self.upload_dir) if self.pending_name: name = self.pending_name else: name = self.pending_file.name self.upload_path = self.upload_dir / name self.move(self.pending_file, self.upload_path, prefer_copy) # If the pending file comes from an archive if not prefer_copy and self.pending_file.is_extracted(): # Create symlink in processed to keep track of parent archive self.mksymlink(self.pending_file, self.upload_path) self.notify( ImportEventType.MOVED_PENDING_FILE, self.pending_file, self.upload_path ) self.notify(ImportEventType.END_DATA_EXTRACTION, self.upload_path) # Identify format self.notify(ImportEventType.START_FORMAT_DETECTION, self.upload_path) format_factory = FormatFactory() format = format_factory.match(self.upload_path) archive = None if format is None: archive = Archive.from_path(self.upload_path) if archive: format = archive.format if format is None: self.notify(ImportEventType.ERROR_NO_FORMAT, self.upload_path) raise NoMatchingFormatProblem(self.upload_path) self.notify( ImportEventType.END_FORMAT_DETECTION, self.upload_path, format ) # Create processed dir self.processed_dir = self.upload_dir / Path(PROCESSED_DIR) self.mkdir(self.processed_dir) # Create original role original_filename = Path( f"{ORIGINAL_STEM}.{format.get_identifier()}" ) self.original_path = self.processed_dir / original_filename if archive: try: self.notify( ImportEventType.START_UNPACKING, self.upload_path ) archive.extract(self.original_path) except ArchiveError as e: self.notify( ImportEventType.ERROR_UNPACKING, self.upload_path, exception=e ) raise FileErrorProblem(self.upload_path) # Now the archive is extracted, check if it's a multi-file format format = format_factory.match(self.original_path) if format: # It is a multi-file format original_filename = Path( f"{ORIGINAL_STEM}.{format.get_identifier()}" ) new_original_path = self.processed_dir / original_filename self.move(self.original_path, new_original_path) self.original_path = new_original_path format = format.__class__(self.original_path) self.notify( ImportEventType.END_UNPACKING, self.upload_path, self.original_path, format=format, is_collection=False ) self.upload_path = self.original_path else: self.extracted_dir = self.processed_dir / Path(EXTRACTED_DIR) self.mksymlink(self.extracted_dir, self.original_path) collection = self.import_collection( self.original_path, prefer_copy ) self.notify( ImportEventType.END_UNPACKING, self.upload_path, self.original_path, is_collection=True ) return collection else: self.mksymlink(self.original_path, self.upload_path) assert self.original_path.has_original_role() # Check original image integrity self.notify(ImportEventType.START_INTEGRITY_CHECK, self.original_path) self.original = Image(self.original_path, format=format) errors = self.original.check_integrity(check_metadata=True) if len(errors) > 0: self.notify( ImportEventType.ERROR_INTEGRITY_CHECK, self.original_path, integrity_errors=errors ) raise ImageParsingProblem(self.original) self.notify(ImportEventType.END_INTEGRITY_CHECK, self.original) if format.is_spatial(): self.deploy_spatial(format) else: raise NotImplementedError() self.deploy_histogram(self.original.get_spatial()) # Finished self.notify( ImportEventType.END_SUCCESSFUL_IMPORT, self.upload_path, self.original ) return [self.upload_path] except Exception as e: self.notify( ImportEventType.FILE_ERROR, self.upload_path, exeception=e ) raise e
from pims.files.file import Path from pims.importer.importer import PENDING_PATH, run_import logging.basicConfig() logger = logging.getLogger("upload") logger.setLevel(logging.INFO) # Run me with: CONFIG_FILE=/path/to/config.env python import_local_images.py --path /my/folder if __name__ == '__main__': parser = ArgumentParser( prog="Import images sequentially to PIMS root from a local folder.") parser.add_argument( '--path', help="A directory with images to import, or an image path.") params, _ = parser.parse_known_args(sys.argv[1:]) path = Path(params.path) if not path.exists(): exit(-1) if path.is_file(): image_paths = [path] else: image_paths = [p for p in path.recursive_iterdir() if p.is_file()] for image_path in image_paths: # We have to copy to file to pending path first to pass importer validation. tmp_path = Path(PENDING_PATH) / image_path.name shutil.copy(image_path, tmp_path) try: run_import(tmp_path, image_path.name, prefer_copy=False)
def walk(path): for p in Path(path).iterdir(): if p.is_dir(): yield from walk(p) continue yield p.resolve()
def test_path2filepath(app, settings, rootpath): fake_settings = settings.copy() fake_settings.root = rootpath path = Path(rootpath) / "dir/file" assert path2filepath(path, fake_settings) == "dir/file"
def test_basic_file(app, settings): path = Path(settings.root, "upload0/myfile.svs") assert path.exists() assert path.size == 0 assert (datetime.today() - path.creation_datetime).days == 0
def build_histogram_file(in_image, dest, hist_type: HistogramType, overwrite: bool = False): """ Build an histogram for an image and save it as zarr file. Parameters ---------- in_image : Image The image from which histogram has to be extracted. dest : Path The path where the histogram file will be saved. hist_type : HistogramType The type of histogram to build (FAST or COMPLETE) overwrite : bool (default: False) Whether overwrite existing histogram file at `dest` if any Returns ------- histogram : Histogram The zarr histogram file in read-only mode """ n_values = 2**min(in_image.significant_bits, 16) if in_image.n_pixels <= MAX_PIXELS_COMPLETE_HISTOGRAM: extract_fn = _extract_np_thumb hist_type = HistogramType.COMPLETE else: if hist_type == HistogramType.FAST: extract_fn = _extract_np_thumb else: extract_fn = in_image.tile raise NotImplementedError() # TODO if not overwrite and dest.exists(): raise FileExistsError(dest) # While the file is not fully built, we save it at a temporary location tmp_dest = dest.parent / Path(f"tmp_{dest.name}") zroot = zarr.open_group(str(tmp_dest), mode='w') zroot.attrs[ZHF_ATTR_TYPE] = hist_type zroot.attrs[ZHF_ATTR_FORMAT] = "PIMS-1.0" # Create the group for plane histogram # TODO: usa Dask to manipulate Zarr arrays (for bounds) # so that we can fill the zarr array incrementally # https://github.com/zarr-developers/zarr-python/issues/446 shape = (in_image.duration, in_image.depth, in_image.n_channels) zplane = zroot.create_group(ZHF_PER_PLANE) npplane_hist = np.zeros(shape=shape + (n_values, ), dtype=np.uint64) for data, c_range, z, t, ratio in extract_fn(in_image): for read, c in enumerate(c_range): h, _ = histogram(data[:, :, read], source_range='dtype') npplane_hist[t, z, c, :] += np.rint(h * ratio).astype(np.uint64) zplane.array(ZHF_HIST, npplane_hist) zplane.array( ZHF_BOUNDS, np.stack((argmin_nonzero(npplane_hist), argmax_nonzero(npplane_hist)), axis=-1)) # Create the group for channel histogram zchannel = zroot.create_group(ZHF_PER_CHANNEL) npchannel_hist = np.sum(npplane_hist, axis=(0, 1)) zchannel.array(ZHF_HIST, npchannel_hist) zchannel.array( ZHF_BOUNDS, np.stack( (argmin_nonzero(npchannel_hist), argmax_nonzero(npchannel_hist)), axis=-1)) # Create the group for image histogram zimage = zroot.create_group(ZHF_PER_IMAGE) npimage_hist = np.sum(npchannel_hist, axis=0) zimage.array(ZHF_HIST, npimage_hist) zimage.array(ZHF_BOUNDS, [argmin_nonzero(npimage_hist), argmax_nonzero(npimage_hist)]) # Remove redundant data if in_image.duration == 1 and in_image.depth == 1: del zroot[ZHF_PER_PLANE] if in_image.n_channels == 1: del zroot[ZHF_PER_CHANNEL] # Move the zarr file (directory) to final location if overwrite and dest.exists(): shutil.rmtree(dest) tmp_dest.replace(dest) return Histogram(dest, format=ZarrHistogramFormat)