def from_json(cls, url_or_path: str) -> "TransformsList": """ Load a TransformsList from a json file or a url pointing to such a file Loads slicedimage version configuration from :py:class:`starfish.config.StarfishConfig` Parameters ---------- url_or_path : str Either an absolute URL or a filesystem path to a transformsList. Returns ------- TransformsList """ config = StarfishConfig() transforms_list: List[Tuple[Mapping[Axes, int], TransformType, GeometricTransform]] = list() backend, name, _ = resolve_path_or_url( url_or_path, backend_config=config.slicedimage) with backend.read_contextmanager(name) as fh: transforms_array = json.load(fh) for selectors_str, transform_type_str, transforms_matrix in transforms_array: selectors = {Axes(k): v for k, v in selectors_str.items()} transform_type = TransformType(transform_type_str) transform_object = transformsTypeMapping[transform_type]( np.array(transforms_matrix)) transforms_list.append( (selectors, transform_type, transform_object)) return cls(transforms_list)
def transform(self, func: Callable, group_by: Set[Axes] = None, verbose=False, n_processes: Optional[int] = None, **kwargs) -> List[Any]: """Split the image along a set of axes, and apply a function across all the components. Parameters ---------- func : Callable Function to apply. must expect a first argument which is a numpy array (see group_by) but may return any object type. group_by : Set[Axes] Axes to split the data along. For instance, splitting a 2D array (axes: X, Y; size: 3, 4) by X results in 3 arrays of size 4. (default {Axes.ROUND, Axes.CH, Axes.ZPLANE}) verbose : bool If True, report on the percentage completed (default = False) during processing n_processes : Optional[int] The number of processes to use for apply. If None, uses the output of os.cpu_count() (default = None). kwargs : dict Additional arguments to pass to func being applied Returns ------- List[Any] : The results of applying func to stored image data """ # default grouping is by (x, y) tile if group_by is None: group_by = {Axes.ROUND, Axes.CH, Axes.ZPLANE} selectors = list(self._iter_axes(group_by)) slice_lists = [self._build_slice_list(index)[0] for index in selectors] selectors_and_slice_lists = zip(selectors, slice_lists) if verbose and StarfishConfig().verbose: selectors_and_slice_lists = tqdm(selectors_and_slice_lists) coordinates = { dim: self.xarray.coords[dim] for dim in self.xarray.coords.dims } mp_applyfunc: Callable = partial(self._processing_workflow, partial(func, **kwargs), self.xarray.dims, coordinates) with Pool(processes=n_processes, initializer=SharedMemory.initializer, initargs=((self._data._backing_mp_array, self._data._data.shape, self._data._data.dtype), )) as pool: results = pool.imap(mp_applyfunc, selectors_and_slice_lists) # Note: results is [None, ...] if executing an in-place workflow # Note: this return must be inside the context manager or the Pool will deadlock return list(zip(results, selectors))
def from_url(cls, url: str, baseurl: Optional[str], aligned_group: int = 0): """ Constructs an ImageStack object from a URL and a base URL. The following examples will all load from the same location: - url: https://www.example.com/images/primary_images.json baseurl: None - url: https://www.example.com/images/primary_images.json baseurl: I_am_ignored - url: primary_images.json baseurl: https://www.example.com/images - url: images/primary_images.json baseurl: https://www.example.com Parameters ---------- url : str Either an absolute URL or a relative URL referring to the image to be read. baseurl : Optional[str] If url is a relative URL, then this must be provided. If url is an absolute URL, then this parameter is ignored. aligned_group: int Which aligned tile group to load into the Imagestack, only applies if the tileset is unaligned. Default 0 (the first group) Returns ------- ImageStack : An ImageStack representing encapsulating the data from the TileSet. """ config = StarfishConfig() tileset = Reader.parse_doc(url, baseurl, backend_config=config.slicedimage) coordinate_groups = CropParameters.parse_aligned_groups(tileset) crop_params = coordinate_groups[aligned_group] return cls.from_tileset(tileset, crop_parameters=crop_params)
def transform( self, func: Callable, *args, group_by: Set[Axes]=None, verbose=False, n_processes: Optional[int]=None, **kwargs ) -> List[Any]: """Split the image along a set of axes, and apply a function across all the components. Parameters ---------- func : Callable Function to apply. must expect a first argument which is a numpy array (see group_by) but may return any object type. group_by : Set[Axes] Axes to split the data along. For instance, splitting a 2D array (axes: X, Y; size: 3, 4) by X results in 3 arrays of size 4. (default {Axes.ROUND, Axes.CH, Axes.ZPLANE}) verbose : bool If True, report on the percentage completed (default = False) during processing n_processes : Optional[int] The number of processes to use for apply. If None, uses the output of os.cpu_count() (default = None). kwargs : dict Additional arguments to pass to func being applied Returns ------- List[Any] : The results of applying func to stored image data """ self._ensure_data_loaded() # default grouping is by (x, y) tile if group_by is None: group_by = {Axes.ROUND, Axes.CH, Axes.ZPLANE} if n_processes is None: n_processes = os.cpu_count() selectors = list(self._iter_axes(group_by)) if verbose and StarfishConfig().verbose: selectors = tqdm(selectors) mp_applyfunc: Callable = partial( self._processing_workflow, func, self.xarray, args, kwargs, ) with ThreadPoolExecutor(max_workers=n_processes) as tpe: results = tpe.map(mp_applyfunc, selectors) # Note: results is [None, ...] if executing an in-place workflow # Note: this return must be inside the context manager or the Pool will deadlock return list(zip(results, selectors))
def run(self, stack: ImageStack, transforms_list: TransformsList, in_place: bool = False, verbose: bool = False, *args, **kwargs) -> Optional[ImageStack]: if not in_place: # create a copy of the ImageStack, call apply on that stack with in_place=True image_stack = deepcopy(stack) self.run(image_stack, transforms_list, in_place=True, **kwargs) return image_stack if verbose and StarfishConfig().verbose: transforms_list.transforms = tqdm(transforms_list.transforms) all_axes = {Axes.ROUND, Axes.CH, Axes.ZPLANE} for selector, _, transformation_object in transforms_list.transforms: other_axes = all_axes - set(selector.keys()) # iterate through remaining axes for axes in stack._iter_axes(other_axes): # combine all axes data to select one tile selector.update(axes) # type: ignore selected_image, _ = stack.get_slice(selector) warped_image = warp(selected_image, transformation_object, **kwargs).astype(np.float32) stack.set_slice(selector, warped_image) return None
def _compute_num_spots_per_threshold( self, img: np.ndarray) -> Tuple[np.ndarray, List[int]]: """Computes the number of detected spots for each threshold Parameters ---------- img : np.ndarray The image in which to count spots Returns ------- np.ndarray : thresholds List[int] : spot counts """ # thresholds to search over thresholds = np.linspace(img.min(), img.max(), num=100) # number of spots detected at each threshold spot_counts = [] # where we stop our threshold search stop_threshold = None if self.verbose and StarfishConfig().verbose: threshold_iter = tqdm(thresholds) print('Determining optimal threshold ...') else: threshold_iter = thresholds for stop_index, threshold in enumerate(threshold_iter): spots = peak_local_max(img, min_distance=self.min_distance, threshold_abs=threshold, exclude_border=False, num_peaks=np.inf, footprint=None, labels=None) # stop spot finding when the number of detected spots falls below min_num_spots_detected if len(spots) <= self.min_num_spots_detected: stop_threshold = threshold if self.verbose: print( f'Stopping early at threshold={threshold}. Number of spots fell below: ' f'{self.min_num_spots_detected}') break else: spot_counts.append(len(spots)) if stop_threshold is None: stop_threshold = thresholds.max() if len(thresholds > 1): thresholds = thresholds[:stop_index] spot_counts = spot_counts[:stop_index] return thresholds, spot_counts
def run( self, stack: ImageStack, in_place: bool = False, verbose=False, n_processes: Optional[int] = None, *args, ) -> Optional[ImageStack]: """Perform filtering of an image stack Parameters ---------- stack : ImageStack Stack to be filtered. in_place : bool if True, process ImageStack in-place, otherwise return a new stack verbose : bool if True, report on the percentage completed during processing (default = False) n_processes : Optional[int]: None Not implemented. Number of processes to use when applying filter. Returns ------- ImageStack : If in-place is False, return the results of filter as a new stack. Otherwise return the original stack. """ # The default is False, so even if code requests True require config to be True as well verbose = verbose and StarfishConfig().verbose channels_per_round = stack.xarray.groupby(Axes.ROUND.value) channels_per_round = tqdm(channels_per_round) if verbose else channels_per_round if not in_place: new_stack = deepcopy(stack) self.run(new_stack, in_place=True) return new_stack # compute channel magnitude mask for r, dat in channels_per_round: # nervous about how xarray orders dimensions so i put this here explicitly .... dat = dat.transpose(Axes.CH.value, Axes.ZPLANE.value, Axes.Y.value, Axes.X.value ) # ... to account for this line taking the norm across axis 0, or the channel axis ch_magnitude = np.linalg.norm(dat, ord=2, axis=0) magnitude_mask = ch_magnitude >= self.thresh # apply mask and optionally, normalize by channel magnitude for c in stack.axis_labels(Axes.CH): ind = {Axes.ROUND.value: r, Axes.CH.value: c} stack._data[ind] = stack._data[ind] * magnitude_mask if self.normalize: stack._data[ind] = np.divide(stack._data[ind], ch_magnitude, where=magnitude_mask ) return None
def validate(experiment_json: str, fuzz: bool=False) -> bool: """validate a spaceTx formatted experiment. Accepts local filepaths or files hosted at http links. Loads configuration from StarfishConfig. Parameters ---------- experiment_json : str path or URL to a target json object to be validated against the schema passed to this object's constructor fuzz : bool whether or not to perform element-by-element fuzzing. If true, will return true and will *not* use warnings. Returns ------- bool : True, if object valid or fuzz=True, else False Examples -------- The following will read the experiment json file provided, downloading it if necessary, and begin recursively validating it and all referenced json files (e.g. codebook.json): >>> from starfish.core.spacetx_format import validate_sptx >>> valid = validate_sptx.validate(json_url) """ config = StarfishConfig() valid = True # use slicedimage to read the top-level experiment json file passed by the user try: backend, name, baseurl = resolve_path_or_url( experiment_json, backend_config=config.slicedimage) except ValueError as exception: raise Exception(f"could not load {experiment_json}:\n{exception}") with backend.read_contextmanager(name) as fh: experiment = json.load(fh) # validate experiment.json valid &= validate_file(name, "experiment.json", fuzz, backend) # loop over all the manifests that are stored in images. Disallowed names will have already been # excluded by experiment validation. for manifest in experiment['images'].values(): obj: Dict = dict() if not validate_file(manifest, "fov_manifest.json", fuzz, backend, obj): valid = False else: for key, fov in obj['contents'].items(): valid &= validate_file(fov, 'field_of_view/field_of_view.json', fuzz, backend) codebook_file = experiment.get('codebook') if codebook_file is not None: valid &= validate_file(codebook_file, "codebook/codebook.json", fuzz, backend) return valid
def _create_spot_attributes( self, region_properties: List[_RegionProperties], decoded_image: np.ndarray, target_map: TargetsMap, n_processes: Optional[int] = None ) -> Tuple[SpotAttributes, np.ndarray]: """ Parameters ---------- region_properties : List[_RegionProperties] Properties of the each connected component. Output of skimage.measure.regionprops decoded_image : np.ndarray Image whose pixels correspond to the targets that the given position in the ImageStack decodes to. target_map : TargetsMap Unique mapping between string target names and int target IDs. n_processes : Optional[int]=None number of processes to devote to measuring spot properties. If None, defaults to the result of os.nproc() Returns ------- pd.DataFrame : DataFrame containing x, y, z, radius, and target name for each connected component feature. np.ndarray[bool] : An array with length equal to the number of features. If zero, indicates that a feature has failed area filters. """ with ThreadPoolExecutor(max_workers=n_processes) as tpe: mapfunc = tpe.map applyfunc = partial(self._single_spot_attributes, decoded_image=decoded_image, target_map=target_map, min_area=self._min_area, max_area=self._max_area) iterable = tqdm(region_properties, disable=(not StarfishConfig().verbose)) results = mapfunc(applyfunc, iterable) if not results: # no spots found warnings.warn( "No spots found, please adjust threshold parameters") return SpotAttributes.empty(extra_fields=['target']), np.array( 0, dtype=bool) spot_attrs, passes_area_filter = zip(*results) # update passes filter passes_filter = np.array(passes_area_filter, dtype=bool) spot_attributes = SpotAttributes( pd.DataFrame.from_records(spot_attrs)) spot_attributes.data[Features.SPOT_ID] = np.arange( 0, len(spot_attributes.data)) return spot_attributes, passes_filter
def run_recipe(ctx, recipe, input, output): """Runs a recipe with a given set of inputs and outputs.""" config = StarfishConfig() backend, relativeurl, _ = resolve_path_or_url( recipe, backend_config=config.slicedimage) with backend.read_contextmanager(relativeurl) as fh: recipe_str = fh.read() recipe_obj = Recipe(recipe_str, input, output) recipe_obj.run_and_save()
def from_json(cls, url_or_path: str) -> "TransformsList": """ Load a TransformsList from a json file or a url pointing to such a file Loads slicedimage version configuration from :py:class:`starfish.config.StarfishConfig` Parameters ---------- url_or_path : str Either an absolute URL or a filesystem path to a transformsList. Returns ------- TransformsList """ config = StarfishConfig() backend, name, _ = resolve_path_or_url( url_or_path, backend_config=config.slicedimage) with backend.read_contextmanager(name) as fh: transforms_document = json.load(fh) return cls.from_dict(transforms_document=transforms_document)
def from_path_or_url(cls, url_or_path: str, aligned_group: int = 0) -> "ImageStack": """ Constructs an ImageStack object from an absolute URL or a filesystem path. The following examples will all load from the same location: - url_or_path: file:///Users/starfish-user/images/primary_images.json - url_or_path: /Users/starfish-user/images/primary_images.json Parameters ---------- url_or_path : str Either an absolute URL or a filesystem path to an imagestack. aligned_group: int Which aligned tile group to load into the Imagestack, only applies if the tileset is unaligned. Default 0 (the first group) """ config = StarfishConfig() _, relativeurl, baseurl = resolve_path_or_url(url_or_path, backend_config=config.slicedimage) return cls.from_url(relativeurl, baseurl, aligned_group)
def open_json( cls, json_codebook: str, n_round: Optional[int] = None, n_channel: Optional[int] = None, ) -> "Codebook": """ Load a codebook from a SpaceTx Format json file or a url pointing to such a file. Parameters ---------- json_codebook : str Path or url to json file containing a spaceTx codebook. n_round : Optional[int] The number of imaging rounds used in the codes. Will be inferred if not provided. n_channel : Optional[int] The number of channels used in the codes. Will be inferred if not provided. Examples -------- Create a codebook from in-memory data :: >>> from starfish.types import Axes >>> from starfish import Codebook >>> import tempfile >>> import json >>> import os >>> dir_ = tempfile.mkdtemp() >>> codebook = [ >>> { >>> Features.CODEWORD: [ >>> {Axes.ROUND.value: 0, Axes.CH.value: 3, Features.CODE_VALUE: 1}, >>> {Axes.ROUND.value: 1, Axes.CH.value: 3, Features.CODE_VALUE: 1}, >>> ], >>> Features.TARGET: "ACTB_human" >>> }, >>> { >>> Features.CODEWORD: [ >>> {Axes.ROUND.value: 0, Axes.CH.value: 3, Features.CODE_VALUE: 1}, >>> {Axes.ROUND.value: 1, Axes.CH.value: 1, Features.CODE_VALUE: 1}, >>> ], >>> Features.TARGET: "ACTB_mouse" >>> }, >>> ] >>> # make a fake file >>> json_codebook = os.path.join(dir_, 'codebook.json') >>> with open(json_codebook, 'w') as f: >>> json.dump(codebook, f) >>> # read codebook from file >>> Codebook.open_json(json_codebook) <xarray.Codebook (target: 2, c: 4, r: 2)> array([[[0, 0], [0, 0], [0, 0], [1, 1]], [[0, 0], [0, 1], [0, 0], [1, 0]]], dtype=uint8) Coordinates: * target (target) object 'ACTB_human' 'ACTB_mouse' * c (c) int64 0 1 2 3 * r (r) int64 0 1 Returns ------- Codebook : Codebook with shape (targets, channels, imaging_rounds) """ config = StarfishConfig() backend, name, _ = resolve_path_or_url( json_codebook, backend_config=config.slicedimage) with backend.read_contextmanager(name) as fh: codebook_doc = json.load(fh) if config.strict: codebook_validator = CodebookValidator(codebook_doc) if not codebook_validator.validate_object(codebook_doc): raise Exception("validation failed") if isinstance(codebook_doc, list): raise ValueError( f"codebook is a list and not an dictionary. It is highly likely that you are using" f"a codebook formatted for a previous version of starfish.") version_str = codebook_doc[DocumentKeys.VERSION_KEY] cls._verify_version(version_str) return cls.from_code_array(codebook_doc[DocumentKeys.MAPPINGS_KEY], n_round, n_channel)
def from_json(cls, json_url: str) -> "Experiment": """ Construct an Experiment from an experiment.json file format specifier. Loads configuration from StarfishConfig. Parameters ---------- json_url : str file path or web link to an experiment.json file Returns ------- Experiment : Experiment object serving the requested experiment data """ config = StarfishConfig() if config.strict: valid = validate_sptx.validate(json_url) if not valid: raise Exception("validation failed") backend, name, baseurl = resolve_path_or_url(json_url, config.slicedimage) with backend.read_contextmanager(name) as fh: experiment_document = json.load(fh) version = cls.verify_version(experiment_document['version']) _, codebook_name, codebook_baseurl = resolve_url(experiment_document['codebook'], baseurl, config.slicedimage) codebook_absolute_url = pathjoin(codebook_baseurl, codebook_name) codebook = Codebook.open_json(codebook_absolute_url) extras = experiment_document['extras'] fovs: MutableSequence[FieldOfView] = list() fov_tilesets: MutableMapping[str, TileSet] if version < Version("5.0.0"): primary_image: Collection = Reader.parse_doc(experiment_document['primary_images'], baseurl, config.slicedimage) auxiliary_images: MutableMapping[str, Collection] = dict() for aux_image_type, aux_image_url in experiment_document['auxiliary_images'].items(): auxiliary_images[aux_image_type] = Reader.parse_doc( aux_image_url, baseurl, config.slicedimage) for fov_name, primary_tileset in primary_image.all_tilesets(): fov_tilesets = dict() fov_tilesets[FieldOfView.PRIMARY_IMAGES] = primary_tileset for aux_image_type, aux_image_collection in auxiliary_images.items(): aux_image_tileset = aux_image_collection.find_tileset(fov_name) if aux_image_tileset is not None: fov_tilesets[aux_image_type] = aux_image_tileset fov = FieldOfView(fov_name, image_tilesets=fov_tilesets) fovs.append(fov) else: images: MutableMapping[str, Collection] = dict() all_fov_names: MutableSet[str] = set() for image_type, image_url in experiment_document['images'].items(): image = Reader.parse_doc(image_url, baseurl, config.slicedimage) images[image_type] = image for fov_name, _ in image.all_tilesets(): all_fov_names.add(fov_name) for fov_name in all_fov_names: fov_tilesets = dict() for image_type, image_collection in images.items(): image_tileset = image_collection.find_tileset(fov_name) if image_tileset is not None: fov_tilesets[image_type] = image_tileset fov = FieldOfView(fov_name, image_tilesets=fov_tilesets) fovs.append(fov) return Experiment(fovs, codebook, extras, src_doc=experiment_document)