def get_un_aligned_tileset(): unAlignedTileset = TileSet( [Axes.X, Axes.Y, Axes.CH, Axes.ZPLANE, Axes.ROUND], { Axes.CH: NUM_CH, Axes.ROUND: NUM_ROUND, Axes.ZPLANE: NUM_Z }, { Axes.Y: HEIGHT, Axes.X: WIDTH }) for r in range(NUM_ROUND): for ch in range(NUM_CH): for z in range(NUM_Z): tile = Tile( { # The round_to methods generate coordinates # based on the r value, therefore the coords vary # throughout the tileset Coordinates.X: round_to_x(r), Coordinates.Y: round_to_y(r), Coordinates.Z: round_to_z(r), }, { Axes.ROUND: r, Axes.CH: ch, Axes.ZPLANE: z, }) tile.numpy_array = np.zeros((HEIGHT, WIDTH)) unAlignedTileset.add_tile(tile) return unAlignedTileset
def get_aligned_tileset(): alignedTileset = TileSet( [Axes.X, Axes.Y, Axes.CH, Axes.ZPLANE, Axes.ROUND], { Axes.CH: NUM_CH, Axes.ROUND: NUM_ROUND, Axes.ZPLANE: NUM_Z }, { Axes.Y: HEIGHT, Axes.X: WIDTH }) for r in range(NUM_ROUND): for ch in range(NUM_CH): for z in range(NUM_Z): tile = Tile( { Coordinates.X: 1, Coordinates.Y: 4, Coordinates.Z: 3, }, { Axes.ROUND: r, Axes.CH: ch, Axes.ZPLANE: z, }) tile.numpy_array = np.zeros((100, 100)) alignedTileset.add_tile(tile) return alignedTileset
def synthetic_stack( cls, num_hyb: int = 4, num_ch: int = 4, num_z: int = 12, tile_height: int = 50, tile_width: int = 40, tile_data_provider: Callable[[int, int, int, int, int], np.ndarray] = None, tile_extras_provider: Callable[[int, int, int], Any] = None, ) -> "ImageStack": """generate a synthetic ImageStack Returns ------- ImageStack : imagestack containing a tensor whose default shape is (2, 3, 4, 30, 20) and whose default values are all 1. """ if tile_data_provider is None: tile_data_provider = cls._default_tile_data_provider if tile_extras_provider is None: tile_extras_provider = cls._default_tile_extras_provider img = TileSet( {Coordinates.X, Coordinates.Y, Indices.HYB, Indices.CH, Indices.Z}, { Indices.HYB: num_hyb, Indices.CH: num_ch, Indices.Z: num_z, }, default_tile_shape=(tile_height, tile_width), ) for hyb in range(num_hyb): for ch in range(num_ch): for z in range(num_z): tile = Tile( { Coordinates.X: (0.0, 0.001), Coordinates.Y: (0.0, 0.001), Coordinates.Z: (0.0, 0.001), }, { Indices.HYB: hyb, Indices.CH: ch, Indices.Z: z, }, extras=tile_extras_provider(hyb, ch, z), ) tile.numpy_array = tile_data_provider( hyb, ch, z, tile_height, tile_width) img.add_tile(tile) stack = cls(img) return stack
def synthetic_stack( num_hyb: int=DEFAULT_NUM_HYB, num_ch: int=DEFAULT_NUM_CH, num_z: int=DEFAULT_NUM_Z, tile_height: int=DEFAULT_HEIGHT, tile_width: int=DEFAULT_WIDTH, tile_data_provider: Callable[[int, int, int, int, int], np.ndarray]=default_tile_data_provider, tile_extras_provider: Callable[[int, int, int], Any]=default_tile_extras_provider ) -> ImageStack: """generate a synthetic ImageStack Returns ------- ImageStack : imagestack containing a tensor of (2, 3, 4, 30, 20) whose values are all 1. """ img = TileSet( {Coordinates.X, Coordinates.Y, Indices.HYB, Indices.CH, Indices.Z}, { Indices.HYB: num_hyb, Indices.CH: num_ch, Indices.Z: num_z, }, default_tile_shape=(tile_height, tile_width), ) for hyb in range(num_hyb): for ch in range(num_ch): for z in range(num_z): tile = Tile( { Coordinates.X: (0.0, 0.001), Coordinates.Y: (0.0, 0.001), Coordinates.Z: (0.0, 0.001), }, { Indices.HYB: hyb, Indices.CH: ch, Indices.Z: z, }, extras=tile_extras_provider(hyb, ch, z), ) tile.numpy_array = tile_data_provider(hyb, ch, z, tile_height, tile_width) img.add_tile(tile) stack = ImageStack(img) return stack
def set_aux(self, key, img): if key in self.auxiliary_images: old_img = self.auxiliary_images[key] if old_img.shape != img.shape: msg = "Shape mismatch. Current data shape: {}, new data shape: {}".format( old_img.shape, img.shape) raise AttributeError(msg) self.auxiliary_images[key].numpy_array = img else: # TODO: (ttung) major hack alert. we don't have a convenient mechanism to build an ImageStack from a single # numpy array, which we probably should. tileset = TileSet( { Indices.HYB, Indices.CH, Indices.Z, Coordinates.X, Coordinates.Y, }, { Indices.HYB: 1, Indices.CH: 1, Indices.Z: 1, }) tile = Tile( { Coordinates.X: (0.000, 0.001), Coordinates.Y: (0.000, 0.001), }, { Indices.HYB: 0, Indices.CH: 0, Indices.Z: 0, }, img.shape, ) tile.numpy_array = img tileset.add_tile(tile) self.auxiliary_images[key] = ImageStack(tileset) self.org['auxiliary_images'][key] = f"{key}.json"
def export(self, filepath: str, tile_opener=None, tile_format: ImageFormat = ImageFormat.NUMPY) -> None: """write the image tensor to disk in spaceTx format Parameters ---------- filepath : str Path + prefix for the images and primary_images.json written by this function tile_opener : TODO ttung: doc me. tile_format : ImageFormat Format in which each 2D plane should be written. """ # Add log data to extras self._tile_data.extras[STARFISH_EXTRAS_KEY] = logging.LogEncoder( ).encode({LOG: self.log}) tileset = TileSet( dimensions={ Axes.ROUND, Axes.CH, Axes.ZPLANE, Axes.Y, Axes.X, }, shape={ Axes.ROUND: self.num_rounds, Axes.CH: self.num_chs, Axes.ZPLANE: self.num_zplanes, }, default_tile_shape={ Axes.Y: self.tile_shape[0], Axes.X: self.tile_shape[1] }, extras=self._tile_data.extras, ) for axis_val_map in self._iter_axes({Axes.ROUND, Axes.CH, Axes.ZPLANE}): tilekey = TileKey(round=axis_val_map[Axes.ROUND], ch=axis_val_map[Axes.CH], zplane=axis_val_map[Axes.ZPLANE]) round_, ch, zplane = tilekey.round, tilekey.ch, tilekey.z extras: dict = self._tile_data[tilekey] selector = { Axes.ROUND: round_, Axes.CH: ch, Axes.ZPLANE: zplane, } coordinates: MutableMapping[Coordinates, Union[Tuple[Number, Number], Number]] = dict() x_coordinates = (float(self.xarray[Coordinates.X.value][0]), float(self.xarray[Coordinates.X.value][-1])) y_coordinates = (float(self.xarray[Coordinates.Y.value][0]), float(self.xarray[Coordinates.Y.value][-1])) coordinates[Coordinates.X] = x_coordinates coordinates[Coordinates.Y] = y_coordinates if Coordinates.Z in self.xarray.coords: # set the z coord to the calculated value from the associated z plane z_coordinates = float(self.xarray[Coordinates.Z.value][zplane]) coordinates[Coordinates.Z] = z_coordinates tile = Tile( coordinates=coordinates, indices=selector, extras=extras, ) tile.numpy_array, _ = self.get_slice(selector={ Axes.ROUND: round_, Axes.CH: ch, Axes.ZPLANE: zplane }) tileset.add_tile(tile) if tile_opener is None: def tile_opener(tileset_path: Path, tile, ext): base = tileset_path.parent / tileset_path.stem if Axes.ZPLANE in tile.indices: zval = tile.indices[Axes.ZPLANE] zstr = "-Z{}".format(zval) else: zstr = "" return open( "{}-H{}-C{}{}.{}".format( str(base), tile.indices[Axes.ROUND], tile.indices[Axes.CH], zstr, ext, ), "wb") if not filepath.endswith('.json'): filepath += '.json' Writer.write_to_path(tileset, filepath, pretty=True, tile_opener=tile_opener, tile_format=tile_format)
def export(self, filepath: str, tile_opener=None, tile_format: ImageFormat = ImageFormat.NUMPY) -> None: """write the image tensor to disk in spaceTx format Parameters ---------- filepath : str Path + prefix for the images and primary_images.json written by this function tile_opener : TODO ttung: doc me. tile_format : ImageFormat Format in which each 2D plane should be written. """ tileset = TileSet( dimensions={ Indices.ROUND, Indices.CH, Indices.Z, Indices.Y, Indices.X, }, shape={ Indices.ROUND: self.num_rounds, Indices.CH: self.num_chs, Indices.Z: self.num_zlayers, }, default_tile_shape=self._tile_shape, extras=self._tile_metadata.extras, ) for round_ in range(self.num_rounds): for ch in range(self.num_chs): for zlayer in range(self.num_zlayers): tilekey = TileKey(round=round_, ch=ch, z=zlayer) extras: dict = self._tile_metadata[tilekey] tile_indices = { Indices.ROUND: round_, Indices.CH: ch, Indices.Z: zlayer, } coordinates: MutableMapping[Coordinates, Tuple[Number, Number]] = dict() x_coordinates = self.tile_coordinates( tile_indices, Coordinates.X) y_coordinates = self.tile_coordinates( tile_indices, Coordinates.Y) z_coordinates = self.tile_coordinates( tile_indices, Coordinates.Z) coordinates[Coordinates.X] = x_coordinates coordinates[Coordinates.Y] = y_coordinates if z_coordinates[0] != np.nan and z_coordinates[ 1] != np.nan: coordinates[Coordinates.Z] = z_coordinates tile = Tile( coordinates=coordinates, indices=tile_indices, extras=extras, ) tile.numpy_array, _ = self.get_slice(indices={ Indices.ROUND: round_, Indices.CH: ch, Indices.Z: zlayer }) tileset.add_tile(tile) if tile_opener is None: def tile_opener(tileset_path, tile, ext): tile_basename = os.path.splitext(tileset_path)[0] if Indices.Z in tile.indices: zval = tile.indices[Indices.Z] zstr = "-Z{}".format(zval) else: zstr = "" return open( "{}-H{}-C{}{}.{}".format( tile_basename, tile.indices[Indices.ROUND], tile.indices[Indices.CH], zstr, ext, ), "wb") if not filepath.endswith('.json'): filepath += '.json' Writer.write_to_path(tileset, filepath, pretty=True, tile_opener=tile_opener, tile_format=tile_format)
def export(self, filepath: str, tile_opener: Optional[Callable[[PurePath, Tile, str], BinaryIO]] = None, tile_format: ImageFormat=ImageFormat.NUMPY) -> None: """write the image tensor to disk in spaceTx format Parameters ---------- filepath : str Path + prefix for the images and primary_images.json written by this function tile_opener : Optional[Callable[[PurePath, Tile, str], BinaryIO]] A callable responsible for opening the file that a tile's data is to be written to. The callable should accept three arguments -- the path of the tileset, the tile data, and the expected file extension. If this is not specified, a reasonable default is provided. tile_format : ImageFormat Format in which each 2D plane should be written. """ # Add log data to extras tileset_extras = self._tile_data.extras if self._tile_data else {} tileset_extras[STARFISH_EXTRAS_KEY] = self.log.encode() tileset = TileSet( dimensions={ Axes.ROUND, Axes.CH, Axes.ZPLANE, Axes.Y, Axes.X, }, shape={ Axes.ROUND: self.num_rounds, Axes.CH: self.num_chs, Axes.ZPLANE: self.num_zplanes, }, default_tile_shape={Axes.Y: self.tile_shape[0], Axes.X: self.tile_shape[1]}, extras=tileset_extras, ) for selector in self._iter_axes({Axes.ROUND, Axes.CH, Axes.ZPLANE}): tilekey = TileKey( round=selector[Axes.ROUND], ch=selector[Axes.CH], zplane=selector[Axes.ZPLANE]) extras: dict = self._tile_data[tilekey] if self._tile_data else {} coordinates: MutableMapping[Coordinates, Union[Tuple[Number, Number], Number]] = dict() x_coordinates = (float(self.xarray[Coordinates.X.value][0]), float(self.xarray[Coordinates.X.value][-1])) y_coordinates = (float(self.xarray[Coordinates.Y.value][0]), float(self.xarray[Coordinates.Y.value][-1])) coordinates[Coordinates.X] = x_coordinates coordinates[Coordinates.Y] = y_coordinates if Coordinates.Z in self.xarray.coords: # set the z coord to the calculated value from the associated z plane z_coordinates = float(self.xarray[Coordinates.Z.value][selector[Axes.ZPLANE]]) coordinates[Coordinates.Z] = z_coordinates tile = Tile( coordinates=coordinates, indices=selector, extras=extras, ) tile.numpy_array, _ = self.get_slice(selector) tileset.add_tile(tile) if tile_opener is None: def tile_opener(tileset_path: PurePath, tile: Tile, ext: str): base = tileset_path.parent / tileset_path.stem if Axes.ZPLANE in tile.indices: zval = tile.indices[Axes.ZPLANE] zstr = "-Z{}".format(zval) else: zstr = "" return open( "{}-H{}-C{}{}.{}".format( str(base), tile.indices[Axes.ROUND], tile.indices[Axes.CH], zstr, ext, ), "wb") if not filepath.endswith('.json'): filepath += '.json' Writer.write_to_path( tileset, filepath, pretty=True, tile_opener=tile_opener, tile_format=tile_format)
def build_image( fov_count: int, round_count: int, ch_count: int, z_count: int, image_fetcher: TileFetcher, default_shape: Optional[Tuple[int, int]] = None, ) -> Collection: """ Build and returns an image set with the following characteristics: Parameters ---------- fov_count : int Number of fields of view in this image set. round_count : int Number for rounds in this image set. ch_count : int Number for channels in this image set. z_count : int Number of z-layers in this image set. image_fetcher : TileFetcher Instance of TileFetcher that provides the data for the tile. default_shape : Optional[Tuple[int, int]] Default shape of the individual tiles in this image set. Returns ------- The slicedimage collection representing the image. """ collection = Collection() for fov_ix in range(fov_count): fov_images = TileSet( [ Coordinates.X, Coordinates.Y, Coordinates.Z, Indices.Z, Indices.ROUND, Indices.CH, Indices.X, Indices.Y, ], { Indices.ROUND: round_count, Indices.CH: ch_count, Indices.Z: z_count }, default_shape, ImageFormat.TIFF, ) for z_ix in range(z_count): for round_ix in range(round_count): for ch_ix in range(ch_count): image = image_fetcher.get_tile(fov_ix, round_ix, ch_ix, z_ix) tile = Tile( image.coordinates, { Indices.Z: z_ix, Indices.ROUND: round_ix, Indices.CH: ch_ix, }, image.shape, extras=image.extras, ) tile.numpy_array = image.tile_data fov_images.add_tile(tile) collection.add_partition("fov_{:03}".format(fov_ix), fov_images) return collection
def synthesize() -> Tuple[Stack, list]: """Synthesize synthetic spatial image-based transcriptomics data Returns ------- Stack : starfish Stack containing synthetic spots list : codebook matching the synthetic data """ # set random seed so that data is consistent across tests random.seed(2) np.random.seed(2) NUM_HYB = 4 NUM_CH = 2 NUM_Z = 1 HEIGHT = 100 WIDTH = 100 assert WIDTH == HEIGHT # for compatibility with the parameterization of the code def choose(n, k): if n == k: return [[1] * k] subsets = [[0] + a for a in choose(n - 1, k)] if k > 0: subsets += [[1] + a for a in choose(n - 1, k - 1)] return subsets def graham_sloane_codes(n): # n is length of codeword # number of on bits is 4 def code_sum(codeword): return sum([i * c for i, c in enumerate(codeword)]) % n return [c for c in choose(n, 4) if code_sum(c) == 0] p = { # number of on bits (not used with current codebook) 'N_high': 4, # length of barcode 'N_barcode': NUM_CH * NUM_HYB, # mean number of flourophores per transcripts - depends on amplification strategy (e.g HCR, bDNA) 'N_flour': 200, # mean number of photons per flourophore - depends on exposure time, bleaching rate of dye 'N_photons_per_flour': 50, # mean number of background photons per pixel - depends on tissue clearing and autoflourescence 'N_photon_background': 1000, # quantum efficiency of the camera detector units number of electrons per photon 'detection_efficiency': .25, # camera read noise per pixel in units electrons 'N_background_electrons': 1, # number of RNA puncta; keep this low to reduce overlap probability 'N_spots': 20, # height and width of image in pixel units 'N_size': WIDTH, # standard devitation of gaussian in pixel units 'psf': 2, # dynamic range of camera sensor 37,000 assuming a 16-bit AD converter 'graylevel': 37000.0 / 2 ** 16, # 16-bit AD converter 'bits': 16 } codebook = graham_sloane_codes(p['N_barcode']) def generate_spot(p): position = rand(2) gene = random.choice(range(len(codebook))) barcode = array(codebook[gene]) photons = [poisson(p['N_photons_per_flour']) * poisson(p['N_flour']) * b for b in barcode] return DataFrame({'position': [position], 'barcode': [barcode], 'photons': [photons], 'gene': gene}) # right now there is no jitter on x-y positions of the spots, we might want to make it a vector spots = concat([generate_spot(p) for _ in range(p['N_spots'])]) # type: ignore image = zeros((p['N_barcode'], p['N_size'], p['N_size'],)) for s in spots.itertuples(): image[:, int(p['N_size'] * s.position[0]), int(p['N_size'] * s.position[1])] = s.photons image_with_background = image + poisson(p['N_photon_background'], size=image.shape) filtered = array([gaussian(im, p['psf']) for im in image_with_background]) filtered = filtered * p['detection_efficiency'] + normal(scale=p['N_background_electrons'], size=filtered.shape) signal = np.array([(x / p['graylevel']).astype(int).clip(0, 2 ** p['bits']) for x in filtered]) def select_uint_dtype(array): """choose appropriate dtype based on values of an array""" max_val = np.max(array) for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]: if max_val <= dtype(-1): return array.astype(dtype) raise ValueError('value exceeds dynamic range of largest numpy type') corrected_signal = select_uint_dtype(signal) rescaled_signal: np.ndarray = rescale_intensity(corrected_signal) # set up the tile set image_data = TileSet( {Coordinates.X, Coordinates.Y, Indices.HYB, Indices.CH, Indices.Z}, { Indices.HYB: NUM_HYB, Indices.CH: NUM_CH, Indices.Z: NUM_Z, }, default_tile_shape=(HEIGHT, WIDTH), ) # fill the TileSet experiment_indices = list(product(range(NUM_HYB), range(NUM_CH), range(NUM_Z))) for i, (hyb, ch, z) in enumerate(experiment_indices): tile = Tile( { Coordinates.X: (0.0, 0.001), Coordinates.Y: (0.0, 0.001), Coordinates.Z: (0.0, 0.001), }, { Indices.HYB: hyb, Indices.CH: ch, Indices.Z: z, } ) tile.numpy_array = rescaled_signal[i] image_data.add_tile(tile) data_stack = ImageStack(image_data) # make a max projection and pretend that's the dots image, which we'll create another ImageStack for this dots_data = TileSet( {Coordinates.X, Coordinates.Y, Indices.HYB, Indices.CH, Indices.Z}, { Indices.HYB: 1, Indices.CH: 1, Indices.Z: 1, }, default_tile_shape=(HEIGHT, WIDTH), ) tile = Tile( { Coordinates.X: (0.0, 0.001), Coordinates.Y: (0.0, 0.001), Coordinates.Z: (0.0, 0.001), }, { Indices.HYB: 0, Indices.CH: 0, Indices.Z: 0, } ) tile.numpy_array = np.max(rescaled_signal, 0) dots_data.add_tile(tile) dots_stack = ImageStack(dots_data) # TODO can we mock up a nuclei image somehow? # put the data together into a top-level Stack results = Stack.from_data(data_stack, aux_dict={'dots': dots_stack}) # make the codebook(s) codebook = [] for _, code_record in spots.iterrows(): codeword = [] for code_value, (hyb, ch, z) in zip(code_record['barcode'], experiment_indices): if code_value != 0: codeword.append({ Indices.HYB: hyb, Indices.CH: ch, Indices.Z: z, "v": code_value }) codebook.append( { 'codeword': codeword, 'gene_name': code_record['gene'] } ) return results, codebook