def from_image_stack(cls, image_stack, crop_x: int = 0, crop_y: int = 0, crop_z: int = 0) -> "IntensityTable": """Generate an IntensityTable from all the pixels in the ImageStack Parameters ---------- crop_x : int number of pixels to crop from both top and bottom of x crop_y : int number of pixels to crop from both top and bottom of y crop_z : int number of pixels to crop from both top and bottom of z image_stack : ImageStack ImageStack containing pixels to be treated as intensities Returns ------- IntensityTable : IntensityTable containing one intensity per pixel (across channels and rounds) """ # verify the image is large enough to crop assert crop_z * 2 < image_stack.shape['z'] assert crop_y * 2 < image_stack.shape['y'] assert crop_x * 2 < image_stack.shape['x'] zmin = crop_z ymin = crop_y xmin = crop_x zmax = image_stack.shape['z'] - crop_z ymax = image_stack.shape['y'] - crop_y xmax = image_stack.shape['x'] - crop_x data = image_stack.numpy_array.transpose(2, 3, 4, 1, 0) # (z, y, x, ch, round) # crop and reshape imagestack to create IntensityTable data cropped_data = data[zmin:zmax, ymin:ymax, xmin:xmax, :, :] # (pixels, ch, round) intensity_data = cropped_data.reshape(-1, image_stack.num_chs, image_stack.num_rounds) # IntensityTable pixel coordinates z = np.arange(zmin, zmax) y = np.arange(ymin, ymax) x = np.arange(xmin, xmax) feature_attribute_data = pd.DataFrame(data=np.array( list(product(z, y, x))), columns=['z', 'y', 'x']) feature_attribute_data[Features.SPOT_RADIUS] = np.full( feature_attribute_data.shape[0], fill_value=0.5) pixel_coordinates = SpotAttributes(feature_attribute_data) return IntensityTable.from_spot_data(intensity_data, pixel_coordinates)
def spot_attribute_factory(n: int) -> SpotAttributes: """ Construct SpotAttributes with n synthetic attributes. Each attribute has radius 1 and x, y, z coordinates equal to their index i in [0, n) """ return SpotAttributes( pd.DataFrame( data=np.array([[i, i, i, 1] for i in np.arange(n)]), columns=[Axes.ZPLANE, Axes.Y, Axes.X, Features.SPOT_RADIUS]))
def image_to_spots( self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: """ Parameters ---------- data_image : np.ndarray three-dimensional image containing spots to be detected Returns ------- SpotAttributes : spot attributes table for all detected spots """ data_image = np.asarray(data_image) with warnings.catch_warnings(): warnings.simplefilter( 'ignore', FutureWarning) # trackpy numpy indexing warning warnings.simplefilter('ignore', UserWarning) # yielded if black images attributes = locate(data_image, diameter=self.diameter, minmass=self.minmass, maxsize=self.maxsize, separation=self.separation, noise_size=self.noise_size, smoothing_size=self.smoothing_size, threshold=self.threshold, percentile=self.percentile, preprocess=self.preprocess) # when zero spots are detected, 'ep' is missing from the trackpy locate results. if attributes.shape[0] == 0: attributes['ep'] = [] # TODO ambrosejcarr: data should always be at least pseudo-3d, this may not be necessary # TODO ambrosejcarr: this is where max vs. sum vs. mean would be parametrized. # here, total_intensity = sum, intensity = max new_colnames = [ 'y', 'x', 'total_intensity', 'radius', 'eccentricity', 'intensity', 'raw_mass', 'ep' ] if len(data_image.shape) == 3: attributes.columns = ['z'] + new_colnames else: attributes.columns = new_colnames attributes['spot_id'] = np.arange(attributes.shape[0]) # convert these to int so it can be used to index attributes.x = attributes.x.astype(int) attributes.y = attributes.y.astype(int) attributes.z = attributes.z.astype(int) return SpotAttributes(attributes)
def intensity_table_factory() -> IntensityTable: """IntensityTable with a single feature that was measured over 2 channels and 2 rounds.""" intensities = np.array([[[0, 3], [4, 0]]], dtype=float) spot_attribute_data = pd.DataFrame( data=[0, 0, 0, 1], index=[Axes.ZPLANE, Axes.Y, Axes.X, Features.SPOT_RADIUS]).T spot_attributes = SpotAttributes(spot_attribute_data) intensity_table = IntensityTable.from_spot_data(intensities, spot_attributes) return intensity_table
def intensity_table_factory(data: np.ndarray=np.array([[[0, 3], [4, 0]]])) -> IntensityTable: """IntensityTable with a single feature that was measured over 2 channels and 2 rounds.""" # generates spot attributes equal in size to the number of passed features. # each attribute has coordinates (z, y, x) equal to the feature index, and radius 1. spot_attributes_data = pd.DataFrame( data=np.array([[i, i, i, 1] for i in np.arange(data.shape[0])]), columns=[Axes.ZPLANE, Axes.Y, Axes.X, Features.SPOT_RADIUS] ) spot_attributes = SpotAttributes(spot_attributes_data) intensity_table = IntensityTable.from_spot_data(data, spot_attributes) return intensity_table
def _create_spot_attributes( self, region_properties: List[_RegionProperties], decoded_image: np.ndarray, target_map: TargetsMap, n_processes: Optional[int]=None ) -> Tuple[SpotAttributes, np.ndarray]: """ Parameters ---------- region_properties : List[_RegionProperties] Properties of the each connected component. Output of skimage.measure.regionprops decoded_image : np.ndarray Image whose pixels correspond to the targets that the given position in the ImageStack decodes to. target_map : TargetsMap Unique mapping between string target names and int target IDs. n_processes : Optional[int]=None number of processes to devote to measuring spot properties. If None, defaults to the result of os.nproc() Returns ------- pd.DataFrame : DataFrame containing x, y, z, radius, and target name for each connected component feature. np.ndarray[bool] : An array with length equal to the number of features. If zero, indicates that a feature has failed area filters. """ pool = Pool(processes=n_processes) mapfunc = pool.map applyfunc = partial( self._single_spot_attributes, decoded_image=decoded_image, target_map=target_map, min_area=self._min_area, max_area=self._max_area ) iterable = tqdm(region_properties, disable=(not StarfishConfig().verbose)) results = mapfunc(applyfunc, iterable) spot_attrs, passes_area_filter = zip(*results) # update passes filter passes_filter = np.array(passes_area_filter, dtype=np.bool) spot_attributes = SpotAttributes(pd.DataFrame.from_records(spot_attrs)) return spot_attributes, passes_filter
def image_to_spots( self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: """ Find spots using a gaussian blob finding algorithm Parameters ---------- data_image : Union[np.ndarray, xr.DataArray] ImageStack containing blobs to be detected Returns ------- SpotAttributes : DataFrame of metadata containing the coordinates, intensity and radius of each spot """ fitted_blobs_array: np.ndarray = self.detector_method( data_image, self.min_sigma, self.max_sigma, self.num_sigma, self.threshold, self.overlap) if fitted_blobs_array.shape[0] == 0: return SpotAttributes.empty(extra_fields=['intensity', 'spot_id']) # create the SpotAttributes Table columns = [ Axes.ZPLANE.value, Axes.Y.value, Axes.X.value, Features.SPOT_RADIUS ] fitted_blobs = pd.DataFrame(data=fitted_blobs_array, columns=columns) # convert standard deviation of gaussian kernel used to identify spot to radius of spot converted_radius = np.round(fitted_blobs[Features.SPOT_RADIUS] * np.sqrt(3)) fitted_blobs[Features.SPOT_RADIUS] = converted_radius # convert the array to int so it can be used to index rounded_blobs = SpotAttributes(fitted_blobs.astype(int)) rounded_blobs.data['intensity'] = measure_spot_intensity( data_image, rounded_blobs, self.measurement_function) rounded_blobs.data['spot_id'] = np.arange(rounded_blobs.data.shape[0]) return rounded_blobs
def image_to_spots(self, image: np.ndarray) -> SpotAttributes: """ Parameters ---------- image : np.ndarray three-dimensional numpy array containing spots to detect Returns ------- SpotAttributes : spot attributes table for all detected spots """ with warnings.catch_warnings(): warnings.simplefilter( 'ignore', FutureWarning) # trackpy numpy indexing warning attributes = locate(image, diameter=self.diameter, minmass=self.minmass, maxsize=self.maxsize, separation=self.separation, noise_size=self.noise_size, smoothing_size=self.smoothing_size, threshold=self.threshold, percentile=self.percentile, preprocess=self.preprocess) # TODO ambrosejcarr: data should always be at least pseudo-3d, this may not be necessary # TODO ambrosejcarr: this is where max vs. sum vs. mean would be parametrized. # here, total_intensity = sum, intensity = max new_colnames = [ 'y', 'x', 'total_intensity', 'radius', 'eccentricity', 'intensity', 'raw_mass', 'ep' ] if len(image.shape) == 3: attributes.columns = ['z'] + new_colnames else: attributes.columns = new_colnames attributes['spot_id'] = np.arange(attributes.shape[0]) return SpotAttributes(attributes)
def test_intensity_table_can_be_created_from_spot_attributes(): """ This test creates an IntensityTable from spot attributes, and verifies that the size matches what was requested and that the values are all zero. """ # input has two spots spot_attributes = SpotAttributes( pd.DataFrame( data=np.array([[1, 1, 1, 1], [2, 2, 2, 1]]), columns=[Indices.Z, Indices.Y, Indices.X, Features.SPOT_RADIUS])) intensities = IntensityTable.empty_intensity_table(spot_attributes, n_ch=1, n_round=3) assert intensities.sizes[Indices.CH] == 1 assert intensities.sizes[Indices.ROUND] == 3 assert intensities.sizes[Features.AXIS] == 2 assert np.all(intensities.values == 0)
def concatenate_spot_attributes_to_intensities( spot_attributes: Sequence[Tuple[SpotAttributes, Dict[Indices, int]]] ) -> IntensityTable: """ Merge multiple spot attributes frames into a single IntensityTable without merging across channels and imaging rounds Parameters ---------- spot_attributes : Sequence[Tuple[SpotAttributes, Dict[Indices, int]]] A sequence of SpotAttribute objects and the Indices (channel, round) that each object is associated with. Returns ------- IntensityTable : concatenated input SpotAttributes, converted to an IntensityTable object """ n_ch: int = max(inds[Indices.CH] for _, inds in spot_attributes) + 1 n_round: int = max(inds[Indices.ROUND] for _, inds in spot_attributes) + 1 all_spots = pd.concat([sa.data for sa, inds in spot_attributes]) # this drop call ensures only x, y, z, radius, and quality, are passed to the IntensityTable features_coordinates = all_spots.drop(['spot_id', 'intensity'], axis=1) intensity_table = IntensityTable.empty_intensity_table( SpotAttributes(features_coordinates), n_ch, n_round, ) i = 0 for attrs, inds in spot_attributes: for _, row in attrs.data.iterrows(): intensity_table[i, inds[Indices.CH], inds[Indices.ROUND]] = row['intensity'] i += 1 return intensity_table
def synthetic_intensities(cls, codebook, num_z: int = 12, height: int = 50, width: int = 40, n_spots=10, mean_fluor_per_spot=200, mean_photons_per_fluor=50) -> "IntensityTable": """Create an IntensityTable containing synthetic spots with random locations Parameters ---------- codebook : Codebook starfish codebook object num_z : number of z-planes to use when localizing spots height : y dimension of each synthetic plane width : x dimension of each synthetic plane n_spots : number of spots to generate mean_fluor_per_spot : mean number of fluorophores per spot mean_photons_per_fluor : mean number of photons per fluorophore. Returns ------- IntensityTable """ # TODO nsofroniew: right now there is no jitter on x-y positions of the spots z = np.random.randint(0, num_z, size=n_spots) y = np.random.randint(0, height, size=n_spots) x = np.random.randint(0, width, size=n_spots) r = np.empty(n_spots) r.fill( np.nan) # radius is a function of the point-spread gaussian size spot_attributes = SpotAttributes( pd.DataFrame({ Axes.ZPLANE.value: z, Axes.Y.value: y, Axes.X.value: x, Features.SPOT_RADIUS: r })) # empty data tensor data = np.zeros(shape=(n_spots, *codebook.shape[1:])) targets = np.random.choice(codebook.coords[Features.TARGET], size=n_spots, replace=True) expected_bright_locations = np.where(codebook.loc[targets]) # create a binary matrix where "on" spots are 1 data[expected_bright_locations] = 1 # add physical properties of fluorescence data *= np.random.poisson(mean_photons_per_fluor, size=data.shape) data *= np.random.poisson(mean_fluor_per_spot, size=data.shape) # convert data to float for consistency with starfish data = preserve_float_range(data) assert 0 < data.max() <= 1 intensities = cls.from_spot_data(data, spot_attributes) intensities[Features.TARGET] = (Features.AXIS, targets) return intensities
def image_to_spots( self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: """measure attributes of spots detected by binarizing the image using the selected threshold Parameters ---------- data_image: Union[np.ndarray, xr.DataArray] image from which spots should be extracted Returns ------- SpotAttributes Attributes for each detected spot """ if self.threshold is None: self.threshold = self._compute_threshold(data_image) # identify each spot's size by binarizing and calculating regionprops masked_image = data_image[:, :] > self.threshold labels = label(masked_image)[0] spot_props = regionprops(labels) # mask spots whose areas are too small or too large for spot_prop in spot_props: if spot_prop.area < self.min_obj_area or spot_prop.area > self.max_obj_area: masked_image[spot_prop.coords[:, 0], spot_prop.coords[:, 1]] = 0 # store re-calculated regionprops and labels based on the area-masked image self._labels = label(masked_image)[0] self._spot_props = regionprops(labels) if self.verbose: print('computing final spots ...') self._spot_coords = peak_local_max(data_image, min_distance=self.min_distance, threshold_abs=self.threshold, exclude_border=False, indices=True, num_peaks=np.inf, footprint=None, labels=self._labels) # TODO how to get the radius? unlikely that this can be pulled out of # self._spot_props, since the last call to peak_local_max can find multiple # peaks per label res = { Indices.X.value: self._spot_coords[:, 1], Indices.Y.value: self._spot_coords[:, 0], Indices.Z.value: np.zeros(len(self._spot_coords)), Features.SPOT_RADIUS: 1, Features.SPOT_ID: np.arange(self._spot_coords.shape[0]), Features.INTENSITY: data_image[self._spot_coords[:, 0], self._spot_coords[:, 1]] } return SpotAttributes(pd.DataFrame(res))