def execute(self, namespace): from PYME.Analysis.points import ripleys from PYME.IO import MetaDataHandler points_real = namespace[self.inputPositions] mask = namespace.get(self.inputMask, None) three_d = np.count_nonzero(points_real['z']) > 0 try: origin_coords = MetaDataHandler.origin_nm(points_real.mdh) except: origin_coords = (0, 0, 0) if three_d: bb, K = ripleys.ripleys_k(x=points_real['x'], y=points_real['y'], z=points_real['z'], mask=mask, n_bins=self.nbins, bin_size=self.binSize, sampling=self.sampling, threaded=self.threaded, coord_origin=origin_coords) else: bb, K = ripleys.ripleys_k(x=points_real['x'], y=points_real['y'], mask=mask, n_bins=self.nbins, bin_size=self.binSize, sampling=self.sampling, threaded=self.threaded, coord_origin=origin_coords) if self.normalization == 'L': d = 3 if three_d else 2 bb, L = ripleys.ripleys_l(bb, K, d) res = tabular.DictSource({'bins': bb, 'vals': L}) else: res = tabular.DictSource({'bins': bb, 'vals': K}) # propagate metadata, if present try: res.mdh = points_real.mdh except AttributeError: pass namespace[self.outputName] = res
def execute(self, namespace): from PYME.Analysis.points.traveling_salesperson import sectioned_two_opt points = namespace[self.input] try: positions = np.stack([points['x_um'], points['y_um']], axis=1) except KeyError: positions = np.stack([points['x'], points['y']], axis=1) / 1e3 final_route = sectioned_two_opt.tsp_chunk_two_opt_multiproc(positions, self.epsilon, self.points_per_chunk, self.n_processes) # note that we sorted the positions / sections once before, need to propagate that through before sorting out = tabular.DictSource({k: points[k][final_route] for k in points.keys()}) out.mdh = MetaDataHandler.NestedClassMDHandler() try: out.mdh.copyEntriesFrom(points.mdh) except AttributeError: pass # use the already sorted output to get the final distance try: og_distance = np.sqrt((points['x_um'][1:] - points['x_um'][:-1]) ** 2 + (points['y_um'][1:] - points['y_um'][:-1]) ** 2).sum() final_distance = np.sqrt((out['x_um'][1:] - out['x_um'][:-1]) ** 2 + (out['y_um'][1:] - out['y_um'][:-1]) ** 2).sum() except KeyError: og_distance = np.sqrt((points['x'][1:] - points['x'][:-1]) ** 2 + (points['y'][1:] - points['y'][:-1]) ** 2).sum() / 1e3 final_distance = np.sqrt((out['x'][1:] - out['x'][:-1]) ** 2 + (out['y'][1:] - out['y'][:-1]) ** 2).sum() / 1e3 out.mdh['TravelingSalesperson.OriginalDistance'] = og_distance out.mdh['TravelingSalesperson.Distance'] = final_distance namespace[self.output] = out
def execute(self, namespace): from PYME.Analysis.points import DistHist pos0 = namespace[self.inputPositions] pos1 = namespace[self.inputPositions2 if self.inputPositions2 is not '' else self.inputPositions] if np.count_nonzero(pos0['z']) == 0 and np.count_nonzero(pos1['z']) == 0: if self.threaded: res = DistHist.distanceHistogramThreaded(pos0['x'], pos0['y'], pos1['x'], pos1['y'], self.nbins, self.binSize) else: res = DistHist.distanceHistogram(pos0['x'], pos0['y'], pos1['x'], pos1['y'], self.nbins, self.binSize) else: if self.threaded: res = DistHist.distanceHistogram3DThreaded(pos0['x'], pos0['y'], pos0['z'], pos1['x'], pos1['y'], pos1['z'], self.nbins, self.binSize) else: res = DistHist.distanceHistogram3D(pos0['x'], pos0['y'], pos0['z'], pos1['x'], pos1['y'], pos1['z'], self.nbins, self.binSize) d = self.binSize*np.arange(self.nbins) res = tabular.DictSource({'bins': d, 'counts': res}) # propagate metadata, if present try: res.mdh = pos0.mdh except AttributeError: try: res.mdh = pos1.mdh except AttributeError: pass namespace[self.outputName] = res
def execute(self, namespace): from scipy.spatial import cKDTree pos = namespace[self.inputChan0] if self.inputChan1 == '': pos1 = pos singleChan = True # flag to not pair molecules with themselves else: pos1 = namespace[self.inputChan1] singleChan = False #create a kdtree p1 = np.vstack([pos[k] for k in self.columns]).T p2 = np.vstack([pos1[k] for k in self.columns]).T kdt = cKDTree(p1) if singleChan: #query the two closest entries - the closest entry will be the orig point paired with itself, so ignore it d, i = kdt.query(p2, 2) d = d[:, 1] else: d, i = kdt.query(p2, 1) res = tabular.DictSource({self.key: d}) if 'mdh' in dir(pos): res.mdh = pos.mdh namespace[self.outputName] = res
def execute(self, namespace): from PYME.Analysis.points.traveling_salesperson import sort points = namespace[self.input] try: positions = np.stack([points['x_um'], points['y_um']], axis=1) except KeyError: # units don't matter for these calculations, but we want to preserve them on the other side positions = np.stack([points['x'], points['y']], axis=1) / 1e3 start_index = 0 if not self.start_from_corner else np.argmin(positions.sum(axis=1)) positions, ogd, final_distance = sort.tsp_sort(positions, start_index, self.epsilon, return_path_length=True) out = tabular.DictSource({'x_um': positions[:, 0], 'y_um': positions[:, 1]}) out.mdh = MetaDataHandler.NestedClassMDHandler() try: out.mdh.copyEntriesFrom(points.mdh) except AttributeError: pass out.mdh['TravelingSalesperson.Distance'] = final_distance out.mdh['TravelingSalesperson.OriginalDistance'] = ogd namespace[self.output] = out
def test_TravelingSalesperson(): r = 10 theta = np.linspace(0, 2 * np.pi, 5) dt = theta[1] - theta[0] x, y = r * np.cos(theta), r * np.sin(theta) x = np.concatenate([x, r * np.cos(theta + 0.5 * dt)]) y = np.concatenate([y, r * np.sin(theta + 0.5 * dt)]) points = tabular.DictSource({ 'x_um': np.concatenate([x, 1.1 * r * np.cos(theta)]), 'y_um': np.concatenate([y, 1.1 * r * np.sin(theta)]) }) recipe = base.ModuleCollection() recipe.add_module( measurement.TravelingSalesperson(output='output', epsilon=0.001)) recipe.namespace['input'] = points ordered = recipe.execute() # should be not too much more than the rough circumference. assert ordered.mdh['TravelingSalesperson.Distance'] < 1.25 * (2 * np.pi * r)
def execute(self, namespace): from PYME.Analysis import binAvg v = namespace[self.inputImage] vals = v.data[:, :, :].ravel() binby = namespace[self.binBy] binby = binby.data[:,:,:].ravel() if not self.inputMask == '': m = namespace[self.inputMask].data[:, :, :].ravel() > 0 vals = vals[m] binby = binby[m] #mask out NaNs m2 = ~np.isnan(vals) vals = vals[m2] binby = binby[m2] edges = np.linspace(self.left, self.right, self.nbins) bn, bm, bs = binAvg.binAvg(binby, vals, edges) res = tabular.DictSource({'bins': 0.5*(edges[:-1] + edges[1:]), 'counts': bn, 'means' : bm}) if 'mdh' in dir(v): res.mdh = v.mdh namespace[self.outputName] = res
def execute(self, namespace): series = namespace[self.input_name] # squeeze down from 4D data = series.data[:, :, :].squeeze() if self.mask == '': # not the most memory efficient, but make a mask logger.debug( 'No mask provided to ClusteringByLabel, analyzing full image') mask = np.ones((data.shape[0], data.shape[1]), int) else: mask = namespace[self.mask].data[:, :, :].squeeze() # toss any negative labels, as well as the zero label (per PYME clustering schema). labels = sorted(list(set(np.clip(np.unique(mask), 0, None)) - {0})) print(labels) n_labels = len(labels) # calculate the Variance_t over Mean_t var = np.var(data[:, :, self.excitation_start_frame:], axis=2) mean = np.mean(data[:, :, self.excitation_start_frame:], axis=2) variance_over_mean = var / mean if np.isnan(variance_over_mean).any(): logger.error('Variance over mean contains NaN, see %s' % series.filename) mean_pre_excitation = np.mean(data[:, :, :self.excitation_start_frame], axis=2) cluster_metric_mean = np.zeros(n_labels) mean_before_excitation = np.zeros(n_labels) for li in range(n_labels): # everything is 2D at this point label_mask = mask == labels[li] cluster_metric_mean[li] = np.mean(variance_over_mean[label_mask]) mean_before_excitation[li] = np.mean( mean_pre_excitation[label_mask]) res = tabular.DictSource({ 'variance_over_mean': cluster_metric_mean, 'mean_intensity_over_first_10_frames': mean_before_excitation, 'labels': np.array(labels) }) try: res.mdh = series.mdh except AttributeError: res.mdh = None namespace[self.output_name] = res if self.output_vom != '': namespace[self.output_vom] = image.ImageStack( data=variance_over_mean, mdh=res.mdh) if self.output_mean_pre_excitation != '': namespace[self.output_mean_pre_excitation] = image.ImageStack( data=mean_pre_excitation, mdh=res.mdh)
def execute(self, namespace): v = namespace[self.inputMeasurements][self.key] edges = np.linspace(self.left, self.right, self.nbins) res = np.histogram(v, edges, normed=self.normalize)[0] res = tabular.DictSource({'bins' : 0.5*(edges[:-1] + edges[1:]), 'counts' : res}) if 'mdh' in dir(v): res.mdh = v.mdh namespace[self.outputName] = res
def test_random_selection(): from PYME.IO import tabular import numpy as np d = tabular.DictSource({'test': np.arange(100)}) out = tablefilters.RandomSubset(num_to_select=5, strict=True).apply_simple(input=d) assert len(out) == 5 out = tablefilters.RandomSubset(num_to_select=150, strict=False).apply_simple(input=d) assert len(out) == 100
def execute(self, namespace): from matplotlib import delaunay from PYME.LMVis import visHelpers pos = namespace[self.inputPositions] x, y = pos['x'], pos['y'] #triangulate the data T = delaunay.Triangulation(x + .1*np.random.normal(size=len(x)), y + .1*np.random.normal(size=len(x))) #find the average edge lengths leading away from a given point res = np.array(visHelpers.calcNeighbourDists(T)) res = tabular.DictSource({self.key:res}) if 'mdh' in dir(pos): res.mdh = pos.mdh namespace[self.outputName] = res
def test_ChunkedTravelingSalesman(): n = 500 x = np.random.rand(n) * 4e3 y = np.random.rand(n) * 4e3 points = tabular.DictSource({'x_um': x, 'y_um': y}) recipe = base.ModuleCollection() recipe.add_module( measurement.ChunkedTravelingSalesperson(output='output', epsilon=0.001, points_per_chunk=50)) recipe.namespace['input'] = points ordered = recipe.execute() assert ordered.mdh['TravelingSalesperson.Distance'] < ordered.mdh[ 'TravelingSalesperson.OriginalDistance']
def execute(self, namespace): v = namespace[self.inputImage] vals = v.data[:,:,:].ravel() if not self.inputMask == '': m = namespace[self.inputMask].data[:,:,:].ravel() >0 vals = vals[m] edges = np.linspace(self.left, self.right, self.nbins) res = np.histogram(vals, edges, normed=self.normalize)[0] res = tabular.DictSource({'bins' : 0.5*(edges[:-1] + edges[1:]), 'counts' : res}) if 'mdh' in dir(v): res.mdh = v.mdh namespace[self.outputName] = res
def execute(self, namespace): import collections res = collections.OrderedDict() for mk, suffix in [(getattr(self, n), getattr(self, 'suffix' + n[-1])) for n in dir(self) if n.startswith('inputMeas')]: if not mk == '': meas = namespace[mk] #res.update(meas) for k in meas.keys(): res[k + suffix] = meas[k] meas1 = namespace[self.inputMeasurements1] #res = pd.DataFrame(res) res = tabular.DictSource(res) if 'mdh' in dir(meas1): res.mdh = meas1.mdh namespace[self.outputName] = res
def execute(self, namespace): v = namespace[self.inputImage] vals = v.data[:,:,:].ravel() if not self.inputMask == '': m = namespace[self.inputMask].data[:,:,:].ravel() > 0 vals = vals[m] yvals = np.linspace(0, 1.0, len(vals)) xvals = np.sort(vals) #res = np.histogram(v, edges)[0] res = tabular.DictSource({'bins' : xvals, 'counts' : yvals}) if 'mdh' in dir(v): res.mdh = v.mdh namespace[self.outputName] = res
def test_simple_distance_to_image_mask(): from PYME.IO import tabular from PYME.IO.image import ImageStack from PYME.IO.MetaDataHandler import CachingMDHandler size = 10 x, y, z = np.mgrid[:size, :size, :size] points = tabular.DictSource({ 'x': np.arange(size), 'y': np.zeros(size), 'z': np.zeros(size) }) # mdh voxelsize units are in um currently, while the voxelsize_nm attributes are used in distance_to_image_mask points.mdh = CachingMDHandler({ 'voxelsize.x': 0.001, 'voxelsize.y': 0.001, 'voxelsize.z': 0.001, 'voxelsize.units': 'um' }) mask = ImageStack(x < 0.5 * size, mdh=points.mdh) distances = coordinate_tools.distance_to_image_mask(mask, points) np.testing.assert_array_equal(distances, np.arange(size) - 0.5 * size)
def execute(self, namespace): from scipy import stats series = namespace[self.input_name] data = np.stack([ series.data[:, :, t, 0].squeeze() for t in range(series.data.shape[2]) ], axis=2) labels = namespace[self.input_labels].data labels = np.stack( [labels[:, :, t, 0].squeeze() for t in range(labels.shape[2])], axis=2) # drop zero label zero_counts = 0 uni, n = np.unique(labels, return_counts=True) if np.any(uni < 0): raise ValueError( 'statistics by label does not support negative labels') if 0 in uni: zind = np.where(uni == 0)[0][0] zero_counts = n[zind] uni = np.delete(uni, zind) n = np.delete(n, zind) logger.debug('labels: %s' % (uni)) n_labels = len(uni) var = np.empty(n_labels, dtype=float) mean = np.empty_like(var) median = np.empty_like(var) mode = np.empty_like(var) sum_ = np.empty_like(var) n_pixels = np.empty(n_labels, dtype=int) label = np.empty_like(n_pixels) I = np.argsort(labels.ravel()) data = data.ravel()[I] start = zero_counts for li in range(n_labels): label_data = data[start:start + n[li]] var[li] = np.var(label_data) mean[li] = np.mean(label_data) median[li] = np.median(label_data) mode[li] = stats.mode(label_data, axis=None)[0][0] sum_[li] = label_data.sum() n_pixels[li] = len(label_data) label[li] = uni[li] start += n[li] # package up and ship-out results res = tabular.DictSource({ 'variance': var, 'mean': mean, 'median': median, 'mode': mode, 'sum': sum_, 'n_pixels': n, 'label': label }) try: res.mdh = series.mdh except: pass namespace[self.output_name] = res
epsilon=0.001, points_per_chunk=50)) recipe.namespace['input'] = points ordered = recipe.execute() assert ordered.mdh['TravelingSalesperson.Distance'] < ordered.mdh[ 'TravelingSalesperson.OriginalDistance'] if __name__ == '__main__': import time n = 10000 x = np.random.rand(n) * 4e3 y = np.random.rand(n) * 4e3 points = tabular.DictSource({'x_um': x, 'y_um': y}) recipe = base.ModuleCollection() recipe.add_module( measurement.ChunkedTravelingSalesperson(output='output', epsilon=0.001, points_per_chunk=500)) recipe.namespace['input'] = points t = time.time() ordered = recipe.execute() print('n_points: %d, runtime: %f' % (n, time.time() - t)) print('og distance: %f, distance: %f' % (ordered.mdh['TravelingSalesperson.OriginalDistance'], ordered.mdh['TravelingSalesperson.Distance']))
def execute(self, namespace): from PYME.Analysis.points import ripleys from PYME.IO import MetaDataHandler points_real = namespace[self.inputPositions] mask = namespace.get(self.inputMask, None) # three_d = np.count_nonzero(points_real['z']) > 0 if self.three_d: if np.count_nonzero(points_real['z']) == 0: raise RuntimeError('Need a 3D dataset') if mask and mask.data.shape[2] < 2: raise RuntimeError( 'Need a 3D mask to run in 3D. Generate a 3D mask or select 2D.' ) else: if mask and mask.data.shape[2] > 1: raise RuntimeError('Need a 2D mask.') if self.statistics and mask is None: raise RuntimeError('Mask is needed to calculate statistics.') if self.statistics and 1.0 / self.nsim > self.significance: raise RuntimeError( 'Need at least {} simulations to achieve a significance of {}'. format(int(np.ceil(1.0 / self.significance)), self.significance)) try: ox, oy, _ = MetaDataHandler.origin_nm(points_real.mdh) origin_coords = (ox, oy, 0) # see origin_nm docs except: origin_coords = (0, 0, 0) if self.three_d: bb, K = ripleys.ripleys_k(x=points_real['x'], y=points_real['y'], z=points_real['z'], mask=mask, n_bins=self.nbins, bin_size=self.binSize, sampling=self.sampling, threaded=self.threaded, coord_origin=origin_coords) else: bb, K = ripleys.ripleys_k(x=points_real['x'], y=points_real['y'], mask=mask, n_bins=self.nbins, bin_size=self.binSize, sampling=self.sampling, threaded=self.threaded, coord_origin=origin_coords) # Run MC simulations if self.statistics: K_min, K_max, p_clustered, p_dispersed = ripleys.mc_sampling_statistics( K, mask=mask, n_points=len(points_real['x']), n_bins=self.nbins, three_d=self.three_d, bin_size=self.binSize, significance=self.significance, n_sim=self.nsim, sampling=self.sampling, threaded=self.threaded, coord_origin=origin_coords) # Check for alternate Ripley's normalization norm_func = None if self.normalization == 'L': norm_func = ripleys.ripleys_l elif self.normalization == 'dL': # Results will be of length 2 less than other results norm_func = ripleys.ripleys_dl elif self.normalization == 'H': norm_func = ripleys.ripleys_h elif self.normalization == 'dH': # Results will be of length 2 less than other results norm_func = ripleys.ripleys_dh # Apply normalization if present if norm_func is not None: d = 3 if self.three_d else 2 bb0, K = norm_func(bb, K, d) # bb0 in case we use dL/dH if self.statistics: _, K_min = norm_func(bb, K_min, d) _, K_max = norm_func(bb, K_max, d) if self.normalization == 'dL' or self.normalization == 'dH': # Truncate p_clustered for dL and dH to match size p_clustered = p_clustered[1:-1] p_dispersed = p_dispersed[1:-1] bb = bb0 if self.statistics: res = tabular.DictSource({ 'bins': bb, 'vals': K, 'min': K_min, 'max': K_max, 'pc': p_clustered, 'pd': p_dispersed }) else: res = tabular.DictSource({'bins': bb, 'vals': K}) # propagate metadata, if present try: res.mdh = points_real.mdh except AttributeError: pass namespace[self.outputName] = res
def execute(self, namespace): from PYME.Analysis.points import spherical_harmonics from PYME.IO import MetaDataHandler shell = namespace[self.input_shell] if isinstance(shell, tabular.TabularBase): shell = spherical_harmonics.ScaledShell.from_tabular(shell) bin_edges = np.arange(0, 1.0 + self.r_bin_spacing, self.r_bin_spacing) bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1]) out_hist = np.zeros(len(bin_centers), float) # get shell bounds, make grid within shell_bounds = shell.approximate_image_bounds() xv = np.arange(shell_bounds.x0, shell_bounds.x1 + self.sampling_nm[0], self.sampling_nm[0]) yv = np.arange(shell_bounds.y0, shell_bounds.y1 + self.sampling_nm[1], self.sampling_nm[1]) zv = np.arange(shell_bounds.z0, shell_bounds.z1 + self.sampling_nm[2], self.sampling_nm[2]) x, y, z = np.meshgrid(xv, yv, zv, indexing='ij') v_estimates = [] sdev_estimates = [] n_choose = 10000 for _ in range(self.jitter_iterations): xr, yr, zr = np.random.rand(len(xv), len(yv), len(zv)), np.random.rand( len(xv), len(yv), len(zv)), np.random.rand( len(xv), len(yv), len(zv)) xr = (xr - 0.5) * self.sampling_nm[0] + x yr = (yr - 0.5) * self.sampling_nm[1] + y zr = (zr - 0.5) * self.sampling_nm[2] + z azi, zen, r = shell.shell_coordinates((xr, yr, zr)) r_shell = spherical_harmonics.reconstruct_shell( shell.modes, shell.coefficients, azi, zen) inside = r < r_shell N = np.sum(inside) r_norm = r[inside] / r_shell[inside] # sum-normalize this iteration and add to output out_hist += np.histogram(r_norm, bins=bin_edges)[0] / N # record volume estimate v_estimates.append(N) # estimate spread along principle axes of the shell X = np.vstack([xr[inside], yr[inside], zr[inside]]) if N > n_choose: # downsample to avoid memory error X = X[:, np.random.choice(N, n_choose, replace=False)] # TODO - do we need to be mean-centered? X = X - X.mean(axis=1)[:, None] _, s, _ = np.linalg.svd(X.T) # svd cov is not normalized, handle that sdev_estimates.append( s / np.sqrt(X.shape[1] - 1)) # with bessel's correction # finish the average out_hist = out_hist / self.jitter_iterations # finish the volume calculation, convert from nm^3 to um^3 volume = np.mean(v_estimates) * (np.prod(self.sampling_nm) / (1e9)) # average the standard deviation estimates standard_deviations = np.mean(np.stack(sdev_estimates), axis=0) # similar to Basser, P. J., et al. doi.org/10.1006/jmrb.1996.0086 # note that singular values are square roots of the eigenvalues. Use # the sample standard deviation rather than pop. anisotropy = np.sqrt(np.var(standard_deviations**2, ddof=1)) / ( np.sqrt(3) * np.mean(standard_deviations**2)) res = tabular.DictSource({ 'bin_centers': bin_centers, 'counts': out_hist }) try: res.mdh = MetaDataHandler.DictMDHandler(shell.mdh) except AttributeError: res.mdh = MetaDataHandler.DictMDHandler() res.mdh['SHShellRadiusDensityEstimate.Volume'] = float(volume) res.mdh[ 'SHShellRadiusDensityEstimate.StdDeviations'] = standard_deviations.tolist( ) res.mdh['SHShellRadiusDensityEstimate.Anisotropy'] = float(anisotropy) namespace[self.output] = res