def conf_mat( self, true_set_size=None, pred_set_size=None, mask=None, ): """ Build a confusion matrix from the call bag. If the set_size parameters are not given it will determine those sizes by asking the prep_result. """ true = self.df["true_pep_iz"].values pred = self.df["pred_pep_iz"].values # Compute true_set_size and pred_set_size if they are not specified if true_set_size is None: true_set_size = self._prep_result.n_peps if pred_set_size is None: pred_set_size = self._prep_result.n_peps n_rows = len(self.df) if mask is not None: if isinstance(mask, pd.Series): mask = mask.values check.array_t(mask, shape=(n_rows, ), dtype=np.bool_) pred = np.copy(pred) pred[~mask] = 0 return ConfMat.from_true_pred(true, pred, true_set_size, pred_set_size)
def filter_im(im, low_inflection, low_sharpness, high_inflection, high_sharpness): """ Use a band-pass filter to remove background and "bloom" which is the light that scatters from foreground to background. Note: A low_inflection of -10 effectively removes the low-pass filter and a high_inflection of +10 effectively removes the high-pass filter Values of sharpness = 50.0 are usually fine. Returns: Filtered image """ # These number were hand-tuned to Abbe (512x512) and might be wrong for other # sizes/instruments and will need to be derived and/or calibrated. im = im.astype(np.float64) check.array_t(im, ndim=2, is_square=True, dtype=np.float64) low_cut = imops.generate_center_weighted_tanh(im.shape[0], inflection=low_inflection, sharpness=low_sharpness) high_cut = 1 - imops.generate_center_weighted_tanh( im.shape[0], inflection=high_inflection, sharpness=high_sharpness) filtered_im = imops.fft_filter_with_mask(im, mask=low_cut * high_cut) # The filters do not necessarily create a zero-centered background so # not remove the median to pull the background to zero. filtered_im -= np.median(filtered_im) return filtered_im
def _pixel_to_subpixel_one_im(im, peak_dim, locs): """ This is a subtle calculation. locs is given as an *integer* position (only has pixel accuracy). We then extract out a sub-image using an *integer* half width. Peak_dim is typically odd. Suppose it is (11, 11) That makes half_peak_mea_i be 11 // 2 = 5 Suppose that a peak is at (17.5, 17.5). Suppose that peak was found a (integer) location (17, 17) which is within 1 pixel of its center as expected. We extract the sub-image at (17 - 5, 17 - 5) = (12:23, 12:23) The Center-of-mass calculation should return (5.5, 5.5) because that is relative to the sub-image which was extracted We wish to return (17.5, 17.5). So that's the lower left (17 - 5) of the peak plus the COM found. """ check.array_t(locs, dtype=int) assert peak_dim[0] == peak_dim[1] half_peak_mea_i = peak_dim[0] // 2 lower_left_locs = locs - half_peak_mea_i com_per_loc = np.zeros(locs.shape) for loc_i, loc in enumerate(lower_left_locs): peak_im = imops.crop(im, off=YX(loc), dim=peak_dim, center=False) com_per_loc[loc_i] = imops.com(peak_im**2) return lower_left_locs + com_per_loc
def channel_aln_offsets(self, ch_aln): """ TODO: This probably should move to Peaks so that it conforms to similar pattern of channel_scale_factor() """ check.array_t(ch_aln, shape=(self.n_channels, 2)) self.ch_aln = ch_aln
def multichannel_dyt_random_choice(self, dyts, probs): """ dyts: (n_options, n_channels, n_cycles) [ [[1, 1, 1], [0, 0, 0]], # On in ch 0 [[0, 0, 0], [1, 1, 1]], # On in ch 1 [[1, 1, 1], [1, 1, 1]], # Shared ], probs: (n_options) [0.80, 0.18, 0.02], # Choices """ dyts = np.array(dyts) check.array_t(dyts, ndim=3) _n_options, _n_channels, _n_cycles = dyts.shape assert _n_channels == self.n_channels assert _n_cycles == self.n_cycles probs = np.array(probs) check.array_t(probs, ndim=1) assert probs.shape[0] == _n_options self.counts = np.zeros((self.n_peaks, self.n_cycles, self.n_channels)) self.dyt_iz = np.random.choice(_n_options, size=self.n_peaks, p=probs) for ch_i in range(self.n_channels): self.counts[:, :, ch_i] = dyts[self.dyt_iz, ch_i] return self
def scale_by_abundance(self, abundance): """ DHW 9/28/2020 - I profiled the check.array_t and the assert and in practice the impact appears minimal (<1ms in my test case) """ check.array_t(abundance, shape=(self.shape[1],)) assert np.all((abundance >= 1.0) | (abundance == 0.0)) return (self * abundance).astype(int)
def _do_predict(classifier, X): """ The Scikit Learn Random Classifier has a predict() and a predict_proba() functions. But the expensive part is the predict_proba(). Oddly, the predict() calls predict_proba() so I was previously doing the expensive part twice. Since I only want a single call per row I need to re-implement the predict() which is nothing more than taking the best score on each row. I also extract both the winner (best score) and the runnerup under the theory that the distance between these might be informative. Note, this must be a module-level function so that it can pickle. """ check.array_t(X, ndim=2) classifier.n_jobs = 1 # all_scores is the score for each row of X against EVERY classification class all_scores = classifier.predict_proba(X) # Sort by score along each row so that we can get the winner and runner up sorted_iz = np.argsort(all_scores, axis=1) winner_iz = sorted_iz[:, -1] runnerup_iz = sorted_iz[:, -2] winner_scores = all_scores[np.arange(all_scores.shape[0]), winner_iz] runnerup_scores = all_scores[np.arange(all_scores.shape[0]), runnerup_iz] winner_y = classifier.classes_.take(winner_iz, axis=0) runnerup_y = classifier.classes_.take(runnerup_iz, axis=0) return winner_y, winner_scores, runnerup_y, runnerup_scores
def classify(self, X, progress=None): check.array_t(X, ndim=2) n_rows = X.shape[0] if n_rows < 100: winner_y, winner_scores, runnerup_y, runnerup_scores = _do_predict( classifier=self.classifier, X=X) else: n_work_orders = n_rows // 100 with zap.Context(progress=progress, trap_exceptions=False): results = zap.work_orders([ Munch(classifier=self.classifier, X=X, fn=_do_predict) for X in np.array_split(X, n_work_orders, axis=0) ]) winner_y = utils.listi(results, 0) winner_scores = utils.listi(results, 1) runnerup_y = utils.listi(results, 2) runnerup_scores = utils.listi(results, 3) winner_y = np.concatenate(winner_y) winner_scores = np.concatenate(winner_scores) runnerup_y = np.concatenate(runnerup_y) runnerup_scores = np.concatenate(runnerup_scores) return winner_y, winner_scores, runnerup_y, runnerup_scores
def radiometry_cy_ims(cy_ims, locs, reg_psf_samples, peak_mea): """ Compute radiometry on the stack of cycle images for one field on one channel Returns: output_radmat: ndarray(n_peaks, n_cycles, (sig, noi, bg_med, bg_std)) """ with context(cy_ims=cy_ims, locs=locs, reg_psf_samples=reg_psf_samples, peak_mea=peak_mea) as ctx: check.array_t(locs, ndim=2, dtype=np.float64) n_peaks = locs.shape[0] if n_peaks > 0: batches = zap.make_batch_slices(n_rows=locs.shape[0], _batch_size=100) with zap.Context(trap_exceptions=False, mode="thread"): zap.work_orders([ dict( fn=_do_radiometry_field_stack_peak_batch, ctx=ctx, peak_start_i=batch[0], peak_stop_i=batch[1], ) for batch in batches ]) return ctx._out_radiometry
def start(self): rf_v2_params = RFV2Params(**self.config.parameters) radmat = None if self.inputs.get("sim_v2"): sim_result = SimV2Result.load_from_folder(self.inputs.sim_v2) radmat = sim_result.flat_test_radmat() elif self.inputs.get("sigproc_v2"): sigproc_v2_result = SigprocV2Result.load_from_folder(self.inputs.sigproc_v2) radmat = sigproc_v2_result.sig(flat_chcy=True) check.array_t(radmat, ndim=2) rf_train_v2_result = RFTrainV2Result.load_from_folder(self.inputs.rf_train_v2) rf_v2_result = rf_classify( rf_v2_params, rf_train_v2_result, radmat, progress=self.progress, ) if self.inputs.get("sim_v2"): # Stuff the true value into the results to simplify downstream processing sim_result = SimV2Result.load_from_folder(self.inputs.sim_v2) rf_v2_result.true_pep_iz = sim_result.test_true_pep_iz rf_v2_result.save()
def estimate(self, samples): """ samples is an array spot radiometries of 3 columns: (y, x, val) """ check.array_t(samples, ndim=2) assert samples.shape[1] == 3 from scipy.optimize import curve_fit ys = samples[:, 0] xs = samples[:, 1] vals = samples[:, 2] cen = self.hyper_im_mea / 2 def _fit_wrapper(*args, **kwargs): return self.fit_func(self.hyper_im_mea, *args, **kwargs) # I had previously seeded the initial_falloff parameter with 0.4 and found that # it failed to converge in some cases, for example, jim/jhm2021_06_17_01_tetraspec_3channel. # It seems happier to start at 0.0 so I'm setting it there for a while # but I would not be shocked if that causes it to fail in some other case. # That said, the jim/jhm2021_06_17_01_tetraspec_3channel is not even a single-count # experiment and is only being used in a self-calibration mode so it should # definitely not be setting the standard for this. initial_falloff = 0.0 popt, pcov = curve_fit( _fit_wrapper, (xs, ys), vals, p0=(cen, cen, initial_falloff, np.nanmean(vals)), ) self.cen_x, self.cen_y, self.falloff, _ = popt
def _step_2_create_inverse_variances(dt_mat, channel_i_to_vpd): """ Using the Variance Per Dye find the inverse for each row of dyemat. This deals with zero-dyes by assigning the half the variance of the 1-count dye. vpd stands for variance per dye. Our models indicate that the standard deviation goes up roughly linearly with the number of dyes. The later code (_do_nn_and_gmm) needs the inverse variance, so we square the standard deviation to obtain the variance and take the inverse. Arguments: dt_mat is the unique dyetracks Returns: ndarray(n_rows, n_channels * n_cycles): inverse variance for each row (flatten) """ check.array_t(dt_mat, ndim=3) check.array_t(channel_i_to_vpd, ndim=1) # Variances of zero will cause div by zeros so all zeros # are set to 0.5 which is chosen arbitrarily because it is > 0 and < 1. dt_mat = dt_mat.astype(float) dt_mat[dt_mat == 0] = 0.5 vpd_broadcast = channel_i_to_vpd[None, :, None] spd = np.sqrt(vpd_broadcast) return 1.0 / np.square( spd * dt_mat) # Scaling by the standard deviation per dye by channel
def set(self, ch_aln): check.array_t(ch_aln, ndim=2) assert ch_aln.shape[1] == 2 assert np.all( ch_aln[0, :] == 0.0 ) # Everything is calibrated relative to channel 0 self.ch_aln = ch_aln return self
def _mask_anomalies_im(im, den_threshold=300): """ Operates on pre-balanced images. The den_threshold of 300 was found empirically on Val data Sets anomalies to nan """ import skimage.transform # Defer slow imports import cv2 check.array_t(im, is_square=True) # SLICE into square using numpy-foo by reshaping the image # into a four-dimensional array can then by np.mean on the inner dimensions. sub_mea = 4 # Size of the sub-sample region im_mea, _ = im.shape squares = im.reshape(im_mea // sub_mea, sub_mea, im_mea // sub_mea, sub_mea) # At this point, im is now 4-dimensional like: (256, 2, 256, 2) # But we want the small_dims next to each other for simplicity so swap the inner axes squares = squares.swapaxes(1, 2) # Now squares is (256, 256, 2, 2.) # squares is like: 256, 256, 2, 2. So we need the mean of the last two axes squares = np.mean(squares, axis=(2, 3)) bad_mask = (squares > den_threshold).astype(float) # EXPAND the bad areas by erosion and dilate. # Erosion gets rid of the single-pixel hits and dilation expands the bad areas kernel = np.ones((3, 3), np.uint8) mask = cv2.erode(bad_mask, kernel, iterations=1) mask = cv2.dilate(mask, kernel, iterations=3) scale = im.shape[0] // mask.shape[0] full_size_mask = skimage.transform.rescale( mask, scale=scale, multichannel=False, mode="constant", anti_aliasing=False).astype(bool) # FIND rect contours of bad areas contours, hierarchy = cv2.findContours(full_size_mask.astype("uint8"), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) bad_rects = [cv2.boundingRect(cnt) for cnt in contours] im = im.copy() for rect in bad_rects: imops.fill(im, loc=XY(rect[0], rect[1]), dim=WH(rect[2], rect[3]), val=np.nan) return im
def scale_im(im, scale): """Scale an image up or down""" check.array_t(im, ndim=2, dtype=float) rows, cols = im.shape M = np.array([[scale, 0.0, 0.0], [0.0, scale, 0.0]]) return cv2.warpAffine(im, M, dsize=(int(scale * cols), int(scale * rows)), flags=cv2.INTER_CUBIC)
def locs_to_region(locs, n_divs, im_dim): """ Convert a matrix of locs in (y, x) columns into the regional coords of im_dim divided in n_divs. """ check.array_t(locs, shape=(None, 2)) reg_locs = np.floor(n_divs * locs / im_dim).astype(int) assert np.all((0 <= reg_locs) & (reg_locs < n_divs)) return reg_locs
def _step_2_create_neighbors_lookup(dyemat, true_pep_iz): """ The dyemat may have many duplicate rows, each from some number of peps. These duplicate rows are consolidated so that each coordinate in dyemat space is given a unique "dye_i". Returns: dt_mat: The unique (sorted) rows of dyemat dyetracks_df: DF(dye_i, weight). Where weight is the sum of all rows that pointed to this dyetrack dt_pep_sources_df: DF(dye_i, pep_i, n_rows) Records how many times each peptide generated dye_i where count > 0. flann: A fast Approximate Nearest Neighbors lookup using PYFLANN. """ check.array_t(dyemat, ndim=3) dt_mat, true_dt_iz, dt_counts = np.unique(dyemat, return_inverse=True, return_counts=True, axis=0) prune_rare = False if prune_rare: keep_mask = dt_counts > 0 keep_mask[0] = True dt_mat = dt_mat[keep_mask] dt_counts = dt_counts[keep_mask] n_old = len(keep_mask) n_new = keep_mask.sum() orig = np.arange(n_old) new_to_old = orig[keep_mask] old_to_new = np.zeros((n_old, )) new_i = np.arange(n_new) old_to_new[new_to_old] = new_i true_dt_iz = old_to_new[true_dt_iz] _, n_channels, n_cycles = dt_mat.shape # PREPEND a zero element that represent nul dt_mat = np.vstack((np.zeros((1, n_channels, n_cycles)), dt_mat)) dt_counts = np.concatenate(([0], dt_counts)) true_dt_iz += 1 dyetracks_df = (pd.DataFrame(dict(weight=dt_counts)).reset_index().rename( columns=dict(index="dye_i"))) dt_pep_sources_df = (pd.DataFrame(dict( dye_i=true_dt_iz, pep_i=true_pep_iz)).groupby( ["dye_i", "pep_i"]).size().to_frame("n_rows").reset_index()) flann = _create_flann(dt_mat) return dt_mat, dyetracks_df, dt_pep_sources_df, flann
def sub_pixel_shift(im, offset): """ Shift with offset in y, x array form. A positive x will shift right. A positive y will shift up. """ check.array_t(im, ndim=2, dtype=float) rows, cols = im.shape M = np.array([[1.0, 0.0, offset[1]], [0.0, 1.0, offset[0]]]) # Note the reversal of the dimensions return cv2.warpAffine(im, M, dsize=(cols, rows), flags=cv2.INTER_CUBIC)
def _dyemat_sim(sim_v2_params, pcbs, n_samples, progress=None): """ Run via the C fast_sim module a dyemat sim. Arguments: sim_v2_params: SimV2Params pcbs: This is an encoding of flus. See SimV2Params.pcbs() Each peptide has a row per amino-acid and either a channel number or a np.nan to indicate a label at that position, plus a p_bright for that aa. n_samples: number of samples to try ... BUT NOT NEC. THE NUMBER RETURNED! -- because all-dark samples are not returned. See "Dealing with dark-rows" above Returns: dyemat: ndarray(n_uniq_dyetracks, n_channels, n_cycle) dyepep: ndarray(dye_i, pep_i, count) pep_recalls: ndarray(n_peps) """ check.t(sim_v2_params, SimV2Params) check.array_t(pcbs, shape=(None, 3), dtype=float) # TODO: Refactor to use priors correctly # The following is assuming that all dyes have the same p_bleach dyemat, dyepeps, pep_recalls = sim_v2_fast.sim( pcbs, n_samples, sim_v2_params.n_channels, len(sim_v2_params.labels), sim_v2_params.cycles_array(), # TODO: Needs to be per-channel and sampled correctly sim_v2_params.channel__priors().set_index("ch_i" ).iloc[0].p_bleach.sample(), # TODO: The following two need to be sampled correctly sim_v2_params.priors.get_mle("p_detach"), sim_v2_params.priors.get_mle("p_edman_failure"), sim_v2_params.allow_edman_cterm, n_threads=get_cpu_limit(), rng_seed=sim_v2_params.random_seed, progress=progress, ) # lex sort dyemats and then remap n_rows, n_cols = dyemat.shape lex_cols = tuple(dyemat[:, n_cols - i - 1] for i in range(n_cols)) sort_args = np.lexsort(lex_cols) lut = np.zeros((n_rows, ), dtype=int) lut[sort_args] = np.arange(n_rows, dtype=int) dyepeps[:, 0] = lut[dyepeps[:, 0]] return dyemat[sort_args], dyepeps, pep_recalls
def it_finds(): _, pred, score, _ = _do_nn_and_gmm(x, dyerow, dt_mat, np.ones_like(dt_mat), dt_weights, flann, use_gmm=False) check.array_t(pred, shape=(1, )) check.array_t(score, shape=(1, )) assert pred.tolist() == [3] assert score[0] > 0.9
def from_psf_ims(cls, im_mea, psf_ims): """ Fit to a Gaussian for one-channel """ check.array_t(psf_ims, ndim=4) divs_y, divs_x, peak_mea_h, peak_mea_w = psf_ims.shape assert divs_y == divs_x assert peak_mea_h == peak_mea_w reg_psf = cls(im_mea, peak_mea_h, divs_y) reg_psf.estimate(psf_ims) return reg_psf
def align(im_stack, return_shifted_ims=False, bounds=None): """ Align the image stack (1 pixel accuracy) relative to the first frame in the stack Arguments: im_stack (3 dimensions) return_shifted_ims: If True, also return the shifted images truncated to the common region of interest bounds: If not None limit the search space Returns: list of YX tuples shifted_ims (optional) """ check.array_t(im_stack, ndim=3, dtype=np.float64) n_cycles, mea_h, mea_w = im_stack.shape assert mea_h == mea_w offsets = [YX(0, 0)] primary = im_stack[0] for im in im_stack[1:]: # TODO: This could be optimized by using fft instead of # cv2.filter2D() which would avoid the fft of the # unchanging primary. conv = convolve(src=primary, kernel=im) # conv is now zero-centered; that is, the peak is # an offset relative to the center of the image. if bounds is not None: edge_fill(conv, (mea_w - 2 * bounds) // 2, val=0) peak = YX(np.unravel_index(conv.argmax(), conv.shape)) center = HW(conv.shape) // 2 offsets += [center - peak] if return_shifted_ims: raw_dim = im_stack.shape[-2:] roi = intersection_roi_from_aln_offsets(offsets, raw_dim) roi_dim = (roi[0].stop - roi[0].start, roi[1].stop - roi[1].start) pixel_aligned_cy_ims = np.zeros((n_cycles, mea_h, mea_w)) for cy_i, offset in zip(range(n_cycles), offsets): shifted_im = shift(im_stack[cy_i], offset * -1) pixel_aligned_cy_ims[cy_i, 0:roi_dim[0], 0:roi_dim[1]] = shifted_im[roi[0], roi[1]] return np.array(offsets), pixel_aligned_cy_ims else: return np.array(offsets)
def synth_image(im, peak_mea, locs, amps, std_xs, std_ys): """ Generate a synthetic image using the Gaussians in the parallel arrays and accumulate into im """ lib = load_lib() n_locs = int(len(locs)) check.array_t(amps, shape=(n_locs,)) check.array_t(std_xs, shape=(n_locs,)) check.array_t(std_ys, shape=(n_locs,)) params = np.zeros((n_locs, Gauss2FitParams.N_FIT_PARAMS)) params[:, Gauss2FitParams.AMP] = amps params[:, Gauss2FitParams.CENTER_Y] = locs[:, 0] params[:, Gauss2FitParams.CENTER_X] = locs[:, 1] params[:, Gauss2FitParams.SIGMA_X] = std_xs params[:, Gauss2FitParams.SIGMA_Y] = std_ys check.array_t(im, ndim=2) params = np.ascontiguousarray(params, dtype=np.float64) im = np.ascontiguousarray(im, dtype=np.float64) im_h, im_w = im.shape error = lib.synth_image(im, im_w, im_h, peak_mea, n_locs, params) if error is not None: raise CException(error)
def _step_4_gmm_classify( radmat, dyemat, dt_mat, dt_inv_var_mat, dt_weights, flann, n_neighbors, dt_score_mode, dt_filter_threshold, dt_score_metric, dt_score_bias, penalty_coefs, rare_penalty, radius, progress, ): """ The dyemat is passed so that we can get the true_dt_iz for debugging """ check.array_t(radmat, ndim=3) true_dt_iz, pred_dt_iz, scores, vdists = zap.arrays( _do_nn_and_gmm, dict(unit_radrow=radmat, dyerow=dyemat), dt_mat=dt_mat, dt_inv_var_mat=dt_inv_var_mat, dt_weights=dt_weights, flann=flann, n_neighbors=n_neighbors, dt_score_mode=dt_score_mode, dt_filter_threshold=dt_filter_threshold, dt_score_metric=dt_score_metric, dt_score_bias=dt_score_bias, penalty_coefs=penalty_coefs, rare_penalty=rare_penalty, radius=radius, _progress=progress, _stack=True, ) # I use the dt counts as a weighting factor on the PDFs # which means that the scores can be > 1.0. # To ensure that all rows get an equal treatment in # normalization I simply divide them through by the # max value to put them into 0-1 range. scores = scores.flatten() scores /= np.max(scores) return true_dt_iz.flatten(), pred_dt_iz.flatten(), scores, vdists
def _step_1_create_neighbors_lookup_singleprocess(dyemat, output_dt_mat): """ The dyemat may have many duplicate rows, each from some number of peps. These duplicate rows are consolidated so that each coordinate in dyemat space is given a unique "dye_i". The unique (sorted) dyetracks are written to output_dt_mat which is expected to be large enough to hold them. Returns: dyetracks_df: DF(dye_i, weight). Where weight is the sum of all rows that pointed to this dyetrack dt_pep_sources_df: DF(dye_i, pep_i, n_rows) Records how many times each peptide generated dye_i where count > 0. flann: A fast Approximate Nearest Neighbors lookup using PYFLANN. n_dts: Number of actual unique dts """ check.array_t(dyemat, ndim=4) # (n_peps, n_samples, n_channels, n_cycles): uint8 n_peps, n_samples, n_channels, n_cycles = dyemat.shape true_pep_iz = np.repeat(np.arange(n_peps), n_samples) # Example usage of unique # b = np.array([1, 4, 3, 2, 1, 2]) # p = np.unique(b, return_inverse=True, return_counts=True, ) # p == (array([1, 2, 3, 4]), array([0, 3, 2, 1, 0, 1]), array([2, 2, 1, 1])) _dyemat = dyemat.reshape( (dyemat.shape[0] * dyemat.shape[1], dyemat.shape[2] * dyemat.shape[3])) dt_mat, true_dt_iz, dt_counts = np.unique(_dyemat, return_inverse=True, return_counts=True, axis=0) dt_mat = dt_mat.reshape((dt_mat.shape[0], n_channels, n_cycles)) n_dts, n_channels, n_cycles = dt_mat.shape output_dt_mat[0:n_dts] = dt_mat # Check that the nul row exists and it the first element if not np.all(dt_mat[0] == 0): raise ValueError("No null row was included in the dyemat") flann = _create_flann(dt_mat) dyetracks_df, dt_pep_sources_df, dye_to_best_pep_df = _setup_pep_source_dfs( true_dt_iz, true_pep_iz, dt_counts) return dyetracks_df, dt_pep_sources_df, dye_to_best_pep_df, flann, n_dts
def mat_lessflat(mat, dim1=None, dim2=None): """ To unflatten you must know either dim1 or dim2 Example, suppose mat is (2, 6) m = mat_lessflat(mat, dim2=3) assert m.shape == (2, 2, 3) """ check.array_t(mat, ndim=2) check.affirm(dim1 is not None or dim2 is not None) if dim1 is None: dim1 = mat.shape[1] // dim2 if dim2 is None: dim2 = mat.shape[1] // dim1 return mat.reshape(mat.shape[0], dim1, dim2)
def do_field_cycle(ch_ims: np.ndarray, field_i: int, cycle_i: int, reg_psf_samples, peak_mea): lib = load_lib() check.array_t(ch_ims, ndim=3, is_square=True) with context(ch_ims=ch_ims, reg_psf_samples=reg_psf_samples, peak_mea=peak_mea, field_i=field_i) as ctx: error = lib.do_field_cycle(ctx, cycle_i) if error is not None: raise CException(error) return ctx._out_align, ctx._out_locs, ctx._out_radiometry
def psf_fields_one_channel(ims_import_result, sigproc_v2_params, field_iz, channel_i, progress=None) -> priors.RegPSFPrior: """ Build up a regional PSF for one channel on the RAW images. Implemented in a parallel zap over every field and then combine the fields into a single RegPSF which stores: (divs, divs, peak_mea, peak_mea) """ if ims_import_result.n_fields == 0: return None with zap.Context(progress=progress): region_to_psf_per_field = zap.arrays( _do_psf_one_field_one_channel, dict(field_i=field_iz), _stack=True, peak_mea=sigproc_v2_params.peak_mea, divs=sigproc_v2_params.divs, bandpass_kwargs=dict( low_inflection=sigproc_v2_params.low_inflection, low_sharpness=sigproc_v2_params.low_sharpness, high_inflection=sigproc_v2_params.high_inflection, high_sharpness=sigproc_v2_params.high_sharpness, ), ims_import_result=ims_import_result, channel_i=channel_i, n_cycles_limit=sigproc_v2_params.n_cycles_limit, ) # SUM over fields psf_ims = np.sum(region_to_psf_per_field, axis=0) psf_ims = psf_normalize(psf_ims) # At this point psf_ims is a pixel image of the PSF at each reg div. # ie, 4 dimensional: (divs_y, divs_x, n_pixels_h, n_pixels_w) # Now we convert it to Gaussian Parameters by fitting so we don't have # to store the pixels anymore: just the 3 critical shape parameters: # sigma_x, sigma_y, and rho. # Use one frame of ims_import_result to sort out dimensions im = ims_import_result.ims[0, 0, 0] check.array_t(im, is_square=True) reg_psf = priors.RegPSFPrior.from_psf_ims(im.shape[-1], psf_ims) return reg_psf
def estimate(self, psf_ims): check.array_t(psf_ims, ndim=4) n_divs_y, n_divs_x, peak_mea_h, peak_mea_w = psf_ims.shape assert n_divs_y == n_divs_x and self.hyper_n_divs == n_divs_y assert peak_mea_h == peak_mea_w and self.hyper_peak_mea == peak_mea_h self.sigma_x = np.zeros((self.hyper_n_divs, self.hyper_n_divs)) self.sigma_y = np.zeros((self.hyper_n_divs, self.hyper_n_divs)) self.rho = np.zeros((self.hyper_n_divs, self.hyper_n_divs)) for y in range(n_divs_y): for x in range(n_divs_x): im = psf_ims[y, x] if np.sum(im) > 0: fit_params, _ = imops.fit_gauss2(im) self.sigma_x[y, x] = fit_params[Gauss2Params.SIGMA_X] self.sigma_y[y, x] = fit_params[Gauss2Params.SIGMA_Y] self.rho[y, x] = fit_params[Gauss2Params.RHO]
def _any_identical_non_zero_rows(a, b): """ Checks if two mats a and b are identical in ANY non-zero rows. """ check.array_t(a, ndim=2) check.array_t(b, ndim=2) arg_sample = stats.arg_subsample(a, 100) a = a[arg_sample] b = b[arg_sample] zero_rows = np.all(a == 0, axis=1) a = a[~zero_rows] b = b[~zero_rows] if a.shape[0] > 0: return np.any(a == b)