def experiment(modelname, datasetname, datasetpath, num_trials, alpha, kreg, lamda, randomized, n_data_conf, n_data_val, bsz, predictor): ### Experiment logic naive_bool = predictor == 'Naive' if predictor in ['Naive', 'APS']: lamda = 0 # No regularization. ### Data Loading logits = get_logits_dataset(modelname, datasetname, datasetpath) ### Instantiate and wrap model model = get_model(modelname) ### Perform experiment top1s = np.zeros((num_trials, )) top5s = np.zeros((num_trials, )) coverages = np.zeros((num_trials, )) sizes = np.zeros((num_trials, )) for i in tqdm(range(num_trials)): top1_avg, top5_avg, cvg_avg, sz_avg = trial(model, logits, alpha, kreg, lamda, randomized, n_data_conf, n_data_val, bsz, naive_bool) top1s[i] = top1_avg top5s[i] = top5_avg coverages[i] = cvg_avg sizes[i] = sz_avg print( f'\n\tTop1: {np.median(top1s[0:i+1]):.3f}, Top5: {np.median(top5s[0:i+1]):.3f}, Coverage: {np.median(coverages[0:i+1]):.3f}, Size: {np.median(sizes[0:i+1]):.3f}\033[F', end='') print('') return np.median(top1s), np.median(top5s), np.median(coverages), np.median( sizes), mad(top1s), mad(top5s), mad(coverages), mad(sizes)
def mad_outlier(obj, nmads=3, verbose=False): """Outlier detection based on median absolute deviation Notes ----- Removes cells with a number of median absolute deviations below the median of either of two quality metrics. The quality metrics are the log of the library size and the log of number of detected genes. The principle is similar to Lun et al. Three mads is the default. Parameters ---------- obj : :class:`adobo.data.dataset` A data class object. nmads : `int` Number of median absolute deviations below the median for the cell to be considered an outlier. Default: 3 verbose : `bool` Be verbose or not. Default: False References ---------- .. [1] Lun et al. (2016) F1000Res, A step-by-step workflow for low-level analysis of single-cell RNA-seq data with Bioconductor, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5112579/ Returns ------- Modifies the passed object. """ # reset obj.meta_cells.status[obj.meta_cells.status != 'OK'] = 'OK' # lib size ls = obj.meta_cells.total_reads # detected genes dg = obj.meta_cells.detected_genes ls_log = np.log2(ls + 1) dg_log = np.log2(dg + 1) # only check below lower_ls = np.median(ls_log) - mad(ls_log) * nmads lower_dg = np.median(dg_log) - mad(dg_log) * nmads remove = np.logical_or(ls_log < lower_ls, dg_log < lower_dg) r = obj.meta_cells.index.isin(remove[remove].index) obj.meta_cells.status[r] = 'EXCLUDE' if verbose: print('Removed %s cells' % np.sum(r))
def noise_floor(sat_thresh, noi_thresh, power): """Computes the noise floor of a rf power array Exclude channels with signal above :samp:`sat_thresh` multiplied by :samp:`standard deviation` of power array. The Median Absolute Deviation :samp:`MAD` is used to quantify the noise level of the remaining channels. The noise floor :samp:`noi_thresh` is defined to be the :samp:`median` of noisy data + :samp:`noi_thresh` multiplied by the :samp:`MAD` of noisy data. :param sat_thresh: An integer multiple of standard deviation of rf power array, used to exclude channels with potential satellites. :class:`~int` :param noi_thresh: An integer multiple of the noisy data MAD, used to compute a noise floor. :class:`~int` :param power: Rf power array :class:`~numpy.ndarry` :returns: noise_threshold: The power level of the noise floor in dBm :class:`~int` """ # compute the standard deviation of data, and use it to identify occupied channels σ = np.std(power) # Any channel with a max power >= σ has a satellite sat_cut = sat_thresh * σ chans_pow_max = np.amax(power, axis=0) # Exclude the channels with sats, to only have noise data noise_chans = np.where(chans_pow_max < sat_cut)[0] noise_data = power[:, noise_chans] # noise median, noise mad, noise threshold = μ + 3*σ μ_noise = np.median(noise_data) σ_noise = mad(noise_data, axis=None) noise_threshold = μ_noise + noi_thresh * σ_noise return noise_threshold
def get_gene_stats(xvals, col_idxs, tissues): """ Compute summary stats across all samples for a given gene & tissue """ xmin, xq1, xmed, xmean, xq3, xmax, xsd, xmad = [], [], [], [], [], [], [], [] for tissue in tissues.keys(): tidx = [col_idxs[s] for s in tissues[tissue] if s in col_idxs.keys()] if len(tidx) > 0: tvals = xvals[tidx] xmin.append(np.nanmin(tvals)) xq1.append(np.nanquantile(tvals, q=0.25)) xmed.append(np.nanmedian(tvals)) xmean.append(np.nanmean(tvals)) xq3.append(np.nanquantile(tvals, q=0.75)) xmax.append(np.nanmax(tvals)) xsd.append(np.nanstd(tvals)) xmad.append(mad(tvals)) else: xmin.append(np.nan) xq1.append(np.nan) xmed.append(np.nan) xmean.append(np.nan) xq3.append(np.nan) xmax.append(np.nan) xsd.append(np.nan) xmad.append(np.nan) return xmin, xq1, xmed, xmean, xq3, xmax, xsd, xmad
def mod_zscore(arr): """Modified z-score, as defined by Iglewicz and Hoaglin :param arr: Array :type arr: array-like :return: Modified z-scores of the elements of the input array :type: ndaray """ return 0.6745 * (np.asarray(arr) - np.median(arr)) / mad(arr)
def _estimate_local_noise(x: np.ndarray, robust: bool = True) -> float: r""" Estimates noise in a 1D signal. Assumes that the noise is gaussian iid. aux function of estimate_noise Parameters ---------- x : 1D array The size of x must be at least 4. If the size is smaller, the function will return 0. robust : bool If True, estimates the noise using the median absolute deviation. Else uses the standard deviation. Returns ------- noise : non negative number. """ d2x = np.diff(x, n=2) sorted_index = np.argsort(np.abs(d2x)) d2x = d2x[sorted_index] # if d2x follows a normal distribution ~ N(0, 2*sigma), its sample mean # has a normal distribution ~ N(0, 2 * sigma / sqrt(n - 2)) where n is the # size of d2x. # d2x with high absolute values are removed until this the mean of d2x is # lower than its standard deviation. # start at 90th percentile and decrease it in each iteration. # The loop stops at the 20th percentile even if this condition is not meet n_deviations = 3 # dummy values to initialize the loop percentile_counter = 9 # start at 90th percentile noise_std = 0 while (n_deviations > 1.0) and (percentile_counter > 2): percentile_index = percentile_counter * d2x.size // 10 # the minimum number of elements required to compute the MAD if percentile_index <= 2: break # dev_threshold = 2 / np.sqrt(percentile - 2) if robust: noise_std = mad(d2x[:percentile_index], scale="normal") noise_mean = np.median(d2x[:percentile_index]) else: noise_std = d2x[:percentile_index].std() noise_mean = d2x[:percentile_index].mean() # if all the values in d2x are equal, noise_std is equal to zero if noise_std > 0: n_deviations = abs(noise_mean / noise_std) else: break percentile_counter -= 1 noise = noise_std / 2 return noise
def _match_cats(xr, yr, er, xt, yt, et, errors=False): if len(xr) > len(xt): xs = np.array(xt) ys = np.array(yt) es = np.array(et) xl = np.array(xr) yl = np.array(yr) el = np.array(er) else: xs = np.array(xr) ys = np.array(yr) es = np.array(er) xl = np.array(xt) yl = np.array(yt) el = np.array(et) # simple closest match iterative dx = 0.0 dy = 0.0 for _i in range(3): i_sl = [] i_ls = [] r_sl = [] for i in range(len(xs)): r = np.sqrt((xl - xs[i] - dx)**2 + (yl - ys[i] - dy)**2) r_sl.append(r.min()) i_sl.append(i) i_ls.append(r.argmin()) r_sl = np.array(r_sl) # combined error, minimum error should be 2 e_c = np.sqrt(el[i_ls]**2 + es[i_sl]**2).clip(2) # only 3sigma matches ii = np.where( r_sl - np.median(r_sl) < 3.0 * 1.48 * mad(r_sl, axis=None))[0] dx = np.sum( (xl[i_ls] - xs[i_sl])[ii] / e_c[ii]**2) / np.sum(1.0 / e_c[ii]**2) dy = np.sum( (yl[i_ls] - ys[i_sl])[ii] / e_c[ii]**2) / np.sum(1.0 / e_c[ii]**2) if errors: ex = np.sqrt(1.0 / np.sum(1.0 / e_c**2)) if len(xr) > len(xt): return dx, dy, ex, i_ls, i_sl else: return -dx, -dy, ex, i_sl, i_ls if len(xr) > len(xt): return dx, dy, i_ls, i_sl else: return -dx, -dy, i_sl, i_ls
def describe_data(data_set): """ basic descriptive statistic calculation descr method with added median_absolute_deviation :param data_set: pandas data frame :return: pandas data frame with descriptive stats """ describe_stats = data_set.describe().reset_index() describe_stats.loc[8] = ['mad'] + [mad(data_set[f'hist_{i}']) for i in range(27)] + [0] describe_stats.set_index('index', inplace=True, drop=True) assert isinstance(describe_stats, object) return describe_stats
def __getWeight(self, d, weight_type): if(weight_type=='uniform'): return 1.0 elif(weight_type=='PCC'): return abs(np.corrcoef(self.X_[:,d], self.Y_)[0,1]) elif(weight_type=='MAD'): if(self.types_[d][0]=='B'): return (self.X_[:,d]*1.4826).std() else: weight = mad(self.X_[:,d]) if(weight!=0.0): return weight**-1 else: return (self.X_[:,d]*1.4826).std()
def experiment(modelname, datasetname, datasetpath, num_trials, alpha, n_data_conf, n_data_val, bsz): ### Experiment logic ### Data Loading logits = get_logits_dataset(modelname, datasetname, datasetpath) ### Perform experiment top1s = np.zeros((num_trials, )) top5s = np.zeros((num_trials, )) coverages = np.zeros((num_trials, )) sizes = np.zeros((num_trials, )) for i in tqdm(range(num_trials)): top1_avg, top5_avg, cvg_avg, sz_avg = trial(logits, alpha, n_data_conf, n_data_val, bsz) top1s[i] = top1_avg top5s[i] = top5_avg coverages[i] = cvg_avg sizes[i] = sz_avg print( f'\n\tTop1: {np.median(top1s[0:i+1]):.3f}, Top5: {np.median(top5s[0:i+1]):.3f}, Coverage: {np.median(coverages[0:i+1]):.3f}, Size: {np.median(sizes[0:i+1]):.3f}\033[F', end='') print('') return np.median(top1s), np.median(top5s), np.median(coverages), np.median( sizes), mad(top1s), mad(top5s), mad(coverages), mad(sizes)
def mad_wavelet(image): """ image: Median absolute deviation of the first wavelet scale. (WARNING: sorry to disapoint, this is not a wavelet for mad scientists) Parameters ---------- image: array An image or cube of images Returns ------- mad: array median absolute deviation for each image in the cube """ sigma = mad(Starlet(image, lvl=2).coefficients[:, 0, ...], axis=(-2, -1)) return sigma
def nan_mad(ref_map): """Compute mad while ignoring nans""" ref_map_mad = [] for j in ref_map: if j != []: j = np.asarray(j) j = j[~np.isnan(j)] ref_map_mad.append(mad(j)) else: ref_map_mad.append(np.nan) ref_map_mad = np.asarray(ref_map_mad) ref_map_mad[np.where(ref_map_mad == np.nan)] = np.nanmean(ref_map_mad) return ref_map_mad
def getFeatureWeight(self, cost_type='uniform'): weights = np.ones(self.D_) if (cost_type == 'MAD'): from scipy.stats import median_abs_deviation as mad for d in range(self.D_): weight = mad(self.X_[:, d], scale='normal') if (self.feature_types_[d] == 'B' or abs(weight) < self.tol_): weights[d] = (self.X_[:, d] * 1.4826).std() else: weights[d] = weight**-1 elif (cost_type == 'standard'): weights = np.std(self.X_, axis=0)**-1 elif (cost_type == 'normalize'): weights = (self.X_.max(axis=0) - self.X_.min(axis=0))**-1 elif (cost_type == 'robust'): weights = (np.quantile(self.X_, 0.75, axis=0) - np.quantile(self.X_, 0.25, axis=0))**-1 return weights
def noise_floor(sat_thresh, noi_thresh, data=None): """Computes the noise floor of the data """ # compute the standard deviation of data, and use it to identify occupied channels σ = np.std(data) # Any channel with a max power >= σ has a satellite sat_cut = sat_thresh * σ chans_pow_max = np.amax(data, axis=0) # Exclude the channels with sats, to only have noise data noise_chans = np.where(chans_pow_max < sat_cut)[0] noise_data = data[:, noise_chans] # noise median, noise mad, noise threshold = μ + 3*σ μ_noise = np.median(noise_data) σ_noise = mad(noise_data, axis=None) noise_threshold = μ_noise + noi_thresh * σ_noise return (data, noise_threshold)
def getFeatureWeight(self, cost_type='uniform'): weights = np.ones(self.D_) if (cost_type == 'MAD'): for d in range(self.D_): weight = mad(self.X_[:, d], scale='normal') if (self.feature_types_[d] == 'B' or abs(weight) < self.tol_): weights[d] = (self.X_[:, d] * 1.4826).std() else: weights[d] = weight**-1 elif (cost_type == 'standard'): weights = np.std(self.X_, axis=0)**-1 elif (cost_type == 'normalize'): weights = (self.X_.max(axis=0) - self.X_.min(axis=0))**-1 elif (cost_type == 'robust'): q25, q75 = np.percentile(self.X_, [0.25, 0.75], axis=0) for d in range(self.D_): if (q75[d] - q25[d] == 0): weights[d] = self.tol_**-1 else: weights = (q75[d] - q25)**-1 return weights
def nan_mad(good_ref_map): """Compute MAD of values in pixel of healpix map while ignoring nans. :param good_ref_map: Reference healpix map, output from :func:`~embers.tile_maps.beam_utils.good_ref_maps` :returns: - ref_map_mad - Median Absolute Deviation of the input healpix map pixels """ ref_map_mad = [] for j in good_ref_map: if j != []: j = np.asarray(j) j = j[~np.isnan(j)] ref_map_mad.append(mad(j)) else: ref_map_mad.append(np.nan) ref_map_mad = np.asarray(ref_map_mad) ref_map_mad[np.where(ref_map_mad == np.nan)] = np.nanmean(ref_map_mad) return ref_map_mad
def test_jacobian(): # Compile the Jacobian _a = tt.dscalar() _b = tt.dscalar() log_jac = theano.function([_a, _b], StarryProcess(a=_a, b=_b).log_jac()) # Log probability def log_prob(p): if np.any(p < 0): return -np.inf elif np.any(p > 1): return -np.inf else: return log_jac(*p) # Run the sampler ndim, nwalkers, nsteps = 2, 50, 10000 p0 = np.random.random(size=(nwalkers, ndim)) sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob) sampler.run_mcmc(p0, nsteps) # Transform to latitude params a, b = sampler.chain.T.reshape(2, -1) mu, sigma = beta2gauss(a, b) # Compute the 2d histogram m1, m2 = 0, 80 s1, s2 = 0, 45 hist, _, _ = np.histogram2d(mu, sigma, range=((m1, m2), (s1, s2))) hist /= np.max(hist) # Check that the variation is less than 10% across the domain std = 1.4826 * mad(hist.flatten()) mean = np.mean(hist.flatten()) assert std / mean < 0.1
def get_desc_stats(l): return max(l), min(l), mean(l), median(l), stdev(l), mad(l)
def find_bad_channels(inst, picks='eeg', method='correlation', mad_threshold=1, std_threshold=1, r_threshold=0.4, percent_threshold=0.1, time_step=1.0, sfreq=None, return_z_scores=False, channels=None): # arguments to be passed to pick_types kwargs = {pick: True for pick in [picks]} # check that tha input data can be handled by the function if isinstance(inst, BaseRaw): # only keep data from desired channels inst = inst.copy().pick_types(**kwargs) dat = inst.get_data() * 1e6 # to microvolt channels = inst.ch_names sfreq = inst.info['sfreq'] elif isinstance(inst, np.ndarray): dat = inst if not channels: raise ValueError('If "inst" is not an instance of BaseRaw a list ' 'of channel names must be provided') else: raise ValueError('inst must be an instance of BaseRaw or a numpy array') # save shape of data n_channels, n_samples = dat.shape if n_channels != len(channels): raise ValueError("Number and channels and data dimensions don't match") # make sure method arguments are in a list if not isinstance(method, list): method = [method] # place holder for results bad_channels = dict() # 1) find channels with zero or near zero activity if 'flat' in method: # compute estimates of channel activity mad_flats = mad(dat, scale=1, axis=1) < mad_threshold std_flats = np.std(dat, axis=1) < std_threshold # flat channels identified flats = np.argwhere(np.logical_or(mad_flats, std_flats)) flats = np.asarray([channels[int(flat)] for flat in flats]) # warn user if too many channels were identified as flat if flats.shape[0] > (n_channels / 2): warnings.warn('Too many channels have been identified as "flat"! ' 'Make sure the input values in "inst" are provided ' 'on a volt scale. ' 'Otherwise try choosing another (meaningful) ' 'threshold for identification.') bad_channels.update(flat=flats) # 3) find bad channels by deviation (high variability in amplitude) if 'deviation' in method: # mean absolute deviation (MAD) scores for each channel mad_scores = \ [mad(dat[i, :], scale=1) for i in range(n_channels)] # compute robust z-scores for each channel rz_scores = \ 0.6745 * (mad_scores - np.nanmedian(mad_scores)) / mad(mad_scores, scale=1) # channels identified by deviation criterion bad_deviation = \ [channels[i] for i in np.where(np.abs(rz_scores) >= 5.0)[0]] bad_channels.update(deviation=np.asarray(bad_deviation)) if return_z_scores: bad_channels.update(deviation_z_scores=rz_scores) # 3) find channels with low correlation to other channels if 'correlation' in method: # check that sampling frequency argument was provided if not sfreq: raise ValueError('If "inst" is not an instance of BaseRaw a ' 'sampling frequency must be provided. Usually ' 'the sampling frequency of the EEG recording in' 'question.') # based on the length of the provided data, # determine size and amount of time windows for analyses corr_frames = time_step * sfreq corr_window = np.arange(corr_frames) # sample index (i.e., time offsets) for each window to time window # to use for correlation analyis corr_offsets = np.arange(1, (n_samples - corr_frames), corr_frames) n_corr_steps = corr_offsets.shape[0] # place holders for correlation coefficients max_r = np.ones((n_channels, n_corr_steps)) channel_r = np.ones((n_corr_steps, n_channels)) # create time windows for analysis dat_t = np.transpose(dat) dat_windowed = np.reshape( np.transpose(dat_t[0: corr_window.shape[0] * n_corr_steps, :]), (n_channels, corr_window.shape[0], n_corr_steps), order="F",) # compute (pearson) correlation coefficient across channels # (for each channel and analysis time window) # take the absolute of the 98th percentile of the correlations with # the other channels as a measure of how well that channel is correlated # to other channels for k in range(0, n_corr_steps): eeg_portion = np.transpose(np.squeeze(dat_windowed[:, :, k])) window_correlation = np.corrcoef(np.transpose(eeg_portion)) abs_corr = \ np.abs( np.subtract( window_correlation, np.diag(np.diag(window_correlation)) ) ) channel_r[k, :] = np.quantile(abs_corr, 0.98, axis=0) # fill in the actual correlations max_r[np.arange(0, n_channels), :] = np.transpose(channel_r) # check which channels correlate badly with the other channels (i.e., # are below correlation threshold) in a certain fraction of windows # (bad_time_threshold) thresholded_correlations = max_r < r_threshold thresholded_correlations = thresholded_correlations.astype(int) frac_bad_corr_windows = np.mean(thresholded_correlations, axis=1) # find the corresponding channel names and return bad_idxs = np.argwhere(frac_bad_corr_windows > percent_threshold) uncorrelated_channels = [channels[int(bad)] for bad in bad_idxs] bad_channels.update(correlation=np.asarray(uncorrelated_channels)) # noqa: E501 return bad_channels
def baseline_noise_estimation(y: np.ndarray) -> Tuple[np.ndarray, float]: # Noise estimation # ---------------- # if y = s + b + e # where s is the peak signal, b is a baseline and e is an error term. # some assumptions: # 1. s is symmetric. This ensures that the cumulative sum of the # difference is of the peak is ~ 0. # 2. e ~ N(0, sigma) iid. # 3. The derivative of b, |db/dx| is small. in particular , for two co # consecutive points |b[n + 1] - b[n]| << sigma # # From this we can say that for two consecutive points the following # approximation is valid: # # dy[n] = y[n + 1] - y[n] ~ s[n + 1] - s[n] + e # # If there's no peak signal, then: # # dy[n] ~= e ~ N(0, sqrt(2) * sigma) # # (The sqrt(2) term comes from adding two independent normal random # variables with std = sigma. # To remove zones with peaks we use an iterative approach, where we remove # the higher 90th percentile of the signal. The noise is computed as the std # of the remaining values from dy. The MAD is used as a robust estimator of # std. Using this noise value, we find baseline points and using these # points we compute a new noise value using the dy values. If the difference # is greater than 50 percent, the procedure is repeated, but now using # the higher 80th percentile of the signal... # # Baseline estimation # ------------------- # The points where dy is smaller than three times the noise are considered # as baseline. The baseline is then interpolated in the peak zones. quantiles = np.linspace(0.1, 0.9, 9)[::-1] dy = np.diff(y) dy_abs = np.abs(dy) noise_found = False noise = 0 for q in quantiles: # initial noise estimation threshold = np.quantile(y, q) noise = mad(dy[y[1:] < threshold]) / np.sqrt(2) # prevent noise equal to zero or nan if np.isnan(noise) or np.isclose(noise, 0): noise = np.finfo(np.float64).eps # detect baseline points baseline_mask = (dy_abs <= (3 * noise)) & (y[1:] < threshold) baseline_index = np.where(baseline_mask)[0] + 1 # compare the noise value obtained with the index selected as baseline new_noise = mad(dy[baseline_index - 1]) / np.sqrt(2) dnoise = np.abs(new_noise - noise) / noise # checks the difference with the new noise value if dnoise <= 0.5: noise = new_noise baseline_mask = (dy_abs <= (3 * noise)) & (y[1:] < threshold) baseline_index = np.where(baseline_mask)[0] + 1 if baseline_index.size: baseline_index = _remove_non_consecutive_index(baseline_index) noise_found = True break # fallback to the noise value using q = 0.25 if there was no convergence if (not noise_found) or (baseline_index.size == 0): threshold = np.quantile(y, 0.5) noise = mad(dy[y[1:] < threshold]) / np.sqrt(2) baseline_index = np.where(dy_abs <= (3 * noise))[0] + 1 # if baseline is still empty, return a constant baseline if baseline_index.size == 0: noise = max(np.finfo(np.float64).eps, mad(y[y < threshold])) baseline = np.ones_like(y) * y.min() return baseline, noise # append first and last elements if they are not part of the baseline # this is a necessary step before interpolation. baseline_x, baseline_y = _get_baseline_points(y, baseline_index) # interpolate baseline to have the same size as y interpolator = interp1d(baseline_x, baseline_y) baseline = interpolator(np.arange(y.size)) return baseline, noise
plt.plot(wavd, flux, alpha=0.4, label='raw') # edge removal ts = 10 te = -100 wavd = wavd[ts:te] flux = flux[ts:te] # outlier md = medfilt(flux, kernel_size=17) # IRD/MMF # md=medfilt(flux,kernel_size=7) #REACH medf = flux - md plt.plot(wavd, medf, color='gray', alpha=0.4, label='flux-median_filt') sn = 5.0 plt.axhline(sn * mad(medf), color='gray', ls='dashed', alpha=0.4) mask = np.abs(medf - np.median(medf)) < sn * mad(medf) plt.plot(wavd[mask], medf[mask], '+', color='C5', alpha=0.4, label='flux-median_filt') # Wavelength mask mask = mask * (wavd < 15690.) ### flux = flux[mask] wavd = wavd[mask] plt.plot(wavd, flux, alpha=0.7, color='C2', label='cleaned')
def plot_phase_and_roc(fld, ncar, dgrid, xlamds, kernel_size=11, printQ=False): # ipeak = np.argmax(np.sum(np.sum(np.abs(fld)**2,axis=1),axis=1)) imid = int(ncar / 2) dimid = int(ncar * 0.1) # ipeak = np.argmax(np.abs(fld[:,imid,imid])**2) ipeak = np.argmax( np.sum(np.sum(np.abs(fld[:, imid - dimid:imid + dimid, imid - dimid:imid + dimid])**2, axis=1), axis=1)) xs = dgrid * np.linspace(-1, 1, ncar) dx_m = xs[1] - xs[0] xs *= 1e6 powx = rolling_average(np.abs(fld[ipeak, :, imid])**2, kernel_size=kernel_size) powy = rolling_average(np.abs(fld[ipeak, imid])**2, kernel_size=kernel_size) wx = fwhm(powx)[0] * dx_m wy = fwhm(powy)[0] * dx_m plt.plot(xs, powx, label='x') plt.plot(xs, powy, label='y') plt.xlabel('Transverse position (um)') plt.ylabel('Slice power (arb.)') plt.tight_layout() plt.legend() plt.show() xphase = np.unwrap(np.angle(fld[ipeak, :, imid])) yphase = np.unwrap(np.angle(fld[ipeak, imid])) # plt.plot(xs,xphase,label='x'); # plt.plot(xs,yphase); yphase = rolling_average(yphase, kernel_size=kernel_size) xphase = rolling_average(xphase, kernel_size=kernel_size) plt.plot(xs, xphase, label='x') plt.plot(xs, yphase, label='y') plt.xlabel('Transverse position (um)') plt.ylabel('phase (rad)') plt.tight_layout() plt.legend() plt.show() phasefactor = -2. * np.pi / xlamds * dx_m**2 rocy = np.diff(np.diff(yphase)) rocx = np.diff(np.diff(xphase)) rocy = rolling_average(rocy, kernel_size=kernel_size) rocx = rolling_average(rocx, kernel_size=kernel_size) # plt.plot(xs[1:-1],rocx); plt.plot(xs[1:-1],rocy); plt.show() rocy = 1. / phasefactor / rocy rocx = 1. / phasefactor / rocx plt.plot(xs[1:-1], rocx, label='x') plt.plot(xs[1:-1], rocy, label='y') rocs = np.reshape([rocy, rocx], -1) rocmean = np.median(rocs) rocstd = mad(rocs) try: ylim = np.array([-1, 1]) * rocstd + rocmean plt.ylim(ylim) except: pass plt.xlabel('Transverse position (um)') plt.ylabel('Radius of curvature (m)') plt.tight_layout() plt.legend() plt.show() Rx = rocx[imid] Ry = rocy[imid] pi = np.pi z0x = pi**2 * Rx * wx**4 / (pi**2 * wx**4 + Rx**2 * xlamds**2) z0y = pi**2 * Ry * wy**4 / (pi**2 * wy**4 + Ry**2 * xlamds**2) w0x = Rx * wx * xlamds / np.sqrt(pi**2 * wx**4 + Rx**2 * xlamds**2) w0y = Ry * wy * xlamds / np.sqrt(pi**2 * wy**4 + Ry**2 * xlamds**2) zrx = pi * w0x**2 / xlamds zry = pi * w0y**2 / xlamds #Solve[{R == z (1 + (zr/z)^2), w^2 == w0^2 (1 + (z/zr)^2)} /. zr -> \[Pi] w0^2/\[Lambda]0, {z, w0}] zwdic = { 'Rx': Rx, 'Ry': Ry, 'wx': wx, 'wy': wy, 'z0x': z0x, 'z0y': z0y, 'w0x': w0x, 'w0y': w0y, 'zrx': zrx, 'zry': zry } if printQ: print(zwdic) return zwdic
def fitIntegratedIntensity(stack, line, outDir, fwhm=None, maxAbsVel=250 * u.km / u.s, snThreshold=3.0): ''' calculate integrated intensity via a gaussian fit input: stack: single stack outDir: output directory for plots and diagnostics fwhm: fwhm to use for upper limit estimate maxAbsVel: maximum velocity at which we expect emission. snThreshold: S/N threshold for peak finding Date Programmer Description of Changes ---------------------------------------------------------------------- 5/13/2021 A.A. Kepley Original Code ''' # default is to scale to normal distribution from scipy.stats import median_absolute_deviation as mad from astropy.modeling import models, fitting from scipy import integrate from scipy.stats import f spectral_axis = stack['spectral_axis'] stack_profile = stack['stack_profile_' + line] chanwidth = spectral_axis[1] - spectral_axis[0] lineFreeChans = (spectral_axis > maxAbsVel) | (spectral_axis < -maxAbsVel) # mad is already scaled to gaussian distribution noisePerChan = mad(stack_profile[lineFreeChans]) * stack_profile.unit lineChans = (spectral_axis < maxAbsVel) & (spectral_axis > -maxAbsVel) plt.clf() fig, myax = plt.subplots(nrows=1, ncols=1, figsize=(8, 6)) plt.plot(spectral_axis, stack_profile, label='data') plt.xlabel('Velocity - ' + spectral_axis.unit.to_string()) plt.ylabel('Average Intensity - ' + stack_profile.unit.to_string()) plt.title(stack['galaxy'] + ' ' + line + ' ' + stack['bin_type'] + ' ' + stack['bin_mean'].to_string()) plt.text(0.07, 0.95, "Noise=" + noisePerChan.to_string(), transform=myax.transAxes) plt.axhspan(-3.0 * noisePerChan.value, 3.0 * noisePerChan.value, color='gray', alpha=0.2) if np.any(stack_profile[lineChans] > snThreshold * noisePerChan): # fit line using Gaussian # construct weight vector weights = np.ones(len(stack_profile)) / noisePerChan.value # start off by fitting one Gassian amp_est = max(stack_profile) peak_cut = spectral_axis[stack_profile > amp_est * 0.5] fwhm_est = max(peak_cut) - min(peak_cut) sigma_est = fwhm_est / 2.355 init_g = models.Gaussian1D(amplitude=amp_est.value, stddev=sigma_est.value) init_g.amplitude.min = 0.0 init_g.stddev.min = 6.0 init_g.stddev.max = 200.0 fit_g = fitting.LevMarLSQFitter() result_g = fit_g(init_g, spectral_axis.value, stack_profile.value, weights=weights) myax.plot(spectral_axis, result_g(spectral_axis.value), label='1 Gauss') # Now fit two Gaussians init_g1 = models.Gaussian1D(amplitude=result_g.amplitude / 2.0, stddev=result_g.stddev / 2.0, mean=result_g.mean + result_g.stddev) init_g1.amplitude.min = 0.0 init_g1.stddev.min = 6.0 init_g1.stddev.max = 200.0 init_g2 = models.Gaussian1D(amplitude=result_g.amplitude / 2.0, stddev=result_g.stddev / 2.0, mean=result_g.mean - result_g.stddev) init_g2.amplitude.min = 0.0 init_g2.stddev.min = 6.0 init_g2.stddev.max = 200.0 init_g1_g2 = init_g1 + init_g2 fit_g1_g2 = fitting.LevMarLSQFitter() result_g1_g2 = fit_g1_g2(init_g1_g2, spectral_axis.value, stack_profile.value, weights=weights) plt.plot(spectral_axis, result_g1_g2(spectral_axis.value), label='2 Gauss') # calculate reduced chi-square for each fit. chisquare_g, chisquare_r_g = chiSquare(stack_profile.value, result_g(spectral_axis.value), 1.0 / weights, nparams=3) chisquare_g1_g2, chisquare_r_g1_g2 = chiSquare( stack_profile.value, result_g1_g2(spectral_axis.value), 1.0 / weights, nparams=6) # calculate f-values by taking ratio of chisquare values ## TODO: I think this is right. It's the definitely, but the ## distribution to compare to is below. fval = chisquare_r_g / chisquare_r_g1_g2 # sf = survival function = 1 - cdf # first parameter in f is the difference in the number of degrees # of freedom between the two fits (here 6-3). The second is the # number of degrees of freedom in the 2nd (2 gaussian # fit). Citation wikipedia article on F statistics and # regression. It's consistent with the description in Bevington # and Robinson. pvalue = f.sf(fval, 6 - 3, (len(stack_profile) - 6.0)) myax.text(0.07, 0.9, "Chi_1=" + str(chisquare_r_g), transform=myax.transAxes) myax.text(0.07, 0.85, "Chi_2=" + str(chisquare_r_g1_g2), transform=myax.transAxes) myax.text(0.07, 0.8, "F=" + str(fval), transform=myax.transAxes) myax.text(0.07, 0.75, "p=" + str(pvalue), transform=myax.transAxes) # For diagnostics purposes. # print(chisquare_r_g, chisquare_r_g1_g2,fval,pvalue) # calculate integrated intensity from fits. If the pvalue is # small, we reject the null hypothesis that the double-gaussian # fits as well as a single gaussian. if pvalue < 0.05: stack_int = integrate.quad( result_g1_g2, -maxAbsVel.value, maxAbsVel.value)[0] * stack_profile.unit * spectral_axis.unit stack_int_err = np.sqrt( fwhm_est / chanwidth) * chanwidth * noisePerChan fwhm = fwhm_est myax.text(0.07, 0.7, 'Best: 2 Gauss', transform=myax.transAxes) else: stack_int = integrate.quad( result_g, -maxAbsVel.value, maxAbsVel.value)[0] * stack_profile.unit * spectral_axis.unit stack_int_err = np.sqrt( fwhm_est / chanwidth) * chanwidth * noisePerChan fwhm = fwhm_est myax.text(0.07, 0.7, 'Best: 1 Gauss', transform=myax.transAxes) uplim = False elif fwhm: stack_int_err = np.sqrt(fwhm / chanwidth) * chanwidth * noisePerChan stack_int = snThreshold * stack_int_err uplim = True else: stack_int = np.nan * spectral_axis.unit * stack_profile.unit stack_int_err = np.nan * spectral_axis.unit * stack_profile.unit fwhm = np.nan * spectral_axis.unit uplim = True plt.legend(loc='upper right') plotname = stack['galaxy'] + '_' + line + '_' + stack[ 'bin_type'] + '_' + str(stack['bin_mean'].value) + '_fit.png' plt.savefig(os.path.join(outDir, plotname)) plt.close() return stack_int, stack_int_err, fwhm, uplim
def dho_fit( t, y, yerr, init_func=None, neg_lp_func=None, optimizer_func=None, n_opt=20, user_bounds=None, scipy_opt_kwargs={}, scipy_opt_options={}, debug=False, ): """ Fit DHO to time series The default settings are optimized for normalized LCs. Args: t (array(float)): Time stamps of the input time series (the default unit is day). y (array(float)): y values of the input time series. yerr (array(float)): Measurement errors for y values. init_func (object, optional): A user-provided function to generate initial guesses for the optimizer. Defaults to None. neg_lp_func (object, optional): A user-provided function to compute negative probability given an array of parameters, an array of time series values and a celerite GP instance. Defaults to None. optimizer_func (object, optional): A user-provided optimizer function. Defaults to None. n_opt (int, optional): Number of optimizers to run.. Defaults to 20. user_bounds (list, optional): Parameter boundaries for the default optimizer and the default flat prior. Defaults to None. scipy_opt_kwargs (dict, optional): Keyword arguments for scipy.optimize.minimize. Defaults to {}. scipy_opt_options (dict, optional): "options" argument for scipy.optimize.minimize. Defaults to {}. debug (bool, optional): Turn on/off debug mode. Defaults to False. Raises: celerite.solver.LinAlgError: For non-positive definite autocovariance matrices. Returns: array(float): Best-fit DHO parameters """ # determine user defined boundaries if any if user_bounds is not None and (len(user_bounds) == 4): bounds = user_bounds else: bounds = [(-15, 15)] * 4 bounds[2:] = [(a[0] - 8, a[1] - 8) for a in bounds[2:]] # re-position/normalize lc t = t - t[0] y = y - np.median(y) y_std = mad(y) * 1.4826 y = y / y_std yerr = yerr / y_std # determine negative log probability function if neg_lp_func is None: neg_lp = partial(neg_lp_flat, bounds=np.array(bounds), mode="param") else: neg_lp = neg_lp_func # initialize parameter, kernel and GP kernel = DHO_term(*dho_log_param_init()) gp = GP(kernel, mean=0) gp.compute(t, yerr) # determine initialize function if init_func is None: init = partial(dho_log_param_init) else: init = init_func # determine the optimizer function if optimizer_func is None: scipy_opt_kwargs.update({"method": "L-BFGS-B", "bounds": bounds}) opt = partial( scipy_opt, mode="param", opt_kwargs=scipy_opt_kwargs, opt_options=scipy_opt_options, debug=debug, ) else: opt = optimizer_func # get best-fit solution & adjust MA params (multiply by y_std) best_fit_return = opt(y, gp, init, neg_lp, n_opt) best_fit_return[2:] = best_fit_return[2:] * y_std return best_fit_return
resultArr[n, :-1] *= 60.0 # gps to gpm if args.print: printResult(result, model, full=False) if args.plot: plotResids(result, model, measFlows, testDateString, pp) plotScheds(result, model, measFlows, testDateString, pp) if args.plot: pp.close() if args.print: print(resultArr) meds = np.median(resultArr, axis=0) mads = mad(resultArr, axis=0) for n in range(len(meds)): if flowData.get(activeFlowLabels[n]): flowData[activeFlowLabels[n]] = (meds[n], mads[n]) if args.updateSheet: updateSheet(testDate.timestamp(), flowData) if args.dataOut: # check for existence. if exists just append line. if not, put out header line first if os.path.exists(args.dataOut): df = open(args.dataOut, 'a') else: df = open(args.dataOut, 'w') if args.csv:
def good_maps(ref_map): """Creates a ref map with only good satellites""" pointings = ["0", "2", "4"] # load data from map .npz file f = Path(f"{map_dir}/{ref_map}") tile_data = np.load(f, allow_pickle=True) tile_data = {key: tile_data[key].item() for key in tile_data} ref_map = tile_data["ref_map"] # Good sats from which to make plots good_sats = [ 25338, 25982, 25984, 25985, 28654, 40086, 40087, 40091, 41179, 41180, 41182, 41183, 41184, 41185, 41187, 41188, 41189, 44387, ] orbcomm = [ 25982, 25984, 25985, 40086, 40087, 40091, 41179, 41180, 41182, 41183, 41184, 41185, 41187, 41188, 41189, ] noaa = [25338, 28654] meteor = [44387] sat_types = [orbcomm, noaa, meteor] sat_types_names = ["orbcomm", "noaa", "meteor"] map_dict = {} for index, s_type in enumerate(sat_types): # Empty good map good_map = [[] for pixel in range(hp.nside2npix(nside))] for p in pointings: # append to good map from all good sat data for sat in s_type: for pix in range(hp.nside2npix(nside)): good_map[pix].extend(ref_map[p][sat][pix]) mad_map = [] for j in good_map: if j != []: j = np.asarray(j) j = j[~np.isnan(j)] mad_map.append(mad(j)) else: mad_map.append(np.nan) good_map = [np.nanmedian(pixel) for pixel in good_map] map_type = [good_map, mad_map] map_dict[sat_types_names[index]] = map_type return map_dict
def carma_fit( t, y, yerr, p, q, init_func=None, neg_lp_func=None, optimizer_func=None, n_opt=20, user_bounds=None, scipy_opt_kwargs={}, scipy_opt_options={}, debug=False, ): """ Fit an arbitrary CARMA model The default settings are optimized for normalized LCs. Args: t (array(float)): Time stamps of the input time series (the default unit is day). y (array(float)): y values of the input time series. yerr (array(float)): Measurement errors for y values. p (int): The p order of a CARMA(p, q) model. q (int): The q order of a CARMA(p, q) model. init_func (object, optional): A user-provided function to generate initial guesses for the optimizer. Defaults to None. neg_lp_func (object, optional): A user-provided function to compute negative probability given an array of parameters, an array of time series values and a celerite GP instance. Defaults to None. optimizer_func (object, optional): A user-provided optimizer function. Defaults to None. n_opt (int, optional): Number of optimizers to run. Defaults to 20. user_bounds (array(float), optional): Parameter boundaries for the default optimizer. If p > 2, these are boundaries for the coefficients of the factored polynomial. Defaults to None. scipy_opt_kwargs (dict, optional): Keyword arguments for scipy.optimize.minimize. Defaults to {}. scipy_opt_options (dict, optional): "options" argument for scipy.optimize.minimize. Defaults to {}. debug (bool, optional): Turn on/off debug mode. Defaults to False. Raises: celerite.solver.LinAlgError: For non-positive definite autocovariance matrices. Returns: array(float): Best-fit parameters """ # set core config dim = int(p + q + 1) mode = "fcoeff" if p > 2 else "param" # init bounds for fitting if user_bounds is not None and (len(user_bounds) == dim): bounds = user_bounds else: bounds = [(-15, 15)] * dim bounds[p:-1] = [(a[0] - 5, a[1] - 5) for a in bounds[p:-1]] bounds[-1] = (-15, 5) # re-position lc t = t - t[0] y = y - np.median(y) y_std = mad(y) * 1.4826 y = y / y_std yerr = yerr / y_std # initialize parameter and kernel ARpars, MApars = sample_carma(p, q) kernel = CARMA_term(np.log(ARpars), np.log(MApars)) gp = GP(kernel, mean=0) gp.compute(t, yerr) # determine/set init func if init_func is not None: init = init_func else: init = partial(carma_log_fcoeff_init, p, q) # determine/set negative log probability function if neg_lp_func is None: neg_lp = partial(neg_lp_flat, bounds=np.array(bounds), mode=mode) else: neg_lp = neg_lp_func # determine/set optimizer function if optimizer_func is None: scipy_opt_kwargs.update({"method": "L-BFGS-B", "bounds": bounds}) opt = partial( scipy_opt, mode=mode, opt_kwargs=scipy_opt_kwargs, opt_options=scipy_opt_options, debug=debug, ) else: opt = optimizer_func # get best-fit solution & adjust MA params (multiply by y_std) best_fit_return = opt(y, gp, init, neg_lp, n_opt) best_fit_return[p:] = best_fit_return[p:] * y_std return best_fit_return
def sumIntegratedIntensity(spectral_axis, stack_profile, fwhm=None, maxAbsVel=250.0 * u.km / u.s, snThreshold=3.0): ''' calculate the straight sum of the integrated intensity. Date Programmer Description of Changes ---------------------------------------------------------------------- 5/13/2021 A.A. Kepley Original Code ''' from astropy.modeling import models, fitting from scipy import integrate # default is to scale to normal distribution from scipy.stats import median_absolute_deviation as mad chanwidth = spectral_axis[1] - spectral_axis[0] lineFreeChans = (spectral_axis > maxAbsVel) | (spectral_axis < -maxAbsVel) # mad is already scaled to gaussian distribution noisePerChan = mad(stack_profile[lineFreeChans]) * stack_profile.unit lineChans = (spectral_axis < maxAbsVel) & (spectral_axis > -maxAbsVel) if np.any(stack_profile[lineChans] > snThreshold * noisePerChan): # sum line # start off by fitting one Gassian amp_est = max(stack_profile) peak_cut = spectral_axis[stack_profile > amp_est * 0.5] fwhm = max(peak_cut) - min(peak_cut) sigma_est = fwhm / 2.355 init_g = models.Gaussian1D(amplitude=amp_est, stddev=sigma_est) fit_g = fitting.LevMarLSQFitter() result_g = fit_g(init_g, spectral_axis, stack_profile) newLineChans = ((spectral_axis < (result_g.mean + 3.0 * result_g.stddev)) & (spectral_axis > (result_g.mean - 3.0 * result_g.stddev))) stack_sum = np.sum(stack_profile[newLineChans] * chanwidth) stack_sum_err = np.sqrt(fwhm / chanwidth) * chanwidth * noisePerChan uplim = False elif fwhm: stack_sum_err = np.sqrt(fwhm / chanwidth) * chanwidth * noisePerChan stack_sum = snThreshold * stack_sum_err uplim = True else: stack_sum_err = np.nan * stack_profile.unit * spectral_axis.unit stack_sum = np.nan * stack_profile.unit * spectral_axis.unit fwhm = np.nan * spectral_axis.unit uplim = True return stack_sum, stack_sum_err, fwhm, uplim
timer = timeit.Timer(stmt=stmt, globals=globals()) N = timer.autorange()[0] if N < 10: N *= 10 vals = timer.repeat(N, 1) meas_times[i] = vals repeats[i] = N size[i] = np.log2(np.prod(grid)) torch.cuda.empty_cache() except RuntimeError as ex: print(ex) break # %% median = np.array([np.median(times) for times in meas_times]) med_ab_dev = np.array([mad(times, scale='normal') for times in meas_times]) tag = COMPUTER + '_' + DEVICE + '_step' np.savez('data\\' + tag, computer=COMPUTER, device=DEVICE, size=size, n_repeats=repeats, med=median, mad=med_ab_dev) np.save(ps.paths['data'] + '..\\' + tag, np.array(meas_times, dtype='object'))
ax.set_yticks(np.arange(len(lbls))+.5) ax.set_yticklabels(np.flipud(np.asarray(lbls)), fontsize="x-small")#plt.savefig(os.path.join(dst, "thal_glm.pdf"), bbox_inches = "tight") ax.set_ylabel("Neocortical 'trisynaptic' timepoint", fontsize="x-small") ax.yaxis.set_label_coords(-0.22,0.5) plt.savefig(os.path.join(dst,"nc_density_at_nc_timepoint.pdf"), bbox_inches = "tight") #%% ratio_mean_density = np.array(mean_thal_density_per_brain/mean_nc_density_per_brain) ratio_std_density = np.array(std_thal_density_per_brain/std_nc_density_per_brain) #calculate median also median_thal_density_per_brain = np.median(thal_density_per_brain, axis = 0) median_nc_density_per_brain = np.median(nc_density_per_brain, axis = 0) ratio_median_density = np.array(median_thal_density_per_brain/median_nc_density_per_brain) from scipy.stats import median_absolute_deviation as mad mad_thal_density_per_brain = mad(thal_density_per_brain, axis = 0) mad_nc_density_per_brain = mad(nc_density_per_brain, axis = 0) ratio_mad_density = np.array(mad_thal_density_per_brain/mad_nc_density_per_brain) import pandas as pd df = pd.DataFrame() d = 4 #decimals to round to df["mean_thal_density"] = np.round(mean_thal_density_per_brain, d) df["mean_nc_density"] = np.round(mean_nc_density_per_brain, d) df["std_thal_density"] = np.round(std_thal_density_per_brain, d) df["std_nc_density"] = np.round(std_nc_density_per_brain, d) df["median_thal_density"] = np.round(median_thal_density_per_brain, d) df["median_nc_density"] = np.round(median_nc_density_per_brain, d) df["mad_thal_density"] = np.round(mad_thal_density_per_brain, d) df["mad_nc_density"] = np.round(mad_nc_density_per_brain, d)