def summarize(self): """ Summarizes a button as a dictionary of paramters mapped from their descriptors Arguments: None Returns: (dict) summary of chamber parameters """ features_disk = ['median_button', 'summed_button', 'summed_button_BGsub', 'std_button', 'x_button_center', 'y_button_center', 'radius_button_disk'] features_ann = ['median_button_annulus', 'summed_button_annulus_normed', 'std_button_annulus_localBG', 'inner_radius_button_annulus', 'outer_radius_button_annulus'] if self.blankFlag: return dict(zip(features_disk+features_ann, list(np.full(len(features_disk+features_ann), np.nan)))) disk = self.get_disk() annulus = self.get_annulus() medI_disk = int(ma.median(disk)) sumI_disk = int(disk.sum()) sdI_disk = int(disk.std()) medI_ann = int(ma.median(annulus)) sumI_ann_normed = int(annulus.sum() / self.annulus_to_disk_ratio) sdI_ann = int(annulus.std()) sumI_BGsub = sumI_disk - sumI_ann_normed vals_disk = [medI_disk, sumI_disk, sumI_BGsub, sdI_disk, self.center[0], self.center[1], self.disk_radius] vals_ann = [medI_ann, sumI_ann_normed, sdI_ann, self.annulus_radii[0], self.annulus_radii[1]] return dict(zip(features_disk+features_ann, vals_disk+vals_ann))
def compare_medians_ms(group_1, group_2, axis=None): """ Compares the medians from two independent groups along the given axis. The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. Parameters ---------- group_1 : array_like First dataset. Has to be of size >=7. group_2 : array_like Second dataset. Has to be of size >=7. axis : int, optional Axis along which the medians are estimated. If None, the arrays are flattened. If `axis` is not None, then `group_1` and `group_2` should have the same shape. Returns ------- compare_medians_ms : {float, ndarray} If `axis` is None, then returns a float, otherwise returns a 1-D ndarray of floats with a length equal to the length of `group_1` along `axis`. """ (med_1, med_2) = (ma.median(group_1, axis=axis), ma.median(group_2, axis=axis)) (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis)) W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2) return 1 - norm.cdf(W)
def transform(self, pfull, p, q): pfull2 = pfull.copy() # copy p2 = p.copy() q2 = q.copy() q2mean = q2.mean() # target mean pfull2 -= median(pfull2) # center p2 -= median(p2) q2 -= median(q2) q5 = self.__percentile(q2, 5) q25 = self.__percentile(q2, 25) q75 = self.__percentile(q2, 75) q95 = self.__percentile(q2, 95) p25 = self.__percentile(p2, 25) # 25th if p25: pfull2[pfull2 < 0] *= q25 / p25 p75 = self.__percentile(p2, 75) # 75th if p75: pfull2[pfull2 > 0] *= q75 / p75 p5 = self.__percentile(p2, 5) # 5th if p5: pfull2[pfull2 < q25] *= q5 / p5 p95 = self.__percentile(p2, 95) # 95th if p95: pfull2[pfull2 > q75] *= q95 / p95 pfull2 += q2mean - pfull2.mean() # reshift return pfull2
def get_targets(K1,K2, method='F', threshold=True): """ returns a binary threshold (conservative) F -> frangi filter with default arguments. greater than mean. R -> blobness measure. greater than median. S -> anisotropy measure (greater than median) """ if method == 'R': R = (K1 / K2) ** 2 if threshold: T = R < ma.median(R) else: T = R elif method == 'S': S = (K1**2 + K2**2)/2 if threshold: T = S > ma.median(S) else: T = S elif method == 'F': R = (K1 / K2) ** 2 S = (K1**2 + K2**2)/2 beta, c = 0.5, 15 F = np.exp(-R / (2*beta**2)) F *= 1 - np.exp(-S / (2*c**2)) T = (K2 < 0)*F if threshold: T = T > (T[T != 0]).mean() else: raise('Need to select method as "F", "S", or "R"') return T
def biweight(x, cst): """ Computes the biweight average and midvariance for a given 1D array. Returns a tuple (biweight mean, biweight variance). Parameters ---------- x: {ndarray} Input Array cst : {float} Parameter controlling how outliers are censored. Notes ----- The function is restricted to 1D data only. """ assert (x.ndim == 1, "1D array only !") xmed = ma.median(x, 0) manom = x - xmed mad = ma.median(ma.absolute(manom)) u_i = (manom/float(cst*mad)) u_i *= ma.less_equal(ma.absolute(u_i), 1.).astype(float) w_i = (1-u_i**2) if ma.count(w_i) > 0: biw_m = xmed + ma.sum(manom * w_i**2)/ma.sum(w_i**2) else: biw_m = xmed biw_sd = ma.sqrt(ma.count(x)*ma.sum(manom**2 * w_i**4)) biw_sd *= 1./ma.absolute(ma.sum(w_i * (1-5*u_i**2))) return (biw_m, biw_sd.item())
def MAD(a, c=0.6745, axis=None): """ Median Absolute Deviation along given axis of an array: median(abs(a - median(a))) / c c = 0.6745 is the constant to convert from MAD to std; it is used by default """ a = ma.masked_where(a != a, a) if a.ndim == 1: d = ma.median(a) m = ma.median(ma.fabs(a - d) / c) else: d = ma.median(a, axis=axis) # I don't want the array to change so I have to copy it? if axis > 0: aswp = ma.swapaxes(a, 0, axis) else: aswp = a m = ma.median(ma.fabs(aswp - d) / c, axis=0) return m
def combine_nights(combined_catalog, filterlist, refcat): header = [ 'BEGIN CATALOG HEADER', 'nfields 13', ' ra 1 0 d degrees %10.6f', ' dec 2 0 d degrees %10.6f', ' id 3 0 c INDEF %3d' ] for filt in filterlist: header.append(' {} {:2d} 0 r INDEF %6.3f'.format( filt, len(header) - 1)) header.append(' {}err {:2d} 0 r INDEF %6.3f'.format( filt, len(header) - 1)) header += ['END CATALOG HEADER', ''] catalog = Table([refcat['ra'], refcat['dec'], refcat['id']], meta={'comments': header}, masked=True) for filt in filterlist: mags = combined_catalog['mag'][combined_catalog['filter'] == filt] median = np.median(mags, axis=0) absdev_mag = mags - median mad = np.median(np.abs(absdev_mag), axis=0) * np.sqrt(pi / 2) mags.mask |= np.abs(absdev_mag) > 5 * mad catalog[filt] = np.median(mags, axis=0) catalog[filt + 'err'] = np.median(np.abs(mags - catalog[filt]), axis=0) * np.sqrt(pi / 2) return catalog
def MAD(a, c=0.6745, axis=None): """ Median Absolute Deviation along given axis of an array: median(abs(a - median(a))) / c c = 0.6745 is the constant to convert from MAD to std; it is used by default """ a = ma.masked_where(a!=a, a) if a.ndim == 1: d = ma.median(a) m = ma.median(ma.fabs(a - d) / c) else: d = ma.median(a, axis=axis) # I don't want the array to change so I have to copy it? if axis > 0: aswp = ma.swapaxes(a,0,axis) else: aswp = a m = ma.median(ma.fabs(aswp - d) / c, axis=0) return m
def single_correlation_flags(tf_plane, threshold=5.0, max_iter=5, previous_sums=[], verbose=False): flags = tf_plane.mask sum_flags=flags.sum() if verbose: print('sum(flags): %s' % (sum_flags,)) print('%5.3f%s flagged\n' % ((sum_flags*100.0/product(tf_plane.shape)),'%')) if sum_flags == product(flags.shape): return flags if max_iter <= 0: return ndimage.binary_dilation(flags,iterations=2) med = ma.median(tf_plane.real) +1j*ma.median(tf_plane.imag) sigma = sqrt(ma.std(tf_plane.real)**2 + ma.std(tf_plane.imag)**2) bad_vis = abs(tf_plane.data-med) > threshold*sigma new_flags = logical_or(flags, bad_vis) new_data = ma.array(tf_plane.data, mask=new_flags) sum_flags = new_flags.sum() if verbose: print('sum_flags: %s' % (sum_flags,)) print('%5.3f%s flagged\nstd: %6.4f' % ((sum_flags*100.0/product(tf_plane.shape)),'%', ma.std(new_data))) print(sum_flags) print(previous_sums) print('------------------------------------------------------------') if sum_flags == reduce(max, previous_sums, 0): return single_correlation_flags(new_data, threshold = threshold, max_iter = 0, previous_sums = previous_sums+[sum_flags]) else: return single_correlation_flags(new_data, threshold=threshold, max_iter=max_iter-1, previous_sums=previous_sums+[sum_flags])
def ma_mad(x, axis=None): """Median absolute deviation""" median_x = ma.median(x, axis=axis) if axis is not None: median_x = ma.expand_dims(median_x, axis=axis) return ma.median(ma.abs(x - median_x), axis=axis)
def statistics(numpy_array): return {'mean' : ma.mean(numpy_array), 'median' : ma.median(numpy_array.real)+1j*ma.median(numpy_array.imag), 'max' : ma.max(abs(array)), 'min' : ma.min(abs(array)), 'std' : ma.std(array), 'stdmean': ma.std(numpy_array)/sqrt(sum(logical_not(numpy_array.mask))-1)}
def _median(A, axis=0, keepdims=True, weights=None): if weights is None: return npm.median(A, axis=axis, keepdims=keepdims) else: sh = A.shape A = A.reshape((sh[0], -1)) if axis == 0: # Reduce Temporal (with a optimization for the common case of all spatial-weights being the same for each time step) med = npm.median(A, axis=0) M = np.not_equal(weights[0, :, :], np.bitwise_and.reduce(weights, axis=0)) D = A[:, M] # Only loop throught those that have different weights in each time step for j in range(D.shape[1]): med[j] = _weighted_median(D[:, j], weights[:, j]) med = med.reshape((1, sh[1], sh[2])) else: #axis = (1,2) # Reduce Spatial med = np.zeros((sh[0])) for i in range(sh[0]): med[i] = _weighted_median(A[i, :], weights[i, :]) med = med.reshape((sh[0], 1, 1)) return med
def get_replica_summary(replicas, telem_dir): n_temp_samples = 15 # ~41.2 seconds temp_pad = 10 * 60 # 20 minutes before and after r_lines = ['replica,datestart,datestop,ra,dec,pre_temp,post_temp'] import mica.archive.aca_hdr3 import numpy.ma as ma for replica in replicas: r_start = DateTime(replica['datestart']).secs - temp_pad r_stop = DateTime(replica['datestop']).secs + temp_pad ccd_temp = mica.archive.aca_hdr3.MSID('ccd_temp', r_start, r_stop) ccd_temp = filter_ccd_temp(ccd_temp) #only bother with unmasked temps ccd_temp_vals = ccd_temp.vals[~ccd_temp.vals.mask] ccd_temp_times = ccd_temp.times[~ccd_temp.vals.mask] pre_mask = ccd_temp_times < DateTime(replica['datestart']).secs post_mask = ccd_temp_times > DateTime(replica['datestop']).secs pre = ccd_temp_vals[pre_mask][-1 * n_temp_samples ::] post = ccd_temp_vals[post_mask][0:n_temp_samples] state = db.fetchone("""select * from cmd_states where datestart <= '%s' and datestop > '%s'""" % (replica['datestart'], replica['datestart'])) r_lines.append("%d,%s,%s,%.2f,%.2f,%.4f,%.4f" % (replica['replica'], replica['datestart'], replica['datestop'], state['ra'], state['dec'], ma.median(pre), ma.median(post))) return Ska.Table.read_ascii_table(r_lines)
def compare_medians_ms(group_1, group_2, axis=None): """ Compares the medians from two independent groups along the given axis. The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. Parameters ---------- group_1 : array_like First dataset. group_2 : array_like Second dataset. axis : int, optional Axis along which the medians are estimated. If None, the arrays are flattened. If `axis` is not None, then `group_1` and `group_2` should have the same shape. Returns ------- compare_medians_ms : {float, ndarray} If `axis` is None, then returns a float, otherwise returns a 1-D ndarray of floats with a length equal to the length of `group_1` along `axis`. """ (med_1, med_2) = (ma.median(group_1, axis=axis), ma.median(group_2, axis=axis)) (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis)) W = np.abs(med_1 - med_2) / ma.sqrt(std_1 ** 2 + std_2 ** 2) return 1 - norm.cdf(W)
def compare_medians_ms(group_1, group_2, axis=None): """Compares the medians from two independent groups along the given axis. The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. Parameters ---------- group_1 : {sequence} First dataset. group_2 : {sequence} Second dataset. axis : {integer} Axis along which the medians are estimated. If None, the arrays are flattened. Returns ------- A (p,) array of comparison values. """ (med_1, med_2) = (ma.median(group_1,axis=axis), ma.median(group_2,axis=axis)) (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis)) W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2) return 1 - norm.cdf(W)
def search_noise(data, low_deviation, high_deviation, max_diff): global logger high_info = list() low_info = list() jitter_info = list() spec_median = ma.median(data, axis=2) spec_max = spec_median.max(axis=1) spec_min = spec_median.min(axis=1) ref_value = ma.median(data) ref_diff = ma.median(spec_max) - ma.median(spec_min) ref_std = ma.std(spec_median) limit = ref_value + min(max((ref_std * 3.0),0.75), high_deviation) n_secs = data.shape[1] logger.debug("median-signal=%5.3fdB, median-fluctuation=%5.3fdB, std=%5.3f, high-limit=%5.3fdB" %(ref_value, ref_diff, ref_std, limit)) for rcu in range(data.shape[0]): peaks = cSearchPeak(data[rcu,0,:]) if not peaks.valid_data: return (low_info, high_info, jitter_info) peaks.search(delta=10.0) if peaks.nMaxPeaks() >= 30: logger.debug("RCU=%d: found %d peaks, skip noise test" %(rcu, peaks.nMaxPeaks())) else: n_bad_high_secs = 0 n_bad_low_secs = 0 n_bad_jitter_secs = 0 rcu_max_diff = spec_max[rcu] - spec_min[rcu] for val in spec_median[rcu,:]: #logger.debug("RCU=%d: high-noise value=%5.3fdB max-ref-value=%5.3fdB" %(rcu, val, ref_val)) if ((val > limit) and (rcu_max_diff > 1.0)) or (val > (ref_value + high_deviation)): n_bad_high_secs += 1 if ((val < (ref_value + low_deviation)) and (rcu_max_diff > 1.0)) or (val < (ref_value + low_deviation)): n_bad_low_secs += 1 if n_bad_high_secs > 0: high_info.append((rcu, spec_max[rcu], n_bad_high_secs, limit, rcu_max_diff)) logger.debug("RCU=%d: max-noise=%5.3f %d of %d seconds bad" %(rcu, spec_max[rcu], n_bad_high_secs, n_secs)) if n_bad_low_secs > 0: low_info.append((rcu, spec_min[rcu], n_bad_low_secs , (ref_value+low_deviation), rcu_max_diff)) logger.debug("RCU=%d: min-noise=%5.3f %d of %d seconds bad" %(rcu, spec_min[rcu], n_bad_low_secs, n_secs)) if (n_bad_high_secs == 0) and (n_bad_low_secs == 0): if rcu_max_diff > (ref_diff + max_diff): check_high_value = ref_value + (ref_diff / 2.0) check_low_value = ref_value - (ref_diff / 2.0) for val in spec_median[rcu,:]: if val > check_high_value or val < check_low_value: n_bad_jitter_secs += 1 jitter_info.append((rcu, rcu_max_diff, ref_diff, n_bad_jitter_secs)) logger.debug("RCU=%d: max spectrum fluctuation %5.3f dB" %(rcu, rcu_max_diff)) return (low_info, high_info, jitter_info)
def test_subtracts_baseline(self) : rebin_freq.rebin(self.Data, 1.0) cal_scale.scale_by_cal(self.Data, sub_med=True) data = self.Data.data self.assertTrue(ma.allclose(ma.median(data, 0), 0.)) # The following fails if you get rid of the rebin line, but in the 7th # digit. Numpy must have only single precision somewhere. #self.assertAlmostEqual(ma.median(data[:,0,0,753]), 0.) self.assertAlmostEqual(ma.median(data), 0.)
def compare_medians_ms(group_1, group_2, axis=None): """ Compares the medians from two independent groups along the given axis. The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. Parameters ---------- group_1 : array_like First dataset. Has to be of size >=7. group_2 : array_like Second dataset. Has to be of size >=7. axis : int, optional Axis along which the medians are estimated. If None, the arrays are flattened. If `axis` is not None, then `group_1` and `group_2` should have the same shape. Returns ------- compare_medians_ms : {float, ndarray} If `axis` is None, then returns a float, otherwise returns a 1-D ndarray of floats with a length equal to the length of `group_1` along `axis`. Examples -------- >>> from scipy import stats >>> a = [1, 2, 3, 4, 5, 6, 7] >>> b = [8, 9, 10, 11, 12, 13, 14] >>> stats.mstats.compare_medians_ms(a, b, axis=None) 1.0693225866553746e-05 The function is vectorized to compute along a given axis. >>> import numpy as np >>> rng = np.random.default_rng() >>> x = rng.random(size=(3, 7)) >>> y = rng.random(size=(3, 8)) >>> stats.mstats.compare_medians_ms(x, y, axis=1) array([0.36908985, 0.36092538, 0.2765313 ]) References ---------- .. [1] McKean, Joseph W., and Ronald M. Schrader. "A comparison of methods for studentizing the sample median." Communications in Statistics-Simulation and Computation 13.6 (1984): 751-773. """ (med_1, med_2) = (ma.median(group_1, axis=axis), ma.median(group_2, axis=axis)) (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis)) W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2) return 1 - norm.cdf(W)
def test_scale(self) : hanning.hanning_smooth(self.Data) rebin_freq.rebin(self.Data, 2.) cal_scale.scale_by_cal(self.Data) data = self.Data.data self.assertTrue(ma.allclose(ma.median(data[:,0,0,:] - data[:,0,1,:], 0), 1.0)) self.assertTrue(ma.allclose(ma.median(data[:,3,0,:] - data[:,3,1,:], 0), 1.0))
def simple_extraction(x, y, binNum, cube, verbose=False, type='sum'): """Extract a spectrum by simply summing all the pixels in a bin at each wavelength value Ignore nans by making a global 'nan' mask and using the bitwise and with that and the bin mask. Can either add spectra by summing or median combining (very simply! No continuum adition or anything) """ binNum=binNum.astype(int) number_of_bins=len(np.unique(binNum)[np.unique(binNum)!=-1]) d1, d2, d3=cube.shape spectra=np.empty((number_of_bins, d1)) #Mask all nans in the cube nan_mask=np.zeros_like(cube).astype(bool) nan_values=np.where(~np.isfinite(cube)) nan_mask[nan_values]=True for i in range(number_of_bins): if verbose: print "Extracting spectrum {} of {}".format(i, number_of_bins) #Create a mask with True where the bin indices are, false everywhere else inds=np.where(binNum==(i)) x_inds=x[inds].astype(int) y_inds=y[inds].astype(int) aperture_indices=[y_inds, x_inds] #True corresponds to masked mask=np.ones_like(cube[0, :, :]).astype(bool) mask[y_inds, x_inds]=False aperture_mask = np.repeat(mask[np.newaxis, :, :], d1, axis=0) final_mask=np.bitwise_or(aperture_mask,nan_mask) masked_cube=ma.array(cube, mask=final_mask) if type=='sum': spectra[i :]=ma.sum(ma.sum(masked_cube, axis=2), axis=1) elif type=='median': spectra[i :]=ma.median(ma.median(masked_cube, axis=2), axis=1) else: return NameError('Type of combination not understood') return spectra, nan_mask
def test_mad_axis_none_mask(): for i in range(25): size = np.random.randint(1, 10000) mean = np.random.uniform(-1000, 1000) sigma = np.random.uniform(0, 1000) a = np.random.normal(mean, sigma, size) value_to_mask = np.random.uniform(0, 0.8) mask = np.random.uniform(0, 1.0, size) < value_to_mask a_masked = ma.array(a, mask=mask, dtype=np.float32) expected = ma.median(ma.array(np.abs(a_masked - ma.median(a_masked)), dtype=np.float32, mask=mask)) actual = stats.median_absolute_deviation(a, mask=mask) np.testing.assert_allclose(actual, np.float32(expected), atol=1e-4)
def test_mad_axis_none_mask(set_random_seed): for i in range(25): size = np.random.randint(1, 10000) mean = np.random.uniform(-1000, 1000) sigma = np.random.uniform(0, 1000) a = np.random.normal(mean, sigma, size) value_to_mask = np.random.uniform(0, 0.8) mask = np.random.uniform(0, 1.0, size) < value_to_mask a_masked = ma.array(a, mask=mask, dtype=np.float32) expected = ma.median(ma.array(np.abs(a_masked - ma.median(a_masked)), dtype=np.float32, mask=mask)) actual = stats.median_absolute_deviation(a, mask=mask) np.testing.assert_allclose(actual, np.float32(expected), atol=1e-4)
def test_mad_2d_axis_1_mask(set_random_seed): for i in range(5): size1 = np.random.randint(1, 300) size2 = np.random.randint(5, 300) mean = np.random.uniform(-1000, 1000) sigma = np.random.uniform(0, 1000) a = np.random.normal(mean, sigma, size=(size1, size2)) value_to_mask = np.random.uniform(0, 0.8) mask = np.random.uniform(0, 1.0, size=(size1, size2)) < value_to_mask a_masked = ma.array(a, mask=mask, dtype=np.float32) expected = ma.median(ma.array(np.abs(a.T - ma.median(a_masked, axis=1)).T, dtype=np.float32, mask=mask), axis=1) actual = stats.median_absolute_deviation(a, mask=mask, axis=1) np.testing.assert_allclose(actual, np.float32(expected), atol=1e-4)
def test_mad_2d_axis_1_mask(): for i in range(5): size1 = np.random.randint(1, 300) size2 = np.random.randint(1, 300) mean = np.random.uniform(-1000, 1000) sigma = np.random.uniform(0, 1000) a = np.random.normal(mean, sigma, size=(size1, size2)) value_to_mask = np.random.uniform(0, 0.8) mask = np.random.uniform(0, 1.0, size=(size1, size2)) < value_to_mask a_masked = ma.array(a, mask=mask, dtype=np.float32) expected = ma.median(ma.array(np.abs(a.T - ma.median(a_masked, axis=1)).T, dtype=np.float32, mask=mask), axis=1) actual = stats.median_absolute_deviation(a, mask=mask, axis=1) np.testing.assert_allclose(actual, np.float32(expected), atol=1e-4)
def find_lines(peaks, fwhm, y=None, verbose=False): if y is None: y = np.arange(len(peaks)) # Делаем все строки одинаковой длины (по наидленнейшей) peaks = np.array(list(zip_longest(*peaks)), dtype='float') # if verbose: # plt.plot(peaks.T, y, 'o') # plt.show() msk = np.isnan(peaks) peaks = ma.array(peaks, mask=msk) col = ['C' + str(j) for j in range(9)] # print(len(peaks)) # print() for i in range(len(peaks)): f**k = peaks[i:] line = f**k[0] # msk = np.logical_not(np.isnan(line)) # k = ma.polyfit(y, line, 2) # print(k) est = np.ones(len(y)) * ma.median(line) # est = np.polyval(k, y) err = est - line move_right = ma.filled((err > 5 * ma.median(ma.abs(err))), False) move_left = ma.filled((err < -5 * ma.median(ma.abs(err))), False) not_move = np.logical_not(move_right + move_left) # plt.plot(y[not_move], f**k[0][not_move], '.' + col[i % 9]) # plt.plot(y, est, col[i % 9], ls='--') # plt.plot(y[move_right], f**k[0][move_right], 'x' + col[i % 9]) # plt.plot(y[move_left], f**k[0][move_left], '+' + col[i % 9]) # plt.show() # print(i) # print(ma.mean(ma.abs(err))) # print(ma.median(line)) # print() if np.sum(move_right) > 0: # Те, что меньше медианы (слева) nonearray = ma.array([[None] * np.sum(move_right.astype('int'))], mask=[[True] * np.sum(move_right.astype('int'))]) f**k[:, move_right] = ma.append(f**k[:, move_right][1:, :], nonearray, axis=0) if np.sum(move_left) > 0: nonearray = ma.array([[None] * np.sum(move_left.astype('int'))], mask=[[True] * np.sum(move_left.astype('int'))]) f**k[:, move_left] = ma.append(nonearray, f**k[:, move_left][:-1, :], axis=0) # plt.plot(f**k[0], col[i%9]) peaks[i:] = f**k plt.show() peaks = peaks.T msk = np.isnan(peaks) peaks = ma.array(peaks, mask=msk) good_lines = (np.sum(np.logical_not(msk), axis=0) > len(y) / 4.) peaks = peaks[:, good_lines] return peaks
def test_mad_3d_axis_2_mask(set_random_seed): for i in range(5): size1 = np.random.randint(1, 50) size2 = np.random.randint(1, 50) size3 = np.random.randint(5, 50) mean = np.random.uniform(-1000, 1000) sigma = np.random.uniform(0, 1000) a = np.random.normal(mean, sigma, size=(size1, size2, size3)) value_to_mask = np.random.uniform(0, 0.8) mask = np.random.uniform(0, 1.0, size=(size1, size2, size3)) < value_to_mask a_masked = ma.array(a, mask=mask, dtype=np.float32) expected = ma.median(ma.array(np.abs(a - np.expand_dims(ma.median(a_masked, axis=2), axis=2)), dtype=np.float32, mask=mask), axis=2) actual = stats.median_absolute_deviation(a, mask=mask, axis=2) np.testing.assert_allclose(actual, expected.astype(np.float32), atol=1e-9)
def test_mad_3d_axis_2_mask(): for i in range(5): size1 = np.random.randint(1, 50) size2 = np.random.randint(1, 50) size3 = np.random.randint(1, 50) mean = np.random.uniform(-1000, 1000) sigma = np.random.uniform(0, 1000) a = np.random.normal(mean, sigma, size=(size1, size2, size3)) value_to_mask = np.random.uniform(0, 0.8) mask = np.random.uniform(0, 1.0, size=(size1, size2, size3)) < value_to_mask a_masked = ma.array(a, mask=mask, dtype=np.float32) expected = ma.median(ma.array(np.abs(a - np.expand_dims(ma.median(a_masked, axis=2), axis=2)), dtype=np.float32, mask=mask), axis=2) actual = stats.median_absolute_deviation(a, mask=mask, axis=2) np.testing.assert_allclose(actual, expected.astype(np.float32), atol=1e-9)
def get_noise_levels(ncfile): # ---------------- # Open NetCDF file # ---------------- print('Opening NetCDF file ' + ncfile) dataset = nc4.Dataset(ncfile,'r+',format='NETCDF3_CLASSIC') nray = len(dataset.dimensions['time']); ngate = len(dataset.dimensions['range']); elv = np.transpose(np.tile(dataset.variables['elevation'][:],(ngate,1))); rng = np.tile(dataset.variables['range'][:],(nray,1)) height = rng*np.sin(elv*np.pi/180.) zh = dataset.variables['ZED_H'][:]; zed = ma.masked_where(height<14000, zh); rngkm = ma.masked_where(rng<=0.0, rng/1000.); range2 = 20.*ma.log10(rngkm); zh[:] = zed - range2; zv = zh.copy(); zv[:] = zh[:] - dataset.variables['ZDR'][:] zx = zh.copy(); zx[:] = zh[:] + dataset.variables['LDR'][:] nezharr = ma.mean(zh,axis=1) nezherr = ma.std(zh,axis=1) nezvarr = ma.mean(zv,axis=1) nezverr = ma.std(zv,axis=1) nezxarr = ma.mean(zx,axis=1) nezxerr = ma.std(zx,axis=1) nezharr = ma.masked_where(nezherr>MAX_ERR,nezharr) nezvarr = ma.masked_where(nezverr>MAX_ERR,nezvarr) nezxarr = ma.masked_where(nezxerr>MAX_ERR,nezxarr) nezh = ma.median(nezharr) nezv = ma.median(nezvarr) nezx = ma.median(nezxarr) dataset.close() return np.round(nezh,2), np.round(nezv,2), np.round(nezx,2)
def calculateMeans(self): self.synHist = ma.masked_values(self.synHist, -9999.0) self.synHistMean = ma.mean(self.synHist, axis=0) self.medSynHist = ma.median(self.synHist, axis=0) self.synHistUpper = percentile(self.synHist, per=95, axis=0) self.synHistLower = percentile(self.synHist, per=5, axis=0)
def median_(self): """ calculates the median of the image over the binarised segmentation :return: """ return ma.median(self.masked_img, 0)
def rbf_gamma_median(x: np.ndarray, pass_D_squared=False): """ 1/(2*(median distance**2)) """ assert len(x.shape) == 2 D_squared = euclidean_distances(x, squared=True) # masking upper triangle and the diagonal. mask = np.triu(np.ones(D_squared.shape), 0) median_squared_distance = ma.median(ma.array(D_squared, mask=mask)) if median_squared_distance == 0: xx = np.array(list(set(D_squared.reshape( D_squared.size)))) # a bit slow? if len(xx) > 1: xx = np.sort( xx) # TODO better find 2nd smallest item... (next to 0) median_squared_distance = xx[1] assert median_squared_distance > 0 else: median_squared_distance = 1 if pass_D_squared: if median_squared_distance == 0 or np.isinf(median_squared_distance): return 0.5, D_squared return 0.5 / median_squared_distance, D_squared else: if median_squared_distance == 0 or np.isinf(median_squared_distance): return 0.5 return 0.5 / median_squared_distance
def _get_fm(self): """Convenience function to return masked flux array""" fm = ma.masked_array(self.lc.f.copy(), self.lc.fmask.copy(), fill_value=0) fm -= ma.median(fm) return fm
def asym_sigmoidal(self, x, asym=1.0, mod_asym=1.0, xmid=None, lscale=1.0, rscale=1.0): if xmid is None: xmid = ma.median(x) return np.where(x <= xmid, (asym * mod_asym) / (1 + ma.exp((xmid - x) / lscale)), asym / (1 + ma.exp((xmid - x) / rscale)))
def _calc_sensitivity() -> np.ndarray: """Returns sensitivity of radar as function of altitude.""" mean_gas_atten = ma.mean(attenuations['radar_gas_atten'], axis=0) z_sensitivity = z_power_min + log_range + mean_gas_atten zc = ma.median(ma.array(z, mask=~classification.is_clutter), axis=0) z_sensitivity[~zc.mask] = zc[~zc.mask] return z_sensitivity
def update_background(fn): with fits.open(fn, mode='update') as hdu: im = hdu[0].data.copy() mask = ~np.isfinite(im) + (im < DATA_FLOOR) if 'MASK' in hdu: mask += hdu['MASK'].data > 0 im = ma.MaskedArray(im, mask=mask, copy=True) scim = sigma_clip(im) mean = ma.mean(scim) mean = mean if mean is not ma.masked else 0 median = ma.median(scim) median = median if median is not ma.masked else 0 stdev = ma.std(scim) stdev = stdev if stdev is not ma.masked else 0 hdu['SCI'].header['bgmean'] = (mean, 'background sigma-clipped mean') hdu['SCI'].header['bgmedian'] = (median, 'background sigma-clipped median') hdu['SCI'].header['bgstdev'] = ( stdev, 'background sigma-clipped standard dev.') hdu['SCI'].header['nbg'] = (ma.sum(~scim.mask), 'area considered in background stats.')
def set_fbackground(self): """ Set flux in background Uses the current value of self.ap (which must be set) and constructs the median flux outside. """ if 0: from matplotlib.pylab import * ion() import pdb pdb.set_trace() flux = ma.masked_array(self.flux, fill_value=0) ap_mask = self.ap.weights > 0 ap_mask = ap_mask[np.newaxis, :, :] flux.mask = flux.mask | ap_mask # Mask out if included in aperture flux.mask = flux.mask | np.isnan(flux.data) flux = flux.reshape(-1, self.npix) self.fbg = np.array(ma.median(flux, axis=1)) fbgfit, bgmask = background_mask(self.cad, self.fbg) #Checks if every single cadence is a nan. If yes don't include at all is_all_nan = flux.mask.sum(1) == flux.mask.shape[1] bgmask = bgmask | is_all_nan self.bgmask = bgmask if ap_mask.sum() > 0.8 * flux.shape[1]: self.bgmask = np.zeros(flux.shape[0]).astype(bool) self.fbg = np.zeros(flux.shape[0]) self.ts['fbg'] = self.fbg self.ts['bgmask'] = self.bgmask
def fit(self, data, y=None): """ Fit MultipleImputer to the input data. Parameters ---------- data : DataFrame, shape [n_samples, n_features] Input data. y : default None Ignore, argument required for constructing sklearn Pipeline. Returns ------- self """ mode_name = 'mode' mean_name = 'mean' median_name = 'median' allowed_strategies = {mode_name, mean_name, median_name} for k in self.strategies: if k not in allowed_strategies: msg = 'Can only use these strategies: {0} got strategy={1}' raise ValueError(msg.format(allowed_strategies, k)) statistics = {} if mean_name in self.strategies: mean_cols = self.strategies[mean_name] X_masked = self._get_masked(data, mean_cols) mean_masked = ma.mean(X_masked, axis=0) statistics[mean_name] = mean_masked.data if median_name in self.strategies: median_cols = self.strategies[median_name] X_masked = self._get_masked(data, median_cols) median_masked = ma.median(X_masked, axis=0) statistics[median_name] = median_masked.data # numpy MaskedArray doesn't seem to support the .mode # method yet, thus we roll out our own # https://docs.scipy.org/doc/numpy-1.13.0/reference/maskedarray.baseclass.html#maskedarray-baseclass if mode_name in self.strategies: mode_cols = self.strategies[mode_name] X_masked = self._get_masked(data, mode_cols) mode_values = np.empty(len(mode_cols)) # transpose to compute along each column instead of row. # TODO : # an embarrassingly parallel problem, needs to investigate # if this is a bottleneck zipped = zip(X_masked.data.T, X_masked.mask.T) for i, (col, col_mask) in enumerate(zipped): col_valid = col[~col_mask] values, _ = mode(col_valid) mode_values[i] = values[0] statistics[mode_name] = mode_values self.statistics_ = statistics return self
def run_numpy_asym_sigmoidal(data): xmid = ma.median(data) ndx = np.where(data <= xmid) # only those data points that satisfy the condition (are even) # are passed to one function then another and the result off applying both # functions to each data point is stored in an array res = np.apply_along_axis( fnx2, 1, data[ndx,] )
def binner(x, y, w_sta, nbins, rang = None, ebar = False, per = None) : from numpy import array, digitize, lexsort, linspace from numpy.ma import average, median ind = lexsort((y, x)) xs, ys = x[ind], y[ind] if rang is None : mn, mx = min(xs), max(xs) else : mn, mx = rang bins = linspace(mn, mx, nbins + 1) x_cen = (bins[: - 1] + bins[1:])*0.5 bins = linspace(mn, mx, nbins) ibins = digitize(xs, bins) if w_sta == "median" : y_sta = array([median(ys[ibins == i]) for i in range(1, bins.size + 1)]) elif w_sta == "mean" : y_sta = array([average(ys[ibins == i]) for i in range(1, bins.size + 1)]) elif w_sta == "mode" : y_sta = array([mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)]) if ebar == False : return x_cen, y_sta elif ebar == True and per == None : myer = abs(array([scoreatpercentile(ys[ibins == i], 15.8) for i in range(1, bins.size + 1)]) - y_sta) pyer = abs(array([scoreatpercentile(ys[ibins == i], 84.0) for i in range(1, bins.size + 1)]) - y_sta) yer = array([myer, pyer]) return x_cen, y_sta, yer elif ebar == True and per != None : myer = abs(array([scoreatpercentile(ys[ibins == i], per[0]) for i in range(1, bins.size + 1)]) - y_sta) pyer = abs(array([scoreatpercentile(ys[ibins == i], per[1]) for i in range(1, bins.size + 1)]) - y_sta) yer = array([myer, pyer]) return x_cen, y_sta, yer
def summarize(self): """ Summarizes a chamber as a dictionary of paramters mapped from their descriptors Arguments: None Returns: (dict) summary of chamber parameters """ features = [ 'median_chamber', 'sum_chamber', 'std_chamber', 'x_center_chamber', 'y_center_chamber', 'radius_chamber' ] if self.blankFlag: return dict(zip(features, list(np.full(len(features), np.nan)))) disk = self.get_disk() medI = int(ma.median(disk)) sumI = int(disk.sum()) sdI = int(disk.std()) vals = [medI, sumI, sdI, self.center[0], self.center[1], self.radius] return dict(zip(features, vals))
def bin_spike(x, l): """ l is the number of points used for comparison, thus l=2 means that each point will be compared only against the previous and following measurements. l=2 is is probably not a good choice, too small. Maybe use pstsd instead? Dummy way to avoid warnings when x[ini:fin] are all masked. Improve this in the future. """ assert x.ndim == 1, "I'm not ready to deal with multidimensional x" assert l%2 == 0, "l must be an even integer" N = len(x) bin = ma.masked_all(N) # bin_std = ma.masked_all(N) half_window = int(l/2) idx = (i for i in range(half_window, N - half_window) if np.isfinite(x[i])) for i in idx: ini = max(0, i - half_window) fin = min(N, i + half_window) # At least 3 valid points if ma.compressed(x[ini:fin]).size >= 3: bin[i] = x[i] - ma.median(x[ini:fin]) # bin_std[i] = (np.append(x[ini:i], x[i+1:fin+1])).std() bin[i] /= (np.append(x[ini:i], x[i+1:fin+1])).std() return bin
def aperture_spectrum(arr, x0=None, y0=None, radius=3, combine='sum'): y, x = np.indices(arr.shape[1:]) if x0 is None: x0 = x.mean() if y0 is None: y0 = x.mean() x -= x0 y -= y0 r = np.sqrt(x**2 + y**2) new_arr = ma.masked_invalid(arr) new_arr.mask |= (r > radius) npix = np.sum(r < radius) if combine == 'sum': s = new_arr.sum(axis=(1, 2)) elif combine == 'mean': s = new_arr.mean(axis=(1, 2)) elif combine == 'median': s = ma.median(new_arr, axis=(1, 2)) elif combine == 'sqrt_sum': s = np.sqrt((np.square(new_arr)).sum(axis=(1, 2))) else: raise Exception('Combination type not understood.') return s, npix
def estimate_psf(self, index_range, weight=True, taper=True, clip=True): """Return an empirical estimate of the PSF based on a set of reference sources. :param array index: index array to look up reference sources :param bool weight=True: use a sqrt(flux)-weighted median :param bool taper=True: smoothly taper the PSF with a cosine bell kernel :param bool clip=True: clip negative values :rtype: array """ psf_grid = self.psf_grid psf_size = psf_grid.psf_size flux = self.flux psf = ma.array([self.magnify(i)/flux[i] for i in index_range]) if clip: psf = psf.clip(min=0) if weight: w = np.sqrt(flux[index_range]) w = np.around(w/np.min(w)).astype("i") psf = np.repeat(psf, w, axis=0) psf = ma.median(psf, axis=0) if taper: psf *= CosineBellKernel(psf_size, 0.5, 1) return psf/psf.sum()
def MAD(a, c=0.6745, axis=None): a = ma.masked_where(a!=a, a) if a.ndim == 1: d = ma.median(a) m = ma.median(ma.fabs(a - d) / c) else: d = ma.median(a, axis=axis) # I don't want the array to change so I have to copy it? if axis > 0: aswp = ma.swapaxes(a,0,axis) else: aswp = a m = ma.median(ma.fabs(aswp - d) / c, axis=0) return m
def get_targets(K1,K2): """ calculate the frangi 'blobness' measure R for the given principal curvatures. return a binary filter with the conservative threshold of R < R_median """ R = (K1 / K2) ** 2 return (R < ma.median(R)).filled(0)
def tdpep(t,fm,PG0): """ Transit-duration - Period - Epoch Parameters ---------- fm : Flux with bad data points masked out. It is assumed that elements of f are evenly spaced in time. PG0 : Initial period grid. Returns ------- epoch2d : Grid (twd,P) of best epoch df2d : Grid (twd,P) of depth epoch count2d : number of filled data for particular (twd,P) noise : Grid (twd) typical scatter PG : The Period grid twd : Grid of trial transit widths. """ assert fm.fill_value ==0 # Determine the grid of periods that corresponds to integer # multiples of cadence values PcadG,PG = P2Pcad(PG0) # Initialize tdur grid. twdMi = a2tdur( P2a( PG[0 ] ) ) /keptoy.lc twdMa = a2tdur( P2a( PG[-1] ) ) /keptoy.lc twdG = np.round(np.linspace(twdMi,twdMa,4)).astype(int) rec2d = [] noise = [] for twd in twdG: dM = mtd(t,fm.filled(),twd) dM.mask = fm.mask | ~isfilled(t,fm,twd) rec2d.append( pep(t[0],dM,PcadG) ) # Noise per transit mad = ma.abs(dM) mad = ma.median(mad) noise.append(mad) rec2d = np.vstack(rec2d) make2d = lambda x : np.tile( np.vstack(x), (1,rec2d.shape[1] )) rec2d = mlab.rec_append_fields(rec2d,'noise',make2d(noise)) rec2d = mlab.rec_append_fields(rec2d,'twd', make2d(twdG)) PG = np.tile( PG, (rec2d.shape[0],1 )) rec2d = mlab.rec_append_fields(rec2d,'PG',PG) s2n = rec2d['fom']/rec2d['noise']*rec2d['count'] rec2d = mlab.rec_append_fields(rec2d,'s2n', s2n ) return rec2d
def timeseries_station_page(ms, station_name, time_slots, data, fn=abs, output_name=None): dpi=50 if output_name is None: fig = figure(figsize=(32,24), dpi=dpi) else: fig = Figure(figsize=(32,24), dpi=dpi) station_name_list = list(ms.tables['antennae']['NAME']) station_id = station_name_list.index(station_name) num_ant = len(ms.tables['antennae']) tsn = time_slots-time_slots[0] pol_names = corr_type(ms.tables['polarization']['CORR_TYPE'][0]) ref_freq_mhz = ms.tables['spectral_windows'][0]['REF_FREQUENCY']/1.e6 fig.suptitle(ms.msname+': '+fn.__name__+'(vis) with '+station_name+' at %3.2f MHz' % (ref_freq_mhz,), fontsize='large') median_amp = ma.median(ma.mean(ma.median(fn(data[station_id,:,0::3,:]), axis=-1), axis=-1), axis=-1) for id2,name in enumerate(station_name_list): ax = fig.add_subplot(ceil(num_ant/4.0),4, id2+1) ax.plot(tsn, fn(data[station_id,id2,0,:]), c='blue' , label=pol_names[0]) ax.plot(tsn, fn(data[station_id,id2,1,:]), c='green' , label=pol_names[1]) ax.plot(tsn, fn(data[station_id,id2,2,:]), c='purple', label=pol_names[2]) ax.plot(tsn, fn(data[station_id,id2,3,:]), c='red' , label=pol_names[3]) ax.grid() ax.set_ylabel(station_name_list[id2], rotation='horizontal') ax.set_ylim(0.0, 3*median_amp) ax.set_yticklabels([]) if id2 < len(station_name_list)-4: ax.set_xticklabels([]) else: ax.set_xlabel('Time [s]') pass fig.subplots_adjust(hspace=0.0, top=0.95, bottom=0.04) if output_name is not None: canvas = FigureCanvasAgg(fig) if output_name[-4:] in ['.jpg', '.JPG']: canvas.print_jpg(output_name, dpi=dpi, quality=55) else: canvas.print_figure(output_name, dpi=dpi) pass pass
def test_median_axis_none_mask(): for i in range(25): size = np.random.randint(1, 10000) mean = np.random.uniform(-1000, 1000) sigma = np.random.uniform(0, 1000) a = np.random.normal(mean, sigma, size) value_to_mask = np.random.uniform(0, 1.0) mask = np.random.uniform(0, 1, size) < value_to_mask expected = ma.median(ma.array(a, mask=mask, dtype=np.float32)) actual = stats.median(a, mask=mask) assert np.float32(expected) == actual
def _set_default_values_Flex(self, keywords): """ """ self._set_default_values(keywords) #if 'center' in keywords: # self.center = keywords['center'] #else: # self.center = {} # Lat, Lon of the origin of the coordinate system, i.e. (x,y) if (not hasattr(self, 'lat_ref')) & (not hasattr(self, 'lon_ref')): try: self.lat_ref = self.center['lat'] self.lon_ref = self.center['lon'] #except AttributeError: except KeyError: self.lat_ref = ma.median(self.input['Lat']) self.lon_ref = ma.median(self.input['Lon'])
def s2n_fit(fdt,tdt,p): """ Evaluate S/N taking the best fit depth as signal and the scatter about the residuals as the noise. """ model = keptoy.P05(p,tdt) sig = p[2] resid = fdt-model noise = ma.median(abs(resid)) s2n = sig/noise*np.sqrt(fdt.count() ) return s2n
def combine_nights(combined_catalog, filterlist, refcat): header = ['BEGIN CATALOG HEADER', 'nfields 13', ' ra 1 0 d degrees %10.6f', ' dec 2 0 d degrees %10.6f', ' id 3 0 c INDEF %3d'] for filt in filterlist: header.append(' {} {:2d} 0 r INDEF %6.3f'.format(filt, len(header) - 1)) header.append(' {}err {:2d} 0 r INDEF %6.3f'.format(filt, len(header) - 1)) header += ['END CATALOG HEADER', ''] catalog = Table([refcat['ra'], refcat['dec'], refcat['id']], meta={'comments': header}, masked=True) for filt in filterlist: mags = combined_catalog['mag'][combined_catalog['filter'] == filt] median = np.median(mags, axis=0) absdev_mag = mags - median mad = np.median(np.abs(absdev_mag), axis=0) * np.sqrt(pi / 2) mags.mask |= np.abs(absdev_mag) > 5 * mad catalog[filt] = np.median(mags, axis=0) catalog[filt+'err'] = np.median(np.abs(mags - catalog[filt]), axis=0) * np.sqrt(pi / 2) return catalog