def test_memory_leak() -> None: import resource arr = np.arange(1).reshape((1, 1)) n_attempts = 3 results = [] for _ in range(n_attempts): starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for _ in range(1000): for axis in [None, 0, 1]: bn.nansum(arr, axis=axis) bn.nanargmax(arr, axis=axis) bn.nanargmin(arr, axis=axis) bn.nanmedian(arr, axis=axis) bn.nansum(arr, axis=axis) bn.nanmean(arr, axis=axis) bn.nanmin(arr, axis=axis) bn.nanmax(arr, axis=axis) bn.nanvar(arr, axis=axis) ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss diff = ending - starting diff_bytes = diff * resource.getpagesize() # For 1.3.0 release, this had value of ~100kB if diff_bytes: results.append(diff_bytes) else: break assert len(results) < n_attempts
def mad(data, sigma=True, axis=None, force=False, medval=0.0): """ Return the median absolute deviation (or the absolute deviation about a fixed value - default zero - if force is set to True). By default returns the equivalent sigma. Axis functionality adapted from https://github.com/keflavich/agpy/blob/master/agpy/mad.py Flips nans to True (default false) to use with nans. Parameters ---------- data : np.ndarray Data set. sigma : bool, optional Enables std estimation from MAD. axis : {int, None}, optional Axis to evaluate MAD along. force : bool, optional Force the median to be a given value. medval : float, optional Forced median value. Returns ------- mad : float MAD estimation. If sigma is True, MAD*1.4826 is returned. """ # Check for nans in the data nans = False if np.isnan(data).any(): nans = True if axis > 0: if force: med = medval else: if nans: med = nanmedian(data.swapaxes(0, axis), axis=0) else: med = np.median(data.swapaxes(0, axis), axis=0) if nans: mad = nanmedian(np.abs(data.swapaxes(0, axis) - med), axis=0) else: mad = np.median(np.abs(data.swapaxes(0, axis) - med), axis=0) else: if force: med = medval else: if nans: med = nanmedian(data, axis=axis) else: med = np.median(data, axis=axis) if nans: mad = nanmedian(np.abs(data - med), axis=axis) else: mad = np.median(np.abs(data - med), axis=axis) if not sigma: return mad else: return mad * 1.4826
def theil_sen(x, y, n_samples=100000): """ Computes the Theil-Sen estimator for 2D data. This complexity is O(n**2), which can be poor for large n. We will perform a sampling of data points to get an unbiased, but larger variance estimator. The sampling will be done by picking two points at random, and computing the slope, up to n_samples times. Parameters: x (ndarray): 1-d np array, the control variate. y (ndarray): 1-d np.array, the ind variate. n_samples (int): how many points to sample. """ if x.shape[0] != y.shape[0]: raise ValueError("x and y must be the same shape.") n = x.shape[0] i1 = np.random.randint(0, n, n_samples) i2 = np.random.randint(0, n, n_samples) slopes = _slope(x[i1], x[i2], y[i1], y[i2]) slope_ = nanmedian(slopes) #find the optimal b as the median of y_i - slope*x_i intercepts = np.empty(n, dtype='float64') for i in range(n): intercepts[i] = y[i] - slope_ * x[i] intercept_ = nanmedian(intercepts) return np.array([slope_, intercept_])
def bottleneck_MAD(arr, c=0.6745, axis=None): """ Median Absolute Deviation along given axis of an array: median(abs(a - median(a))) / c c = 0.6745 is the constant to convert from MAD to std; it is used by default """ from bottleneck import nanmedian import numpy as np if not arr.dtype.isnative: kind = str(arr.dtype.kind) sz = str(arr.dtype.itemsize) dt = '=' + kind + sz data = arr.astype(dt) else: data = arr if data.ndim == 1: d = nanmedian(data) m = nanmedian(ma.fabs(data - d) / c) else: d = nanmedian(data, axis=axis) if axis > 0: aswp = np.swapaxes(data, 0, axis) else: aswp = data m = nanmedian(ma.fabs(aswp - d) / c, axis=0) return m
def smear(self, img): """CCD dark current and smear correction. TODO: - Should we weight everything with the number of rows used in masked vs virtual regions? - Should we take self.frametransfer_time into account? - Cosmic ray rejection requires images before and after in time? """ self.logger.info("Doing smear correction...") # Remove cosmic rays in collateral data: # TODO: Can cosmic rays also show up in virtual pixels? If so, also include img.virtual_smear #index_collateral_cosmicrays = cosmic_rays(img.masked_smear) index_collateral_cosmicrays = np.zeros_like(img.masked_smear, dtype='bool') img.masked_smear[index_collateral_cosmicrays] = np.nan # Average the masked and virtual smear across their rows: masked_smear = nanmedian(img.masked_smear, axis=0) virtual_smear = nanmedian(img.virtual_smear, axis=0) # Estimate dark current: # TODO: Should this be self.frametransfer_time? fdark = nanmedian(masked_smear - virtual_smear * (self.exposure_time + self.readout_time) / self.exposure_time) img.dark = fdark # Save for later use self.logger.info('Dark current: %f', img.dark) if np.isnan(fdark): fdark = 0 # Correct the smear regions for the dark current: masked_smear -= fdark virtual_smear -= fdark * (self.exposure_time + self.readout_time) / self.exposure_time # Weights from number of pixels in different regions: Nms = np.sum(~np.isnan(img.masked_smear), axis=0) Nvs = np.sum(~np.isnan(img.virtual_smear), axis=0) c_ms = Nms / np.maximum(Nms + Nvs, 1) c_vs = Nvs / np.maximum(Nms + Nvs, 1) # Weights as in Kepler where you only have one row in each sector: #g_ms = ~np.isnan(masked_smear) #g_vs = ~np.isnan(virtual_smear) #c_ms = g_ms/np.maximum(g_ms + g_vs, 1) #c_vs = g_vs/np.maximum(g_ms + g_vs, 1) # Estimate the smear for all columns, taking into account # that some columns could be missing: replace(masked_smear, np.nan, 0) replace(virtual_smear, np.nan, 0) fsmear = c_ms * masked_smear + c_vs * virtual_smear # Correct the science pixels for dark current and smear: img.target_data -= fdark for k, col in enumerate(img.collateral_columns): img.target_data[img.columns == col] -= fsmear[k] return img
def bottleneck_MAD(arr, c=0.6745, axis=None): """ Median Absolute Deviation along given axis of an array: median(abs(a - median(a))) / c c = 0.6745 is the constant to convert from MAD to std; it is used by default """ from bottleneck import nanmedian import numpy as np if not arr.dtype.isnative: kind = str(arr.dtype.kind) sz = str(arr.dtype.itemsize) dt = '=' + kind + sz data = arr.astype(dt) else: data = arr if data.ndim == 1: d = nanmedian(data) m = nanmedian(ma.fabs(data - d) / c) else: d = nanmedian(data, axis=axis) if axis > 0: aswp = np.swapaxes(data,0,axis) else: aswp = data m = nanmedian(ma.fabs(aswp - d) / c, axis=0) return m
def _median_central(x, width_points): y = move_median(x, width_points, min_count=1) yny = append(y[width_points // 2:], [NaN] * (width_points // 2)) for k in range(width_points // 2): yny[k] = nanmedian(x[:(2 * k + 1)]) yny[-(k + 1)] = nanmedian(x[-(2 * k + 1):]) return yny
def _move_median_central_1d(x, width_points): y = move_median(x, width_points, min_count=1) y = np.roll(y, -width_points // 2 + 1) for k in range(width_points // 2 + 1): y[k] = nanmedian(x[:(k + 2)]) y[-(k + 1)] = nanmedian(x[-(k + 2):]) return y
def robust_median_filter(flux, size=375): if size % 2 == 0: size = size + 1 #Make even results odd half_sizes = np.array([-(size - 1) / 2, ((size - 1) / 2) + 1], dtype='int') if np.ndim(flux) == 2: #For 2D spectrum ny, nx = np.shape(flux) #Calculate npix in x and y else: #Else for 1D spectrum nx = len(flux) #Calculate npix median_result = np.zeros( nx) #Create array that will store the smoothed median spectrum if np.ndim(flux) == 2: #Run this loop for 2D for i in range( nx ): #This loop does the running of the median down the spectrum each pixel x_left, x_right = i + half_sizes if x_left < 0: x_left = 0 elif x_right > nx: x_right = nx median_result[i] = bn.nanmedian( flux[:, x_left:x_right] ) #Calculate median between x_left and x_right for a given pixel else: #Run this loop for 1D for i in range( nx ): #This loop does the running of the median down the spectrum each pixel x_left, x_right = i + half_sizes if x_left < 0: x_left = 0 elif x_right > nx: x_right = nx median_result[i] = bn.nanmedian( flux[x_left:x_right] ) #Calculate median between x_left and x_right for a given pixel return median_result
def rms_timescale(time, flux, timescale=3600 / 86400): """ Compute robust RMS on specified timescale. Using MAD scaled to RMS. Parameters: time (ndarray): Timestamps in days. flux (ndarray): Flux to calculate RMS for. timescale (float, optional): Timescale to bin timeseries before calculating RMS. Default=1 hour. Returns: float: Robust RMS on specified timescale. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ # Construct the bin edges seperated by the timescale: bins = np.arange(np.nanmin(time), np.nanmax(time), timescale) bins = np.append(bins, np.nanmax(time)) # Bin the timeseries to one hour: indx = np.isfinite(flux) flux_bin, _, _ = binned_statistic(time[indx], flux[indx], nanmean, bins=bins) # Compute robust RMS value (MAD scaled to RMS) return mad_to_sigma * nanmedian(np.abs(flux_bin - nanmedian(flux_bin)))
def mad(a, c=0.6745, axis=None): """ Compute the median absolute deviation along the specified axis. median(abs(a - median(a))) / c Returns the median absolute deviation of the array elements. Parameters ---------- a : array_like Input array or object that can be converted to an array. axis : int, optional Axis along which the medians are computed. The default (axis=None) is to compute the median along a flattened version of the array. c : float, optional The scaling factor applied to the raw median aboslute deviation. The default is to scale to match the standard deviation. Returns ------- mad : ndarray A new array holding the result. """ if (axis is None): _shape = a.shape a.shape = np.product(a.shape, axis=0) m = nanmedian(np.fabs(a - nanmedian(a))) / c a.shape = _shape else: m = np.apply_along_axis( lambda x: nanmedian(np.fabs(x - nanmedian(x))) / c, axis, a) return m
def test_memory_leak(): import resource arr = np.arange(1).reshape((1, 1)) starting = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for i in range(1000): for axis in [None, 0, 1]: bn.nansum(arr, axis=axis) bn.nanargmax(arr, axis=axis) bn.nanargmin(arr, axis=axis) bn.nanmedian(arr, axis=axis) bn.nansum(arr, axis=axis) bn.nanmean(arr, axis=axis) bn.nanmin(arr, axis=axis) bn.nanmax(arr, axis=axis) bn.nanvar(arr, axis=axis) ending = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss diff = ending - starting diff_bytes = diff * resource.getpagesize() print(diff_bytes) # For 1.3.0 release, this had value of ~100kB assert diff_bytes == 0
def crude_skycor(fitslist, ext, mask=None, nsimul=100, noisechisel_grad=False, bootmedian=True): if isinstance(fitslist, str): fitslist = [fitslist] if isinstance(fitslist, list): for fits_name in fitslist: print(fits_name) fits_image = fits.open(fits_name) if mask is not None: print("Input mask accepted: " + mask) mask_fits = fits.open(mask) shape_mask = mask_fits[0].data.shape shape_fits = fits_image[ext].data.shape mask_array = mask_fits[0].data if (shape_mask == (1014, 1014)) & (shape_fits == (1024, 1024)): mask_array = np.zeros(shape_fits) border = 5 mask_array[0+border:1024-border, 0+border:1024-border] = mask_fits[0].data if bootmedian: skylvl = bm.bootmedian(sample_input=fits_image[ext].data[~np.isnan(mask_array)], nsimul=nsimul, errors=1) if not bootmedian: median_sky = bn.nanmedian(fits_image[ext].data[~np.isnan(mask_array)]) #sigma_sky = bn.nanstd(fits_image[ext].data[~np.isnan(mask_array)]) sigma_sky = 0 skylvl = {"median": median_sky, "s1_up": median_sky+sigma_sky, "s1_down": median_sky-sigma_sky, "std1_down": sigma_sky, "std1_up": sigma_sky} else: if bootmedian: skylvl = bm.bootmedian(sample_input=fits_image[ext].data, nsimul=nsimul, errors=1) if not bootmedian: median_sky = bn.nanmedian(fits_image[ext].data) #sigma_sky = bn.nanstd(fits_image[ext].data) sigma_sky = 0 skylvl = {"median": median_sky, "s1_up": median_sky+sigma_sky, "s1_down": median_sky-sigma_sky, "std1_down": sigma_sky, "std1_up": sigma_sky} print(skylvl) print(np.abs(skylvl["median"] - skylvl["s1_up"])/2.) print("Skylvl: " + str(skylvl["median"]) + " +/- " + str(np.abs(skylvl["s1_up"] - skylvl["s1_down"])/2.)) fits_image[ext].data = fits_image[ext].data - skylvl["median"] fits_image[0].header['SKYSTD'] = skylvl["std1_down"] fits_image[0].header['SKYLVL'] = skylvl["median"] fits_image[ext].header['SKYSTD'] = skylvl["std1_down"] fits_image[ext].header['SKYLVL'] = skylvl["median"] os.system("rm " + fits_name) fits_image.verify("silentfix") fits_image.writeto(fits_name) fits_image.close()
def factor_outlierlimit(factor, n_extremum=5): x_m = factor.values median = bn.nanmedian(x_m, axis=1).reshape(-1, 1) Dmad = bn.nanmedian(abs(x_m - median), axis=1).reshape(-1, 1) upper = (median + n_extremum * Dmad) lower = (median - n_extremum * Dmad) with np.errstate(invalid='ignore'): res = np.clip(x_m, lower, upper) return pd.DataFrame(res, factor.index, factor.columns)
def _get_fluctuations(image_array, correction='median'): # Calculate the mean PV for each image if correction == 'mean': mean_values = bn.nanmean(bn.nanmean(image_array, axis=1), axis=1) elif correction == 'median': mean_values = bn.nanmedian(bn.nanmedian(image_array, axis=1), axis=1) return mean_values
def k2p2_saturated(SumImage, MASKS, idx): # Get logger for printing messages: logger = logging.getLogger(__name__) no_masks = MASKS.shape[0] column_mask = np.zeros_like(SumImage, dtype='bool') saturated_mask = np.zeros_like(MASKS, dtype='bool') pixels_added = 0 # Loop through the different masks: for u in range(no_masks): # Create binary version of mask and extract # the rows and columns which it spans and # the highest value in it: mask = np.asarray(MASKS[u, :, :], dtype='bool') mask_rows, mask_columns = np.where(mask) mask_max = np.nanmax(SumImage[mask]) # Loop through the columns of the mask: for c in set(mask_columns): column_mask[:, c] = True # Extract the pixels that are in this column and in the mask: pixels = SumImage[mask & column_mask] # Calculate ratio as defined in Lund & Handberg (2014): ratio = np.abs(nanmedian(np.diff(pixels))) / np.nanmax(pixels) if ratio < 0.01 and nanmedian(pixels) >= mask_max / 2: logger.debug("Column %d - RATIO = %f - Saturated", c, ratio) # Has significant flux and is in saturated column: add_to_mask = (idx & column_mask) # Make sure the pixels we add are directly connected to the highest flux pixel: new_mask_labels, numfeatures = ndimage.label(add_to_mask) imax = np.unravel_index( np.nanargmax(SumImage * mask * column_mask), SumImage.shape) add_to_mask &= (new_mask_labels == new_mask_labels[imax]) # Modify the mask: pixels_added += np.sum(add_to_mask) - np.sum(mask[column_mask]) logger.debug(" %d pixels should be added to column %d", np.sum(add_to_mask) - np.sum(mask[column_mask]), c) saturated_mask[u][add_to_mask] = True else: logger.debug("Column %d - RATIO = %f", c, ratio) column_mask[:, c] = False return saturated_mask, pixels_added
def _nanmedian(array, axis=None): """Bottleneck nanmedian function that handle tuple axis.""" if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 if isinstance(array, Quantity): return array.__array_wrap__(bottleneck.nanmedian(array, axis=axis)) else: return bottleneck.nanmedian(array, axis=axis)
def mad(x): """ Median absolute deviation scaled to standard deviation. Parameters: x (ndarray): Array to calculate robust standard deviation for. Returns: float: Median absolute deviation scaled to standard deviation. """ return mad_to_sigma * nanmedian(np.abs(x - nanmedian(x)))
def bootfit(x, y, nsimul, errors=1): m_array = np.empty(nsimul) m_array[:] = np.nan b_array = np.empty(nsimul) b_array[:] = np.nan boot_polyfit_results = miniutils.parallel_progbar(boot_polyfit, zip([x]*nsimul, [y]*nsimul, np.random.randint(0,100*nsimul,nsimul)), nprocs=4, starmap=True) # boot_polyfit_results = miniutils.parallel_progbar(boot_polyfit, zip([muGaia]*nsimul, [muVis]*nsimul, np.random.randint(0,10*nsimul,nsimul)), # nprocs=4, starmap=True) # for i in tqdm(range(nsimul)): # index_resamp = bootstrap_resample(index_array) # m_temp, b_temp = np.polyfit(x[index_array], y[index_array], 1) m_array = np.array(boot_polyfit_results)[:,0] b_array = np.array(boot_polyfit_results)[:,1] print(m_array) print(b_array) #print(median_boot) m_median = bn.nanmedian(m_array) b_median = bn.nanmedian(b_array) if(errors == 1): m_s1_up = np.percentile(m_array, s1_up_q*100) m_s1_down = np.percentile(m_array, s1_down_q*100) m_s2_up = np.percentile(m_array, s2_up_q*100) m_s2_down = np.percentile(m_array, s2_down_q*100) m_s3_up = np.percentile(m_array, s3_up_q*100) m_s3_down = np.percentile(m_array, s3_down_q*100) b_s1_up = np.percentile(b_array, s1_up_q*100) b_s1_down = np.percentile(b_array, s1_down_q*100) b_s2_up = np.percentile(b_array, s2_up_q*100) b_s2_down = np.percentile(b_array, s2_down_q*100) b_s3_up = np.percentile(b_array, s3_up_q*100) b_s3_down = np.percentile(b_array, s3_down_q*100) if(errors == 0): s1_up = 0 s1_down = 0 s2_up = 0 s2_down = 0 s3_up = 0 s3_down = 0 output = {"m_median": m_median, "m_s1_up": m_s1_up, "m_s1_down": m_s1_down, "m_s2_up": m_s2_up, "m_s2_down": m_s2_down, "m_s3_up": m_s3_up, "m_s3_down": m_s3_down, "b_median": b_median, "b_s1_up": b_s1_up, "b_s1_down": b_s1_down, "b_s2_up": b_s2_up, "b_s2_down": b_s2_down, "b_s3_up": b_s3_up, "b_s3_down": b_s3_down, } return(output)
def fit(self, flux, Ncbvs=2, sigma_clip=4.0, maxiter=3): # Find the median flux, as it is used for # initial guesses later on: median_flux = nanmedian(flux) # Start looping over the number of CBVs to include: bic = np.empty(self.cbv.shape[1] + 1, dtype='float64') solutions = [] for Ncbvs in range(self.cbv.shape[1] + 1): # Initial guesses for coefficients: coeffs0 = np.zeros(Ncbvs + 1, dtype='float64') coeffs0[-1] = median_flux iters = 0 fluxi = np.copy(flux) while iters <= maxiter: iters += 1 # Do the fit: res = minimize(self._lhood, coeffs0, args=(fluxi, ), method='Powell') flux_filter = self.mdl(res.x) # Do robust sigma clipping: absdev = np.abs(fluxi - flux_filter) mad = 1.4826 * nanmedian(absdev) indx = np.greater(absdev, sigma_clip * mad, where=np.isfinite(fluxi)) if np.any(indx): fluxi[indx] = np.nan else: break # Calculate the Bayesian Information Criterion (BIC) and store the solution: bic[Ncbvs] = np.log(np.sum( np.isfinite(fluxi))) * len(coeffs0) + res.fun solutions.append(res) # Use the solution which minimizes the BIC: indx = np.argmin(bic) flux_filter = self.mdl(solutions[indx].x) #plt.figure() #plt.plot(bic, '.-') #plt.show() return flux_filter
def nanmedian(array, axis=None): """ A nanmedian function that uses bottleneck if available. """ if HAS_BOTTLENECK: if isinstance(axis, tuple): array = move_tuple_axes_first(array, axis=axis) axis = 0 if isinstance(array, u.Quantity): return array.__array_wrap__(bn.nanmedian(array, axis=axis)) else: return bn.nanmedian(array, axis=axis) else: return np.nanmedian(array, axis=axis)
def nanmad(data, sigma=True, axis=None): """ Return the median absolute deviation. Axis functionality adapted from https://github.com/keflavich/agpy/blob/master/agpy/mad.py """ if axis>0: med = nanmedian(data.swapaxes(0,axis),axis=0) mad = nanmedian(np.abs(data.swapaxes(0,axis) - med),axis=0) else: med = nanmedian(data,axis=axis) mad = nanmedian(np.abs(data - med),axis=axis) if not sigma: return mad else: return mad*1.4826
def entropy_cleaning(self, matrix, targ_limit=150): """ Entropy-cleaning of lightcurve matrix using the SVD U-matrix. Parameters: matrix (:class:`numpy.ndarray`): targ_limit (int, optional): Maximum number of targets to remove during cleaning. .. codeauthor:: Mikkel N. Lund <*****@*****.**> """ logger = logging.getLogger(__name__) # Calculate the principle components: pca = PCA(self.ncomponents, random_state=self.random_state) U, _, _ = pca._fit(matrix) ent = compute_entropy(U) logger.info('Entropy start: %s', ent) targets_removed = 0 components = np.arange(self.ncomponents) with np.errstate(invalid='ignore'): while np.any(ent < self.threshold_entropy): com = components[ent < self.threshold_entropy][0] # Remove highest relative weight target m = nanmedian(U[:, com]) s = mad_to_sigma * nanmedian(np.abs(U[:, com] - m)) dev = np.abs(U[:, com] - m) / s idx0 = np.argmax(dev) # Remove the star from the lightcurve matrix: star_no = np.ones(U.shape[0], dtype=bool) star_no[idx0] = False matrix = matrix[star_no, :] targets_removed += 1 if targets_removed >= targ_limit: break U, _, _ = pca._fit(matrix) ent = compute_entropy(U) logger.info('Entropy end: %s', ent) logger.info('Targets removed: %d', targets_removed) return matrix
def singleVar(content): return dict(min=nanmin(content), max=nanmax(content), mean=nanmean(content), median=nanmedian(content), valid=numpy.sum(numpy.isfinite(content)) * 100.0 / content.size)
def moving_nanmedian_cyclic(t, x, w, dt=None): """ Calculate cyclic moving average of input with given window (in t-units) taking into account NaNs in the data. """ if len(t) != len(x): raise ValueError("t and x must have the same length.") if dt is None: dt = median(np.diff(t)) # Calculate width of filter: width_points = int(w / dt) if width_points <= 1: return x if width_points % 2 == 0: width_points += 1 # Filter is much faster when using an odd number of points! wh = width_points // 2 N = len(x) if wh >= N: return np.zeros_like(x) + nanmedian(x) # Stich ends onto the array: xny = np.concatenate((x[-wh - 1:N - 1], x, x[1:wh + 1])) # Run moving median on longer series: N = len(xny) y = _median_central(xny, width_points) # Cut out the central part again: y = y[wh:N - wh] return y
def weighted_mean(_line): max_weight = 50 # print _line.shape median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) std = bottleneck.nanstd(_line, axis=1) std_2d = std.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - median_2d)) # weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight for i in range(3): avg = bottleneck.nansum(_line * weight_2d, axis=1) / bottleneck.nansum( weight_2d, axis=1) avg_2d = avg.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) std = numpy.sqrt( bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1) / bottleneck.nansum(weight_2d, axis=1)) std_2d = std.reshape(_line.shape[0], 1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - avg_2d)) #weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight return bottleneck.nansum(_line * weight_2d, axis=1) / bottleneck.nansum( weight_2d, axis=1)
def _nanmedian(array, axis=None): """Bottleneck nanmedian function that handle tuple axis.""" if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 return bottleneck.nanmedian(array, axis=axis)
def theil_sen(x, y, sample= "auto", n_samples = 1e7): assert x.shape[0] == y.shape[0] n = x.shape[0] if n < 100 or not sample: ix = np.argsort( x ) slopes = np.empty(int(n*(n-1)*0.5)) for c, pair in enumerate(itertools.combinations(range(n), 2)): i,j = ix[pair[0]], ix[pair[1]] slopes[c] = slope(x[i], x[j], y[i], y[j]) else: i1 = np.random.randint(int(0), int(n), int(n_samples)) i2 = np.random.randint(int(0), int(n), int(n_samples)) slopes = slope(x[i1], x[i2], y[i1], y[i2]) slope_ = bottleneck.nanmedian(slopes) #find the optimal b as the median of y_i - slope*x_i intercepts = np.empty(n) for c in range(n): intercepts[c] = y[c] - slope_*x[c] intercept_ = bottleneck.median(intercepts) return np.array([slope_, intercept_])
def demedian(arr, axis=None): """ Subtract the median along the specified axis. Parameters ---------- arr : ndarray Input array. axis : {int, None}, optional The axis along which to remove the median. The default (None) is to subtract the median of the flattened array. Returns ------- y : ndarray A copy with the median along the specified axis removed. Examples -------- >>> arr = np.array([1, np.nan, 2, 10]) >>> demedian(arr) array([ -1., NaN, 0., 8.]) """ marr = bn.nanmedian(arr, axis) if (axis != 0) and (not axis is None) and (not np.isscalar(marr)): ind = [slice(None)] * arr.ndim ind[axis] = np.newaxis marr = marr[ind] return arr - marr
def weighted_mean(_line): max_weight = 50 # print _line.shape median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = bottleneck.nanstd(_line, axis=1) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - median_2d)) # weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight for i in range(3): avg = bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1) avg_2d = avg.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = numpy.sqrt(bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1)/bottleneck.nansum(weight_2d, axis=1)) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - avg_2d)) #weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight return bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
def is_guide_ota(primhdu, ext, w=20, debug=False): logger = logging.getLogger("IsGuideOTA") binning = primhdu.header['BINNING'] skylevel = primhdu.header['SKYLEVEL'] gain = primhdu.header['GAIN'] skynoise = primhdu.header['SKYNOISE'] logger.debug("Checking OTA %s (bin=%d, sky=%.1f, skynoise=%.2f)" % ( ext.name, binning, skylevel, skynoise)) if (not is_image_extension(ext)): logger.debug("extension is not a valid image extension") return False excesses = numpy.empty((8,8)) excesses[:,:] = numpy.NaN if (debug): center_hdu = [pyfits.PrimaryHDU()] corner_hdu = [pyfits.PrimaryHDU()] for cx, cy in itertools.product(range(8), repeat=2): # # Get pixel coord for this cell # x1,x2,y1,y2 = cell2ota__get_target_region(cx, cy, binning=binning, trimcell=0) x1,x2,y1,y2 = int(x1),int(x2),int(y1),int(y2) x21 = (x2-x1)//2 # extract the mean value in the bottom corner corner = bottleneck.nanmean(ext.data[y1:y1+w, x1:x1+w].astype(numpy.float32)) # also get the value in the bottom center center = bottleneck.nanmean(ext.data[y1:y1+w, x1+x21-w//2:x1+x21+w//2].astype(numpy.float32)) if (debug): print(cx,cy,corner, center) corner_hdu.append(pyfits.ImageHDU(data=ext.data[y1:y1+w, x1:x1+w])) center_hdu.append(pyfits.ImageHDU(data=ext.data[y1:y1+w, x1+x21-w//2:x1+x21+w//2])) excess = corner - center #print ext.name, cx, cy, corner, center, excess excesses[cx,cy] = excess _mean = bottleneck.nanmean(excesses) _median = bottleneck.nanmedian(excesses) is_guideota = (_median > 10*skynoise) logger.debug("Found corner excess mean=%.1f, median=%.1f --> guide-OTA: %s" % ( _mean, _median, "YES" if is_guideota else "NO")) if (debug): return is_guideota, excesses, _mean, _median, skynoise, corner_hdu, center_hdu return is_guideota
def euclid_normalize_mask(fits_list): """ euclid.normalize_mask - Antiguo normalize_eur_mask """ if not isinstance(fits_list, list): fits_list = [fits_list] corrected_files = np.array([]) for fits_name in fits_list: fits_file = fits.open(fits_name) for i in np.linspace(1, 36, 36).astype("int"): # Translate to GNUASTRO # Normalize as a function of width and center print("Normalizing CCD " + str(i)) # execute_cmd(cmd_text="astarithmetic -h " + str(j) + " " + outname + " -h" + str(j+2) + " " + outname + " 0 gt nan where") fits_file[i].data = np.divide( fits_file[i].data, bn.nanmedian(fits_file[i].data[1798:2298, 1818:2318])) if os.path.exists(fits_name): os.remove(fits_name) fits_file.verify("silentfix") fits_file.writeto(fits_name) fits_file.close() execute_cmd(cmd_text="astfits -h0 " + fits_name + " --update=NORMAL,True") corrected_files = np.append(corrected_files, fits_name) return (corrected_files)
def _estimate(self, dataset): """Estimate and save the displacements for the time series. Parameters ---------- num_states_retained : int Number of states to retain at each time step of the HMM. max_displacement : array of int The maximum allowed displacement magnitudes in [y,x]. Returns ------- dict The estimated displacements and partial results of motion correction. """ params = self._params if params['verbose']: print('Estimating model parameters.') shifts = self._estimate_shifts(dataset) references, variances = _whole_frame_shifting(dataset, shifts) if params['max_displacement'] is None: max_displacement = np.array(dataset.frame_shape[:3]) // 2 else: max_displacement = np.array(params['max_displacement']) gains = nanmedian( (variances / references).reshape(-1, references.shape[-1])) if not (np.all(np.isfinite(gains)) and np.all(gains > 0)): raise Exception('Failed to estimate positive gains') pixel_means, pixel_variances = _pixel_distribution(dataset) movement_model = MovementModel.estimate(shifts) if shifts[0].shape[-1] == 2: shifts = [ np.concatenate([np.zeros(s.shape[:-1] + (1, ), dtype=int), s], axis=-1) for s in shifts ] min_shifts = np.nanmin( [np.nanmin(s.reshape(-1, s.shape[-1]), 0) for s in shifts], 0) max_shifts = np.nanmax( [np.nanmax(s.reshape(-1, s.shape[-1]), 0) for s in shifts], 0) # add a bit of extra room to move around if max_displacement.size == 2: max_displacement = np.hstack(([0], max_displacement)) extra_buffer = ((max_displacement - max_shifts + min_shifts) // 2).astype(int) min_displacements = min_shifts - extra_buffer max_displacements = max_shifts + extra_buffer displacements = self._neighbor_viterbi(dataset, references, gains, movement_model, min_displacements, max_displacements, pixel_means, pixel_variances) return self._post_process(displacements)
def fit(self, X, y): X_y = self._check_params(X, y) self.X = X_y[0] self.y = X_y[1].reshape((-1, 1)) n, p = X.shape S = [] # list of selected features F = range(p) # list of unselected features if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # Find the first feature k_min = 3 range_k = 7 xy_MI = np.empty((range_k, p)) for i in range(range_k): xy_MI[i, :] = self._get_first_mi_vector(i + k_min) xy_MI = bn.nanmedian(xy_MI, axis=0) S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) if self.verbose > 0: self._info_print(S, S_mi) # Find the next features if self.n_features == 'auto': n_features = np.inf else: n_features = self.n_features while len(S) < n_features: s = len(S) - 1 feature_mi_matrix[s, F] = self._get_mi_vector(F, S[-1]) fmm = feature_mi_matrix[:len(S), F] if bn.allnan(bn.nanmean(fmm, axis=0)): break MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) if np.isnan(MRMR).all(): break selected = F[bn.nanargmax(MRMR)] S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) if self.verbose > 0: self._info_print(S, S_mi) if self.n_features == 'auto' and len(S) > 10: MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1) if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break self.n_features_ = len(S) self.ranking_ = S self.mi_ = S_mi return self
def med_over_images(masked_arr, axis=0): """ Calculate median pixel value along specified axis Uses bottleneck.nanmedian for speed """ dat = masked_arr.data.copy() dat[masked_arr.mask] = np.NaN return bn.nanmedian(dat, axis=axis)
def theil_sen(x,y, sample= "auto", n_samples = 1e7): """ Computes the Theil-Sen estimator for 2d data. parameters: x: 1-d np array, the control variate y: 1-d np.array, the ind variate. sample: if n>100, the performance can be worse, so we sample n_samples. Set to False to not sample. n_samples: how many points to sample. This complexity is O(n**2), which can be poor for large n. We will perform a sampling of data points to get an unbiased, but larger variance estimator. The sampling will be done by picking two points at random, and computing the slope, up to n_samples times. """ assert x.shape[0] == y.shape[0], "x and y must be the same shape." n = x.shape[0] if n < 100 or not sample: ix = np.argsort( x ) slopes = np.empty( n*(n-1)*0.5 ) for c, pair in enumerate(itertools.combinations( range(n),2 ) ): #it creates range(n) =( i,j = ix[pair[0]], ix[pair[1]] slopes[c] = slope( x[i], x[j], y[i],y[j] ) else: i1 = np.random.randint(0, n, n_samples) i2 = np.random.randint(0, n, n_samples) print '...checking for unwanted zeros...' zero_check=np.where(np.abs((x[i1]-x[i2])) != 0) i1=i1[zero_check] i2=i2[zero_check] print '...calculating slopes...' slopes = slope( x[i1], x[i2], y[i1], y[i2] ) print 'slope min and max are:',np.amin(slopes),np.amax(slopes) histogram,bin_limits=np.histogram(slopes,bins=10000,range=(-2,2)) #print histogram #c95=np.percentile(slopes,(5,95)) #pdb.set_trace() slope_ = bottleneck.nanmedian( slopes ) print '...done! Now finding intercepts...' #find the optimal b as the median of y_i - slope*x_i intercepts = np.empty( n ) for c in xrange(n): intercepts[c] = y[c] - slope_*x[c] histogram_i,bin_limits_i=np.histogram(intercepts,bins=10000,range=(-2,2)) #print histogram_i #c95i=np.percentile(intercepts,(5,95)) #print cumul_i intercept_ = bottleneck.median( intercepts ) return np.array( [slope_,intercept_]) #c95[0],c95[1],c95i[0],c95i[1]] )
def _estimate(self, dataset): """Estimate and save the displacements for the time series. Parameters ---------- num_states_retained : int Number of states to retain at each time step of the HMM. max_displacement : array of int The maximum allowed displacement magnitudes in [y,x]. Returns ------- dict The estimated displacements and partial results of motion correction. """ params = self._params if params['verbose']: print('Estimating model parameters.') shifts = self._estimate_shifts(dataset) references, variances = _whole_frame_shifting(dataset, shifts) if params['max_displacement'] is None: max_displacement = np.array(dataset.frame_shape[:3]) // 2 else: max_displacement = np.array(params['max_displacement']) gains = nanmedian( (variances / references).reshape(-1, references.shape[-1])) if not (np.all(np.isfinite(gains)) and np.all(gains > 0)): raise Exception('Failed to estimate positive gains') pixel_means, pixel_variances = _pixel_distribution(dataset) movement_model = MovementModel.estimate(shifts) if shifts[0].shape[-1] == 2: shifts = [np.concatenate([np.zeros(s.shape[:-1] + (1,), dtype=int), s], axis=-1) for s in shifts] min_shifts = np.nanmin([np.nanmin(s.reshape(-1, s.shape[-1]), 0) for s in shifts], 0) max_shifts = np.nanmax([np.nanmax(s.reshape(-1, s.shape[-1]), 0) for s in shifts], 0) # add a bit of extra room to move around if max_displacement.size == 2: max_displacement = np.hstack(([0], max_displacement)) extra_buffer = ((max_displacement - max_shifts + min_shifts) // 2 ).astype(int) min_displacements = min_shifts - extra_buffer max_displacements = max_shifts + extra_buffer displacements = self._neighbor_viterbi( dataset, references, gains, movement_model, min_displacements, max_displacements, pixel_means, pixel_variances) return self._post_process(displacements)
def _estimate(self, dataset): """Estimate and save the displacements for the time series. Parameters ---------- num_states_retained : int Number of states to retain at each time step of the HMM. max_displacement : array of int The maximum allowed displacement magnitudes in [y,x]. Returns ------- dict The estimated displacements and partial results of motion correction. """ params = self._params if params.verbose: print 'Estimating model parameters.' if params.max_displacement is not None: params.max_displacement = np.array(params.max_displacement) shifts = VolumeTranslation(params.max_displacement).estimate(dataset) references, variances, offset = _whole_frame_shifting(dataset, shifts) assert np.all(offset == 0) gains = nanmedian( (variances / references).reshape(-1, references.shape[-1])) if not (np.all(np.isfinite(gains)) and np.all(gains > 0)): raise Exception('Failed to estimate positive gains') pixel_means, pixel_variances = _pixel_distribution(dataset) movement_model = MovementModel.estimate(shifts) # TODO: detect unreasonable shifts before doing this calculation min_shifts = np.nanmin(list(it.chain(*it.chain(*shifts))), 0) max_shifts = np.nanmax(list(it.chain(*it.chain(*shifts))), 0) # add a bit of extra room to move around if params.max_displacement is None: extra_buffer = 5 else: extra_buffer = ( (params.max_displacement - max_shifts + min_shifts) / 2 ).astype(int) min_displacements = (min_shifts - extra_buffer) max_displacements = (max_shifts + extra_buffer) return self._neighbor_viterbi( dataset, references, gains, movement_model, min_displacements, max_displacements, pixel_means, pixel_variances, params.num_states_retained, params.verbose)
def sample_background_using_ds9_regions(hdu, sky_regions): wcs = astWCS.WCS(hdu.header, mode='pyfits') pixelscale = wcs.getPixelSizeDeg() * 3600. data = hdu.data center_xy = wcs.wcs2pix(sky_regions[:,0], sky_regions[:,1]) # print center_xy center_xy = numpy.array(center_xy) cx = center_xy[:,0] cy = center_xy[:,1] width = sky_regions[:,2]/2. / pixelscale height = sky_regions[:,3]/2. / pixelscale in_ota = ((cx + width) > 0) & ((cx - width) < data.shape[1]) & \ ((cy + height) > 0) & ((cy - height) < data.shape[0]) cx = cx[in_ota] cy = cy[in_ota] w = width[in_ota] h = height[in_ota] if (cx.size <= 0): # no boxes in this OTA return None left = numpy.floor(cx - w).astype(numpy.int) right = numpy.ceil(cx + w).astype(numpy.int) top = numpy.ceil(cy + h).astype(numpy.int) bottom = numpy.floor(cy - h).astype(numpy.int) left[left < 0] = 0 bottom[bottom < 0] = 0 results = [] for box in range(cx.shape[0]): cutout = data[bottom[box]:top[box], left[box]:right[box]] median = bottleneck.nanmedian(cutout.astype(numpy.float32)) if (numpy.isfinite(median)): results.append([cx[box], cy[box], median]) #print results if (len(results) <= 0): return None return numpy.array(results)
def _estimate(self, dataset): """Estimate and save the displacements for the time series. Parameters ---------- num_states_retained : int Number of states to retain at each time step of the HMM. max_displacement : array of int The maximum allowed displacement magnitudes in [y,x]. Returns ------- dict The estimated displacements and partial results of motion correction. """ params = self._params if params.verbose: print 'Estimating model parameters.' if params.max_displacement is not None: params.max_displacement = np.array(params.max_displacement) else: params.max_displacement = np.array([-1, -1]) # TODO shifts = sima.motion.frame_align.PlaneTranslation2D( params.max_displacement, n_processes=params.n_processes ).estimate(dataset) references, variances, offset = _whole_frame_shifting(dataset, shifts) gains = nanmedian( (variances / references).reshape(-1, references.shape[-1])) assert np.all(np.isfinite(gains)) and np.all(gains > 0) pixel_means, pixel_variances = _pixel_distribution(dataset) cov_matrix_est, decay_matrix, log_transition_matrix, mean_shift = \ _estimate_movement_model(shifts, dataset.frame_shape[1]) # add a bit of extra room to move around min_shifts = np.nanmin(list(it.chain(*it.chain(*shifts))), 0) max_shifts = np.nanmax(list(it.chain(*it.chain(*shifts))), 0) extra_buffer = ((params.max_displacement - max_shifts + min_shifts) / 2 ).astype(int) extra_buffer[params.max_displacement < 0] = 5 min_displacements = (min_shifts - extra_buffer) max_displacements = (max_shifts + extra_buffer) return self._neighbor_viterbi( dataset, log_transition_matrix, references, gains, decay_matrix, cov_matrix_est, mean_shift, offset, min_displacements, max_displacements, pixel_means, pixel_variances, params.num_states_retained, params.verbose)
def monte_lines(numtrys): bigarr = np.zeros((1,3)) for i in range(numtrys): v, I = ADE.ADE_gauss(1000,500,50) I *= 55/I.max() I += 3. * np.random.randn(I.size) # ADE.eplot(v,I) moments = ADE.ADE_moments(v, I, threshold=np.inf,err=np.abs(I)**0.5) bigarr = np.vstack((bigarr,moments)) bigarr = bigarr[1:] # print bigarr return bn.nanmedian(bigarr,axis=0), bn.nanstd(bigarr,axis=0)
def bn_median(masked_array, axis=None): """ https://github.com/astropy/ccdproc/blob/122cdbd5713140174f057eaa8fdb6f9ce03312df/docs/ccdproc/bottleneck_example.rst Perform fast median on masked array Parameters masked_array : `numpy.ma.masked_array` Array of which to find the median. axis : int, optional Axis along which to perform the median. Default is to find the median of the flattened array. """ import bottleneck as bn data = masked_array.filled(fill_value=np.NaN) med = bn.nanmedian(data, axis=axis) # construct a masked array result, setting the mask from any NaN entries return np.ma.array(med, mask=np.isnan(med)) # bn_median
def theil_sen(x, y, sample="auto", n_samples=1e7): """ Computes the Theil-Sen estimator for 2d data. parameters: x: 1-d np array, the control variate y: 1-d np.array, the ind variate. sample: if n>100, the performance can be worse, so we sample n_samples. Set to False to not sample. n_samples: how many points to sample. This complexity is O(n**2), which can be poor for large n. We will perform a sampling of data points to get an unbiased, but larger variance estimator. The sampling will be done by picking two points at random, and computing the slope, up to n_samples times. """ assert x.shape[0] == y.shape[0], "x and y must be the same shape." n = x.shape[0] if n < 100 or not sample: ix = np.argsort(x) slopes = np.empty(n * (n - 1) * 0.5) for c, pair in enumerate(itertools.combinations(range(n), 2)): # it creates range(n) =( i, j = ix[pair[0]], ix[pair[1]] slopes[c] = slope(x[i], x[j], y[i], y[j]) else: i1 = np.random.randint(0, n, n_samples) i2 = np.random.randint(0, n, n_samples) slopes = slope(x[i1], x[i2], y[i1], y[i2]) # pdb.set_trace() slope_ = bottleneck.nanmedian(slopes) # find the optimal b as the median of y_i - slope*x_i intercepts = np.empty(n) for c in xrange(n): intercepts[c] = y[c] - slope_ * x[c] intercept_ = bottleneck.median(intercepts) return np.array([slope_, intercept_])
def group_median(x, groups, axis=0): """ Median with groups along an axis. Parameters ---------- x : ndarray Input data. groups : list List of group membership of each element along the given axis. axis : int, {default: 0} axis along which the ranking is calculated. Returns ------- idx : ndarray The group median of the data along axis 0. """ # Find set of unique groups ugroups = unique_group(groups) # Convert groups to a numpy array groups = np.asarray(groups) # Loop through unique groups and normalize xmedian = np.nan * np.zeros(x.shape) for group in ugroups: idx = groups == group idxall = [slice(None)] * x.ndim idxall[axis] = idx if idx.sum() > 0: ns = bn.nanmedian(x[idxall], axis=axis) xmedian[idxall] = np.expand_dims(ns, axis) return xmedian
def stats(self, lmean=False, lmed=False, lskew=False, lvar=False, lstd=False, lcoefvar=False, lperc=False, p=0.95): """Calculate some statistics among every realisation. Each statistic is calculated node-wise along the complete number of realisations. Parameters ---------- lmean : boolean, default False Calculate the mean. lmed : boolean, default False Calculate the median. lskew : boolean, default False Calculate skewness. lvar : boolean, default False Calculate the variance. lstd : boolean, default False Calculate the standard deviation. lcoefvar : boolean, default False Calculate the coefficient of variation. lperc : boolean, default False Calculate the percentile `100 * (1 - p)`. p : number, default 0.95 Probability value. Returns ------- retdict : dict of GridArr Dictionary containing one GridArr for each calculated statistic. See Also -------- stats_area : same but considering a circular (and horizontal) area of a specified radius around a given point. """ # check if the map files are already opened or not if isinstance(self.files[0], file): opened_files = True else: opened_files = False if lmean: meanmap = np.zeros(self.cells) if lmed: medmap = np.zeros(self.cells) if lskew: skewmap = np.zeros(self.cells) if lvar: varmap = np.zeros(self.cells) if lstd: stdmap = np.zeros(self.cells) if lcoefvar: coefvarmap = np.zeros(self.cells) if lperc: percmap = np.zeros((self.cells, 2)) arr = np.zeros(self.nfiles) skip = True offset = os.SEEK_SET for cell in xrange(self.cells - self.header): for i, gridfile in enumerate(self.files): # deal with map files not open yet if opened_files: grid = gridfile else: grid = open(gridfile, 'rb') grid.seek(offset) if skip: skip_lines(grid, self.header) arr[i] = grid.readline() if not opened_files: offset = grid.tell() grid.close() skip = False # replace no data's with NaN bn.replace(arr, self.nodata, np.nan) if lmean: meanmap[cell] = bn.nanmean(arr) if lmed: medmap[cell] = bn.nanmedian(arr) if lskew: skewmap[cell] = pd.Series(arr).skew() if lvar: varmap[cell] = bn.nanvar(arr, ddof=1) if lstd: stdmap[cell] = bn.nanstd(arr, ddof=1) if lcoefvar: if lstd and lmean: coefvarmap[cell] = stdmap[cell] / meanmap[cell] * 100 else: std = bn.nanstd(arr, ddof=1) mean = bn.nanmean(arr) coefvarmap[cell] = std / mean * 100 if lperc: percmap[cell] = pd.Series(arr).quantile([(1 - p) / 2, 1 - (1 - p) / 2]) retdict = dict() if lmean: meangrid = GridArr(name='meanmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=meanmap) retdict['meanmap'] = meangrid if lmed: medgrid = GridArr(name='medianmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=medmap) retdict['medianmap'] = medgrid if lskew: skewgrid = GridArr(name='skewmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=skewmap) retdict['skewmap'] = skewgrid if lvar: vargrid = GridArr(name='varmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=varmap) retdict['varmap'] = vargrid if lstd: stdgrid = GridArr(name='stdmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=stdmap) retdict['stdmap'] = stdgrid if lcoefvar: coefvargrid = GridArr(name='coefvarmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=coefvarmap) retdict['coefvarmap'] = coefvargrid if lperc: percgrid = GridArr(name='percmap', dx=self.dx, dy=self.dy, dz=self.dz, nodata=self.nodata, val=percmap) retdict['percmap'] = percgrid return retdict
def stats_area(self, loc, tol=0, lmean=False, lmed=False, lskew=False, lvar=False, lstd=False, lcoefvar=False, lperc=False, p=0.95, save=False): """Calculate some statistics among every realisation, considering a circular (only horizontaly) area of radius `tol` around the point located at `loc`. Parameters ---------- loc : array_like Location of the vertical line [x, y]. tol : number, default 0 Tolerance radius used to search for neighbour nodes. lmean : boolean, default False Calculate the mean. lmed : boolean, default False Calculate the median. lskew : boolean, default False Calculate skewness. lvar : boolean, default False Calculate the variance. lstd : boolean, default False Calculate the standard deviation. lcoefvar : boolean, default False Calculate the coefficient of variation. lperc : boolean, default False Calculate the percentile `100 * (1 - p)`. p : number, default 0.95 Probability value. save : boolean, default False Write the points used to calculate the chosen statistics in PointSet format to a file named 'sim values at (x, y, line).prn'. Returns ------- statspset : PointSet PointSet instance containing the calculated statistics. .. TODO: checkar stats variance com geoms """ if lmean: meanline = np.zeros(self.dz) if lmed: medline = np.zeros(self.dz) if lskew: skewline = np.zeros(self.dz) if lvar: varline = np.zeros(self.dz) if lstd: stdline = np.zeros(self.dz) if lcoefvar: coefvarline = np.zeros(self.dz) if lperc: percline = np.zeros((self.dz, 2)) # convert the coordinates of the first point to grid nodes loc = coord_to_grid(loc, [self.cellx, self.celly, self.cellz], [self.xi, self.yi, self.zi])[:2] # find the nodes coordinates within a circle centred in the first point neighbours_nodes = circle(loc[0], loc[1], tol) # compute the lines numbers for each point in the neighbourhood, across # each grid layer. this yields a N*M matrix, with N equal to the number # of neighbour nodes, and M equal to the number of layers in the grid. neighbours_lines = [line_zmirror(node, [self.dx, self.dy, self.dz]) for node in neighbours_nodes] # sort the lines in ascending order neighbours_lines = np.sort(neighbours_lines, axis=0) # create an array to store the neighbour nodes in each grid file nnodes = neighbours_lines.shape[0] arr = np.zeros(self.nfiles * nnodes) skip = True curr_line = np.zeros(self.nfiles) for layer in xrange(neighbours_lines.shape[1]): for i, line in enumerate(neighbours_lines[:, layer]): for j, grid in enumerate(self.files): # skip header lines only once per grid file if skip and self.header: skip_lines(grid, self.header) # advance to the next line with a neighbour node skip_lines(grid, int(line - curr_line[j] - 1)) # read the line and store its value a = grid.readline() arr[i + j * nnodes] = float(a) curr_line[j] = line skip = False # replace no data's with NaN bn.replace(arr, self.nodata, np.nan) # compute the required statistics if lmean: meanline[layer] = bn.nanmean(arr) if lmed: medline[layer] = bn.nanmedian(arr) if lskew: skewline[layer] = pd.Series(arr).skew() if lvar: varline[layer] = bn.nanvar(arr, ddof=1) if lstd: stdline[layer] = bn.nanstd(arr, ddof=1) if lcoefvar: if lstd and lmean: coefvarline[layer] = stdline[layer] / meanline[layer] * 100 else: std = bn.nanstd(arr, ddof=1) mean = bn.nanmean(arr) coefvarline[layer] = std / mean * 100 if lperc: percline[layer] = pd.Series(arr).quantile([(1 - p) / 2, 1 - (1 - p) / 2]) if save and tol == 0: # FIXME: not working with the tolerance feature # need to adjust the arrpset or cherry-pick arr arrpset = PointSet('realisations at location ({0}, {1}, {2})'. format(loc[0], loc[1], layer * self.cellz + self.zi), self.nodata, 3, ['x', 'y', 'value'], values=np.zeros((self.nfiles, 3))) arrout = os.path.join(os.path.dirname(self.files[0].name), 'sim values at ({0}, {1}, {2}).prn'.format( loc[0], loc[1], layer * self.cellz + self.zi)) arrpset.values.iloc[:, 2] = arr arrpset.values.iloc[:, :2] = np.repeat(np.array(loc) [np.newaxis, :], self.nfiles, axis=0) arrpset.save(arrout, header=True) ncols = sum((lmean, lmed, lvar, lstd, lcoefvar, lskew)) if lperc: ncols += 2 statspset = PointSet(name='vertical line stats at (x,y) = ({0},{1})'. format(loc[0], loc[1]), nodata=self.nodata, nvars=3 + ncols, varnames=['x', 'y', 'z'], values=np.zeros((self.dz, 3 + ncols))) statspset.values.iloc[:, :3] = (np.column_stack (((np.repeat(np.array(loc) [np.newaxis, :], self.dz, axis=0)), np.arange(self.zi, self.zi + self.cellz * self.dz)))) j = 3 if lmean: statspset.varnames.append('mean') statspset.values.iloc[:, j] = meanline j += 1 if lmed: statspset.varnames.append('median') statspset.values.iloc[:, j] = medline j += 1 if lskew: statspset.varnames.append('skewness') statspset.values.iloc[:, j] = skewline j += 1 if lvar: statspset.varnames.append('variance') statspset.values.iloc[:, j] = varline j += 1 if lstd: statspset.varnames.append('std') statspset.values.iloc[:, j] = stdline j += 1 if lcoefvar: statspset.varnames.append('coefvar') statspset.values.iloc[:, j] = coefvarline j += 1 if lperc: statspset.varnames.append('lperc') statspset.varnames.append('rperc') statspset.values.iloc[:, -2:] = percline # reset the reading pointer in each grid file self.reset_read() # update varnames statspset.flush_varnames() return statspset
def _build_epsf_step(self, stars, epsf=None): """ A single iteration of improving an ePSF. Parameters ---------- stars : `EPSFStars` object The stars used to build the ePSF. epsf : `EPSFModel` object, optional The initial ePSF model. If not input, then the ePSF will be built from scratch. Returns ------- epsf : `EPSFModel` object The updated ePSF. """ if len(stars) < 1: raise ValueError('stars must contain at least one EPSFStar or ' 'LinkedEPSFStar object.') if epsf is None: # create an initial ePSF (array of zeros) epsf = self._create_initial_epsf(stars) else: # improve the input ePSF epsf = copy.deepcopy(epsf) # compute a 3D stack of 2D residual images residuals = self._resample_residuals(stars, epsf) self._residuals.append(residuals) # compute the sigma-clipped median along the 3D stack with warnings.catch_warnings(): warnings.simplefilter('ignore', category=RuntimeWarning) warnings.simplefilter('ignore', category=AstropyUserWarning) residuals = self.sigclip(residuals, axis=0, masked=False, return_bounds=False) if HAS_BOTTLENECK: residuals = bottleneck.nanmedian(residuals, axis=0) else: residuals = np.nanmedian(residuals, axis=0) self._residuals_sigclip.append(residuals) # interpolate any missing data (np.nan) mask = ~np.isfinite(residuals) if np.any(mask): residuals = _interpolate_missing_data(residuals, mask, method='cubic') # fill any remaining nans (outer points) with zeros residuals[~np.isfinite(residuals)] = 0. self._residuals_interp.append(residuals) # add the residuals to the previous ePSF image new_epsf = epsf.normalized_data + residuals # smooth the ePSF new_epsf = self._smooth_epsf(new_epsf) # recenter the ePSF new_epsf = self._recenter_epsf(new_epsf, epsf, centroid_func=self.recentering_func, box_size=self.recentering_boxsize, maxiters=self.recentering_maxiters, center_accuracy=1.0e-4) # normalize the ePSF data new_epsf /= np.sum(new_epsf, dtype=np.float64) # return the new ePSF object xcenter = (new_epsf.shape[1] - 1) / 2. ycenter = (new_epsf.shape[0] - 1) / 2. epsf_new = EPSFModel(data=new_epsf, origin=(xcenter, ycenter), normalize=False, oversampling=epsf.oversampling) return epsf_new
def nanmedian(array, axis=None): if isinstance(axis, tuple): array = _move_tuple_axes_first(array, axis=axis) axis = 0 return bt.nanmedian(array, axis=axis)
def deltaconvert(series, visualize=False, max_adj_outliers=10): """Perform delta-conversion to given pd.Series. Delta-conversion returns 3 series as a tuple and possibly error message. First series (D) contains daily returns where all the data has been removed that could make comparison difficult with other assets. Second series (W) contains weekly returns where all the data has been removed that could make comparison difficult with other assets. Third series (DS) contains daily returns where all the outliers and erroneus data points have been removed but holes in data are not taken into account. This is more suitable for calculating performance scores etc. Arguments: series -- series to use visualize -- visualize results max_adj_outliers -- maximum number of adjancent outliers, if there are actually more adjancent outliers than this then they will not be considered outliers anymore. default value: 10 """ # MEDIAN_LEN = 50 ZSCORE_CUT_RATIO = 2 series = series.dropna() if len(series) < 50: raise DeltaConversionException("Not enough data") lines_taken = 0 if series.index[0] > series.index[-1]: raise DeltaConversionException("Wrong cronological order") # closes = [] # dates = [] # datesord = [] # for line in lines: # splitted = line.split(",") # closes.append(float(splitted[column])) # dt = datetime.strptime(splitted[0], "%Y-%m-%d").date() # dates.append(dt) # datesord.append(dt.toordinal()) # if datesord[-1] < datesord[0]: # closes.reverse() # dates.reverse() # datesord.reverse() # lines.reverse() closes = series dates = series.index num_invalid_prices = 0 deltapct = [np.nan] changescores = [np.nan] invalid_price_indices = [] for i in range(1, len(series)): if closes[i - 1] > 0 and closes[i] > 0: change = closes[i] / closes[i - 1] deltapct.append(change - 1) changescore = change_to_score(change) changescores.append(changescore) else: deltapct.append(np.nan) changescores.append(np.nan) num_invalid_prices += 1 invalid_price_indices.append(i) logging.debug("Cannot determine changescore at {} ({} / {})".format(dates[i], closes[i], closes[i - 1])) # # remove zeroes (data may only end with price zero if stock goes bankrupt...) # first_nonzero_idx = [i for i, val in enumerate(closes[:-1]) if val == 0] # del closes[:first_nonzero_idx] # del dates[:first_nonzero_idx] # lines_taken += first_nonzero_idx # if first_nonzero_idx > 0: # logging.debug("{}: removed {} zero-lines from the beginning.".format(filename, first_nonzero_idx)) num_gaps = 0 num_invalid_chrono_orders = 0 gap_indices = [] for i in range(len(dates) - 1, 0, -1): d = (dates[i] - dates[i - 1]).days # standard weekends are only allowed if d == 3: if dates[i].weekday() != 0: # not monday # deltapct[i] = np.nan # changescores[i] = np.nan num_gaps += 1 gap_indices.append(i) logging.log(5, "Non-weekend gap of 2 day(s) at {}".format(dates[i])) elif d > 1: # deltapct[i] = np.nan # changescores[i] = np.nan num_gaps += 1 gap_indices.append(i) logging.log(5, "Non-weekend gap of {} day(s) at {}".format(d, dates[i])) elif d <= 0: del deltapct[i], dates[i], closes[closes.index[i]], changescores[i] logging.warning(5, "Invalid chronological order ({} day(s)) at {}" .format(d - 1, dates[i])) num_invalid_chrono_orders += 1 deltapct = np.asarray(deltapct) changescores = np.asarray(changescores) std_score = bn.nanstd(changescores) zscores = np.abs(changescores) / std_score mean_z = bn.nanmean(zscores) zscores_set = list(set(zscores[(~np.isnan(zscores)) & (zscores > 0)])) zscores_set.sort() outlier_z = None maxpctdiff = 0 for i in range(int(len(zscores_set) * .95), len(zscores_set)): pctdiff = zscores_set[i] / zscores_set[i - 1] maxpctdiff = pctdiff # logging.info("{}: {}".format(i / len(zscores_set), pctdiff)) if pctdiff >= 2: outlier_z = zscores_set[i] second_highest_z = zscores_set[i - 1] break possible_outliers = [] confirmed_outliers = [] localmean_factors = [] if outlier_z: logging.log(5, "Outlier z-score: {:.2f}, earlier z-score: {:.2f}, mean z-score: {:.5f}" .format(outlier_z, second_highest_z, mean_z)) for i in range(len(zscores)): if zscores[i] >= outlier_z: localmean = bn.nanmean(zscores[max(0, i - 50):min(len(zscores) + 1, i + 50)]) localmean_factor = np.sqrt(mean_z / localmean) score = (zscores[i] / second_highest_z) * localmean_factor logging.log(5, "Possible outlier at {}: localmean_factor: {:.2f}, zscore: {:.2f}, score: {:.2f}" .format(dates[i], localmean_factor, zscores[i], score)) if score >= ZSCORE_CUT_RATIO: logging.debug("Possible outlier at {} (z-score={:.2f}, deltapct={:.2%})" .format(dates[i], zscores[i], deltapct[i])) # deltapct[i] = np.nan possible_outliers.append(i) localmean_factors.append(localmean_factor) if len(possible_outliers) == 1: confirmed_outliers = possible_outliers for i in range(1, len(possible_outliers)): firstidx = possible_outliers[i - 1] secondidx = possible_outliers[i] # opposite signs and not too far from each other if deltapct[firstidx] * deltapct[secondidx] < 0 \ and secondidx - firstidx + 1 <= max_adj_outliers: firstnonan = None for i2 in range(firstidx, -1, -1): if not np.isnan(deltapct[i2]): firstnonan = i2 break confirmed = False if not firstnonan: confirmed = True if firstnonan: if i == 1: left_mean = bn.nanmedian(closes[max(0, firstnonan - (max_adj_outliers - 1)):firstnonan + 1]) else: left_mean = bn.nanmedian(closes[max(0, possible_outliers[i - 2], \ firstnonan - (max_adj_outliers - 1)):firstnonan + 1]) right_mean = bn.nanmedian(closes[firstidx:secondidx]) changescore = change_to_score(right_mean / left_mean) zscore = abs(changescore) / std_score score_left_vs_mid = (zscore / second_highest_z) * localmean_factors[i - 1] left_mean = right_mean right_mean = bn.nanmedian(closes[secondidx:min(secondidx + max_adj_outliers, len(closes))]) changescore = change_to_score(right_mean / left_mean) zscore = abs(changescore) / std_score score_mid_vs_right = (zscore / second_highest_z) * localmean_factors[i] if score_left_vs_mid > ZSCORE_CUT_RATIO * .75 and score_mid_vs_right > ZSCORE_CUT_RATIO * .75: confirmed = True if confirmed: indices = [i2 for i2 in range(firstidx, secondidx + 1)] deltapct[indices] = np.nan confirmed_outliers += indices else: logging.debug("No possible outliers found based on initial z-score analysis (maxpctdiff: {})" .format(maxpctdiff)) if visualize: # TODO: make this work with DataFrame pass # closes_arr = np.asarray(closes.get_values()) # datesord = np.asarray(datesord) # plt.subplot(2, 1, 1) # plt.plot(datesord - datesord[0], closes_arr, 'b*') # plt.plot(datesord[gap_indices] - datesord[0], closes_arr[gap_indices], 'ob') # plt.plot(datesord[confirmed_outliers] - datesord[0], closes_arr[confirmed_outliers], 'or') # plt.plot(datesord[invalid_price_indices] - datesord[0], closes_arr[invalid_price_indices], 'om') # plt.subplot(2, 1, 2) # plt.plot(datesord - datesord[0], zscores, 'o') # plt.show() logging.debug("Conversion result: lines = {}, invalid closes = {}, gaps = {}, invalid dates = {}, outliers = {}" .format(len(series) - lines_taken, num_invalid_prices, num_gaps, num_invalid_chrono_orders, len(confirmed_outliers))) indices_to_rem = list(set(gap_indices + confirmed_outliers + invalid_price_indices)) # datesordmod = np.delete(datesord, indices_to_rem) datesmod = dates.copy() datesmod = datesmod.delete(indices_to_rem) deltapctmod = np.delete(deltapct, indices_to_rem) closesmod = closes.drop(closes.index[indices_to_rem]) assert(not np.any(np.isnan(deltapctmod[1:]))) weeklydeltapct = [] weeklydatesmod = [] lastidx = -1 # resample to W-FRI (could be done with pandas) for i in range(len(closesmod)): if datesmod[i].weekday() == 4: dd = (datesmod[i] - datesmod[lastidx]).days if lastidx >= 0 or dd == 7: if closesmod[lastidx] >= 0: weeklydeltapct.append(closesmod[i] / closesmod[lastidx] - 1) weeklydatesmod.append(datesmod[i]) else: logging.log(5, "Weekly bar at {} skipped (delta: {} days)".format(datesmod[i], i, dd)) lastidx = i res_daily = pd.Series(deltapctmod, datesmod) res_weekly = pd.Series(weeklydeltapct, weeklydatesmod) indices_to_rem = list(set(confirmed_outliers + invalid_price_indices)) datesmod = dates.copy() datesmod = datesmod.delete(indices_to_rem) deltapctmod = np.delete(deltapct, indices_to_rem) assert(not np.any(np.isnan(deltapctmod[1:]))) res_dailyscore = pd.Series(deltapctmod, datesmod) return res_daily, res_weekly, res_dailyscore
def parallel_compute(queue, return_queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist, operation): #queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist = worker_args # buffer = shmem_as_ndarray(shmem_buffer).reshape((size_x, size_y, len_filelist)) buffer = shmem_buffer.to_ndarray() # result_buffer = shmem_as_ndarray(shmem_results).reshape((size_x, size_y)) result_buffer = shmem_results.to_ndarray() logger = logging.getLogger("ParallelImcombine") logger.debug("Operation: %s, #samples/pixel: %d" % (operation, len_filelist)) while (True): line = queue.get() if (line is None): queue.task_done() break if (operation == "median"): result_buffer[line,:] = numpy.median(buffer[line,:,:], axis=1) elif (operation == "medsigclip"): # Do not use (yet), is slow as hell # (maskedarrays are pure python, not C as all the rest) #print buffer[line,:,:].shape _sigma_plus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9 _sigma_minus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9 _median = numpy.median(buffer[line,:,:], axis=1) nrep = 3 valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:]) for rep in range(nrep): _median_2d = _median.reshape(_median.shape[0],1).repeat(buffer.shape[2], axis=1) _min = _median_2d - 3 * _sigma_minus _max = _median_2d + 3 * _sigma_plus #valid_pixels = numpy.ma.masked_inside(buffer[line,:,:], _min, _max) valid = (buffer[line,:,:] > _min) & (buffer[line,:,:] < _max) valid_pixels = numpy.ma.array(buffer[line,:,:], mask=valid) #valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:], valid) #print _min.shape, valid.shape, valid_pixels.shape #if (numpy.sum(valid, axis=1).any() <= 0): # break #_median = numpy.median(buffer[line,:,:][valid], axis=1) _median = numpy.median(valid_pixels, axis=1) if (rep < nrep-1): #_sigma_plus = scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 84) - _median #_sigma_minus = _median - scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 16) _sigma_plus = scipy.stats.scoreatpercentile(valid_pixels, 84) - _median _sigma_minus = _median - scipy.stats.scoreatpercentile(valid_pixels, 16) result_buffer[line,:] = _median elif (operation == "sigclipx"): stdout_write(".") rep_count = 2 _line = buffer[line,:,:].astype(numpy.float32) # print _line.shape mask = numpy.isfinite(_line) #print "line.shape=",_line.shape # numpy.savetxt("line_block_%d.dat" % (line), _line) def sigclip_pixel(pixelvalue): mask = numpy.isfinite(pixelvalue) old_mask = mask rep = 0 while (rep < rep_count and numpy.sum(mask) > 3): old_mask = mask mss = scipy.stats.scoreatpercentile(pixelvalue[mask], [16,50,84]) lower = mss[1] - 3 * (mss[1] - mss[0]) # median - 3*sigma upper = mss[1] + 3 * (mss[2] - mss[1]) # median + 3*sigma mask = (pixelvalue > lower) & (pixelvalue < upper) rep += 1 if (rep == rep_count or numpy.sum(mask) < 3): mask = old_mask return numpy.mean(pixelvalue[mask]) result_buffer[line,:] = [sigclip_pixel(_line[x,:]) for x in range(_line.shape[0])] elif (operation == "sigmaclipmean"): _line = buffer[line,:,:].astype(numpy.float64) output = numpy.zeros(shape=(_line.shape[0])) podi_cython.sigma_clip_mean(_line, output) result_buffer[line,:] = output elif (operation == "sigmaclipmedian"): _line = buffer[line,:,:].astype(numpy.float64) output = numpy.zeros(shape=(_line.shape[0])) podi_cython.sigma_clip_median(_line, output) result_buffer[line,:] = output elif (operation == "weightedmean"): _line = buffer[line,:,:].astype(numpy.float32) result_buffer[line,:] = weighted_mean(_line) elif (operation == "medclip"): intermediate = numpy.sort(buffer[line,:,:], axis=1) result_buffer[line,:] = numpy.median(intermediate[:,1:-2], axis=1) elif (operation == "min"): result_buffer[line,:] = numpy.min(buffer[line,:,:], axis=1) elif (operation == "max"): result_buffer[line,:] = numpy.max(buffer[line,:,:], axis=1) elif (operation == "nanmean"): result_buffer[line,:] = scipy.stats.nanmean(buffer[line,:,:], axis=1) elif (operation == "nanmedian"): result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1) elif (operation == "nanmedian.bn"): x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nanmedian(x, axis=1) x = None del x elif (operation == "nanmean.bn"): x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nanmean(x, axis=1) x = None del x else: result_buffer[line,:] = numpy.mean(buffer[line,:,:], axis=1) return_queue.put(line) queue.task_done() buffer = None shmem_buffer = None del shmem_buffer del buffer sys.exit(0) return
def subtract_background(data, radius, angle, radius_range, binfac, logger=None): """ This routine takes the input in polar coordinates and fits a straight line to the radial profile inside and outside of the allowed range. This is assumed to be the background level (in analogy to the algorithm used in the IRAF task mkpupil). Input data: - data (the actual intensity values for all pixels) - radius (the r in the polar coordianates) - angle (the phi in polar coordinates) - radius range (r_inner, r_outer, d_radius) - binfac (the binning used for the data) """ if (logger is None): logger = logging.getLogger("BGSub") # Compute the radial bin size in binned pixels logger.debug("subtracting background - binfac=%d" % (binfac)) r_inner, r_outer, dr_full = radius_range dr = dr_full/binfac r_inner /= binfac r_outer /= binfac # # Compute the number of radial bins # # Here: Add some correction if the center position is outside the covered area max_radius = 1.3 * r_outer #math.sqrt(data.shape[0] * data.shape[1]) # Splitting up image into a number of rings n_radii = int(math.ceil(max_radius / dr)) # # Compute the background level as a linear interpolation of the levels # inside and outside of the pupil ghost # logger.info("Computing background-level ...") # Define the background ring levels radii = numpy.arange(0, max_radius, dr) background_levels = numpy.zeros(shape=(n_radii)) background_level_errors = numpy.ones(shape=(n_radii)) * 1e9 background_levels[:] = numpy.NaN for i in range(n_radii): ri = i * dr ro = ri + dr if (ri < r_inner): ro = numpy.min([ro, r_inner]) elif (ro > r_outer): ri = numpy.max([ri, r_outer]) # else: # # Skip the rings within the pupil ghost range for now # continue #print i, ri, ro median, count = get_median_level(data, radius, ri, ro) background_levels[i] = median background_level_errors[i] = 1. / math.sqrt(count) if count > 0 else 1e9 # Now fit a straight line to the continuum, assuming it varies # only linearly (if at all) with radius # define our (line) fitting function #print "XXXXXXX", radii.shape, background_levels.shape numpy.savetxt("radial__%s" % ("x"), numpy.append(radii.reshape((-1,1)), background_levels.reshape((-1,1)), axis=1)) #print "saved" # Find average intensity at the largest radii avg_level = bottleneck.nanmedian(background_levels[radii>4000]) #print "avg_level=",avg_level # # Compute a profile without background interpolation to allow for easier # scaling of the pupilghost when subtracting the pupilghost from the data # frames # # # Normalize profile # normalize_region = ((radii < 1100) & (radii > 600)) | \ ((radii > 4000) & (radii < 4600)) normalize_flux = numpy.mean(background_levels[normalize_region]) logger.info("normalization flux = %f" % (normalize_flux)) # # Subtract background and normalize all measurements # normalized_bgsub_profile = (background_levels - normalize_flux) / normalize_flux # fitfunc = lambda p, x: p[0] + p[1] * x # errfunc = lambda p, x, y, err: (y - fitfunc(p, x)) / err # bg_for_fit = background_levels # #bg_for_fit[numpy.isnan(background_levels)] = 0 # bg_for_fit[((radii > ri) & (radii < ro))] = 0 # pinit = [0.0, 0.0] # Assume no slope and constant level of 0 # out = scipy.optimize.leastsq(errfunc, pinit, # args=(radii, background_levels, background_level_errors), full_output=1) # pfinal = out[0] # covar = out[1] # stdout_write(" best-fit: %.2e + %.3e * x\n" % (pfinal[0], pfinal[1])) #print pfinal #print covar # # # # Now we have the fit for the background, compute the 2d background # # image and subtract it out # # # x = numpy.linspace(0, max_radius, 100) # y_fit = radii * pfinal[1] + pfinal[0] # background = pfinal[0] + pfinal[1] * radius # bg_sub = ((data - normalize_flux) / normalize_flux) - background # bg_sub_profile = background_levels - (pfinal[0] + pfinal[1]*radii) # numpy.savetxt("radial__%s" % ("bgsub"), # numpy.append(radii.reshape((-1,1)), # bg_sub_profile.reshape((-1,1)), axis=1)) # # Use the profile and fit a spline to the underlying shape # spl = fit_spline_background(radii, background_levels, logger=logger) background_1d = spl(radius.flatten()) background_2d = background_1d.reshape(radius.shape) bg_sub = (data - background_2d) / background_2d #if (write_intermediate): # bgsub_hdu = pyfits.PrimaryHDU(data=bg_sub) # bgsub_hdu.writeto("bgsub.fits", clobber=True) # # Combine all radial template profiles so we can store them in the output # file. This is required to allow for faster and more accurate scaling # of the pupilghost during subtraction from the data files # profiles = numpy.empty((radii.shape[0], 4)) profiles[:,0] = radii[:] profiles[:,1] = normalized_bgsub_profile[:] profiles[:,2] = spl(radii[:]) profiles[:,3] = (background_levels - profiles[:,2]) / profiles[:,2] peak_flux = numpy.max(profiles[:,3][numpy.isfinite(profiles[:,3])]) # numpy.savetxt("radial__%s" % ("norm+bgsub"), # numpy.append(radii.reshape((-1,1)), # normalized_bgsub_profile.reshape((-1,1)), axis=1)) return bg_sub, profiles, peak_flux
def _fit(self, X, y): self.X, y = self._check_params(X, y) n, p = X.shape self.y = y.reshape((n, 1)) # list of selected features S = [] # list of all features F = range(p) if self.n_features != 'auto': feature_mi_matrix = np.zeros((self.n_features, p)) else: feature_mi_matrix = np.zeros((n, p)) feature_mi_matrix[:] = np.nan S_mi = [] # ---------------------------------------------------------------------- # FIND FIRST FEATURE # ---------------------------------------------------------------------- # check a range of ks (3-10), and choose the one with the max median MI k_min = 3 k_max = 11 xy_MI = np.zeros((k_max-k_min, p)) xy_MI[:] = np.nan for i, k in enumerate(range(k_min, k_max)): xy_MI [i, :] = mi.get_first_mi_vector(self, k) xy_MI = bn.nanmedian(xy_MI, axis=0) # choose the best, add it to S, remove it from F S, F = self._add_remove(S, F, bn.nanargmax(xy_MI)) S_mi.append(bn.nanmax(xy_MI)) # notify user if self.verbose > 0: self._print_results(S, S_mi) # ---------------------------------------------------------------------- # FIND SUBSEQUENT FEATURES # ---------------------------------------------------------------------- while len(S) < self.n_features: # loop through the remaining unselected features and calculate MI s = len(S) - 1 feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, s) # make decision based on the chosen FS algorithm fmm = feature_mi_matrix[:len(S),F] if self.method == 'JMI': selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))] elif self.method == 'JMIM': selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))] elif self.method == 'MRMR': MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0) selected = F[bn.nanargmax(MRMR)] # record the JMIM of the newly selected feature and add it to S S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0))) S, F = self._add_remove(S, F, selected) # notify user if self.verbose > 0: self._print_results(S, S_mi) # if n_features == 'auto', let's check the S_mi to stop if self.n_features == 'auto' and len(S) > 10: # smooth the 1st derivative of the MI values of previously sel MI_dd = signal.savgol_filter(S_mi[1:],9,2,1) # does the mean of the last 5 converge to 0? if np.abs(np.mean(MI_dd[-5:])) < 1e-3: break # ---------------------------------------------------------------------- # SAVE RESULTS # ---------------------------------------------------------------------- self.n_features_ = len(S) self.support_ = np.zeros(p, dtype=np.bool) self.support_[S] = 1 self.ranking_ = S self.mi_ = S_mi return self
def parallel_compute(queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist, operation): #queue, shmem_buffer, shmem_results, size_x, size_y, len_filelist = worker_args buffer = shmem_as_ndarray(shmem_buffer).reshape((size_x, size_y, len_filelist)) result_buffer = shmem_as_ndarray(shmem_results).reshape((size_x, size_y)) while (True): cmd_quit, line = queue.get() if (cmd_quit): queue.task_done() return if (operation == "median"): result_buffer[line,:] = numpy.median(buffer[line,:,:], axis=1) elif (operation == "medsigclip"): # Do not use (yet), is slow as hell # (maskedarrays are pure python, not C as all the rest) #print buffer[line,:,:].shape _sigma_plus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9 _sigma_minus = numpy.ones(shape=(buffer.shape[1],buffer.shape[2])) * 1e9 _median = numpy.median(buffer[line,:,:], axis=1) nrep = 3 valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:]) for rep in range(nrep): _median_2d = _median.reshape(_median.shape[0],1).repeat(buffer.shape[2], axis=1) _min = _median_2d - 3 * _sigma_minus _max = _median_2d + 3 * _sigma_plus #valid_pixels = numpy.ma.masked_inside(buffer[line,:,:], _min, _max) valid = (buffer[line,:,:] > _min) & (buffer[line,:,:] < _max) valid_pixels = numpy.ma.array(buffer[line,:,:], mask=valid) #valid_pixels = numpy.ma.MaskedArray(buffer[line,:,:], valid) #print _min.shape, valid.shape, valid_pixels.shape #if (numpy.sum(valid, axis=1).any() <= 0): # break #_median = numpy.median(buffer[line,:,:][valid], axis=1) _median = numpy.median(valid_pixels, axis=1) if (rep < nrep-1): #_sigma_plus = scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 84) - _median #_sigma_minus = _median - scipy.stats.scoreatpercentile(buffer[line,:,:][valid], 16) _sigma_plus = scipy.stats.scoreatpercentile(valid_pixels, 84) - _median _sigma_minus = _median - scipy.stats.scoreatpercentile(valid_pixels, 16) result_buffer[line,:] = _median elif (operation == "medclip"): intermediate = numpy.sort(buffer[line,:,:], axis=1) result_buffer[line,:] = numpy.median(intermediate[:,1:-2], axis=1) elif (operation == "min"): result_buffer[line,:] = numpy.min(buffer[line,:,:], axis=1) elif (operation == "max"): result_buffer[line,:] = numpy.max(buffer[line,:,:], axis=1) elif (operation == "nanmean"): result_buffer[line,:] = scipy.stats.nanmean(buffer[line,:,:], axis=1) elif (operation == "nanmedian"): #print "nanmedian" result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1) elif (operation == "nanmean.bn"): x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nanmean(x, axis=1) elif (operation == "nanmedian.bn"): #print "nanmedian" x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nanmedian(x, axis=1) #result_buffer[line,:] = scipy.stats.nanmedian(buffer[line,:,:], axis=1) elif (operation == "nansum.bn"): x = numpy.array(buffer[line,:,:], dtype=numpy.float32) result_buffer[line,:] = bottleneck.nansum(x, axis=1) else: result_buffer[line,:] = numpy.mean(buffer[line,:,:], axis=1) queue.task_done()
xvals = np.arange(newweight.shape[1]) yvals = np.arange(newweight.shape[0]) X,Y = np.meshgrid(xvals,yvals) badx = X[newweight == False] bady = Y[newweight == False] imagearr[bady,badx,:,i] = np.nan #med_image = np.zeros(imagearr[:,:,:,-1].shape) #med_image[:,:,0] = bn.nanmedian(imagearr[:,:,0,:i+1],axis=2) #med_image[:,:,1] = bn.nanmedian(imagearr[:,:,1,:i+1],axis=2) #med_image[:,:,2] = bn.nanmedian(imagearr[:,:,2,:i+1],axis=2) avg_image = jointimage*weightimage.reshape(weightimage.shape+(1,))+unwarped_newimage*newweight.reshape(newweight.shape+(1,)) weightimage += newweight avg_image /= weightimage.reshape(weightimage.shape+(1,)) #Replace the currframe with the avg_image: currframe = frame(cap,-1,image=ski_util.img_as_ubyte(avg_image)) #currframe = frame(cap,-1,image=ski_util.img_as_ubyte(med_image)) ski_io.imsave('test_avg.jpg',currframe.rawimage) medianed_image = np.zeros(imagearr[:,:,:,0].shape) medianed_image[:,:,0] = bn.nanmedian(imagearr[:,:,0,:],axis=2) medianed_image[:,:,1] = bn.nanmedian(imagearr[:,:,1,:],axis=2) medianed_image[:,:,2] = bn.nanmedian(imagearr[:,:,2,:],axis=2) ski_io.imsave('test_median.jpg',medianed_image) for i in range(len(chronological_order)): curridx = chronological_order[i] print frameorder[curridx], currimage = insert_image(medianed_image,imagearr[:,:,:,curridx],adjustmask=1,overlap=20) ski_io.imsave('test_out_{0:d}.jpg'.format(frameorder[curridx]),currimage)
# skyvalue[:,:] = numpy.NaN for cx, cy in itertools.product(range(8), repeat=2): # # Get pixel coord for this cell # #print binning x1,x2,y1,y2 = cell2ota__get_target_region(cx, cy, binning=binning, trimcell=0) x21 = (x2-x1)/2 # extract the mean value in the bottom corner corner = bottleneck.nanmean(ext.data[y1:y1+w, x1:x1+w].astype(numpy.float32)) # also get the value in the bottom center center = bottleneck.nanmean(ext.data[y1:y1+w, x1+x21-w/2:x1+x21+w//2].astype(numpy.float32)) excess = corner - center #print ext.name, cx, cy, corner, center, excess excesses[cx,cy] = excess _mean = bottleneck.nanmean(excesses) _median = bottleneck.nanmedian(excesses) guideota = (_median > 10*skynoise) print(ext.name, _mean, _median, skylevel, guideota) #break