def mad_ultimi_12_diviso_mad_totale_per_unita(data, cons_dict): """ Funzione che calcola il mad degli ultimi 12 mesi diviso il mad dei consumi complessivi, per ogni id Parametri --------- cons_dict: Formato dict dati di consumo data: Formato pandas.DataFrame matrice dei metadati prima di modificare i valori da stringa a media di consumi, per alcune colonne Valore Ritornato ---------------- La funzione ritorna il mad degli ultimi 12 mesi diviso il mad dei consumi complessivi, per ogni id. """ data = data.copy() mad = [] for i in data.index: mad.append(median_absolute_deviation(cons_dict[i][-12:], ignore_nan=True) \ / (median_absolute_deviation(cons_dict[i], ignore_nan=True)+0.001)) return mad
def flux_variability_plot(flux, fluxchan, starflux=[], normalised=False, stars=False): ''' Function to plot the variability vs mean flux plot using the MAD statistic. Optional input of fluxes of stars or normalising the plot. Inputs: flux = array of flux values for UDS objects fluxchan = array of flux values for chandra objects starflux = optional array of fluxes for stars normalised = True or False (default) depending if the fluxes should be normalised to the objects average flux stars = True or False (default) depending on if stars should be added to the plot Output: fig = figure handle to allow clicking for light curves to be enabled if required ''' fig = plt.figure() avgfluxperob = np.mean(flux, axis=1) #for UDS avgfluxchanperob = np.mean(fluxchan, axis=1) #for non-stellar chandra if stars == True: savgfluxperob = np.mean(starflux, axis=1) #for stars ### Check if normalisation is true and normalise if necessary ### if normalised == True: flux = normalise(flux) fluxchan = normalise(fluxchan) if stars == True: starflux = normalise(starflux) ### Find out which plot type is specified and calculate appropriate statistic ### vary = median_absolute_deviation(flux, axis=1) varychan = median_absolute_deviation(fluxchan, axis=1) ### Plot the variability v mean as appropriate ### if stars == True: varystar = median_absolute_deviation(starflux, axis=1) plt.plot(savgfluxperob, varystar, 'm*', mfc='none', markersize=10, label='Secure Star') line, = plt.plot(avgfluxperob, vary, 'b+', label='UDS Source', picker=2) plt.plot(avgfluxchanperob, varychan, 'ro', mfc='none', markersize=10, label='Chandra Source' ) #no picker as will be selected in the UDS point ### Apply required plot charateristics ### plt.yscale('log') plt.xlabel('Mean Magnitude') plt.ylabel('MAD') plt.legend() return fig
def distance_metric(X_first, X_second): budget_weight = gauss_fct((X_first.budget_day - X_second.budget_day), 0, median_absolute_deviation(X['budget_day'])) duration_weight = gauss_fct( (X_first.duration_event_days - X_second.duration_event_days), 0, median_absolute_deviation(X['duration_event_days'])) anticipation_weight = gauss_fct( (X_first.nbr_days_before_event - X_second.nbr_days_before_event), 0, median_absolute_deviation(X['nbr_days_before_event'])) if (X_first.event_type == X_second.event_type): event_type_weight = 1 else: event_type_weight = 0 print('budget_weight = ', budget_weight, '\n') print('duration_weight = ', duration_weight, '\n') print('anticipation_weight = ', anticipation_weight, '\n') print('event_type_weight = ', event_type_weight, '\n') score = (budget_weight + duration_weight + anticipation_weight + event_type_weight) / 4 return score
def mad_acc(x, y, z, m): #mad= np.mean(np.absolute(x - np.mean(x))) # Mean Absolute Deviation formula x_feat = [median_absolute_deviation(i) for i in x] y_feat = [median_absolute_deviation(i) for i in y] z_feat = [median_absolute_deviation(i) for i in z] m_feat = [median_absolute_deviation(i) for i in m] return x_feat, y_feat, z_feat, m_feat
def find_outliers(flux, tbdata, bins, threshold=6): '''Function used to find outliers when using MAD to select variables. It splits the data into flux bins, calulates all the MAD values in that bin and then uses the modified z score to find which objects had disproportionately high MAD values for that bin. Anything above a given threshold was said to be an outlier. Inputs: flux = a 2D array of flux values where each row is a lightcurve for a single object tbdata = original data table for those flux values bins = array of bin edges threshold = threshold at which everything with a mod-z score above that value is said to be an outlier. Default is 6 Outputs: outliers = bool array defining which objects are outliers tbnew = table of data in the same order as outliers and allmodz so easy to compare/apply the boolean allmodz = array of z values for all the objects ''' ### Bin data ### allmodz = [] tbnew = np.recarray([0], dtype=tbdata.dtype, formats=tbdata.formats) for n, binedge in enumerate(bins): if n==np.size(bins)-1: break fluxbin1, tbbin1 = flux_funcs.fluxbin(binedge, bins[n+1], flux, tbdata) #calulate mad values in the bins mad1 = median_absolute_deviation(fluxbin1, axis=1) #for UDS modz = mod_z_score(mad1) tbnew = np.hstack([tbnew, tbbin1]) allmodz = np.append(allmodz, modz) outliers = allmodz>=threshold return outliers, tbnew, allmodz
def reg_RANSAC(X, y, thresh = 0., main_path = '', pl_name = '', plots = False, log = False): ## Robustly fit linear model with RANSAC algorithm X = X.reshape((len(X),1)) model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression()) if thresh != 0: threshhold = median_absolute_deviation(y)*thresh model_ransac.set_params(residual_threshold = threshhold) model_ransac.fit(X, y) inlier_mask = model_ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) ## Predict data of estimated models line_X = np.linspace(np.min(X), np.max(X), 100) line_y_ransac = model_ransac.predict(line_X[:, np.newaxis]) ## Ploting in and outliers + linear regression if plots: fig, ax = plt.subplots(1) fig.suptitle(pl_name, fontsize = 15) ax.scatter(X[inlier_mask], y[inlier_mask], marker = '.', color = 'b') ax.scatter(X[outlier_mask], y[outlier_mask], marker = '.', color = 'r') ax.plot(line_X, line_y_ransac, color = 'c') if log: ax.set_xlabel('Ref (log)') ax.set_ylabel('Single epoch (log)') else: ax.set_xlabel('Ref') ax.set_ylabel('Single epoch') ax.grid(which='major', axis='x', linewidth=0.5, linestyle='-', color='0.8') ax.grid(which='major', axis='y', linewidth=0.5, linestyle='-', color='0.8') plt.savefig('%s/%s_ransac.png' % (main_path, pl_name), dpi = 300) plt.clf() plt.close() ## Return the coef of the linear regression return model_ransac.estimator_.coef_[0][0], model_ransac.estimator_.intercept_[0], line_X, line_y_ransac, outlier_mask
def feat_extract(sensor_name, path=intermediate_dataset_path, train=train): train_feat = [] train_data = np.array( pd.read_csv(path + train + '_' + sensor_name + '.csv', header=None)) logging.debug(train_data.shape) logging.debug(sensor_name + ' read done') train_data = [np.gradient(train_data[i]) for i in range(len(train_data))] train_data = np.array(train_data) np.savetxt(intermediate_dataset_path + train + '_' + sensor_name + '_jerk.csv', train_data, delimiter=",", fmt='%.3f') logging.debug(train_data.shape) train_feat.append(np.mean(train_data, axis=1)) logging.info(sensor_name + ' mean') train_feat.append(np.std(train_data, axis=1)) logging.info(sensor_name + ' std') train_feat.append(np.var(train_data, axis=1)) logging.info(sensor_name + ' var') train_feat.append(np.max(train_data, axis=1)) logging.info(sensor_name + ' max') train_feat.append(np.min(train_data, axis=1)) logging.info(sensor_name + ' min') train_feat.append(median_absolute_deviation(train_data, axis=1)) logging.info(sensor_name + ' mad') train_feat = np.transpose(train_feat) train_feat = np.c_[train_feat, fft_feat_extract(np.array(train_data), sensor_name)] return train_feat
def plot_mcmc_diagnostic(chain): """ TODO: """ names = [r'$\ln P$', r'$\sqrt{K}\,\cos M_0$', r'$\sqrt{K}\,\sin M_0$', r'$\sqrt{e}\,\cos \omega$', r'$\sqrt{e}\,\sin \omega$', '$v_0$'] ndim = chain.shape[-1] assert ndim == len(names) fig, axes = plt.subplots(ndim, 3, figsize=(12, 16), sharex=True) for k in range(ndim): axes[k, 0].set_ylabel(names[k]) axes[k, 0].plot(chain[..., k].T, marker='', drawstyle='steps-mid', alpha=0.1, rasterized=True) axes[k, 1].plot(np.median(chain[..., k], axis=0), marker='', drawstyle='steps-mid') std = 1.5 * median_absolute_deviation(chain[..., k], axis=0) axes[k, 2].plot(std, marker='', drawstyle='steps-mid') axes[0, 0].set_title('walkers') axes[0, 1].set_title('med(walkers)') axes[0, 2].set_title('1.5 MAD(walkers)') fig.tight_layout() return fig
def line_reg_RANSAC(X, y, pl_name, log = False, plots = False): ## Robustly fit linear model with RANSAC algorithm X = X.reshape((len(X),1)) model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression()) threshhold = median_absolute_deviation(y)*2. model_ransac.set_params(residual_threshold = threshhold) model_ransac.fit(X, y) inlier_mask = model_ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) ## Predict data of estimated models if log: line_X = np.logspace(np.min(X), np.max(X), 100) else: line_X = np.linspace(np.min(X), np.max(X), 100) line_y_ransac = model_ransac.predict(line_X[:, np.newaxis]) ## Ploting in and outliers + linear regression if plots: fig, ax = plt.subplots(1) fig.suptitle(pl_name, fontsize = 15) ax.scatter(X[inlier_mask], y[inlier_mask], marker = '.', color = 'b') ax.scatter(X[outlier_mask], y[outlier_mask], marker = '.', color = 'r') ax.plot(line_X, line_y_ransac, color = 'c') ax.set_xlabel('Stack') ax.set_ylabel('Single epoch') plt.savefig('%s/%s/%s/%s_ransac.png' % (webpath, field, epoch, pl_name), dpi = 300) plt.clf() plt.close() ## Return the coef of the linear regression return model_ransac.estimator_.coef_[0][0], model_ransac.estimator_.intercept_[0], line_X, line_y_ransac, outlier_mask
def get_decals(coordinates, size, band='g', pix_scale=0.262 * u.arcsec, clobber=True): size_pix = np.round((size / pix_scale), 0) + 1 if size_pix > 512: raise Exception() base_url = 'http://legacysurvey.org/viewer/fits-cutout-decals-dr2?' query_string = 'ra={0}&dec={1}&pixscale={2}'\ '&size={3:.0f}&bands={4}'.format( coordinates.ra.to(u.deg).value, coordinates.dec.to(u.deg).value, pix_scale.to(u.arcsec).value, size_pix, band) filename = 'test.fits' if os.path.isfile(filename) and clobber: os.remove(filename) print base_url + query_string test = urllib.urlretrieve(base_url + query_string, filename=filename) image = load_image(filename) mad = median_absolute_deviation(image) sigma = mad #/np.sqrt(2)*erfinv(2*0.75-1) image = image.subtract(np.median(image) * image.unit) image = image.divide(sigma) return image #, out
def calc_beam_offsets(AUT=None, subplot=None, normed_beam=None, all_offsets=all_offsets): divided_map = load( '../rotate_ref_and_reformat/prerotated_AUT_%s_ref_rf0XX.npz' % AUT)['beammap'] corrected_map_error = load( '../rotate_ref_and_reformat/prerotated_AUT_%s_ref_rf0XX_error.npz' % AUT)['beammap'] reference = load( '../rotate_ref_and_reformat/rotated+flip_ung_western.npz')['beammap'] corrected_map = divided_map + reference gain = fit_gain(map_data=corrected_map, map_error=corrected_map_error, normed_beam=normed_beam) offsets = ((corrected_map[where(beam_zas <= 80.0 * (pi / 180.0))] - gain - normed_beam[where(beam_zas <= 80.0 * (pi / 180.0))])) ax = fig.add_subplot(2, 2, subplot) offsets = abs(offsets[(isnan(offsets) != True)]) ax.hist(offsets, bins=15, label='Offsets (%.1f$\pm$%.1f)' % (median(offsets), median_absolute_deviation(offsets))) #ax.plot(alts,chis,label=r'$\chi^2$ (sum=%d)' %int(round(chis[chis!=nan].sum())) ) ax.legend() all_offsets = append(all_offsets, offsets) return all_offsets
def sample_curve(x,y,err,xsample_range,num_of_sample_curves,filename): clipped_fluxes = get_sigma_clipped_fluxes(y) background = np.ma.median(clipped_fluxes) noise = median_absolute_deviation(clipped_fluxes) ls = get_ls(x,y,err) var = noise # print('Background = ',background) # print('Noise = ', noise) # print('ls = ',ls) k = gpflow.kernels.RBF(1,lengthscales=ls,variance=var) m = gpflow.gpr.GPR(x.reshape(len(x),1), y.reshape(len(y),1), kern=k) m.kern.lengthscales.prior = gpflow.priors.Gaussian(ls, ls/5) m.kern.variance.prior = gpflow.priors.Gaussian(5*var, var) m.optimize() xx = np.linspace(min(x), max(x), 1000)[:,None] mean, var = m.predict_y(xx) pl.figure(figsize=(12, 6)) pl.plot(x, y, 'kx', mew=2) pl.plot(xx, mean, 'b', lw=2) pl.fill_between(xx[:,0], mean[:,0] - 2*np.sqrt(var[:,0]), mean[:,0] + 2*np.sqrt(var[:,0]), color='blue', alpha=0.2) pl.xlabel('Days [JD]') pl.ylabel('Flux [Jy]') pl.savefig(filename+'.png') pl.close() xnew = xsample_range[:,None] ynew = m.predict_f_samples(xnew,num_of_sample_curves) return xnew,ynew
def median_combine(self, median_func=ma.median, scale_to=None): """Median combine a set of arrays. A `~ccdproc.CCDData` object is returned with the data property set to the median of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the median. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by 1.4826 times the median absolute deviation of all input images. Parameters ---------- median_func : function, optional Function that calculates median of a `~numpy.ma.masked_array`. Default is to use `numpy.ma.median` to calculate median. scale_to : float, optional Scaling factor used in the average combined image. If given, it overrides ``CCDData.scaling``. Defaults to None. Returns ------- combined_image: `~ccdproc.CCDData` CCDData object based on the combined input of CCDData objects. Warnings -------- The uncertainty currently calculated using the median absolute deviation does not account for rejected pixels. """ if scale_to is not None: scalings = scale_to elif self.scaling is not None: scalings = self.scaling else: scalings = 1.0 # set the data data = median_func(scalings * self.data_arr, axis=0) # set the mask mask = self.data_arr.mask.sum(axis=0) mask = (mask == len(self.data_arr)) # set the uncertainty uncertainty = 1.4826 * median_absolute_deviation(self.data_arr.data, axis=0) # create the combined image with a dtype matching the combiner combined_image = CCDData(np.asarray(data.data, dtype=self.dtype), mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty)) # update the meta data combined_image.meta['NCOMBINE'] = len(self.data_arr) # return the combined image return combined_image
def get_decals(coordinates, size, band='g', pix_scale=0.262*u.arcsec, clobber=True): size_pix = np.round((size / pix_scale),0)+1 if size_pix > 512: raise Exception() base_url='http://legacysurvey.org/viewer/fits-cutout-decals-dr2?' query_string = 'ra={0}&dec={1}&pixscale={2}'\ '&size={3:.0f}&bands={4}'.format( coordinates.ra.to(u.deg).value, coordinates.dec.to(u.deg).value, pix_scale.to(u.arcsec).value, size_pix, band) filename = 'test.fits' if os.path.isfile(filename) and clobber: os.remove(filename) print base_url+query_string test = urllib.urlretrieve(base_url+query_string, filename=filename) image = load_image(filename) mad = median_absolute_deviation(image) sigma = mad#/np.sqrt(2)*erfinv(2*0.75-1) image = image.subtract(np.median(image)*image.unit) image = image.divide(sigma) return image #, out
def calc_rms(spectrum): """ Returns the spectral rms Parameters ---------- Spectrum : spectral cube spectrum An individual spectrum taken from the spectral cube """ # Find all negative values negative_indices = (spectrum < 0.0) spectrum_negative_values = spectrum[negative_indices] reflected_noise = np.concatenate( (spectrum[negative_indices], abs(spectrum[negative_indices]))) # Compute the median absolute deviation MAD = median_absolute_deviation(reflected_noise) # For pure noise you should have roughly half the spectrum negative. If # it isn't then you need to be a bit more conservative if len(spectrum_negative_values) < 0.47 * len(spectrum): maximum_value = 3.5 * MAD else: maximum_value = 4.0 * MAD noise = spectrum[spectrum < abs(maximum_value)] rms = np.sqrt(np.sum(noise**2) / np.size(noise)) return rms
def __init__(self, im, x0, y0, profs, thetas): self.x0 = x0 self.y0 = y0 self.im = im # store the center image deltay = -(im.fovy() / 2. - y0 * eh.RADPERUAS) / im.psize deltax = (im.fovx() / 2. - x0 * eh.RADPERUAS) / im.psize self.im_center = im.shift( [int(np.round(deltay)), int(np.round(deltax))]) # total flux and normalization self.flux = im.total_flux() self.parea = (im.psize / eh.RADPERUAS)**2 # factor to convert to normalized brightness temperature (total flux of # 1 Jy) self.normfactor = NORMFLUX / im.total_flux() # image array and profiles # factor to convert to brightness temperature factor = 3.254e13 / (im.rf**2 * im.psize**2) self.imarr = im.imvec.reshape(im.ydim, im.xdim)[::-1] * factor # in Tb self.xs = np.arange(im.xdim) * im.psize / eh.RADPERUAS self.ys = np.arange(im.ydim) * im.psize / eh.RADPERUAS #self.interp = scipy.interpolate.interp2d(self.ys,self.xs,self.imarr,kind='quintic') self.interp = scipy.interpolate.interp2d(self.ys, self.xs, self.imarr, kind='cubic') self.profiles = np.array(profs) self.thetas = np.array(thetas) self.nang = len(thetas) self.nrs = len(self.profiles[0]) self.nthetas = len(self.thetas) self.rs = np.linspace(0, RMAX, self.nrs) self.dr = self.rs[-1] - self.rs[-2] self.pks = [] self.pk_vals = [] self.diameters = [] for prof in self.profiles: pk, vpk = self.calc_pkrad_from_prof(prof) self.pks.append(pk) self.pk_vals.append(vpk) self.diameters.append(2 * np.abs(pk)) self.pks = np.array(self.pks) self.pk_vals = np.array(self.pk_vals) self.diameters = np.array(self.diameters) # ring size self.RingSize1 = (np.mean(self.diameters), np.std(self.diameters)) self.RingSize1_med = (np.median(self.diameters), median_absolute_deviation(self.diameters))
def calc_dispersion_nearest_target(target_x, target_y, x, y, z, N): dispersions = np.zeros(len(target_x)) for i in trange(len(target_x)): nx, ny, nz = n_nearest_points(target_x[i], target_y[i], x, y, z, N) dispersions[i] = 1.5 * aps.median_absolute_deviation(nz, ignore_nan=True) return dispersions
def get_MAD(self, recompute=False): """ Compute the median absolute deviation of the lightcurve """ mad = getattr(self, 'MAD', None) if mad is not None: if not recompute: return mad mad = {} outlc = self.get_lc(recompute=recompute) for i, pb in enumerate(outlc): tlc = outlc.get(pb) ttime, tFlux, tFluxErr, tFluxUnred, tFluxErrUnred, tFluxRenorm, tFluxErrRenorm, tphotflag, tzeropoint, tobsId = tlc photmask = tphotflag >= constants.GOOD_PHOTFLAG tFluxRenorm = tFluxRenorm[photmask] if len(tFluxRenorm) == 0: # if t Flux is empty tmad = 0. else: tmad = median_absolute_deviation(tFluxRenorm) mad[pb] = tmad self.MAD = mad return mad
def median_filter(flx, chunk_size, coldpix): # Init new spectrum newspc = [] # Chunk size *MUST* be divisor of spectrum length. if len(flx) % chunk_size != 0: raise ("Chunk size not factor of spectrum length") for i in range(int(len(flx) / chunk_size)): a = i * chunk_size b = (i + 1) * chunk_size ### Split the spectrum into 16 pixel chunks subflx = flx[a:b] # Compute the median and MAD (robust stdev) for each chunk median = np.nanmedian(subflx) sigma = median_absolute_deviation(subflx) # Pixel more than 5 MAD away from the median get masked if coldpix == True: cleanmsk = np.logical_or( np.array(subflx) > 5 * sigma + median, np.array(subflx) < median - 7 * sigma) else: cleanmsk = np.array(subflx) > 5 * sigma + median subflx[cleanmsk == True] = 'NaN' #np.median(subflx[cleanmsk == False]) #print("Threshold: %s, Max: %s" % (2*sigma + median, np.max(subflx))) # Rebuild the spectrum chunk by chunk newspc.extend(subflx) return np.array(newspc)
def compute_mad_estimates(x_FWHMs,y_FWHMs): """ """ # Computing median and mad estimates x_med=np.nanmedian(x_FWHMs) y_med=np.nanmedian(y_FWHMs) x_mad = median_absolute_deviation(x_FWHMs,ignore_nan=True) y_mad = median_absolute_deviation(y_FWHMs,ignore_nan=True) # Outlier resistant median x_med_clipped = np.median(np.array(x_FWHMs)[ abs(x_FWHMs-x_med) <= 2.0*x_mad ]) y_med_clipped = np.median(np.array(y_FWHMs)[ abs(y_FWHMs-y_med) <= 2.0*y_mad ]) return 0.5*(x_med_clipped+y_med_clipped)*2.355
def calc_dispersion_bins_target(target_x, target_y, x, y, z, xrange, yrange): dispersions = np.zeros(len(target_x)) for i in trange(len(target_x)): nx, ny, nz = make_bin(target_x[i], target_y[i], x, y, z, xrange, yrange) dispersions[i] = 1.5 * aps.median_absolute_deviation(nz, ignore_nan=True) return dispersions
def main(epic, field, cad, refcad, logging=True): targ = PixelTarget(epic, field, cad, logging) targ.read_fits() # logger.debug('Removing thrusters') targ.remove_thrust(refcad) # logger.debug('Finding aper') aperture = targ.find_aper() # logger.debug('Performing aperture photometry') ftot = targ.aper_phot(aperture) mad = median_absolute_deviation(ftot) best_rad = 'arbitrary' best_aper = aperture ftot_all = {'arbitrary': ftot} poisson_all = {'arbitrary': targ.poisson} if targ.start_aper > 2: rads = np.arange(targ.start_aper - 1, targ.start_aper + 3) else: rads = np.arange(2, 6) # logger.debug('Looping through all rads') for r in rads: # logger.debug('Rad=%s', r) circ_apers = targ.find_circ_aper(rad=r) ftot_circ = targ.aper_phot(circ_apers) ftot_all[str(r)] = ftot_circ poisson_all[str(r)] = targ.poisson mad_new = median_absolute_deviation(ftot_circ) if mad_new < mad: mad = mad_new best_rad = str(r) best_aper = circ_apers fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111) ax = draw_aper(targ, best_aper, ax) ax.set_title('Rad =' + best_rad) plt.savefig('outputs/' + targ.epic + '_aper.png', dpi=150) plt.pause(0.01) plt.close() return targ, ftot_all, poisson_all, best_rad
def normalize(data, cols=0, exclude=0.0, to_db=False, use_median=False): """ Normalize data per frequency channel so that the noise level in data is controlled; using mean or median filter. Uses a sliding window to calculate mean and standard deviation to preserve non-drifted signals. Excludes a fraction of brightest pixels to better isolate noise. Parameters ---------- data : ndarray Time-frequency data cols : int Number of columns on either side of the current frequency bin. The width of the sliding window is thus 2 * cols + 1 exclude : float, optional Fraction of brightest samples in each frequency bin to exclude in calculating mean and standard deviation to_db : bool, optional Convert values to decibel equivalents *before* normalization use_median : bool, optional Use median and median absolute deviation instead of mean and standard deviation Returns ------- normalized_data : ndarray Normalized data """ # Width of normalization window = 2 * cols + 1 t_len, f_len = data.shape mean = np.empty(f_len) std = np.empty(f_len) if to_db: data = db(data) for i in np.arange(f_len): if i < cols: start = 0 else: start = i - cols if i > f_len - 1 - cols: end = f_len else: end = i + cols + 1 temp = np.sort(data[:, start:end].flatten()) noise = temp[0:int(np.ceil(t_len * (end - start) * (1 - exclude)))] if use_median: mean[i] = np.median(noise) std[i] = median_absolute_deviation(noise) else: mean[i] = np.mean(noise) std[i] = np.std(noise) return np.nan_to_num((data - mean) / std)
def plot_median_line(fluxn, tbdata, statistic='MAD', createplot=True): ''' Function to find (and plot a line showing) the median value for a variety of statistics across the flux range of the sample - useful when calculating uncertainties. Inputs: fluxn = 2D array of flux values where each line is the light curve of an object tbdata = table of data that corresponds to the fluxes given (same length) statisitic = which statistic to find the median of. Options are MAD ('MAD', default), excess variance ('excess'), standard deviation ('std'), variance ('var') createplot = bool, defines whether or not to actually plot the median line onto the most recent plot. Default is True Outputs: bins = array denoting what the bin edges were allmedstat = array of the median values for each bin ''' bins = np.array([13, 15]) bins = np.append(bins, np.arange(16, 24, 0.2)) bins = np.append(bins, [24]) bins = 10**((30 - bins) / 2.5) bins = np.flip(bins, axis=0) #bins = bins[16:44] #because of flux limit ### Bin data ### allmedstat = np.array([]) for n, binedge in enumerate(bins): # print(binedge) if n == np.size(bins) - 1: break mag, bindata = flux_funcs.fluxbin(binedge, bins[n + 1], fluxn, tbdata) #bindata if statistic == 'excess': magerr = k_mag_flux.fluxerr5_stacks(bindata) #make error array nmag, nmagerr = flux_funcs.normalise_flux_and_errors(mag, magerr) else: nmag = flux_funcs.normalise_flux(mag) if statistic == 'std': binstat = np.std(nmag, axis=1) elif statistic == 'excess': binstat = vary_stats.normsigmasq(nmag, nmagerr) elif statistic == 'MAD': binstat = median_absolute_deviation(nmag, axis=1) elif statistic == 'var': binstat = np.nanvar(nmag, axis=1, ddof=1) else: print('Unrecognised statistic entered') return statmed = np.nanmedian(binstat) allmedstat = np.append(allmedstat, statmed) if createplot == True: plt.plot(bins[0:42], allmedstat, 'k--') return bins, allmedstat
def cal_beam_MADMFD(infile): """ Calculating the MAD of max flux density of each beam. """ data = np.loadtxt(infile) maxfdensity = data[:, 8] mad_maxfdensity = round(median_absolute_deviation(maxfdensity), 3) return mad_maxfdensity
def mad_with_extrema(x, mnm=None, mxm=None, return_func=False): """Calculate Bounded Median Absolute Deviation. with a minimum and maximum allowed mad Parameters ---------- x: array the data on which to calculate the MAD mnm: float lower bound if MAD(x) < mnm: return mnm mxm: float upper bound if MAD(x) > mxm: return mxm return_func: bool if True, returns a single-parameter function with set mnm & mxm Returns ------- MAD: float if return is True `mad_with_extrema(., mnm=mnm, mxm=mxm, return_func=False)` """ if return_func is True: return lambda x: mad_with_extrema( x, mnm=mnm, mxm=mxm, return_func=False) # astropy MAD mad = np.nan_to_num(median_absolute_deviation(x)) if mnm is not None: if issubclass(x.__class__, u.Quantity): mnm = mnm * x.unit try: # array mad[mad < mnm] = mnm except TypeError: # single value if mad < mnm: mad = mnm if mxm is not None: if issubclass(x.__class__, u.Quantity): mxm = mxm * x.unit try: mad[mad > mxm] = mxm except TypeError: if mad > mxm: mad = mxm return mad
def plot_sigma_clipping_hist(dataset, ax): """ Plot bi-color histogram highlighting data-range masked by sigma-clipping. Overplot gaussian PDF matched to median, std. dev of sigma-clipped data. """ fluxes = dataset[DataCols.flux] hist, bin_edges = np.histogram(fluxes, bins=int(max(len(fluxes) / 20, 15)), normed=True) clipped_fluxes = get_sigma_clipped_fluxes(fluxes) bin_centres = [] for bin_idx in range(len(bin_edges) - 1): bin_centres.append(0.5 * (bin_edges[bin_idx] + bin_edges[bin_idx + 1])) bin_centres = np.asarray(bin_centres) bin_width = bin_edges[1] - bin_edges[0] # Get a mask for the histogram that only shows flux values that are inside # the sigma-clipped range # (nb len(bin_edges)==len(bins)+1) clip_mask = ((bin_centres > clipped_fluxes.min()) * (bin_centres < clipped_fluxes.max())) # # Plot the hist of all flux values, in red: ax.bar(bin_centres, hist, width=bin_width, color='r', label="All flux values") # The overplot the flux values that are inside the sigma-clip range, # using the mask ax.bar(bin_centres[clip_mask], hist[clip_mask], width=bin_width, label="Fluxes after sigma-clipping") xlim = np.percentile(fluxes, 0.5), np.percentile(fluxes, 99.5) ax.set_xlim(xlim) # Overplot a gaussian curve with same median and std dev as clipped data, # for comparison. (In yellow) x = np.linspace(xlim[0], xlim[1], 1000) clip_pars_norm = scipy.stats.norm( np.ma.median(clipped_fluxes), #np.ma.std(clipped_fluxes), median_absolute_deviation(clipped_fluxes) ) ax.plot(x, clip_pars_norm.pdf(x), color='y', label="Normal dist. for comparison") ax.set_xlabel(dataset[DataCols.flux_units]) ax.set_ylabel("Relative prob") ax.legend(loc='best') return ax
def plot_history(history, model_filename): # Plot loss vs epochs plt.figure() trainloss = history.history['loss'] valloss = history.history['val_loss'] plt.plot(trainloss) plt.plot(valloss) plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.savefig(model_filename.replace('.hdf5', '.pdf')) # Plot zoomed figure lenloss = len(trainloss) zoomloss = int(lenloss / 2.) plt.figure() plt.plot(np.arange(zoomloss, lenloss), trainloss[zoomloss:]) plt.plot(np.arange(zoomloss, lenloss), valloss[zoomloss:]) plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.ylim(bottom=min(valloss) - abs(0.1 * min(valloss)), top=1.1 * max( np.array(valloss[zoomloss:]) [abs(valloss[zoomloss:] - np.median(valloss[zoomloss:])) < 5 * median_absolute_deviation(valloss[zoomloss:])])) plt.savefig(f"{model_filename.replace('.hdf5', '_zoomed.pdf')}") # Plot zoomed figure reduced y axis lenloss = len(trainloss) zoomloss = int(0.75 * lenloss) plt.figure() plt.plot(np.arange(zoomloss, lenloss), trainloss[zoomloss:]) plt.plot(np.arange(zoomloss, lenloss), valloss[zoomloss:]) plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.ylim(bottom=min(valloss) - abs(0.1 * min(valloss)), top=1.1 * max( np.array(valloss[zoomloss:]) [abs(valloss[zoomloss:] - np.median(valloss[zoomloss:])) < 5 * median_absolute_deviation(valloss[zoomloss:])])) plt.savefig(f"{model_filename.replace('.hdf5', '_zoomed2.pdf')}")
def assessrecovery(self): exists = self.FUVexists() # Exposure metric already computed in init (self.c_exposure) # Periodogram Metric time_seconds = self.df['t_mean'] * 60 #ls = LombScargle(time_seconds, self.flux_injected) ls = LombScargle(self.df['t_mean'], self.flux_injected) freq, amp = ls.autopower(nyquist_factor=1) detrad = self.df['detrad'] #ls_detrad = LombScargle(time_seconds, detrad) ls_detrad = LombScargle(self.df['t_mean'], detrad) freq_detrad, amp_detrad = ls_detrad.autopower(nyquist_factor=1) pgram_tup = WDranker_2.find_cPGRAM(ls, amp_detrad, exposure=self.exposure) # Return 0,1 rseult of recovery c_periodogram = pgram_tup.c ditherperiod_exists = pgram_tup.ditherperiod_exists # Welch Stetson Metric if exists: c_ws = WDranker_2.find_cWS(self.t_mean, self.t_mean_fuv, self.flux_injected, self.flux_injected_fuv, self.flux_err, self.flux_err_fuv, ditherperiod_exists, self.FUVexists()) else: c_ws = WDranker_2.find_cWS(self.t_mean, None, self.flux_injected, None, self.flux_err, None, ditherperiod_exists, self.FUVexists()) # RMS Metric --- have to 'unscale' the magnitudes converted_flux = [f * self.original_median for f in self.flux_injected] injectedmags = [WDutils.flux_to_mag('NUV', f) for f in converted_flux] sigma_mag = median_absolute_deviation(injectedmags) c_magfit = WDranker_2.find_cRMS(self.mag, sigma_mag, 'NUV') # Weights: w_pgram = 1 w_expt = .2 w_WS = .3 w_magfit = .25 C = ((w_pgram * c_periodogram) + (w_expt * self.c_exposure) + (w_magfit * c_magfit) + (w_WS * c_ws)) if C > self.cutoff: return 1 else: return 0
def cal_mosaic_Stats(infile): """ Calculating MAD RMS and median RMS for the mosaic cube. """ data = np.loadtxt(infile) rms = data[:, 3] med_rms = np.median(rms) mad_rms = round(median_absolute_deviation(rms), 3) return mad_rms, med_rms
def mod_z_score(arr): '''Function to find the modified z score of a given array, used to find variables in my first pass at this project Inputs: arr = array to find mod-z of Outputs: zvalues = array of z-values for that array ''' medx = np.median(arr) mad = median_absolute_deviation(arr) zvalues = np.array([(0.6745*(x-medx))/mad for x in arr]) return zvalues
def get_bias_corrected_data_in_electrons(hdu): """ Estimate the bias level of image and substract it from the image. If bias estimate is negative, this means that the GAIN in the fits HDU is wrong. In this case, issue a warning and re-scale data after calculating what the gain should have been. Uses median_absolute_deviation (MAD) to compute standard deviation. Note: does not substract background from data to compute noise, as was done prior to Sept.2018 parameter tuning. :param hdu: fits hdu with the image in question :return: np.array data_e """ gain = float(hdu[0].header['GAIN']) # e-/count read_noise_e = float(hdu[0].header['RDNOISE']) # e-/pixel data_e = gain * hdu[0].data # counts to electrons # 1.48 here goes from median absolute deviation to standard deviation noise_e = 1.48 * median_absolute_deviation(data_e) estimated_bias_level_in_electrons = np.median(data_e) - noise_e * noise_e + read_noise_e * read_noise_e # If the bias is negative, that means the GAIN is probably wrong in the HDU # Scaling the data here is a work-around for that. if estimated_bias_level_in_electrons < 0: # here, we're really figuring about what the gain should have been and re-scaling the data noise_e = 1.48 * median_absolute_deviation(data_e) sqrt_median_e = np.sqrt(np.median(data_e)) scale_factor = sqrt_median_e / noise_e msg = 'Negative bias {b:0.2f}. Scaling data by (sqrt(median)/noise): ({r:0.2f}/{n:0.2f})= {s:0.2f}' logger.warning(msg.format(b=estimated_bias_level_in_electrons, s=scale_factor, r=sqrt_median_e, n=noise_e )) data_e *= scale_factor else: # bias corrected data in electrons data_e -= estimated_bias_level_in_electrons return data_e
def testCalculate(self): """Test flux median absolute deviation calculation. """ n_sources = 10 objId = 0 # Test expected MAD value. fluxes = np.linspace(-1, 1, n_sources) diaObjects = pd.DataFrame({"diaObjectId": [objId]}) diaSources = pd.DataFrame( data={ "diaObjectId": n_sources * [objId], "filterName": n_sources * ["u"], "diaSourceId": np.arange(n_sources, dtype=int), "psFlux": fluxes, "psFluxErr": np.ones(n_sources) }) plug = MadDiaPsFlux(MadDiaPsFluxConfig(), "ap_madFlux", None) run_multi_plugin(diaObjects, diaSources, "u", plug) self.assertAlmostEqual( diaObjects.at[objId, "uPSFluxMAD"], median_absolute_deviation(fluxes, ignore_nan=True)) # Test expected MAD value with a nan set. fluxes[4] = np.nan diaObjects = pd.DataFrame({"diaObjectId": [objId]}) diaSources = pd.DataFrame( data={ "diaObjectId": n_sources * [objId], "filterName": n_sources * ["r"], "diaSourceId": np.arange(n_sources, dtype=int), "psFlux": fluxes, "psFluxErr": np.ones(n_sources) }) run_multi_plugin(diaObjects, diaSources, "r", plug) self.assertAlmostEqual( diaObjects.at[objId, "rPSFluxMAD"], median_absolute_deviation(fluxes, ignore_nan=True))
def median_combine(self, median_func=ma.median): """Median combine a set of arrays. A CCDData object is returned with the data property set to the median of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the median. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by 1.4826 times the median absolute deviation of all input images. Parameters ---------- median_func : function, optional Function that calculates median of a ``numpy.ma.masked_array``. Default is to use ``np.ma.median`` to calculate median. Returns ------- combined_image: CCDData object CCDData object based on the combined input of CCDData objects. Warnings -------- The uncertainty currently calculated using the median absolute deviation does not account for rejected pixels """ #set the data data = median_func(self.data_arr, axis=0) #set the mask mask = self.data_arr.mask.sum(axis=0) mask = (mask == len(self.data_arr)) #set the uncertainty uncertainty = 1.4826 * median_absolute_deviation(self.data_arr.data, axis=0) #create the combined image combined_image = CCDData(data.data, mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty)) #update the meta data combined_image.meta['NCOMBINE'] = len(self.data_arr) #return the combined image return combined_image
def sigma_func(arr): """ Robust method for calculating the variance of an array. ``sigma_func`` uses the median absolute deviation to determine the variance. Parameters ---------- arr : `~ccdproc.ccddata.CCDData` or `~numpy.ndarray` Array whose variance is to be calculated. Returns ------- float variance of array """ return 1.4826 * stats.median_absolute_deviation(arr)
def calcJumpTwo(freq, curve, verbose=0): """ Calculates the ratio in the overlap region freq and curve must be dictionaries with keys 'SSW' and 'SLW' """ overlap = [959.3,989.4] # GHz fqL = freq['SLW'] fqS = freq['SSW'] slwOver = (fqL >= overlap[0]) & (fqL <= overlap[1]) sswOver = (fqS >= overlap[0]) & (fqS <= overlap[1]) ratio = curve['SLW'][slwOver]/curve['SSW'][sswOver] # # Find the median and Median Absolute Deviation of the ratio med = np.median(ratio) mad = median_absolute_deviation(ratio) if (verbose): print ("SLW/SSW median ratio in overlap = %5.3f +/- %5.3f"%(med,mad)) return med, mad
def mad_std(data, axis=None): """ Calculate a robust standard deviation using the `median absolute deviation (MAD) <http://en.wikipedia.org/wiki/Median_absolute_deviation>`_. The standard deviation estimator is given by: .. math:: \\sigma \\approx \\frac{\\textrm{MAD}}{\Phi^{-1}(3/4)} \\approx 1.4826 \ \\textrm{MAD} where :math:`\Phi^{-1}(P)` is the normal inverse cumulative distribution function evaluated at probability :math:`P = 3/4`. Parameters ---------- data : array-like Data array or object that can be converted to an array. axis : int, optional Axis along which the robust standard deviations are computed. The default (`None`) is to compute the robust standard deviation of the flattened array. Returns ------- mad_std : float or `~numpy.ndarray` The robust standard deviation of the input data. If ``axis`` is `None` then a scalar will be returned, otherwise a `~numpy.ndarray` will be returned. Examples -------- >>> import numpy as np >>> from photutils.extern.stats import mad_std >>> rand = np.random.RandomState(12345) >>> madstd = mad_std(rand.normal(5, 2, (100, 100))) >>> print(madstd) # doctest: +FLOAT_CMP 2.0232764659422626 """ # NOTE: 1. / scipy.stats.norm.ppf(0.75) = 1.482602218505602 return median_absolute_deviation(data, axis=axis) * 1.482602218505602
def linear_reg_RANSAC(X, y, X_err = None, y_err = None, thresh = 0., main_path = '', pl_name = '', plots = False, log = False): ## Robustly fit linear model with RANSAC algorithm X_old = X X = X.reshape((len(X),1)) model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression()) if thresh != 0: threshhold = median_absolute_deviation(y)*thresh model_ransac.set_params(residual_threshold = threshhold) model_ransac.fit(X, y) inlier_mask = model_ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) ## Predict data of estimated models line_X = np.linspace(np.min(X), np.max(X), 100) line_y_ransac = model_ransac.predict(line_X[:, np.newaxis]) y_predicted = model_ransac.predict(X) slope = model_ransac.estimator_.coef_[0][0] inter = model_ransac.estimator_.intercept_[0] score = np.sqrt(((y[inlier_mask] - y_predicted[inlier_mask])**2).sum()/(np.std(X[inlier_mask]) * len(X[inlier_mask]) * (len(X[inlier_mask])-2))) if X_err != None and y_err != None: slope_err = (score * np.dot(X_err[inlier_mask], y_err[inlier_mask]) / np.dot(X_old[inlier_mask], X_old[inlier_mask]))**2 else: slope_err = score ## Ploting in and outliers + linear regression if plots: fig, ax = plt.subplots(1) fig.suptitle(pl_name, fontsize = 15) ax.scatter(X[inlier_mask], y[inlier_mask], marker = '.', c = 'b', edgecolors = 'None') ax.scatter(X[outlier_mask], y[outlier_mask], marker = '.', c = 'r', edgecolors = 'None') ax.loglog(line_X, line_y_ransac, color = 'c', alpha = .7, label = 'slope = %.5f\nerror = %.5f' % (slope, slope_err)) ax.set_xlabel('Ref') ax.set_ylabel('Single epoch') ax.legend(loc = 'best', fontsize='x-small') if log: ax.set_xscale('log') ax.set_yscale('log') ax.grid(which='major', axis='x', linewidth=0.5, linestyle='-', color='0.8') ax.grid(which='major', axis='y', linewidth=0.5, linestyle='-', color='0.8') plt.savefig('%s/%s_ransac.png' % (main_path, pl_name), dpi = 300) plt.clf() plt.close() ## Return the coef of the linear regression return slope, inter, line_X, line_y_ransac, outlier_mask, slope_err
def calcJump(spec, verbose=0): """ Calculates the ratio in the overlap region spec must be a dictionary the jump is calculated only for the central detectors """ overlap = [959.3,989.4] # GHz fx = {} for idet in ['SSWD4','SLWC3']: fqX = spec[idet]['wave'] maskOver = (fqX >= overlap[0]) & (fqX <= overlap[1]) fx[idet] = spec[idet]['flux'][maskOver] # ratio = fx['SLWC3']/fx['SSWD4'] # # Find the median and Median Absolute Deviation of the ratio med = np.median(ratio) mad = median_absolute_deviation(ratio) if (verbose): print ("SLW/SSW median ratio in overlap = %5.3f +/- %5.3f"%(med,mad)) return med, mad
def sigma_func(arr, axis=None): """ Robust method for calculating the deviation of an array. ``sigma_func`` uses the median absolute deviation to determine the standard deviation. Parameters ---------- arr : `~ccdproc.CCDData` or `~numpy.ndarray` Array whose deviation is to be calculated. axis : None or int or tuple of ints, optional Axis or axes along which the function is performed. If ``None`` (the default) it is performed over all the dimensions of the input array. The axis argument can also be negative, in this case it counts from the last to the first axis. Returns ------- float uncertainty of array estimated from median absolute deviation. """ return stats.median_absolute_deviation(arr) * 1.482602218505602
def get_wise(coordinates, ang_size, band=1, clobber=True): """ Get cutouts of ALLWISE images Parameters ---------- coordinates : astropy.coordinates object Central coordinates of desired cutout ang_size : astropy.quantity object Angular diamater of desired cutout (max = 5 degrees) band : integer, default = 1 WISE band to get cutout for. WISE filters correspond to rest-frame central wavelengths of W1 = 3.4um, W2 = 4.6um, W3 = 12.0um and W4 = 22.0um. clobber : bool, default = True Overwrite existing filenames when generating cutouts Returns ------- """ filename = dl_wise(coordinates, ang_size, band=1, clobber=True) image = load_image(filename) mad = median_absolute_deviation(image) sigma = mad#/np.sqrt(2)*erfinv(2*0.75-1) image = image.subtract(np.median(image)*image.unit) image = image.divide(sigma) return image #, out
def background(b_arr, niter=3): """Determine the background for an array Parameters ---------- b_arr: numpy.ndarray Array for the determination of the background niter: int Number of iterations for sigma clipping Returns ------- bkgrd: float median background value after sigma clipping bkstd: float Estimated standard deviation based on the median absolute deviation """ cl_arr = stats.sigma_clip(b_arr, iters=niter, cenfunc=np.ma.median, varfunc=stats.median_absolute_deviation) return np.ma.median(cl_arr), 1.48 * stats.median_absolute_deviation(cl_arr)
def _patch_rlrps(array, array_ref, rank, low_rank_ref, low_rank_mode, thresh, thresh_mode, max_iter, auto_rank_mode='noise', cevr=0.9, residuals_tol=1e-2, random_seed=None, debug=False, full_output=False): """ Patch decomposition based on GoDec/SSGoDec (Zhou & Tao 2011) """ ############################################################################ # Initializing L and S ############################################################################ L = array if low_rank_ref: L_ref = array_ref.T else: L_ref = None S = np.zeros_like(L) random_state = np.random.RandomState(random_seed) itr = 0 power = 0 svdlib = 'lapack' while itr <= max_iter: ######################################################################## # Updating L ######################################################################## if low_rank_mode == 'brp': Y2 = random_state.randn(L.shape[1], rank) for _ in range(power + 1): Y1 = np.dot(L, Y2) Y2 = np.dot(L.T, Y1) Q, _ = qr(Y2, mode='economic') Lnew = np.dot(np.dot(L, Q), Q.T) elif low_rank_mode == 'svd': if itr == 0: PC = get_eigenvectors(rank, L, svdlib, mode=auto_rank_mode, cevr=cevr, noise_error=residuals_tol, data_ref=L_ref, debug=debug, collapse=True) rank = PC.shape[0] # so we can use the optimized rank if low_rank_ref: Lnew = np.dot(np.dot(PC, L).T, PC).T else: Lnew = np.dot(np.dot(L, PC.T), PC) else: rank_i = min(rank, min(L.shape[0], L.shape[1])) PC = svd_wrapper(L, svdlib, rank_i, False, False, random_state=random_state) Lnew = np.dot(np.dot(L, PC.T), PC) else: raise RuntimeError('Low Rank estimation mode not recognized.') ######################################################################## # Updating S ######################################################################## T = L - Lnew + S threshold = np.sqrt(median_absolute_deviation(T.ravel())) * thresh # threshold = np.sqrt(median_absolute_deviation(T, axis=0)) * thresh # threshmat = np.zeros_like(T) # for i in range(threshmat.shape[0]): # threshmat[i] = threshold # threshold = threshmat if debug: print('threshold = {:.3f}'.format(threshold)) S = thresholding(T, threshold, thresh_mode) T -= S L = Lnew + T itr += 1 G = array - L - S L = L.T S = S.T G = G.T if full_output: return L, S, G else: return S
def fit_wavelength_solution(sol_dict): """Determine the best fit solution and re-fit each line with that solution The following steps are used to determine the best wavelength solution: 1. The coefficients of the solution to each row are fit by a line 2. The coefficients for each row are then replaced by the best-fit values 3. The wavelenght zeropoint is then re-calculated for each row Parameters: ----------- sol_dict: dict A dictionary where the key is the y-position of each row and the value is a list that containts an array of x values of peaks, the corresponding wavelength array of the peaks, and a `~astropy.modeling.model` that transforms between the x positions and wavelengths Returns: dict ------- sol_dict: dict An updating dictionary with the new wavelength solution for each row """ #determinethe quality of each solution weights = np.zeros(len(sol_dict)) yarr = np.zeros(len(sol_dict)) ncoef = len(sol_dict[sol_dict.keys()[0]][2].parameters) coef_list = [] for i in range(ncoef): coef_list.append(np.zeros(len(sol_dict))) #populate the coeffient list with values for i, y in enumerate(sol_dict): yarr[i] = y mx, mw, ws = sol_dict[y] weights[i] = stats.median_absolute_deviation(ws(mx)-mw) / 0.6745 for j, p in enumerate(ws.parameters): coef_list[j][i] = p #fit each coefficient with a value coef_sol = [] for coef in coef_list: fit_c = mod.fitting.LinearLSQFitter() c_init = mod.models.Polynomial1D(1) mask = (weights < 5 * np.median(weights)) c = iterfit1D(yarr[mask], coef[mask], fit_c, c_init, niter=7) coef_sol.append(c) #refit with only allowing zeropoint to change for i, y in enumerate(sol_dict): mx, mw, ws = sol_dict[y] for j, n in enumerate(ws.param_names): c = coef_sol[j] ws.parameters[j] = c(y) weights = calc_weights(mx, mw, ws) dw = np.average(mw - ws(mx), weights=weights) ws.c0 = ws.c0 + dw sol_dict[y] = [mx, mw, ws] return sol_dict
def find_ionfactor(parmdb_file, baseline_dict, t1, t2, target_rms_rad=0.2): """ Finds ionospheric scaling factor """ pdb_in = lofar.parmdb.parmdb(parmdb_file) parms = pdb_in.getValuesGrid('*') # Filter any stations not in both the instrument table and the ms stations_pbd = set([s.split(':')[-1] for s in pdb_in.getNames()]) stations_ms = set([s for s in baseline_dict.itervalues() if type(s) is str]) stations = sorted(list(stations_pbd.intersection(stations_ms))) # Select long baselines only (BL > 10 km), as they will set the ionfactor scaling ant1 = [] ant2 = [] dist = [] min_length = 10.0 for k, v in baseline_dict.iteritems(): if type(v) is not str and '-' in k: if v > min_length: s1 = k.split('-')[0] s2 = k.split('-')[1] s1_name = baseline_dict[s1] s2_name = baseline_dict[s2] if s1_name in stations and s2_name in stations: ant1.append(s1_name) ant2.append(s2_name) dist.append(v) # Find correlation times rmstimes = [] dists = [] freq = None for a1, a2, d in zip(ant1, ant2, dist): if freq is None: freq = np.copy(parms['Gain:0:0:Phase:{}'.format(a1)]['freqs'])[0] times = np.copy(parms['Gain:0:0:Phase:{}'.format(a1)]['times']) time_ind = np.where((times >= t1) & (times < t2))[0] timepersolution = np.copy(parms['Gain:0:0:Phase:{}'.format(a1)]['timewidths'])[0] ph1 = np.copy(parms['Gain:0:0:Phase:{}'.format(a1)]['values'])[time_ind] ph2 = np.copy(parms['Gain:0:0:Phase:{}'.format(a2)]['values'])[time_ind] # Filter flagged solutions good = np.where((~np.isnan(ph1)) & (~np.isnan(ph2)))[0] if len(good) == 0: continue rmstime = None ph = unwrap_fft(ph2[good] - ph1[good]) step = 1 for i in range(1, len(ph)/2, step): p1 = ph[i:] p2 = ph[:-i] rms = np.linalg.norm(p1-p2) / np.sqrt(len(p1)) mad = median_absolute_deviation(p1-p2) mean = np.mean(p1-p2) if rms + mean > target_rms_rad: rmstime = i break if rmstime is None: rmstime = len(ph)/2 rmstimes.append(rmstime) dists.append(d) # Find the mean ionfactor assuming that the correlation time goes as # t_corr ~ 1/sqrt(BL). The ionfactor is defined in BLavg() as: # # ionfactor = (t_corr / 30.0 sec) / ( np.sqrt((25.0 / dist_km)) * (freq_hz / 60.e6) ) # ionfactor = np.mean(np.array(rmstimes) / 30.0 / (np.sqrt(25.0 / np.array(dists)) * freq / 60.0e6)) * timepersolution return ionfactor
def xmatch_checkplots0(ra1, dec1, ra2, dec2, width=10.0, binsize=1.0, saveplot=True, markersize=1.0, plotfile='', suptitle='', **kwargs): """ Based on code by Chris Desira """ import numpy as np import matplotlib.pyplot as plt from astropy import stats from astropy.coordinates import SkyCoord from astropy import units as u from librgm.plotid import plotid rmax = width print('RA1 range:', np.min(ra1), np.max(ra1)) print('Dec1 range:', np.min(dec1), np.max(dec1)) print('RA2 range:', np.min(ra2), np.max(ra2)) print('Dec2 range:', np.min(dec2), np.max(dec2)) # offsets in arc seconds difference_ra = (ra1 - ra2) * np.cos(np.radians(dec1)) * 3600.0 difference_dec = (dec1 - dec2) * 3600.0 itest = (np.abs(difference_ra) < rmax) & (np.abs(difference_dec) < rmax) difference_ra = difference_ra[itest] difference_dec = difference_dec[itest] skycoord_object1 = SkyCoord(ra1, dec1, unit=('degree', 'degree'), frame='icrs') skycoord_object2 = SkyCoord(ra2, dec2, unit=('degree', 'degree'), frame='icrs') skycoord_object1 = skycoord_object1[itest] skycoord_object2 = skycoord_object2[itest] separations = skycoord_object1.separation(skycoord_object2) med = np.median(separations.arcsec) ndata = len(separations) mad = stats.median_absolute_deviation(separations.arcsec) mad_std = stats.mad_std(separations.arcsec) fig = plt.figure(1, figsize=(10, 5)) plt.suptitle(suptitle, size=10) ax1=fig.add_subplot(1,2,1) xdata = separations.arcsec n, b, patches = ax1.hist(xdata, bins=rmax/binsize, range=[0.0, rmax], color='green', alpha=0.5) bin_min = np.where(n == n.min()) ax1.locator_params(axis='x', nbins=4) s04 = '# = %i'% ndata ax1.annotate(s04,(0.28,0.90) , xycoords = 'axes fraction',size=8) s01 = 'Median = %.2f' % med ax1.annotate(s01,(0.28,0.85) , xycoords = 'axes fraction',size=8) ax1.set_xlabel('Pariwise separation (arcsec)') ax1.set_ylabel('Frequency per bin') ax2 = fig.add_subplot(1,2,2, aspect='equal') alpha = 1.0 ax2.plot(difference_ra,difference_dec,'oc', markersize=markersize, markeredgewidth=0.0, alpha=alpha) #0.5 smallest size ax2.axis([-1.0*rmax, rmax,-1.0*rmax, rmax]) ax2.locator_params(axis='x',nbins=4) ax2.set_xlabel('Delta RA') ax2.set_ylabel('Delta Dec') s11 = 'Self-xmatch' ax2.annotate(s11,(0.45,0.95) , xycoords = 'axes fraction',size=8) s1 = '# of Objects = %i' % ndata ax2.annotate(s1,(0.45,0.90) , xycoords = 'axes fraction',size=8) s7 = 'MAD = %.2f' % mad ax2.annotate(s7,(0.45,0.85) , xycoords = 'axes fraction',size=8) s3 = 'sigma_MAD = %.2f' % mad_std ax2.annotate(s3,(0.45,0.80) , xycoords = 'axes fraction',size=8) fig.tight_layout() ax2.grid() fig.subplots_adjust(top=0.88) # make room for the plotid on right edge fig.subplots_adjust(right=0.95) plotid() if plotfile != None: print('Saving plotfile:', plotfile) plt.savefig(plotfile) if ('save' in kwargs): path_to_save = str(kwargs['save']) plt.savefig(path_to_save, dpi=150) else: plt.show()
def biweight_midvariance(a, c=9.0, M=None, axis=None): """ Compute the biweight midvariance. The biweight midvariance is a robust statistic for determining the midvariance (i.e. the standard deviation) of a distribution. It is given by: .. math:: C_{bl}= (n')^{1/2} \\frac{[\Sigma_{|u_i|<1} (x_i-M)^2(1-u_i^2)^4]^{0.5}} {|\Sigma_{|u_i|<1} (1-u_i^2)(1-5u_i^2)|} where :math:`u_i` is given by .. math:: u_{i} = \\frac{(x_i-M)}{c MAD} where :math:`c` is the tuning constant and :math:`MAD` is the median absolute deviation. The midvariance tuning constant ``c`` is typically 9.0. :math:`n'` is the number of points for which :math:`|u_i| < 1` holds, while the summations are over all :math:`i` up to :math:`n`: .. math:: n' = \Sigma_{|u_i|<1}^n 1 This is slightly different than given in the reference below, but results in a value closer to the true midvariance. For more details, see `Beers, Flynn, and Gebhardt (1990); AJ 100, 32 <http://adsabs.harvard.edu/abs/1990AJ....100...32B>`_. Parameters ---------- a : array-like Input array or object that can be converted to an array. c : float, optional Tuning constant for the biweight estimator. Default value is 9.0. M : float or array-like, optional Initial guess for the biweight location. An array can be input when using the ``axis`` keyword. axis : int, optional Axis along which the biweight midvariances are computed. The default (`None`) is to compute the biweight midvariance of the flattened array. Returns ------- biweight_midvariance : float or `~numpy.ndarray` The biweight midvariance of the input data. If ``axis`` is `None` then a scalar will be returned, otherwise a `~numpy.ndarray` will be returned. Examples -------- Generate random variates from a Gaussian distribution and return the biweight midvariance of the distribution:: >>> import numpy as np >>> from photutils.extern.stats import biweight_midvariance >>> rand = np.random.RandomState(12345) >>> from numpy.random import randn >>> bmv = biweight_midvariance(rand.randn(1000)) >>> print(bmv) # doctest: +FLOAT_CMP 0.986726249291 """ a = np.asanyarray(a) if M is None: M = np.median(a, axis=axis) if axis is not None: M = np.expand_dims(M, axis=axis) # set up the differences d = a - M # set up the weighting mad = median_absolute_deviation(a, axis=axis) if axis is not None: mad = np.expand_dims(mad, axis=axis) u = d / (c * mad) # now remove the outlier points mask = np.abs(u) < 1 u = u ** 2 n = mask.sum(axis=axis) f1 = d * d * (1. - u)**4 f1[~mask] = 0. f1 = f1.sum(axis=axis) ** 0.5 f2 = (1. - u) * (1. - 5.*u) f2[~mask] = 0. f2 = np.abs(f2.sum(axis=axis)) return (n ** 0.5) * f1 / f2
def biweight_location(a, c=6.0, M=None, axis=None): """ Compute the biweight location. The biweight location is a robust statistic for determining the central location of a distribution. It is given by: .. math:: C_{bl}= M+\\frac{\Sigma_{\|u_i\|<1} (x_i-M)(1-u_i^2)^2} {\Sigma_{\|u_i\|<1} (1-u_i^2)^2} where :math:`M` is the sample median (or the input initial guess) and :math:`u_i` is given by: .. math:: u_{i} = \\frac{(x_i-M)}{c\ MAD} where :math:`c` is the tuning constant and :math:`MAD` is the median absolute deviation. For more details, see `Beers, Flynn, and Gebhardt (1990); AJ 100, 32 <http://adsabs.harvard.edu/abs/1990AJ....100...32B>`_. Parameters ---------- a : array-like Input array or object that can be converted to an array. c : float, optional Tuning constant for the biweight estimator. Default value is 6.0. M : float or array-like, optional Initial guess for the biweight location. An array can be input when using the ``axis`` keyword. axis : int, optional Axis along which the biweight locations are computed. The default (`None`) is to compute the biweight location of the flattened array. Returns ------- biweight_location : float or `~numpy.ndarray` The biweight location of the input data. If ``axis`` is `None` then a scalar will be returned, otherwise a `~numpy.ndarray` will be returned. Examples -------- Generate random variates from a Gaussian distribution and return the biweight location of the distribution:: >>> import numpy as np >>> from photutils.extern.stats import biweight_location >>> rand = np.random.RandomState(12345) >>> from numpy.random import randn >>> loc = biweight_location(rand.randn(1000)) >>> print(loc) # doctest: +FLOAT_CMP -0.0175741540445 """ a = np.asanyarray(a) if M is None: M = np.median(a, axis=axis) if axis is not None: M = np.expand_dims(M, axis=axis) # set up the differences d = a - M # set up the weighting mad = median_absolute_deviation(a, axis=axis) if axis is not None: mad = np.expand_dims(mad, axis=axis) u = d / (c * mad) # now remove the outlier points mask = (np.abs(u) >= 1) u = (1 - u ** 2) ** 2 u[mask] = 0 return M.squeeze() + (d * u).sum(axis=axis) / u.sum(axis=axis)
def wavelength_calibrate_order(hrs, slines, sfluxes, ws_init, fit_ws, y0=50, npoints=30, xlimit=1.0, slimit=1.0, wlimit=0.5, fixed=False): """Wavelength calibration of a single order from the HRS arc spectra The calibration proceeds through following steps: 1. Curvature due to the optical distortion is removed from the spectra and a square representation of the 2D spectra is created. Only integer shifts are applied to the data 2. A model of the spectrograph is created based on the order, camera, and xpos offset that are supplied. 3. In each row of the data, peaks are extracted and matched with a line in the atlas of wavelengths that is provided (slines, sflux). For the details of the matching process, see the match_arc function. 4. Once the first set of peaks and lines are matched up, a new solution is calculated for the given row. Then the processes of matching lines and determining a wavelength solution is repeated. The best result from each line is saved. 5. Using all of the matched lines from all lines, a 'best' solution is determined. Everything but the zeroth order parameter of the fit is fixed to a slowly varying value based on the overall solution to all lines. See fit_solution for more details. 6. Based on the best solution found, the process is repeated for each row but only determing the zeropoint. 7. Based on the solution found, a wavelength is assigned to each pixel Parameters ---------- hrs: ~HRSOrder Object describing a single HRS order. It should already contain the defined order and the flux from the arc for that order slines: numpy.ndarray wavelengths of known arc lines sfluxes: numpy.ndarray relative fluxes at those wavelengths ws_init: ~astropy.modeling.model A initial model decribe the trasnformation from x-position to wavelength fit_ws: ~astropy.modeling.fitting Method to fit the model y0: int First row for determine the solution npoints: int The maximum number of points to bright points to fit. xlimit: float Maximum shift in line centroid when fitting slimit: float Minimum scale for line when fitting wlimit: float Minimum separation in wavelength between peak and line Returns ------- hrs: ~HRSOrder An HRSOrder with a calibrated wavelength property """ import pickle #create the box xmax = hrs.region[1].max() xmin = 0 ymax = hrs.region[0].max() ymin = hrs.region[0].min() ys = ymax-ymin xs = xmax-xmin ydata = np.zeros((ys+1,xs+1)) coef = np.polyfit(hrs.region[1], hrs.region[0], 3) xarr = np.arange(xs+1) yarr = np.polyval(coef, xarr)-ymin x = hrs.region[1]-xmin y = hrs.region[0]-ymin - (np.polyval(coef, x) - ymin - yarr.min()).astype(int) ys = y.max() data = np.zeros((ys+1,xs+1)) data[y,x] = hrs.flux pickle.dump(data, open('box_%i.pkl' % hrs.order, 'w')) #set the wavelength func_order = len(ws_init.parameters) warr = ws_init(xarr) #match the lines y = data[:,int(0.5*len(xarr))] y = np.where(y>0)[0] nmax = y.max() thresh=3 #find the best solution farr = 1.0*data[y0,:] mx, mw = match_lines(xarr, farr, slines, sfluxes, ws_init, npoints=npoints, xlimit=xlimit, slimit=slimit, wlimit=1.0) if fixed: ws=ws_init.copy() dw = np.median(mw - ws(mx)) ws.c0 -= dw else: if len(mx)==0: return hrs, None ws = iterfit1D(mx, mw, fit_ws, ws_init) sol_dict={} for y in range(0, nmax, 1): if farr.sum() > 0: mx, mw = match_lines(xarr, farr, slines, sfluxes, ws, npoints=npoints, xlimit=xlimit, slimit=slimit, wlimit=wlimit) if len(mx) > func_order+1: if fixed: nws=ws_init.copy() dw = np.median(mw - nws(mx)) nws.c0 -= dw sol_dict[y] = [mx, mw, nws] else: nws = iterfit1D(mx, mw, fit_ws, ws_init, thresh=thresh) sol_dict[y] = [mx, mw, nws] if len(sol_dict)==0: return hrs, None pickle.dump(sol_dict, open('sol_%i.pkl' % hrs.order, 'w')) sol_dict = fit_wavelength_solution(sol_dict) #update the wavelength values wdata = 0.0*data edata = 0.0*data for y in sol_dict: mx, mw, nws = sol_dict[y] wdata[y,:] = nws(xarr) rms = stats.median_absolute_deviation(mw-nws(mx)) / 0.6745 edata[y,:] += rms x = hrs.region[1] y = hrs.region[0] - ymin - (np.polyval(coef, hrs.region[1]) - ymin - yarr.min()).astype(int) hrs.wavelength = wdata[y,x] hrs.wavelength_error = edata[y,x] #in case no solution found for y0 try: yt = sol_dict[y0][0] except KeyError: y0 = sol_dict.keys()[0] return hrs, sol_dict