def statistics(data): # I think 8 is better for flattening def num2str(x): return ( "%.3f" % x ) # Define model function to be used to fit to the data def gauss(x, *p): A, mu, sigma = p return A*np.exp(-(x-mu)**2/(2.*sigma**2)) def gauss_fit(in_data): # p0 is the initial guess for the fitting coefficients (A, mu and sigma above) p0 = [np.max(in_data), np.argmax(in_data), np.std(in_data)] coeff, var_matrix = scipy.optimize.curve_fit(gauss, np.array(range(len(in_data))), in_data, p0=p0) return coeff, np.sqrt(np.mean(np.diag(var_matrix))) data = ensure_mask(data) # Now can use the demeaning flat = bn.move_nanmean(data, params.st_bp_window_t, axis=0) flat = np.roll(flat, -params.st_bp_window_t/2+1, axis=0) flat = bn.move_nanmean(flat, params.st_bp_window_f, axis=1) flat = np.roll(flat, -params.st_bp_window_f/2+1, axis=1) flat = data-flat flat = np.ma.ravel(flat) flat = flat[np.logical_not(flat.mask)] if len(flat) != np.ma.MaskedArray.count(data): print "ERROR: mask not preserved in statistics", len(flat), np.ma.MaskedArray.count(data) exit(1) flat = flat[np.logical_not(np.isnan(flat))] flat -= np.mean(flat) # Print stats. Some are from the data, others from the flattened data print "Gaussian statistics, from de-meaned data:" print " Min", num2str(np.min(flat)), "Max", num2str(np.max(flat)), "Std", num2str(np.std(flat)), print "Skewness", num2str(skew(flat)), "Kurtosis", num2str(kurtosis(flat, fisher=True)) print "Statistics from data, not de-meaned:" print " Min", num2str(np.ma.min(data)), "Max", num2str(np.ma.max(data)), "Std", num2str(np.ma.std(data)) total = data.shape[0]*data.shape[1] num_in = np.ma.MaskedArray.count(data) print "Flags:", ( "%.3f%%" % (100*(total-num_in)/total) ), "flagged (num:"+str(total-num_in)+")" # Get histogram for Gauss fit histogram = np.zeros((params.histogram_length, 2)) hist = np.histogram(flat, params.histogram_length) histogram[:, 0] = hist[1][:params.histogram_length] histogram[:, 1] = hist[0] np.savetxt("hist_data.dat", histogram) # See how Gaussian it is try: coeff, err = gauss_fit(histogram[:, 1]) print "Gauss fit error", ( "%.3f" % err ), "(hoping for < 5)" histogram[:, 1] = np.array([gauss(i, coeff[0], coeff[1], coeff[2]) for i in range(len(histogram))]) np.savetxt("hist_fit.dat", histogram) except: print "Gauss fit failed"
def clip(data): bp_window_t = params.sc_bp_window_t bp_window_f = params.sc_bp_window_f # Get the standard deviation of the high (by frequency) third of the data, for clipping cut = data.shape[1] / 3 chunk = data[:, data.shape[1] - cut:] chunk = bn.move_nanmean(chunk, bp_window_t, axis=0) chunk = np.roll(chunk, -bp_window_t / 2 + 1, axis=0) chunk = bn.move_nanmean(chunk, bp_window_f, axis=1) chunk = np.roll(chunk, -bp_window_f / 2 + 1, axis=1) chunk = data[:, data.shape[1] - cut:] - chunk chunk = chunk[bp_window_t:, bp_window_f:] # Because these edge values are nan now chunk = np.ravel(chunk) if np.ma.is_masked(chunk): chunk = chunk[chunk.mask == False] # Clipping values dmin = -params.sigma * np.std(chunk) dmax = params.sigma * np.std(chunk) # Mask the data. Have to flatten the data to find where to mask it flat = bn.move_nanmean(data, bp_window_t, axis=0) flat = np.roll(flat, -bp_window_t / 2 + 1, axis=0) flat = bn.move_nanmean(flat, bp_window_f, axis=1) flat = np.roll(flat, -bp_window_f / 2 + 1, axis=1) flat = data - flat m = np.ma.mean(flat[bp_window_t:, bp_window_f:]) flat[: bp_window_t, :] = m # Because these edge values are now Nan due to move_nanmean flat[:, :bp_window_f] = m flat -= m data.mask = np.logical_or(data.mask, flat > dmax) data.mask = np.logical_or(data.mask, flat < dmin)
def clip1(data): nstart = np.ma.count(data) for i in range(data.shape[1]): flat = bn.move_nanmean(data[:, i], params.sc_bp_window_t, axis=-1) flat = np.roll(flat, -params.sc_bp_window_t / 2 + 1, axis=-1) flat = data[:, i] - flat # this will also insert the mask std = np.std(flat[np.logical_not( np.logical_or(np.isnan(flat), flat.mask))]) if not np.isnan(float(std)): clip_mask = np.logical_or(flat < -params.sigma * std, params.sigma * std < flat) data[:, i].mask = np.logical_or(data[:, i].mask, clip_mask) if np.ma.count(data) > nstart: print "ERROR: number of points flagged went DOWN after clipping!" exit(1)
def add_uncertainties(data): data = ensure_mask(data) rms = np.zeros(data.shape[1]) for i in range(data.shape[1]): flat = bn.move_nanmean(data[:, i], params.un_bp_window_t, axis=0) flat = np.roll(flat, -params.un_bp_window_t/2+1, axis=0) flat = data[:, i]-flat flat = np.ma.ravel(flat) flat = flat[np.logical_not(flat.mask)] if len(flat) != np.ma.MaskedArray.count(data[:, i]): print "ERROR: mask not preserved in statistics", len(flat), np.ma.MaskedArray.count(data[:, i]) exit(1) flat = flat[np.logical_not(np.isnan(flat))] rms[i] = float(np.std(flat)) # Will be Nan if whole channel masked return rms
def filter(data, size, axis=0): # If the input is a masked array, the mask will be lost after filtering if not params.median: if size % 2 == 0: size -= 1 d = bn.move_nanmean(data, size, axis=axis) return np.roll(d, -size / 2 + 1, axis=axis) else: d = np.zeros((data.shape[0], data.shape[1])) if size % 2 == 0: size -= 1 if axis == 0: for i in range(data.shape[0]): d[i] = scipy.signal.medfilt(data[i], size) elif axis == 1: for i in range(data.shape[1]): d[:, i] d[:, i] = scipy.signal.medfilt(data[:, i], size) else: print "Invalid axis", axis, "for filtering" exit(1) return d
len(indexT)) d_scrunched = scrunch(d[ant][indexT]) # Averages over 1MHz freq bins. # Masked entries are ignored data_averaged = np.ma.average(d_scrunched, axis=0) # averaging over time # seems like if channel is all masked it becomes 0 num_in = np.ma.MaskedArray.count(data_averaged) if num_in != len(data_averaged): raise RuntimeError("There are masked values in averaged array") if len(data_averaged[np.isnan(data_averaged)]) != 0: raise RuntimeError("There are NaN values in averaged array") av_data_dict["Bins"][ind][ant] = data_averaged flat = bn.move_nanmean(d_scrunched, bp_window_t, axis=0) flat = np.roll(flat, -bp_window_t / 2 + 1, axis=0) for indnu in range(d_scrunched.shape[1]): #print "indnu", d[ant][indexT,indnu].shape flat_channel = d_scrunched[:, indnu] - flat[:, indnu] rms_av[indnu] = np.std(flat_channel[bp_window_t:]) # print "rms",rms_av[ind][indnu] # plt.subplot(len(vecTbins)-1,2,flagsp-2*(len(vecTbins)-1)*indD+indD) # plt.subplot(1,2,flagsp-2*(len(vecTbins)-1)*indD+indD) av_data_dict["Bins"][ind][ant + "_RMS"] = rms_av av_data_dict["Bins"][ind][ant + "_SI"] = spectral_index( freq_scrunched[19:], data_averaged[19:]) #plt.subplot(2,2,flagsp)
def sum_threshold(data, thr_f, thr_t=None, scales=None, rho=1.5, plot_progress=False, verbose=False): """ Apply Sum-Threshold method This function applies a set ofmoving averages to the data along both time and frequency axes, then checks if the output are above a threshold. This is the basic technique used in AOFlagger's algorithm. data (np.ma.array): data to flag, 2D array (time, freq) thr_f (int): threshold over which to flag on frequency axis thr_t (int): threshold over which to flag on time axis scales (list): list of window sizes (ints) to do moving average over. Defaults to None, in which case it uses [1,2,4,8,16,32,64] rho (float): Threshold setting base. From eqn 12 in Offringa et. al. 2010: thr_1 thr_i = -------------- rho^(log_2(i)) A value of 1.5 is suggested as being "empirically good" """ if scales is None: scales = [1, 2, 4, 8, 16, 32, 64] if thr_t is None: thr_t = thr_f mask = np.copy(data.mask) thr1_f = thr_f thr1_t = thr_t # do first stage of flagging: mask_f = np.greater_equal(np.abs(data-1), thr_f) mask_t = np.greater_equal(np.abs(data-1), thr_t) #mask_b = np.greater_equal(np.abs(summed_b-1), np.sqrt(thr_f * thr_t)) mask_s = np.logical_or(mask_f, mask_t) #mask_s = np.logical_or(mask_s, mask_b) mask = np.logical_or(data.mask, mask_s) data[mask] = np.sqrt(thr_f * thr_t) for window in scales: thr_f = thr1_f / np.power(rho, np.log2(window)) thr_t = thr1_t / np.power(rho, np.log2(window)) if window > 1: summed_f = bn.move_nanmean(data, window, axis=1) summed_t = bn.move_nanmean(data, window, axis=0) #summed_b = bn.move_nanmean(summed_f, int(np.sqrt(window)), axis=0) mask_f = np.greater_equal(np.abs(summed_f-1), thr_f) mask_t = np.greater_equal(np.abs(summed_t-1), thr_t) #mask_b = np.greater_equal(np.abs(summed_b-1), np.sqrt(thr_f * thr_t)) mask_s = np.logical_or(mask_f, mask_t) #mask_s = np.logical_or(mask_s, mask_b) mask = np.logical_or(data.mask, mask_s) data[mask] = 1 + np.sqrt(thr_f * thr_t) data.mask = mask else: summed_f = data summed_t = data if verbose: print "M: %i, Xi_f: %2.2e, Xi_t: %2.2e" % (window, thr_f, thr_t) if plot_progress: plt.figure() plt.subplot(221) plt.title("summed f: %i" % window) plt.imshow(summed_f, aspect='auto', interpolation='none', rasterized=True) plt.colorbar() plt.subplot(222) plt.title("summed t: %i" % window) plt.imshow(summed_t, aspect='auto', interpolation='none', rasterized=True) plt.colorbar() plt.subplot(223) plt.title("flagged: %i" % window) plt.imshow(data, aspect='auto', interpolation='none', rasterized=True) plt.colorbar() if plot_progress: plt.show() return data.mask
def bin_to_1MHz(bottom_f, filt, variance, channel_indexes): def calc_rms(x): return np.sqrt(np.mean(x**2)) if len(filt) != len(variance) or len(filt) != len(channel_indexes): raise RuntimeError("Arrays of different length in bin_to_1MHz " + str(len(filt)) + " " + str(len(variance)) + " " + str(len(variance))) # We want to bin 1MHz of channels. That means from channel N to N+41 (inclusive). However, there # may be gaps in the channels, so there may be different numbers of channels binned. # The averaged frequencies are calculted from averaging 4 frequencies without gaps. nbin = 42 chan_width = .024 ndata = [] nvariance = [] i = 0 while i < len( channel_indexes ): # Find blocks of channels and bin them. Blocks are defined by a channel sep of 42 in the indexes. j = i weighted_mean = 0.0 D_2 = 0.0 # https://en.wikipedia.org/wiki/Inverse-variance_weighting while j < len(channel_indexes ) and channel_indexes[j] < channel_indexes[i] + nbin: weighted_mean += filt[j] / variance[j] D_2 += 1 / variance[j] j += 1 print j - i, "channels binned" D_2 = 1 / D_2 weighted_mean *= D_2 ndata.append(weighted_mean) nvariance.append(D_2) i = j # Get frequencies for the bins, based on what was the starting frequency originally bottom_freq = (bottom_f + bottom_f + (nbin - 1) * chan_width) / 2 print "Bottom f", bottom_f, "->", bottom_freq nf = [bottom_freq + i * nbin * chan_width for i in range(len(ndata))] print "Scrunch to length", len(nf) #np.savetxt("filt.dat", np.array(list(zip(filt_f, filt)))) plt.figure(figsize=(8, 6)) plt.plot(nf, ndata) plt.title("Binned to 1MHz") plt.xlabel("Frequency [MHz]") plt.ylabel("Temp [K]") plt.savefig("bin1MHz.png") plt.clf() plt.figure(figsize=(8, 6)) plt.plot(nf, nvariance) plt.title("Variance binned") plt.xlabel("Frequency [MHz]") plt.ylabel("Temp [K$^2$]") plt.tight_layout() plt.savefig("bin1MHz_var.png") np.savetxt("binned_frequencies.dat", nf) np.savetxt("binned_data.dat", ndata) np.savetxt("binned_variance.dat", nvariance) mn = (ndata - bn.move_nanmean(ndata, 9))[4:-4] mn = mn[mn != np.nan] print mn print calc_rms((ndata - scipy.signal.medfilt(ndata, 9))[4:-4]), calc_rms( mn[4:]), calc_rms(ndata - filter(ndata)) return nf, ndata, nvariance
plt.show() plt.savefig("variance.png") # Plot the damped sinusoid plt.clf() plt.plot(f2, rD, label="Data") #plt.plot(f2, rD_sin_model, label="Fit") plt.xlabel("Frequency [MHz]") plt.ylabel("Temperature [K]") plt.legend() plt.show() plt.savefig("residual.png") np.savetxt("residual.dat", np.array(list(zip(f2, rD)))) #filt = (rD-scipy.signal.medfilt(rD, 9))[9:-9] filt = (rD - bn.move_nanmean(rD, 9))[9:-9] filt = (rD - filter(rD))[9:-9] #f2, filt = bin_to_1MHz(f2[9:-9], filt) print "Noise again", np.std(filt[len(filt) / 2:]) plt.figure(figsize=(10, 10)) lw = 0.5 plt.clf() plt.plot(f2, rD, linewidth=lw) plt.title("Signal") plt.xlabel("Frequency [MHz]") plt.ylabel("Temperature [K]") plt.savefig("signal.png")