def countblinks(seqarr, framecount): farr=seqarr lear=farr[:,8] rear=farr[:,9] lsteady=savgol(lear, 31, 2) rsteady=savgol(rear, 31, 2) lthresh=lear.std() rthresh=rear.std() linds=np.where(lsteady-lear>=lthresh) rinds=np.where(rsteady-rear>=rthresh) carr=np.union1d(linds, rinds) blinks=0 cont=[] frame=1 for i in range(len(carr))[1:]: #count number of contiguous frames with eyes closed if carr[i]<=carr[i-1]+20: #allow 1 second gap frame+=1 continue else: blinks+=1 cont.append(frame) frame=1 if len(cont)>0: c_avg=1.0*np.sum(cont)/len(cont) else: c_avg=0 return blinks, c_avg
def savgolTrendLine(y, window=101, degree=3): if window > len(y): window = len(y) # savgol requires an odd number for the window --- enforce that here if window % 2 == 0: window -= 1 stage1trend = savgol(np.array(y), window, degree) stage2trend = savgol(stage1trend, window, degree) return stage2trend # This is a numpy.ndarray
def apply_savgol(self) -> None: """ Calculates and adds derivations 1 and 2 using Savitzky-Golay filter """ self.data['deriv1'] = savgol(self.data.soil_moisture, 3, 2, 1, mode='nearest') self.data['deriv2'] = savgol(self.data.soil_moisture, 3, 2, 2, mode='nearest')
def marsmodelorr(self, use_smY=True, slope_trunc=0.00001, savgol_window=151, savgol_order=3, ex_order=51): Xf, Yf = self.Xf_, self.Yf_ X, Y = self.X_, self.Y_ fom = {} # smooth the data smY = savgol(Y, savgol_window, savgol_order) # perform mars model = MARS() if use_smY: model.fit(X, smY) else: model.fit(X, Y) Y_h = model.predict(X) ''' calculate dydx based on mars model to get knots and intercepts as this is complicated to extract from hinge functions ''' diff1 = np.diff(Y_h) / np.diff(X) tdiff1 = diff1 - np.nanmin(diff1) tdiff1 = tdiff1 / np.nanmax(tdiff1) #calculate slopes of linear segments ID = [i for i in range(1, len(tdiff1)) if np.abs(tdiff1[i] - tdiff1[i - 1]) > slope_trunc] ID.insert(0, 0) ID.append(np.argmax(X)) # this might cause an error slopes = [np.nanmean(diff1[ID[i - 1]:ID[i]]) for i in range(1, len(ID) - 1)] a = [Y_h[ID[i]] - slopes[i] * X[ID[i]] for i in range(len(ID) - 2)] IDM, IDm = np.argmax(slopes), np.argmin(np.abs(slopes)) # intercept of highest slope and zero as well as highest slope and lowest slope fom['zinter'] = -a[IDM] / slopes[IDM] fom['lminter'] = (a[IDM] - a[IDm]) / (slopes[IDm] - slopes[IDM]) fom['max_slope'] = slopes[IDM] fom['curr_lminter_model'] = fom['lminter'] * slopes[IDM] + a[IDM] fom['curr_lminter_data'] = np.mean(Y[np.where(np.abs(X - fom['lminter']) < 0.5)[0]]) # calculate how the CV curves kight look like without the 'ORR part' srYs = smY - model.predict(X) srYf = savgol(Yf - model.predict(Xf), savgol_window, savgol_order) # calculate their derivative dsrYf = savgol(np.diff(srYf) / np.diff(Xf), savgol_window, savgol_order) # find the extrema in the derivatives for extraction of redox pots redID_f = argrelextrema(srYf, np.less, order=ex_order) oxID_f = argrelextrema(srYf, np.greater, order=ex_order) # calc some more foms like position of redox waves fom['redpot_f'], fom['redpot_f_var'] = np.nanmean(Xf[redID_f]), np.nanstd(Xf[redID_f]) fom['oxpot_f'], fom['oxpot_f_var'] = np.nanmean(Xf[oxID_f]), np.nanstd(Xf[oxID_f]) fom['X'], fom['Xf'] = X, Xf fom['srYs'], fom['srYf'], fom['smY'] = srYs, srYf, smY fom['Y'], fom['Yf'], fom['Y_h'] = Y, Yf, Y_h fom['noise_lvl'] = np.sum((Y_h - Y) ** 2, axis=0) self.fom = fom
def average_component_spectra(zz): """Does the robust average of the component spectra.""" # First read the data, throwing away the first row (which is k=0). iz = int(100*zz+0.01) fn = db+"pc_z{:03d}_R0_????.txt".format(iz) kk = read_column(fn,0)[0,1:] # Now we want to read in each of the remaining columns and average them. dd = np.loadtxt(db+"pc_z{:03d}_R0_{:04d}.txt".format(iz,seeds[1])) pk = np.zeros( (kk.size,dd.shape[1]) ) pk[:,0] = kk # Now we compute robust averages, and here it matter more. for i in range(1,pk.shape[1]): dd = read_column(fn,i)[:,1:] mu,sig = robust_avg(dd) pk[:,i]= mu.copy() # Some of the spectra are still a bit noisy, so we smooth them. for i in [3,4,7,8]: pk[:,i]= savgol(pk[:,i],7,polyorder=3) # and write the summary file. fout=db+"pc_z{:03d}_R0.txt".format(iz) ff = open(fout,"w") ff.write("# LPT component spectra.\n") ff.write("# Robust average of {:d} spectra using sqrt(1+x^2).\n".\ format(dd.shape[0])) ff.write("# z={:.3f}\n".format(zz)) for i in range(kk.size): outstr = "{:15.5e}".format(kk[i]) for j in range(1,pk.shape[1]): outstr += " {:15.5e}".format(pk[i,j]) ff.write(outstr+"\n") ff.close()
def smooth(self,method = 'sg'): N = self.getNodd(self._filterLength) if method == 'sg': try: y = savgol(self.f, N, 6, 0) self.f = y except: method = 'basic' if method == 'basic': self.f = medfilt(self.f,N)
def main(argv): showCritic = False try: opts, args = getopt.getopt(argv, "p:c", ['path=']) except getopt.GetoptError: sys.exit(2) for opt, arg in opts: if opt in ['--path', '-p']: path = arg if opt == '-c': showCritic = True loss = None try: e, loss = np.loadtxt(path, unpack=True) except OSError: e, loss = np.loadtxt('weights/' + path, unpack=True) except ValueError: try: e, i, loss, loss_d = np.loadtxt(path, unpack=True) except OSError: e, i, loss, loss_d = np.loadtxt('weights/' + path, unpack=True) plt.plot(loss, label='raw') window = len(loss) // 6 window += 1 if window % 2 == 0 else 0 filterd_loss = savgol(loss, window, 2) plt.plot(filterd_loss, label='savgol') #N=len(loss)//4 #plt.plot(np.convolve(loss, np.ones(N)/N, mode='valid'),label='convolve') plt.xlabel('#batches') plt.ylabel('loss') plt.title('loss over time') #plt.legend() plt.show() if not type(loss_d) == type('') and showCritic: plt.plot(loss_d, label='raw') plt.plot(savgol(loss_d, window, 2), label='savgol') plt.xlabel('#batches') plt.ylabel('loss') plt.title('critic loss over time') plt.show()
def findLastEdge(self , bins , hist): nhist = hist/ max(hist) diff1 = savgol( np.diff(nhist) , 15 , 3) #plt.plot(bins[1:-1] , diff1) #plt.plot(bins[1:] , nhist) found = False lastEdge = len(hist) - 10 for i , val in reversed(list(enumerate(hist))): if i < len(hist) - 10: if (found == False and sum( diff1[i-5:i]) < 0 and sum(nhist[i-5:i]) > 0.008 ): found = True lastEdge = i return(lastEdge)
def opt_filter(x, n): Nopt = 3 N1 = 1 while N1 != Nopt: N1 = int(np.floor(2 * Nopt / 2)) if N1 % 2 == 0: N1 += 1 print(N1) y = savgol(x, N1, n) dy = savgol(np.diff(y, 1), N1, n) y2 = np.diff(dy, 3) c1 = np.mean(np.power(y2, 2)) Nopt = np.power( ((2 * (n + 2)) * (np.power(np.math.factorial(2 * n + 3), 2)) * (np.var(x))) / ((np.power(np.math.factorial(n + 1), 2)) * (c1)), 1 / (2 * n + 5)) return Nopt
def getps(file, day): data = fits.open(file) head = data[0].data dat = data[1].data time = dat['TIME'] qual = dat['SAP_QUALITY'] flux = dat['PDCSAP_FLUX'] good = np.where(qual == 0)[0] time = time[good] flux = flux[good] ndays = (time[-1] - time[0]) / 1. time_1 = time flux_1 = flux #third=time[0]+ndays #idx=np.where(time<third)[0] #time=time[idx] #flux=flux[idx] #time_1=time #flux_1=flux # Duty cycle: total_obs_time = ndays * 24. * 60 #mins cadence = 30. #mins expected_points = total_obs_time / cadence observed_points = len(flux) # Only analyze stars with light curve duty cycle > 60%: # if observed_points/expected_points<0.5: # continue res = sigclip(time, flux, 50, 3) good = np.where(res == 1)[0] time = time[good] flux = flux[good] time_2 = time flux_2 = flux width = day boxsize = width / (30. / 60. / 24.) box_kernel = Box1DKernel(boxsize) smoothed_flux = savgol(flux, int(boxsize) - 1, 1, mode='mirror') flux = flux / (smoothed_flux) time_3 = time flux_3 = smoothed_flux # Remove data points > 3*sigma: std = mad_std(flux, ignore_nan=True) med = np.median(flux) #idx =np.where(abs(flux-med)<3.*std)[0] #time=time[idx] #flux=flux[idx] # now let's calculate the fourier transform. the nyquist frequency is: nyq = 1. / (30. / 60. / 24.) fres = 1. / 90. / 0.0864 fres_cd = 0.001 fres_mhz = fres_cd / 0.0864 freq = np.arange(0.001, 24., 0.001) #pdb.set_trace() # FT magic #freq, amp = LombScargle(time,flux).autopower(method='fast',samples_per_peak=10,maximum_frequency=nyq) amp = LombScargle(time, flux).power(freq) # unit conversions freq = 1000. * freq / 86.4 bin = freq[1] - freq[0] amp = 2. * amp * np.var(flux * 1e6) / (np.sum(amp) * bin) # White noise correction: wnoise = getkp(file) amp1 = np.zeros(len(amp)) for p in range(0, len(amp)): a = amp[p] if a - wnoise < 0.: amp1[p] = amp[p] if a - wnoise > 0.: amp1[p] = a - wnoise # smooth by 2 muHz n = np.int(2. / fres_mhz) n_wnoise = np.int(2. / fres_mhz) gauss_kernel = Gaussian1DKernel(n) gk_wnoise = Gaussian1DKernel(n_wnoise) pssm = convolve(amp, gauss_kernel) pssm_wnoise = convolve(amp1, gk_wnoise) timeseries = [time_1, flux_1, time_2, flux_2, time_3, flux_3] return timeseries, time, flux, freq, amp1, pssm, pssm_wnoise, wnoise
def findComptonEdges(self , bins , hist , key): print("calibrating spectrum energy lines: " + ' , '.join(str(e) for e in self.lines) + " MeV") print("Finding Compton Edges") splineWidth = int(math.ceil(self.bins/15)) if splineWidth%2 == 0: splineWidth = splineWidth + 1 nhist = savgol( (hist / hist.max()) , splineWidth , 3 ) diff1 = savgol( np.diff(hist/hist.max()) , splineWidth , 3 ) diff2 = savgol( np.diff(np.diff(hist/hist.max() )) , splineWidth*2+1 , 3 ) #k = np.where( diff1 == diff1.min() ) #k2 = np.where( diff2 == diff2.min() ) peakw = int(math.ceil(self.bins/20)) peakid = peaks( nhist , [peakw , peakw + 1 , peakw+2] ) peakid2 = peaks( diff2 , [peakw , peakw + 1 , peakw+2] ) #if len(peakid) < 3 or len(peakid2) < 3: # raise NotImplementedError("Compton edges couldn't be found, and manual selection hasn't been implemented. Try again with more data!") # return(0) if self.loud == True: fig = plt.figure() ax = fig.add_subplot(111) ax.plot( bins[1:] , nhist , label="smoothed spectrum" ) #find first line print("Looking for " + str(self.lines[0]) + " MeV Compton edge.") found = False for pk in peakid2: if found == False and pk > peakid[1]: stopEdge = pk intermediate = nhist[peakid[1]:stopEdge] - 0.8 * nhist[peakid[1]] k = peakid[1] + np.where( intermediate**2 == (intermediate**2).min() ) if (self.loud == True): ax.plot( bins[k] , nhist[k-1] , '*r' , label="Compton Edge" ) print("Got it!") for i, line in reversed(list(enumerate(self.lines))): if( i == 0 ): pass else: print("Looking for " + str(line) + " MeV Compton edge.") lastEdge = self.findLastEdge(bins , hist) try: peak2 = peakid[np.where(peakid < lastEdge)].max() except ValueError: raise NotImplementedError("Compton edges couldn't be found, and manual selection hasn't been implemented. Try again with more data!") return(0) intermediate = nhist[peak2:lastEdge] - 0.8 * nhist[peak2] k2 = peak2 + np.where( intermediate**2 == (intermediate**2).min() ) if (self.loud == True): ax.plot( bins[k2] , nhist[k2-1] , '*r' , label="Compton Edge" ) nhist = nhist[:peak2] print("Got it!") if self.loud == True: plt.xlabel("Pulse Integral [V ns]") plt.ylabel("Relative Frequency") plt.title(key) plt.legend() plt.show() correct = input("Are the compton edges marked correctly? [yes/no]") if "y" in correct: return( [ bins[k[0][0]] ] ) else: raise NotImplementedError("Sorry! We haven't added manually selected compton edges yet...") return(0) else: return( [ bins[k[0][0]] , bins[k2[0][0]] ] )
file = '/Users/nwinner/code/venv/dipoles.txt' vel = pd.read_csv('/Users/nwinner/code/venv/vdatcar.csv') x = ensemble_average(vel, power_spectrum, ['vx', 'vy', 'vz'], ['Li', 'Be', 'F']) x = np.power(np.abs(x), 2) / (3 * 98 * 973 * constants.kB) time = vel['Timestep'].drop_duplicates().values * .001 wavenumber = time * 100 plt.plot(wavenumber[0:10000], x[0:10000], label="") y = savgol(x, 51, 2) plt.plot(wavenumber[0:10000], y[0:10000], label="") plt.legend() plt.show() exit() with open(file) as f: lines = f.readlines() ionic = [] electronic = [] for l in range(0, len(lines) - 1, 2):
def test_rtp_finder(signal, dic, plot=False): """ Simulates detection of peaks/troughs in real time Parameters ---------- signal : (N x 2) array_like dic : dictionary dic['log'] : str, logfile name dic['samplerate'] : list, samplerates for each channel dic['baseline'] : bool, whether a baseline session was run dic['channelloc'] : int, location of test channel from baseline data plot : bool, optional Whether to plot in real time w/threshold visualizations (WICKED SLOW). Default: False Returns ------- (N x 3) np.ndarray Detected peaks and troughs """ detected = [] last_found = np.array([[0, 0, 0], [1, 0, 0], [-1, 0, 0]] * 2) if plot: fig, ax = plt.subplots(1) plt.show(block=False) ax.set(ylim=[signal[:, 1].min() - 2, signal[:, 1].max() + 2], xlim=[signal[0, 0], signal[-1, 0]]) inds = np.arange(0, signal.shape[0], np.ceil(1000. / dic['samplerate']), dtype='int64') whole_sig, part_sig, all_peaks, all_troughs, hline, vline = ax.plot( signal[inds, 0], signal[inds, 1], 'blue', np.array([0]), np.array([0]), 'oc', np.array([0]), np.array([0]), 'or', np.array([0]), np.array([0]), 'og', np.array([0]), np.array([0]), 'r', np.array([0]), np.array([0]), 'black') fig.canvas.draw() if dic['baseline']: out = get_baseline(op.join(os.getcwd(), 'data', dic['log']), dic['channelloc'], dic['samplerate']) last_found = out.copy() t_thresh = gen_thresh(last_found[:-1])[0, 0] last_found[-1, 1] = signal[0, 0] - t_thresh thresh = gen_thresh(last_found[:-1]) if plot: tdiff = thresh[0, 0] - thresh[0, 1] sig = np.atleast_2d(signal[0]) st = np.ceil(1000. / dic['samplerate']) if plot: x = np.arange(signal[0, 0], signal[-1, 0], st) for i in signal[1:]: if i[0] < sig[-1, 0] + st: continue sig = np.vstack((sig, i)) if len(sig) > 3: sig[:, 1] = savgol(sig[:, 1], 3, 1) peak, trough = peak_or_trough(sig, last_found, thresh, st) if plot: # if time since last det > upper bound of normal time interval # shrink height threshold by relative factor divide = ((sig[-1, 0] - last_found[-1, 1]) / (thresh[0, 0] + thresh[0, 1])) divide = divide if divide > 1 else 1 hdiff = (thresh[1, 0] - thresh[1, 1]) / divide # draw previously detected peaks and troughs m = last_found[last_found[:, 1] > signal[0, 0]] p, t = m[m[:, 0] == 1], m[m[:, 0] == 0] if len(t) > 0: all_troughs.set(xdata=t[:, 1], ydata=t[:, 2]) if len(p) > 0: all_peaks.set(xdata=p[:, 1], ydata=p[:, 2]) # set the moving blue dot denoting signal part_sig.set(xdata=np.array([sig[-1, 0]]), ydata=np.array([sig[-1, 1]])) if last_found[-1, 0] != 1: # if we're looking for a peak mult = last_found[-1, 2] + hdiff hline.set(color='r', xdata=x, ydata=np.ones(x.size) * mult) if last_found[-1, 0] != 0: # if we're looking for a trpugh mult = last_found[-1, 2] - hdiff hline.set(color='g', xdata=x, ydata=np.ones(x.size) * mult) vline.set(xdata=np.array([last_found[-1, 1] + tdiff]), ydata=np.arange(*ax.get_ylim())) ax.draw_artist(ax.patch) ax.draw_artist(whole_sig) ax.draw_artist(hline) ax.draw_artist(vline) if len(t) > 0: ax.draw_artist(all_troughs) if len(p) > 0: ax.draw_artist(all_peaks) ax.draw_artist(part_sig) fig.canvas.update() fig.canvas.flush_events() if peak is not None or trough is not None: # get index of extrema ex, l = peak or trough, int(bool(peak)) # add to last_found last_found = np.vstack((last_found, np.append([l], sig[ex]))) if (not dic['baseline'] and len(last_found) > 7 and np.any(last_found[:, 1] == 0)): last_found = last_found[np.where(last_found[:, 1] != 0)[0]] last_found = np.vstack((last_found, last_found)) thresh = gen_thresh(last_found[:-1]) if plot: tdiff = thresh[0, 0] - thresh[0, 1] # if extrema was detected "immediately" then log detection if len(sig) - ex <= 3: detected.append(np.append(sig[-1], [l])) else: detected.append(np.append(sig[ex], [2])) # reset sig sig = np.atleast_2d(sig[-1]) return np.array(detected)
def smoothedSolutionDistribution( dist: np.ndarray, nsamples: int) -> Tuple[np.ndarray, np.ndarray]: sampledX, sampledY = sampledCdf(dist, nsamples) smoothedY = savgol(sampledY, 21, 3) return np.array(sampledX[:-1]), np.diff(smoothedY)
labels = np.array(labels)[indSort] values = np.array(values)[indSort] indices = np.arange(len(labels)) NUM_OF_DISTINCT_BARCODES = len(indices) print "NUM_OF_DISTINCT_BARCODES =", NUM_OF_DISTINCT_BARCODES # By default we look for a number of cells in a window of 500 to 5000. # WINDOW = [500,5000] WINDOW = parameter["WINDOW"] print "CELL_WINDOW:", WINDOW from scipy.signal import savgol_filter as savgol valdiff = np.diff((values)) yhat = savgol(valdiff, 151, 1) NUM_OF_BARCODES = np.argmax(-yhat[WINDOW[0]:WINDOW[1]]) + WINDOW[0] print "Cell_barcodes_detected:", NUM_OF_BARCODES NUM_OF_READS_in_CELL_BARCODES = sum(values[:NUM_OF_BARCODES]) print "NUM_OF_READS_in_CELL_BARCODES =", NUM_OF_READS_in_CELL_BARCODES codewords = labels[:NUM_OF_BARCODES] print "Calculating d_min..." Ham_dist = np.zeros([len(codewords), len(codewords)]) for i in range(len(codewords)): codi = decode(codewords[i]) for j in range(i + 1, len(codewords)):
def get_amp(file): '''Given star's filename, get its frequency & white noise corrected power spectral density.''' file = file[0] file = file[0:-3] kicid = int( file.split('/')[-1].split('-')[0].split('kplr')[-1].lstrip('0')) data = fits.open(file) head = data[0].data dat = data[1].data time = dat['TIME'] qual = dat['SAP_QUALITY'] flux = dat['PDCSAP_FLUX'] good = np.where(qual == 0)[0] time = time[good] flux = flux[good] time_1 = time flux_1 = flux res = sigclip(time, flux, 50, 3) good = np.where(res == 1)[0] time = time[good] flux = flux[good] time_2 = time flux_2 = flux # Check Duty Cycle: ndays = time[-1] - time[0] nmins = ndays * 24. * 60. expected_points = nmins / 30. observed_points = len(time) kicid = int( file.split('/')[-1].split('-')[0].split('kplr')[-1].lstrip('0')) if kicid in np.array(kepler_catalogue['KIC']): row = kepler_catalogue.loc[kepler_catalogue['KIC'] == kicid] rad = row['iso_rad'].item() teff = row['iso_teff'].item() elif kicid in np.array(gaia['KIC']): teff = gaia['teff'][um[0]] rad = gaia['rad'][um[0]] closestrad = getclosest(rad, fit_radii) idx = np.where(fit_radii == closestrad)[0] best_fit_width = fit_width[idx][0] width = best_fit_width boxsize = int(width / (30. / 60. / 24.)) box_kernel = Box1DKernel(boxsize) if boxsize % 2 == 0: smoothed_flux = savgol(flux, int(boxsize) - 1, 1, mode='mirror') else: smoothed_flux = savgol(flux, int(boxsize), 1, mode='mirror') flux = flux / smoothed_flux time_3 = time flux_3 = smoothed_flux # Remove data points > 5*sigma: std = mad_std(flux, ignore_nan=True) med = np.median(flux) idx = np.where(abs(flux - med) < 5. * std)[0] time = time[idx] flux = flux[idx] # now let's calculate the fourier transform. the nyquist frequency is: nyq = 0.5 / (30. / 60. / 24.) fres = 1. / 90. / 0.0864 freq = np.arange(0.01, 24., 0.01) # critically sampled (values, counts) = np.unique(np.diff(time), return_counts=True) cadence = values[np.argmax(counts)] #pdb.set_trace() # FT magic #freq, amp = LombScargle(time,flux).autopower(method='fast',samples_per_peak=10,maximum_frequency=nyq) time_interp = np.arange(time[0], time[-1], cadence) flux_interp = np.interp(time_interp, time, flux) time, flux = time_interp, flux_interp amp = LombScargle(time, flux).power(freq) # unit conversions freq = 1000. * freq / 86.4 bin = freq[1] - freq[0] amp = 2. * amp * np.var(flux * 1e6) / (np.sum(amp) * bin) # White noise correction: wnoise = getkp(file) amp_wn = np.zeros(len(amp)) power_more_than_wnoise = 0 for p in range(0, len(amp)): a = amp[p] if a - wnoise < 0.: amp_wn[p] = amp[p] if a - wnoise > 0.: amp_wn[p] = a - wnoise power_more_than_wnoise += 1 snr = power_more_than_wnoise / len(amp) # smooth by 2 muHz fres_cd = 0.01 #to check if it's smoothed by 2muHz: print(freq[0],freq[0+n]) difference should be 2 fres_mhz = fres_cd / 0.0864 n = np.int(2. / fres_mhz) n_wnoise = np.int(2. / fres_mhz) gauss_kernel = Gaussian1DKernel(n) gk_wnoise = Gaussian1DKernel(n_wnoise) pssm = convolve(amp, gauss_kernel) pssm_wnoise = convolve(amp_wn, gk_wnoise) timeseries = [time_1, flux_1, time_2, flux_2, time_3, flux_3] return freq, amp
# === END #plt.plot(time,flux) ## if rads[i] <= 36.8.: #from width_vs_radius_test2.txt closestrad = getclosest(rads[i], fit_radii) idx = np.where(fit_radii == closestrad)[0] best_fit_width = fit_width[idx][0] width = best_fit_width print(i, kicid, width) boxsize = int(width / (30. / 60. / 24.)) box_kernel = Box1DKernel(boxsize) if boxsize % 2 == 0: smoothed_flux = savgol(flux, int(boxsize) - 1, 1, mode='mirror') else: smoothed_flux = savgol(flux, int(boxsize), 1, mode='mirror') flux = flux / smoothed_flux # overplot this smoothed version, and then divide the light curve through it #plt.plot(time,smoothed_flux) #plt.axvspan(time[1000],time[1000+int(boxsize)],color='g',zorder=1,alpha=0.2) #flux=flux+np.random.randn(len(flux))*0.01 # plot the filtered light curve #plt.subplot(3,1,2) #plt.plot(time,flux) #plt.xlabel('Time (Days)')
inputs_seq[2, 4, :, :, :] = simple_inputs[2, :, :, :] # inputs_seq[1, 0, :, :, :] = simple_inputs[0, :, :, :] inputs_seq[1, 1, :, :, :] = simple_inputs[0, :, :, :] inputs_seq[1, 2, :, :, :] = simple_inputs[0, :, :, :] inputs_seq[1, 3, :, :, :] = simple_inputs[0, :, :, :] inputs_seq[1, 4, :, :, :] = simple_inputs[1, :, :, :] # inputs_seq[0, :, :, :, :] = simple_inputs[0, :, :, :] # =============== Labels =============== wx = wx - np.mean(wx[:60]) wy = wy - np.mean(wy[:60]) wz = wz - np.mean(wz[:60]) wxf = savgol(wx, 23, 2) wyf = savgol(wy, 23, 2) wzf = savgol(wz, 23, 2) labels = np.transpose(np.vstack((wxf, wyf, wzf))) # =============== Time analysis =============== ts = [float(i) for i in ts] ts = np.array(ts) tn = [float(i) for i in tn] tn = np.array(tn) tn = tn * 10**(-9) time = ts + tn T = np.zeros(len(time)) for i in range(1, len(time)):
indices = np.arange(len(labels)) NUM_OF_DISTINCT_BARCODES=len(indices) print "NUM_OF_DISTINCT_BARCODES =",NUM_OF_DISTINCT_BARCODES # By default we look for a number of cells in a window of 500 to 5000. # WINDOW = [500,5000] WINDOW=parameter["WINDOW"] print "CELL_WINDOW:", WINDOW from scipy.signal import savgol_filter as savgol valdiff=np.diff((values)) yhat = savgol(valdiff, 151, 1) NUM_OF_BARCODES=np.argmax(-yhat[WINDOW[0]:WINDOW[1]])+WINDOW[0] print "Cell_barcodes_detected:",NUM_OF_BARCODES NUM_OF_READS_in_CELL_BARCODES = sum(values[:NUM_OF_BARCODES]) print "NUM_OF_READS_in_CELL_BARCODES =",NUM_OF_READS_in_CELL_BARCODES codewords=labels[:NUM_OF_BARCODES] print "Calculating d_min..." Ham_dist=np.zeros([len(codewords),len(codewords)]) for i in range(len(codewords)): codi=decode(codewords[i])
def savgol_filter(x, h_freq, axis=None, sfreq=None, polyorder=5, verbose=None): """Filter the data using Savitzky-Golay polynomial method. This function is an adaptation of the mne-python one for xarray.DataArray. Parameters ---------- x : array_like Multidimensional array or DataArray h_freq : float Approximate high cut-off frequency in Hz. Note that this is not an exact cutoff, since Savitzky-Golay filtering is done using polynomial fits instead of FIR/IIR filtering. This parameter is thus used to determine the length of the window axis : int, string | None Position of the time axis. Can either be an integer when `x` is a NumPy array or a string (e.g 'times') when using a DataArray polyorder : int | 5 Polynomial order Returns ------- x_filt : array_like Filtered data Notes ----- For Savitzky-Golay low-pass approximation, see: https://gist.github.com/larsoner/bbac101d50176611136b See also -------- kernel_smoothing """ set_log_level(verbose) # inputs checking if isinstance(x, xr.DataArray): dims = list(x.dims) # get axis name if axis is None: axis = 'times' if isinstance(axis, str): axis = list(x.dims).index(axis) # get sfreq if possible if not isinstance(sfreq, (int, float)): assert 'times' in dims sfreq = 1. / (x['times'].data[1] - x['times'].data[0]) assert isinstance(h_freq, (int, float)) assert isinstance(axis, int) assert isinstance(sfreq, (int, float)) if h_freq >= sfreq / 2.: raise ValueError('h_freq must be less than half the sample rate') # get window length window_length = (int(np.round(sfreq / h_freq)) // 2) * 2 + 1 logger.info(f' Using savgol length {window_length}') # apply savgol depending on input type kw = dict(axis=axis, polyorder=polyorder, window_length=window_length) if isinstance(x, xr.DataArray): x.data = savgol(x.data, **kw) return x else: return savgol(x, **kw)
def get_smoothed(self): smoothed_data = savgol(self.dataset, self.window, self.order, self.deriv) return smoothed_data
def get_psd(FITSFILE, FACTOR): f = FITSFILE kicid = int(f.split('/')[-1].split('-')[0].split('kplr')[-1].lstrip('0')) s0 = TIME.time() data = fits.open(f) head = data[0].data dat = data[1].data time = dat['TIME'] qual = dat['SAP_QUALITY'] flux = dat['PDCSAP_FLUX'] um = np.where(gaia['KIC'] == kicid)[0] try: row = kepler_catalogue.loc[kepler_catalogue['KIC'] == kicid] teff = row['iso_teff'].item() rad = row['iso_rad'].item() except: idx = np.where(gaia['KIC'] == kicid)[0] teff = gaia['teff'][idx][0] rad = gaia['rad'][idx][0] if math.isnan(rad) is True: idx = np.where(gaia['KIC'] == kicid)[0] teff = gaia['teff'][idx][0] rad = gaia['rad'][idx][0] # only keep data with good quality flags good = np.where(qual == 0)[0] time = time[good] flux = flux[good] # sigma-clip outliers from the light curve and overplot it res = sigclip(time, flux, 50, 3) good = np.where(res == 1)[0] time = time[good] flux = flux[good] # Check Duty Cycle: ndays = time[-1] - time[0] nmins = ndays * 24. * 60. expected_points = nmins / 30. observed_points = len(time) ## if rads[i] <= 36.8.: #from width_vs_radius_test2.txt closestrad = getclosest(rad, fit_radii) idx = np.where(fit_radii == closestrad)[0] best_fit_width = fit_width[idx][0] width = best_fit_width #print(kicid,width) boxsize = int(width / (30. / 60. / 24.)) box_kernel = Box1DKernel(boxsize) if boxsize % 2 == 0: smoothed_flux = savgol(flux, int(boxsize) - 1, 1, mode='mirror') else: smoothed_flux = savgol(flux, int(boxsize), 1, mode='mirror') flux = flux / smoothed_flux # Get Kp of star: row = kpdf.loc[kpdf['KIC'] == int(kicid)] kp = float(row['Kp'].item()) # Remove data points > 5*sigma: std = mad_std(flux, ignore_nan=True) med = np.median(flux) idx = np.where(abs(flux - med) < 5. * std)[0] time = time[idx] flux = flux[idx] # Get expected time domain noise in photometry: idx2, nearest_kp = find_nearest( ppmkp_kp, kp) # find nearest Kp in our PPM vs. Kp file given Kp of our stars TD_noise = ppmkp_ppm[ idx2] # find time domain noise corresponding to that Kp TD_noise = TD_noise / 1e6 np.random.seed(0) factor = FACTOR * TD_noise flux = flux + np.random.randn(len(flux)) * factor #print(kicid,TD_noise,FACTOR) #plt.plot(time,flux) # now let's calculate the fourier transform. the nyquist frequency is: nyq = 0.5 / (30. / 60. / 24.) fres = 1. / 90. / 0.0864 freq = np.arange(0.01, 24., 0.01) # long-cadence critically sampled # freq = np.arange(0.01, 734.1, 0.01) # short-cadence critically sampled freq0 = freq (values, counts) = np.unique(np.diff(time), return_counts=True) cadence = values[np.argmax(counts)] # cadence=30./60./24. # time_interp = np.arange(time[0],time[-1],30./(60.*24.)) time_interp = np.arange(time[0], time[-1], cadence) flux_interp = np.interp(time_interp, time, flux) time0, flux0 = time, flux #non-interpolated data time, flux = time_interp, flux_interp amp = LombScargle(time, flux).power(freq) # FT magic # freq, amp = LombScargle(time,flux).autopower(method='fast',samples_per_peak=10,maximum_frequency=nyq) # unit conversions freq = 1000. * freq / 86.4 bin = freq[1] - freq[0] amp = 2. * amp * np.var(flux * 1e6) / (np.sum(amp) * bin) # White noise correction: # wnoise=getkp(f) idx = np.where(freq > 270)[0] wnoise = np.mean(amp[idx]) amp_wn = np.zeros(len(amp)) # amp_wn0=np.zeros(len(amp0)) power_more_than_wnoise = 0 for p in range(0, len(amp)): a = amp[p] if a - wnoise < 0.: amp_wn[p] = amp[p] if a - wnoise > 0.: amp_wn[p] = a - wnoise power_more_than_wnoise += 1 snr = power_more_than_wnoise / len(amp) # smooth by 2 muHz fres_cd = 0.01 #to check if it's smoothed by 2muHz: print(freq[0],freq[0+n]) difference should be 2 fres_mhz = fres_cd / 0.0864 n = np.int(2. / fres_mhz) gauss_kernel = Gaussian1DKernel(n) pssm = convolve(amp, gauss_kernel) wnpssm = convolve(amp_wn, gauss_kernel) # wnpssm0 = convolve(amp_wn0, gauss_kernel) #Save frequency & power spectrum arrays: um = np.where(freq > 10.)[0] return freq[um], wnpssm[um], kicid, snr