def periods_lomb(t, f, e, n_per=3, f0=1.0 / 1000, df=1e-5, fn=1.0 / 50.0, plot=False): numf = int((fn - f0) / df) freqgrid = np.linspace(f0, fn, numf) ftest = 1.0 * f P_cand = [] for i in xrange(n_per): psd, res = lombr(t, ftest, e, f0, df, numf, detrend_order=1, nharm=1) if plot: plt.plot(freqgrid, psd) plt.show() P_cand.append(1.0 / res["freq"]) ftest -= res["model"] return P_cand
def old_stuff(self): print res2['chi2'], res2['chi0'] if self.verbose: print "New Period is %.8f day" % period plt.figure(2) plt.cla() tt=(self.x0/period) % 1.; s=tt.argsort() plt.errorbar (tt[s],new_y[s],self.dy_orig[s],fmt='o',c="b") plt.plot(tt[s],res2['model'][s],c="r") f = open("lc.dat","w") z = zip(tt[s] - 0.5,new_y[s],self.dy_orig[s]) for l in z: f.write("%f %f %f\n" % l) f.close() f = open("lc0.dat","w") z = zip(self.x0,new_y,self.dy_orig) for l in z: f.write("%f %f %f\n" % l) f.close() psdr,res2 = lombr(self.x0,new_y,self.dy0,f0/2.,df,numf) period1=1./res2['freq'] if self.verbose: print "New Period is %.8f day" % period1 plt.figure(4) plt.cla() tt=(self.x0/period1) % 1.; s=tt.argsort() plt.errorbar (tt[s],new_y[s],self.dy_orig[s],fmt='o',c="b") plt.plot(tt[s],res2['model'][s],c="r") print res2['chi2'], res2['chi0'] f = open("lc2.dat","w") z = zip(tt[s] - 0.5,new_y[s],self.dy_orig[s]) for l in z: f.write("%f %f %f\n" % l) f.close()
def _get_pulsational_period(self,min_freq=10.0,doplot=False,max_pulse_period=400.0): self.x0 = self.t self.y = self.m self.dy = self.merr self.dy0 = np.sqrt(self.dy**2+self.sys_err**2) self.x0 -= self.x0.min() self.nepochs = len(self.x0) # define the frequency grid Xmax = self.x0.max() if not self.fix_initial_period: f0 = 1.0/max_pulse_period; df = 0.1/Xmax; fe = min_freq numf = int((fe-f0)/df) else: f0 = 1./self.initial_period df = 1e-7 numf = 1 psdr,res2 = lombr(self.x0,self.y,self.dy0,f0,df,numf,detrend_order=1) period=1./res2['freq'] self.rrlp = period if self.verbose: print "Initial pulstional Period is %.8f day" % self.rrlp self.features.update({"p_pulse_initial": self.rrlp}) if self.allow_plotting and doplot: try: plt.figure(3) plt.cla() tt=(self.x0/period) % 1.; s=tt.argsort() plt.errorbar (tt,self.y,self.dy,fmt='o'); plt.plot(tt[s],res2['model'][s]) plt.ylim(self.y.max()+0.05,self.y.min()-0.05) plt.title("P=%f" % (self.rrlp)) plt.draw() except: pass return res2
fp = open(fpath, "w") for i in range(len(x)): fp.write("%lf %lf %lf\n" % (x[i], y[i], dy[i])) fp.close() dy0 = sqrt(dy ** 2 + sys_err ** 2) Xmax = x.max() f0 = 1.0 / Xmax df = 0.1 / Xmax fe = 10.0 numf = int((fe - f0) / df) freqin = f0 + df * arange(numf, dtype="float64") # psd,res = lombr(x,y,dy0,f0,df,numf) psd, res = lombr(x, y, dy0, f0, df, numf, detrend_order=1) import pdb pdb.set_trace() print() psd1, res1 = lombr(x, y - res["model"], dy0, f0, df, numf, detrend_order=0) plot(freqin, psd) ### """ The default is to fit 8 harmonics to every initial lomb-scargle peak above 6, with 0th order detrending (fitting mean only). Dan, if you think I should, I can put the logic to define the frequency grid in the main code and not in a wrapper like this. res is a dictionary containing the stuff previously reported by
def lomb_code(self, y, dy, x, sys_err=0.05, srcid=0): """ This function is used for final psd and final L-S freqs which are used as features. NOTE: lomb_extractor.py..lomb_extractor..extract() also generates psd, but its psd and objects not used for the final L.S. freqs. NOTE: currently (20101120) This is adapted from Nat's run_lomb14.py """ ### These are defaults found in run_lomb14.py::run_lomb() definition: nharm = 8 # nharm = 4 num_freq_comps = 3 do_models = True # 20120720: dstarr changes from False -> True tone_control = 5.0 #1. ############## dy0 = sqrt(dy**2 + sys_err**2) wt = 1./dy0**2 x-=x.min()#needed for lomb() code to run in a fast amount of time chi0 = dot(y**2,wt) #alias_std = std( x-x.round() ) Xmax = x.max() f0 = 1./Xmax df = 0.8/Xmax # 20120202 : 0.1/Xmax fe = 33. #pre 20120126: 10. # 25 numf = int((fe-f0)/df) freqin = f0 + df*arange(numf,dtype='float64') # OK ytest=1.*y # makes a copy of the array dof = n0 = len(x) hh = 1.+arange(nharm) out_dict = {} #prob = gammaincc(0.5*(n0-1.),0.5*chi0) #if (prob>0): # lprob=log(prob) #else: # lprob= -gammaln(0.5*(n0-1)) - 0.5*chi0 + 0.5*(n0-3)*log(0.5*chi0) #out_dict['sigma_vary'] = lprob2sigma(lprob) lambda0_range=[-log10(n0),8] # these numbers "fix" the strange-amplitude effect for i in xrange(num_freq_comps): if (i==0): psd,res = lombr(x,ytest,dy0,f0,df,numf, tone_control=tone_control, lambda0_range=lambda0_range, nharm=nharm, detrend_order=1) ### I think it still makes sense to set these here, even though freq1 may be replaced by another non-alias freq. This is because these are parameters that are derived from the first prewhitening application: out_dict['lambda'] = res['lambda0'] # 20120206 added out_dict['chi0'] = res['chi0'] out_dict['time0'] = res['time0'] out_dict['trend'] = res['trend_coef'][1] #temp_b out_dict['trend_error'] = res['trend_coef_error'][1] # temp_covar[1][1] # this is the stdev(b)**2 else: psd,res = lombr(x,ytest,dy0,f0,df,numf, tone_control=tone_control, lambda0_range=lambda0_range, nharm=nharm, detrend_order=0) ytest -= res['model'] if (i==0): out_dict['varrat'] = dot(ytest**2,wt) / chi0 #pre20110426: out_dict['cn0'] -= res['trend']*res['time0'] dof -= n0 - res['nu'] dstr = "freq%i" % (i + 1) if (do_models==True): #20120720Commentout#raise # this needs to be moved below after alias stuff out_dict[dstr+'_model'] = res['model'] out_dict[dstr] = {} freq_dict = out_dict[dstr] freq_dict["frequency"] = res['freq'] freq_dict["signif"] = res['signif'] freq_dict["psd"] = psd # 20110804 added just for self.make_psd_plot() use. freq_dict["f0"] = f0 freq_dict["df"] = df freq_dict["numf"] = numf freq_dict['harmonics_amplitude'] = res['amplitude'] freq_dict['harmonics_amplitude_error'] = res['amplitude_error'] freq_dict['harmonics_rel_phase'] = res['rel_phase'] freq_dict['harmonics_rel_phase_error'] = res['rel_phase_error'] freq_dict['harmonics_nharm'] = nharm freq_dict['harmonics_time_offset'] = res['time0'] freq_dict['harmonics_y_offset'] = res['cn0'] # 20110429: disable since it was previously mean subtracted and not useful, and not mean subtracted is avg-mag and essentially survey biased # out_dict['cn0'] ### Here we check for "1-day" aliases in ASAS / Deboss sources dstr_alias = [] dstr_all = ["freq%i" % (i + 1) for i in range(num_freq_comps)] ### 20120223 co: #for dstr in dstr_all: # period = 1./out_dict[dstr]['frequency'] # if (((period >= 0.93) and (period <= 1.07) and # (out_dict[dstr]['signif'] < (3.771221/numpy.power(numpy.abs(period - 1.), 0.25) + 3.293027))) or # ((period >= 0.485) and (period <= 0.515) and (out_dict[dstr]['signif'] < 10.0)) or # ((period >= 0.325833333) and (period <= 0.340833333) and (out_dict[dstr]['signif'] < 8.0))): # dstr_alias.append(dstr) # this frequency has a "1 day" alias (or 0.5 or 0.33 # ### 20120212 Joey alias re-analysis: alias = [{'per':1., 'p_low':0.92, 'p_high':1.08, 'alpha_1':8.191855, 'alpha_2':-7.976243}, {'per':0.5, 'p_low':0.48, 'p_high':0.52, 'alpha_1':2.438913, 'alpha_2':0.9837243}, {'per':0.3333333333, 'p_low':0.325, 'p_high':0.342, 'alpha_1':2.95749, 'alpha_2':-4.285432}, {'per':0.25, 'p_low':0.245, 'p_high':0.255, 'alpha_1':1.347657, 'alpha_2':2.326338}] for dstr in dstr_all: period = 1./out_dict[dstr]['frequency'] for a in alias: if ((period >= a['p_low']) and (period <= a['p_high']) and (out_dict[dstr]['signif'] < (a['alpha_1']/numpy.power(numpy.abs(period - a['per']), 0.25) + a['alpha_2']))): dstr_alias.append(dstr) # this frequency has a "1 day" alias (or 0.5 or 0.33 break # only need to do this once per period, if an alias is found. out_dict['n_alias'] = len(dstr_alias) if 0: # 20120624 comment out the code which replaces the aliased freq1 with the next non-aliased one: if len(dstr_alias) > 0: ### Here we set the next non-alias frequency to freq1, etc: dstr_diff = list(set(dstr_all) - set(dstr_alias)) dstr_diff.sort() # want to ensure that the lowest freq is first reorder = [] for dstr in dstr_all: if len(dstr_diff) > 0: reorder.append(out_dict[dstr_diff.pop(0)]) else: reorder.append(out_dict[dstr_alias.pop(0)]) for i, dstr in enumerate(dstr_all): out_dict[dstr] = reorder[i] if 0: ### Write PSD vs freq .png plots for AllStars web visualization: self.make_psd_plot(psd=out_dict['freq1']['psd'], srcid=srcid, freqin=freqin) var0 = var(ytest) - median(dy0)**2 out_dict['sigma0'] = 0. if (var0 > 0.): out_dict['sigma0'] = sqrt(var0) out_dict['nu'] = dof out_dict['chi2'] = res['chi2'] #dot(ytest**2,wt) # 20110512: res['chi2'] is the last freq (freq3)'s chi2, which is pretty similar to the old dot(ytest**2,wt) calculation which uses the signal removed ytest #out_dict['alias_std'] = alias_std out_dict['freq_binwidth'] = df out_dict['freq_searched_min']=min(freqin) out_dict['freq_searched_max']=max(freqin) out_dict['mad_of_model_residuals'] = median(abs(ytest - median(ytest))) ##### This is used for p2p_scatter_2praw feature: t_2per_fold = x % (2/out_dict['freq1']['frequency']) tups = zip(t_2per_fold, y)#, range(len(t_2per_fold))) tups.sort() t_2fold, m_2fold = zip(*tups) #So: m_2fold[30] == y[i_fold[30]] m_2fold_array = numpy.array(m_2fold) sumsqr_diff_folded = numpy.sum((m_2fold_array[1:] - m_2fold_array[:-1])**2) sumsqr_diff_unfold = numpy.sum((y[1:] - y[:-1])**2) p2p_scatter_2praw = sumsqr_diff_folded / sumsqr_diff_unfold out_dict['p2p_scatter_2praw'] = p2p_scatter_2praw mad = numpy.median(numpy.abs(y - median(y))) out_dict['p2p_scatter_over_mad'] = numpy.median(numpy.abs(y[1:] - y[:-1])) / mad ### eta feature from arXiv 1101.3316 Kim QSO paper: out_dict['p2p_ssqr_diff_over_var'] = sumsqr_diff_unfold / ((len(y) - 1) * numpy.var(y)) t_1per_fold = x % (1./out_dict['freq1']['frequency']) tups = zip(t_1per_fold, y)#, range(len(t_2per_fold))) tups.sort() t_1fold, m_1fold = zip(*tups) #So: m_1fold[30] == y[i_fold[30]] m_1fold_array = numpy.array(m_1fold) out_dict['p2p_scatter_pfold_over_mad'] = \ numpy.median(numpy.abs(m_1fold_array[1:] - m_1fold_array[:-1])) / mad ######################## # # # ### This section is used to calculate Dubath (10. Percentile90:2P/P) ### Which requires regenerating a model using 2P where P is the original found period ### NOTE: this essentially runs everything a second time, so makes feature ### generation take roughly twice as long. model_vals = numpy.zeros(len(y)) #all_model_vals = numpy.zeros(len(y)) freq_2p = out_dict['freq1']['frequency'] * 0.5 ytest_2p=1.*y # makes a copy of the array ### So here we force the freq to just 2*freq1_Period # - we also do not use linear detrending since we are not searching for freqs, and # we want the resulting model to be smooth when in phase-space. Detrending would result # in non-smooth model when period folded psd,res = lombr(x,ytest_2p,dy0,freq_2p,df,1, tone_control=tone_control, lambda0_range=lambda0_range, nharm=nharm, detrend_order=0)#1) model_vals += res['model'] #all_model_vals += res['model'] ytest_2p -= res['model'] for i in xrange(1,num_freq_comps): psd,res = lombr(x,ytest_2p,dy0,f0,df,numf, tone_control=tone_control, lambda0_range=lambda0_range, nharm=nharm, detrend_order=0) #all_model_vals += res['model'] ytest_2p -= res['model'] out_dict['medperc90_2p_p'] = scoreatpercentile(numpy.abs(ytest_2p), 90) / \ scoreatpercentile(numpy.abs(ytest), 90) some_feats = self.get_2P_modeled_features(x=x, y=y, freq1_freq=out_dict['freq1']['frequency'], srcid=srcid, ls_dict=out_dict) out_dict.update(some_feats) ### So the following uses the 2*Period model, and gets a time-sorted, folded t and m: ### - NOTE: if this is succesful, I think a lot of other features could characterize the ### shapes of the 2P folded data (not P or 2P dependent). ### - the reason we choose 2P is that occasionally for eclipsing ### sources the LS code chooses 0.5 of true period (but never 2x ### the true period). slopes are not dependent upon the actual ### period so 2P is fine if it gives a high chance of correct fitting. ### - NOTE: we only use the model from freq1 because this with its harmonics seems to ### adequately model shapes such as RRLyr skewed sawtooth, multi minima of rvtau ### without getting the scatter from using additional LS found frequencies. t_2per_fold = x % (1/freq_2p) tups = zip(t_2per_fold, model_vals) tups.sort() t_2fold, m_2fold = zip(*tups) t_2fold_array = numpy.array(t_2fold) m_2fold_array = numpy.array(m_2fold) slopes = (m_2fold_array[1:] - m_2fold_array[:-1]) / (t_2fold_array[1:] - t_2fold_array[:-1]) out_dict['fold2P_slope_10percentile'] = scoreatpercentile(slopes,10) # this gets the steepest negative slope? out_dict['fold2P_slope_90percentile'] = scoreatpercentile(slopes,90) # this gets the steepest positive slope? return out_dict, ytest
fp = open(fpath, 'w') for i in range(len(x)): fp.write("%lf %lf %lf\n" % (x[i], y[i], dy[i])) fp.close() dy0 = sqrt(dy**2+sys_err**2) Xmax = x.max() f0 = 1./Xmax; df = 0.1/Xmax; fe = 10. numf = int((fe-f0)/df) freqin = f0 + df*arange(numf,dtype='float64') #psd,res = lombr(x,y,dy0,f0,df,numf) psd,res = lombr(x,y,dy0,f0,df,numf, detrend_order=1) import pdb; pdb.set_trace() print psd1,res1 = lombr(x,y-res['model'],dy0,f0,df,numf, detrend_order=0) plot (freqin,psd) ### """ The default is to fit 8 harmonics to every initial lomb-scargle peak above 6, with 0th order detrending (fitting mean only). Dan, if you think I should, I can put the logic to define the frequency grid in the main code and not in a wrapper like this. res is a dictionary containing the stuff previously reported by pre_whiten: amplitudes, phases, the folded model, etc.
def gen_orbital_period(self, doplot=False, sig_features=[30,20,15,8,5], min_eclipses=4, eclipse_shorter=False, dynamic=True, choose_largest_numf=False): """ """ try: offs,res2 = self.gen_outlier_stat_features(doplot=doplot,sig_features=sig_features) ## subtract the model new_y = self.y - res2['model'] # make new weights that penalize sources _near_ the model dy0 = np.sqrt(self.dy_orig**2+ res2['model_error']**2 + (3*self.sys_err*np.exp(-1.0*abs(offs)/3))**2) ## this downweights data near the model Xmax = self.x0.max() #import pdb; pdb.set_trace() #print if choose_largest_numf: f0 = min_eclipses/Xmax df = 0.1/Xmax fe = res2['freq']*0.98 ## dont go near fundamental freq least we find it again numf = int((fe-f0)/df) f0_b = res2['freq']*0.98 fe_b = 10.0 df_b = 0.1/Xmax numf_b = int((fe_b-f0_b)/df_b) if numf < numf_b: f0 = f0_b fe = fe_b df = df_b numf = numf_b else: if not eclipse_shorter: f0 = min_eclipses/Xmax df = 0.1/Xmax fe = res2['freq']*0.98 ## dont go near fundamental freq least we find it again numf = int((fe-f0)/df) else: f0 = res2['freq']*0.98 fe = 10.0 df = 0.1/Xmax numf = int((fe-f0)/df) freqin = f0 + df*np.arange(numf,dtype='float64') periodin = 1/freqin if self.verbose: print "P min, max", min(periodin),max(periodin) psdr,res2 = lombr(self.x0,new_y,self.dy0,f0,df,numf) period=1./res2['freq'] if self.verbose: print "orb period = %f sigf = %f" % (period,res2['signif']) self.last_res = res2 s = selectp.selectp(self.x0, new_y, self.dy_orig, period, mults=[1.0,2.0], dynamic=dynamic, verbose=self.verbose, srcid=self.srcid) s.select() self.features.update({"best_orb_period": s.rez['best_period'], "best_orb_chi2": \ s.rez['best_chi2'], 'orb_signif': res2['signif']}) is_suspect = False reason = [] if abs(1.0 - self.features['best_orb_period']) < 0.01 or abs(2.0 - self.features['best_orb_period']) < 0.01 or \ abs(0.5 - self.features['best_orb_period']) < 0.01: ## likely an alias is_suspect=True reason.append("alias") if self.features['best_orb_chi2'] > 10.0 or self.features['orb_signif'] < 4: is_suspect=True reason.append("low significance") if self.features['best_orb_period'] > Xmax/(2*min_eclipses): ## probably too long is_suspect=True reason.append("too long") if (0.5 - abs( (self.features['best_orb_period'] / self.features['p_pulse']) % 1.0 - 0.5)) < 0.01: ## probably an alias of the pulse period is_suspect=True reason.append("pulse alias") self.features.update({'is_suspect': is_suspect, 'suspect_reason': None if not is_suspect else \ "; ".join(reason)}) if doplot: try: plt.figure(2) plt.cla() s.plot_best(extra="suspect=%s %s" % (is_suspect,"" if not is_suspect else "(" + ",".join(reason) + ")")) plt.savefig("orb-%s-p=%f-sig=%f.png" % (os.path.basename(self.name),period,res2['signif'])) if self.verbose: print "saved...", "org-%s-p=%f.png" % (os.path.basename(self.name),period) except: pass except: return
def gen_outlier_stat_features(self,doplot=False,sig_features=[30,20,15,8,5],\ min_freq=10.0,dosave=True,max_pulse_period=400.0): """here we generate outlier features and refine the initial pulsational period by downweighting those outliers. """ res2 = self._get_pulsational_period(doplot=doplot,min_freq=min_freq) ## now sigclip offs = (self.y - res2['model'])/self.dy0 moffs = np.median(offs) offs -= moffs ## do some feature creation ... find the statistics of major outliers for i,s in enumerate(sig_features): rr = (np.inf,s) if i == 0 else (sig_features[i-1],s) tmp = (offs < rr[0]) & (offs > rr[1]) nlow = float(tmp.sum())/self.nepochs tmp = (offs > -1*rr[0]) & (offs < -1*rr[1]) nhigh = float(tmp.sum())/self.nepochs if self.verbose: print "%i: low = %f high = %f feature-%i-ratio-diff = %f" % (s,nlow,nhigh,s,nhigh - nlow) self.features.update({"feature-%i-ratio-diff" % s: (nhigh - nlow)*100.0}) tmp = np.where(abs(offs) > 4) self.dy_orig = copy.copy(self.merr) dy = copy.copy(self.merr) dy[tmp] = np.sqrt(dy[tmp]**2 + res2['model_error'][tmp]**2 + (8.0*(1 - np.exp(-1.0*abs(offs[tmp])/4)))**2) dy0 = np.sqrt(dy**2+self.sys_err**2) #Xmax = self.x0.max() #f0 = 1.0/max_pulse_period; df = 0.1/Xmax; fe = min_freq #numf = int((fe-f0)/df) #refine around original period ## Josh's original calcs, which fail for sources like: 221205 ##df = 0.1/self.x0.max() ##f0 = res2['freq']*0.95 ##fe = res2['freq']*1.05 ##numf = int((fe-f0)/df) df = 0.1/self.x0.max() f0 = res2['freq']*0.95 fe = res2['freq']*1.05 numf = int((fe-f0)/df) if numf == 0: ## Josh's original calcs, which fail for sources like: 221205 numf = 100 # kludge / fudge / magic number df = (fe-f0) / float(numf) psdr,res = lombr(self.x0,self.y,dy0,f0,df,numf,detrend_order=1) period=1./res['freq'] self.features.update({"p_pulse": period}) if self.allow_plotting and doplot: try: tt=(self.x0*res2['freq']) % 1.; s=tt.argsort() plt.errorbar (tt[tmp],self.y[tmp],self.dy_orig[tmp],fmt='o',c="r") tt=(self.x0*res['freq']) % 1.; s=tt.argsort() plt.plot(tt[s],res['model'][s],c="r") if dosave: plt.savefig("pulse-%s-p=%f.png" % (os.path.basename(self.name),period)) if self.verbose: print "saved...", "pulse-%s-p=%f.png" % (os.path.basename(self.name),period) plt.draw() except: pass return offs, res2