def train_quadfit(\ trainingfilename=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cannon','training', 'training_apokasc_gc_ind_feh_fix.txt'), outfilename=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cannon','trained', 'trained_apokasc_gc_ind_feh_fix.txt'), baseline_labels=[4500.,2.,-0.3,0.05]): """ NAME: train_quadfit PURPOSE: train a quadratic polynomial fit to training data INPUT: trainingfilename= name of the file that has the training data outfilename= name of the file that will hold the output (scatter is in file with .txt replaced by _scatter.txt) baseline_labels= baseline to subtract from the labels OUTPUT: (none; just writes the output to a file) HISTORY: 2015-02-28 - Written - Bovy (IAS) 2018-02-05 - Updated to account for changing detector ranges - Price-Jones (UofT) """ # Read the training data loc_ids, ap_ids, labels = _read_training(trainingfilename) new_labels = (labels[0] - baseline_labels[0], ) for ii in range(1, len(labels)): new_labels = new_labels + (labels[ii] - baseline_labels[ii], ) labels = new_labels # Load the spectra for these data aspcapBlu_start, aspcapGre_start, aspcapRed_start, aspcapTotal = _aspcapPixelLimits( dr=None) spec = numpy.empty((len(loc_ids), aspcapTotal)) specerr = numpy.empty((len(loc_ids), aspcapTotal)) for ii in range(len(loc_ids)): spec[ii] = apread.aspcapStar(loc_ids[ii], ap_ids[ii], ext=1, header=False, aspcapWavegrid=True) specerr[ii] = apread.aspcapStar(loc_ids[ii], ap_ids[ii], ext=2, header=False, aspcapWavegrid=True) # Train qout = cannon.quadfit(spec, specerr, *labels) # Save to file numpy.savetxt(outfilename, qout[0]) numpy.savetxt(outfilename.replace('.txt', '_scatter.txt'), qout[1]) numpy.savetxt(outfilename.replace('.txt', '_baseline_labels.txt'), baseline_labels) return None
def train_quadfit(\ trainingfilename=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cannon','training', 'training_apokasc_gc_ind_feh_fix.txt'), outfilename=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'cannon','trained', 'trained_apokasc_gc_ind_feh_fix.txt'), baseline_labels=[4500.,2.,-0.3,0.05]): """ NAME: train_quadfit PURPOSE: train a quadratic polynomial fit to training data INPUT: trainingfilename= name of the file that has the training data outfilename= name of the file that will hold the output (scatter is in file with .txt replaced by _scatter.txt) baseline_labels= baseline to subtract from the labels OUTPUT: (none; just writes the output to a file) HISTORY: 2015-02-28 - Written - Bovy (IAS) 2018-02-05 - Updated to account for changing detector ranges - Price-Jones (UofT) """ # Read the training data loc_ids, ap_ids, labels= _read_training(trainingfilename) new_labels= (labels[0]-baseline_labels[0],) for ii in range(1,len(labels)): new_labels= new_labels+(labels[ii]-baseline_labels[ii],) labels= new_labels # Load the spectra for these data aspcapBlu_start,aspcapGre_start,aspcapRed_start,aspcapTotal = _aspcapPixelLimits(dr=None) spec= numpy.empty((len(loc_ids),aspcapTotal)) specerr= numpy.empty((len(loc_ids),aspcapTotal)) for ii in range(len(loc_ids)): spec[ii]= apread.aspcapStar(loc_ids[ii],ap_ids[ii],ext=1,header=False, aspcapWavegrid=True) specerr[ii]= apread.aspcapStar(loc_ids[ii],ap_ids[ii],ext=2, header=False, aspcapWavegrid=True) # Train qout= cannon.quadfit(spec,specerr,*labels) # Save to file numpy.savetxt(outfilename,qout[0]) numpy.savetxt(outfilename.replace('.txt','_scatter.txt'),qout[1]) numpy.savetxt(outfilename.replace('.txt','_baseline_labels.txt'), baseline_labels) return None
def pixels_cannon(*args, **kwargs): """ NAME: pixels_cannon PURPOSE: determine continuum pixels using a Cannon-like technique (Ness et al. 2015) INPUT: Either: a) Input for running the apogee.spec.cannon: spec - spectra to fit (nspec,nlambda) specerrs - errors on the spectra (nspec,nlambda); assume no covariances label1, label2, ... - labels (nspec); best to subtract reference values before running this type= ('lin') type of Cannon to run: 'lin' - linear Cannon 'quad' - quadratic Cannon b) Output from a previous Cannon run: coefficients - coefficients from the fit (ncoeffs,nlambda) scatter - scatter from the fit (nlambda) KEYWORDS: baseline_dev= (0.015) maximum deviation from baseline label1_max= (10.**-5.) maximum deviation in first linear coefficient label2_max= (0.006) similar for the second label3_max= (0.012) similar for the third labelN_max= same with default 0.03 ... scatter_max= (0.015) maximum scatter of residuals dr= (module-wide default) data release OUTPUT: Boolean index into the wavelength range with True for continuum pixels HISTORY: 2015-02-05 - Written - Bovy (IAS@KITP) """ # Grab kwargs type = kwargs.pop('type', 'lin') dr = kwargs.pop('dr', path._default_dr()) # Parse input if len(args) == 0: # Use default fit from apogee.spec._train_cannon import load_fit coeffs, scatter, baseline_labels = load_fit() type = 'quad' else: spec = args[0] specerr = args[1] # Determine the type of input if len(specerr.shape) == 2: # Run the Cannon if type.lower() == 'lin': coeffs, scatter = cannon.linfit(*args) elif type.lower() == 'quad': coeffs, scatter = cannon.quadfit(*args) else: coeffs = spec scatter = specerr ncoeffs = coeffs.shape[0] if type.lower() == 'lin': nlabels = ncoeffs - 1 elif type.lower() == 'quad': nlabels = int((-3 + numpy.sqrt(9 + 8 * (ncoeffs - 1)))) // 2 # Determine continuum pixels out = numpy.ones(len(scatter), dtype='bool') # Deviation from baseline out[numpy.fabs(coeffs[0] - 1.) > kwargs.get('baseline_dev', 0.015)] = False # Large dependence on labels maxs = numpy.zeros(nlabels) maxs[0] = kwargs.get('label1_max', 10.**-5.) maxs[1] = kwargs.get('label2_max', 0.006) maxs[2] = kwargs.get('label3_max', 0.012) for ii in range(nlabels - 3): maxs[ii + 3] = kwargs.get('label%i_max' % (ii + 4), 0.03) for ii in range(1, nlabels + 1): out[numpy.fabs(coeffs[ii]) > maxs[ii - 1]] = False # Large residuals out[scatter > kwargs.get('scatter_max', 0.015)] = False _, _, _, aspcapDR12length = _aspcapPixelLimits(dr='12') if int(dr) > 12 and coeffs.shape[1] == aspcapDR12length: # Want continuum pixels on >DR12 ASPCAP grid, but using coefficients # from <= DR12 grid dr_module = path._default_dr() path.change_dr(12) out = toApStarGrid(out) path.change_dr(dr) out = toAspcapGrid(out) path.change_dr(dr_module) return out
def pixels_cannon(*args,**kwargs): """ NAME: pixels_cannon PURPOSE: determine continuum pixels using a Cannon-like technique (Ness et al. 2015) INPUT: Either: a) Input for running the apogee.spec.cannon: spec - spectra to fit (nspec,nlambda) specerrs - errors on the spectra (nspec,nlambda); assume no covariances label1, label2, ... - labels (nspec); best to subtract reference values before running this type= ('lin') type of Cannon to run: 'lin' - linear Cannon 'quad' - quadratic Cannon b) Output from a previous Cannon run: coefficients - coefficients from the fit (ncoeffs,nlambda) scatter - scatter from the fit (nlambda) KEYWORDS: baseline_dev= (0.015) maximum deviation from baseline label1_max= (10.**-5.) maximum deviation in first linear coefficient label2_max= (0.006) similar for the second label3_max= (0.012) similar for the third labelN_max= same with default 0.03 ... scatter_max= (0.015) maximum scatter of residuals OUTPUT: Boolean index into the wavelength range with True for continuum pixels HISTORY: 2015-02-05 - Written - Bovy (IAS@KITP) """ # Grab kwargs type= kwargs.pop('type','lin') # Parse input if len(args) == 0: # Use default fit from apogee.spec._train_cannon import load_fit coeffs, scatter, baseline_labels= load_fit() type= 'quad' else: spec= args[0] specerr= args[1] # Determine the type of input if len(specerr.shape) == 2: # Run the Cannon if type.lower() == 'lin': coeffs, scatter= cannon.linfit(*args) elif type.lower() == 'quad': coeffs, scatter= cannon.quadfit(*args) else: coeffs= spec scatter= specerr ncoeffs= coeffs.shape[0] if type.lower() == 'lin': nlabels= ncoeffs-1 elif type.lower() == 'quad': nlabels= int((-3+numpy.sqrt(9+8*(ncoeffs-1))))//2 # Determine continuum pixels out= numpy.ones(len(scatter),dtype='bool') # Deviation from baseline out[numpy.fabs(coeffs[0]-1.) > kwargs.get('baseline_dev',0.015)]= False # Large dependence on labels maxs= numpy.zeros(nlabels) maxs[0]= kwargs.get('label1_max',10.**-5.) maxs[1]= kwargs.get('label2_max',0.006) maxs[2]= kwargs.get('label3_max',0.012) for ii in range(nlabels-3): maxs[ii+3]= kwargs.get('label%i_max' % (ii+4),0.03) for ii in range(1,nlabels+1): out[numpy.fabs(coeffs[ii]) > maxs[ii-1]]= False # Large residuals out[scatter > kwargs.get('scatter_max',0.015)]= False return out
def plot_afe_spectra(savename,plotname): # Load the data data= define_rcsample.get_rcsample() data= data[data['SNR'] > 200.] fehindx= (data['FE_H'] <= -0.35)*(data['FE_H'] > -0.45) fehdata= data[fehindx] # First compute the residuals and do the EM-PCA smoothing if not os.path.exists(savename): nspec= len(fehdata) spec= numpy.zeros((nspec,7214)) specerr= numpy.zeros((nspec,7214)) for ii in range(nspec): sys.stdout.write('\r'+"Loading spectrum %i / %i ...\r" % (ii+1,nspec)) sys.stdout.flush() spec[ii]= apread.aspcapStar(fehdata['LOCATION_ID'][ii], fehdata['APOGEE_ID'][ii], ext=1,header=False,aspcapWavegrid=True) specerr[ii]= apread.aspcapStar(fehdata['LOCATION_ID'][ii], fehdata['APOGEE_ID'][ii], ext=2,header=False, aspcapWavegrid=True) teffs= fehdata['FPARAM'][:,paramIndx('teff')] loggs= fehdata['FPARAM'][:,paramIndx('logg')] metals= fehdata[define_rcsample._FEHTAG] cf, s, r= apcannon.quadfit(spec,specerr, teffs-4800.,loggs-2.85,metals+0.3, return_residuals=True) pr= numpy.zeros_like(r) # Deal w/ bad data _MAXERR= 0.02 npca= 8 pca_input= r pca_weights= (1./specerr**2.) pca_weights[pca_weights < 1./_MAXERR**2.]= 0. nanIndx= numpy.isnan(pca_input) + numpy.isnan(pca_weights) pca_weights[nanIndx]= 0. pca_input[nanIndx]= 0. # Run EM-PCA m= empca.empca(pca_input,pca_weights,nvec=npca,niter=25)#,silent=False) for jj in range(nspec): for kk in range(npca): pr[jj]+= m.coeff[jj,kk]*m.eigvec[kk] save_pickles(savename,pr,r,cf) else: with open(savename,'rb') as savefile: pr= pickle.load(savefile) # Now plot the various elements colormap= cm.seismic colorFunc= lambda afe: afe/0.25 widths= [3.5,2.] yranges= [[-0.05,0.02],[-0.03,0.01]] for ee, elem in enumerate(['S','Ca1']): for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),elem,) kwargs= {'markLines':ii==4, 'yrange':yranges[ee], 'ylabel':'', 'cleanZero':False, 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':widths[ee]} if ii>0: kwargs.pop('fig_width') splot.windows(*args,**kwargs) bovy_plot.bovy_end_print(plotname.replace('ELEM', elem.lower().capitalize())) # Also do Mg for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startindxs':[3012,3120,3990], 'endindxs':[3083,3158,4012], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':[15745.017,15753.189,15770.055,15958.836], 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':4.5, 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{Mg}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','Mg')) # Also do Si for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startindxs':[4469, 4624,5171, 7205, 7843], 'endindxs':[4488, 4644,5182, 7243, 7871], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':apwindow.lines('Si'), 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':6., 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{Si}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','Si2')) # Also do Oxygen for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startlams':[15558,16242,16536,16720], 'endlams':[15566,16250,16544,16728], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':[15562,16246,16539,16723.5], 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':5., 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{O}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','O')) # Also do Ti for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startindxs':[1116,2100,2899], 'endindxs':[1146,2124,2922], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':apwindow.lines('Ti'), 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':3.5, 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{Ti}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','Ti')) return None