def get_PCA_model(QSOlist, niter, nvec, lmin=1040, lmax=1600): wwave = sp.arange(lmin, lmax, .1) nbObj = len(QSOlist) # nbObj = 20 pcaflux = sp.zeros((nbObj, wwave.size)) pcaivar = sp.zeros((nbObj, wwave.size)) for nspectra in range(0, nbObj): pcaflux[nspectra], pcaivar[nspectra] = resample_flux( wwave, QSOlist[nspectra].w, QSOlist[nspectra].flux, QSOlist[nspectra].ivar) # interpolation pcaivar[pcaivar < 0.] = 0. # Remove if all measured bins are zero w = sp.sum(pcaivar, axis=0) > 0. pcawave = wwave[w] pcaflux = pcaflux[:, w] pcaivar = pcaivar[:, w] ### Cap the ivar pcaivar[pcaivar > 100.] = 100. ### Get the mean data_meanspec = sp.average(pcaflux, weights=pcaivar, axis=0) for i in range(nbObj): w = pcaivar[i] > 0. # subtracting the mean for each spectrum pcaflux[i, w] -= data_meanspec[w] # ### PCA print('Starting EMPCA: ') dmodel = empca.empca(pcaflux, weights=pcaivar, niter=niter, nvec=nvec) return dmodel, pcawave, pcaflux, pcaivar, data_meanspec
def empca_(X, n_components=3, niter=25, showeigenvecs=False): X = X.copy() X[np.isnan(X)] = 0 weights = ~np.isnan(X) + 0 scaler = StandardScaler() X = scaler.fit_transform(X) m = empca(X, weights, nvec=n_components, niter=niter) X_PC = m.coeff # loss = 1 - m.R2() if showeigenvecs: for i in range(n_components): f = np.reshape(m.eigvec[i], (len(TIME), len(LAMB))) plt.contourf(TIME, LAMB, f.T, 100) plt.title('PC%s' % (i + 1)) plt.xlabel('MJD - max ' + r'$m_B$') plt.ylabel(r'wavelength [$\AA$]') plt.colorbar(label=r'$f_\lambda$, scaled') plt.show() # if query: # return X_PC, m, scaler return X_PC, m, scaler
def calculate_empca_projections(derivative, weight,empca_dict_file=None): if empca_dict_file == None: empca_dict = pickle.loads(pkgutil.get_data( 'lc_predictor', 'trained_data/trained_empca.dict')) else: filehandler = open(empca_dict_file, 'r') empca_dict = pickle.load(filehandler) filehandler.close() mean_derivative = empca_dict['mean_derivative'] eigvec = empca_dict['eigvec'] empca_model = empca(np.zeros([1,np.size(mean_derivative)]), niter=0) empca_model.eigvec = eigvec centered_derivative = np.nan_to_num(derivative - mean_derivative) empca_model.set_data(np.array([centered_derivative]), np.array([weight])) return empca_model.coeff[0]
def calculate_empca_projections(derivative, weight, empca_dict_file=None): if empca_dict_file == None: empca_dict = pickle.loads( pkgutil.get_data('lc_predictor', 'trained_data/trained_empca.dict')) else: filehandler = open(empca_dict_file, 'r') empca_dict = pickle.load(filehandler) filehandler.close() mean_derivative = empca_dict['mean_derivative'] eigvec = empca_dict['eigvec'] empca_model = empca(np.zeros([1, np.size(mean_derivative)]), niter=0) empca_model.eigvec = eigvec centered_derivative = np.nan_to_num(derivative - mean_derivative) empca_model.set_data(np.array([centered_derivative]), np.array([weight])) return empca_model.coeff[0]
def calculatePCA(type, immatrix, m, n, dim): timer = Timer() if type is 'pca': timer.tic() V, S, immean = pca.pca(immatrix, m, n) timer.toc() return V, S, immean if type is 'empca': timer.tic() empcaResult = empca.empca(immatrix, 100, 1.e-6, dim, m, n) timer.toc() if type is 'rpca': timer.tic() empcaResult = rpca.rpca(immatrix, m, n, 100, 1.e-6, dim) timer.toc() return empcaResult.V, empcaResult.S, empcaResult.mean_A
def reduce_dim(drname, X,niter=2000,nfriends=5,ndim=2,weight=None): n_neighbors = nfriends n_components = ndim #dimension #get dimension Reduced data if drname=='Isomap': Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X) if drname=='MDS': Y = manifold.MDS(n_components, max_iter=niter, n_init=1).fit_transform(X) if drname=='TSNE': Y = manifold.TSNE(n_components=n_components, learning_rate=100,n_iter=niter,perplexity=5,random_state=0).fit_transform(X) if drname=='empca': # load the data and run PCA centered_der = X-mean(X,0) m = empca(centered_der, weight, nvec=5,smooth=0, niter=50) Y = m.coeff return Y
def calculate_empca_projections(derivative, weight,empca_dict_file=None): """ This returns the empca projections of a SN. The empca_dict_file is a pickled dictionary with the value of the mean derivative and the eigenvectors of the PCA analysis. """ if empca_dict_file == None: empca_dict = pickle.loads(pkgutil.get_data( 'lc_predictor', 'trained_data/trained_empca.dict')) else: filehandler = open(empca_dict_file, 'r') empca_dict = pickle.load(filehandler) filehandler.close() mean_derivative = empca_dict['mean_derivative'] eigvec = empca_dict['eigvec'] empca_model = empca(np.zeros([1,np.size(mean_derivative)]), niter=0) empca_model.eigvec = eigvec centered_derivative = np.nan_to_num(derivative - mean_derivative) empca_model.set_data(np.array([centered_derivative]), np.array([weight])) return empca_model.coeff[0]
#derivatives = loadtxt(os.path.join(data_dir,'derivatives.dat') ) derivatives = loadtxt(os.path.join(data_dir,'derivatives_not_res.dat') ) #errors = loadtxt(os.path.join(data_dir,'errors.dat') ) errors = ones(derivatives.shape) labels = loadtxt(data_dir+'labels.dat') centered_der = derivatives-mean(derivatives,0) #m = empca(centered_der, 1./(errors)**2, nvec=5,smooth=0, niter=50) m = empca(centered_der, 1./(errors)**2, nvec=nvec,smooth=0, niter=niter) X = m.coeff ## plot the results colors_vec=[] for i in labels: if i ==1: colors_vec.append('r') if i ==0: colors_vec.append('w') for indexes in [[0,i] for i in range(nvec)]: figure() scatter(X[:, indexes[0]], X[:, indexes[1]], c=colors_vec, marker='.',linewidth=.1) xlabel('PC %d' % (indexes[0]+1))
def plot_afe_spectra(savename,plotname): # Load the data data= define_rcsample.get_rcsample() data= data[data['SNR'] > 200.] fehindx= (data['FE_H'] <= -0.35)*(data['FE_H'] > -0.45) fehdata= data[fehindx] # First compute the residuals and do the EM-PCA smoothing if not os.path.exists(savename): nspec= len(fehdata) spec= numpy.zeros((nspec,7214)) specerr= numpy.zeros((nspec,7214)) for ii in range(nspec): sys.stdout.write('\r'+"Loading spectrum %i / %i ...\r" % (ii+1,nspec)) sys.stdout.flush() spec[ii]= apread.aspcapStar(fehdata['LOCATION_ID'][ii], fehdata['APOGEE_ID'][ii], ext=1,header=False,aspcapWavegrid=True) specerr[ii]= apread.aspcapStar(fehdata['LOCATION_ID'][ii], fehdata['APOGEE_ID'][ii], ext=2,header=False, aspcapWavegrid=True) teffs= fehdata['FPARAM'][:,paramIndx('teff')] loggs= fehdata['FPARAM'][:,paramIndx('logg')] metals= fehdata[define_rcsample._FEHTAG] cf, s, r= apcannon.quadfit(spec,specerr, teffs-4800.,loggs-2.85,metals+0.3, return_residuals=True) pr= numpy.zeros_like(r) # Deal w/ bad data _MAXERR= 0.02 npca= 8 pca_input= r pca_weights= (1./specerr**2.) pca_weights[pca_weights < 1./_MAXERR**2.]= 0. nanIndx= numpy.isnan(pca_input) + numpy.isnan(pca_weights) pca_weights[nanIndx]= 0. pca_input[nanIndx]= 0. # Run EM-PCA m= empca.empca(pca_input,pca_weights,nvec=npca,niter=25)#,silent=False) for jj in range(nspec): for kk in range(npca): pr[jj]+= m.coeff[jj,kk]*m.eigvec[kk] save_pickles(savename,pr,r,cf) else: with open(savename,'rb') as savefile: pr= pickle.load(savefile) # Now plot the various elements colormap= cm.seismic colorFunc= lambda afe: afe/0.25 widths= [3.5,2.] yranges= [[-0.05,0.02],[-0.03,0.01]] for ee, elem in enumerate(['S','Ca1']): for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),elem,) kwargs= {'markLines':ii==4, 'yrange':yranges[ee], 'ylabel':'', 'cleanZero':False, 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':widths[ee]} if ii>0: kwargs.pop('fig_width') splot.windows(*args,**kwargs) bovy_plot.bovy_end_print(plotname.replace('ELEM', elem.lower().capitalize())) # Also do Mg for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startindxs':[3012,3120,3990], 'endindxs':[3083,3158,4012], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':[15745.017,15753.189,15770.055,15958.836], 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':4.5, 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{Mg}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','Mg')) # Also do Si for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startindxs':[4469, 4624,5171, 7205, 7843], 'endindxs':[4488, 4644,5182, 7243, 7871], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':apwindow.lines('Si'), 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':6., 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{Si}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','Si2')) # Also do Oxygen for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startlams':[15558,16242,16536,16720], 'endlams':[15566,16250,16544,16728], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':[15562,16246,16539,16723.5], 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':5., 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{O}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','O')) # Also do Ti for ii in range(5): tindx= (fehdata[define_rcsample._AFETAG] > ii*0.05-0.025)\ *(fehdata[define_rcsample._AFETAG] <= (ii+1)*0.05-0.025) args= (apstack.median(pr[tindx][:12]),) kwargs={'startindxs':[1116,2100,2899], 'endindxs':[1146,2124,2922], 'yrange':[-0.05,0.02], 'ylabel':'', 'cleanZero':False, '_markwav':apwindow.lines('Ti'), 'zorder':int(numpy.floor(numpy.random.uniform()*5)), 'color':colormap(colorFunc(ii*0.05)), 'overplot':ii>0, 'fig_width':3.5, 'markLines':True} if ii>0: kwargs.pop('fig_width') if ii != 4: kwargs.pop('_markwav') kwargs.pop('markLines') kwargs['_startendskip']= 0 kwargs['_noxticks']= True kwargs['_labelwav']= True splot.waveregions(*args,**kwargs) bovy_plot.bovy_text(r'$\mathrm{Ti}$', top_left=True,fontsize=10,backgroundcolor='w') bovy_plot.bovy_end_print(plotname.replace('ELEM','Ti')) return None
pcaivar[pcaivar > 0.] = 1. else: pcaivar[pcaivar > args.weight_max] = args.weight_max ### Get the mean tmeanspec = sp.zeros(pcaflux.shape[1], dtype='float32') for i in range(1): step = sp.average(pcaflux, weights=pcaivar, axis=0) #print('INFO: Removing mean at step: ',i,step.min(), step.max()) tmeanspec += step #pcaflux -= step ### PCA print('INFO: Starting EMPCA') model = empca.empca(pcaflux, weights=pcaivar, niter=args.niter, nvec=args.nvec) for i in range(model.coeff.shape[0]): model.coeff[i] /= sp.linalg.norm(model.coeff[i]) ### TODO: if not edge interpolate pcamodel = sp.zeros((model.eigvec.shape[0], pcawave.size)) meanspec = sp.zeros((1, pcawave.size)) meanspec[0][w] = tmeanspec for i in range(model.eigvec.shape[0]): #f = interp1d(pcawave[w],model.eigvec[i]) pcamodel[i, w] = model.eigvec[i] ### Save ### TODO: Add missing comments out = fitsio.FITS(args.out, 'rw', clobber=True)
def do_subtraction(image, template, cutoff=1.0, radius=10, boundary=None, maskreg=None, kerneltype=None, method=None, sqrt=False, rdnoise=False): """ image: fits object template: fits object """ if kerneltype is None: #- Gaussian poly as default kerneltype = "gauss_poly" #- Check for kerneltype if kerneltype not in ["gauss_poly", "gauss_hermite", "delta"]: raise ValueError("Not a valid kernel type. Give a valid kernel type.") pixelsize = 11 #- instantiate the base Kernel Kern = kernels.Kernels(kerneltype, pixelsize) #- get all kernels for this type if Kern.name == "gauss_poly": KK = kernels.Gauss_Poly(Kern) #- using default sigmas and orders allk = KK.all_kernels() if Kern.name == "gauss_hermite": KK = kernels.Gauss_Hermite(Kern) allk = KK.all_kernels() if Kern.name == "delta": KK = kernels.Delta(Kern) allk = KK.all_kernels() #H=KK.make_R_matrix print "using kernel", kerneltype mask = np.ones_like(image[0].data) print "mask shape", mask.shape if maskreg is not None: print "Mask reg.", maskreg mxlo, mxhi, mylo, myhi = map(int, maskreg.split(',')) mask[mxlo:mxhi, mylo:myhi] = 0 if boundary is not None: xlo, xhi, ylo, yhi = map(int, boundary.split(',')) else: xlo, xhi, ylo, yhi = 0, image[0].data.shape[0], 0, image[0].data.shape[ 1] #- image image_data = image[0].data[xlo:xhi, ylo:yhi] #*mask[xlo:xhi,ylo:yhi] sky_image = util.get_background_level(image_data, sextractor=True) Zs = image_data - sky_image imghdr = image[0].header if imghdr["PC001001"] < 0: #- Coordinate rotation matrix opposite! Zs = np.rot90(np.rot90(Zs)) print "rotating image" #- template template_data = template[0].data[xlo:xhi, ylo:yhi] #*mask[xlo:xhi,ylo:yhi] sky_template = util.get_background_level(template_data, sextractor=True) Zt = template_data - sky_template print sky_template temphdr = template[0].header if temphdr["PC001001"] < 0: #- Coordinate rotation matrix opposite! Zt = np.rot90(np.rot90(Zt)) print "rotating template" #- Get needed header parameters to propagate from ROTSE image file satcnts_t = temphdr["SATCNTS"] satcnts_i = imghdr["SATCNTS"] #- mask all pixels higher than cutoff of saturation level print "SatCnts_i", satcnts_i k_t = np.where(Zt > satcnts_t * cutoff) print "SatCnts_t", satcnts_t for ii in range(k_t[0].shape[0]): Zt[k_t[0][ii] - radius:k_t[0][ii] + radius, k_t[1][ii] - radius:k_t[1][ii] + radius] = 1.0e-30 Zs[k_t[0][ii] - radius:k_t[0][ii] + radius, k_t[1][ii] - radius:k_t[1][ii] + radius] = 1.0e-30 k_picked = np.where(Zt == 1.0e-30)[0] print "No. of pixels above cutoff", k_t[0].shape[0] #- Add noise to the masked regions: #print "count", Zt[Zt==1.0e-30] rmsi = np.std(Zs[(Zs > np.percentile(Zs, 5)) & (Zs < np.percentile(Zs, 95))]) rmst = np.std(Zt[(Zt > np.percentile(Zt, 5)) & (Zt < np.percentile(Zt, 95))]) Zt[Zt == 1.0e-30] = np.random.normal(0, rmst, size=k_picked.shape) Zs[Zs == 1.0e-30] = np.random.normal(0, rmsi, size=k_picked.shape) if rdnoise: readnoise_t = temphdr["BSTDDEV"] readnoise_i = imghdr["BSTDDEV"] else: #- assume a sparse noisy image. derive from the pixel counts. This is too large! #readnoise_i=np.median(Zs)-np.percentile(Zs,15.865) #readnoise_t=np.median(Zt)-np.percentile(Zt,15.865) readnoise_i = 3. readnoise_t = 3. print "Rdnoise image:", readnoise_i print "Rdnoise template:", readnoise_t exptime_t = temphdr["EXPTIME"] exptime_i = imghdr["EXPTIME"] print readnoise_i sc_variance = image_data.clip( 0) + readnoise_i**2 # variance should be before background subtraction temp_variance = template_data.clip(0) + readnoise_i**2 convtemp = util.get_convolve_kernel(Zt, allk) #2D [nvec,nobs] efftemplate = np.zeros_like(Zs) #n=300 if method == "PCA": #- use empca print "using pca" #- this requires empca package https://github.com/sbailey/empca/ import empca nx, ny = Zs.shape[0:2] n_im = convtemp.shape[0] #- list convolved template as data data = np.array([convtemp[i, :] for i in range(convtemp.shape[0])], 'f') #var_data=data.clip(0)+readnoise_t**2 var_data = np.tile(temp_variance.ravel(), 31).reshape(31, np.size(temp_variance)) print var_data.shape wt = 1. / var_data print data.shape #- Add image to the data list all_data = np.vstack((data, Zs.ravel())) all_wt = np.vstack((wt, 1. / sc_variance.ravel())) if sqrt: all_wt = all_wt**0.5 model = empca.empca(all_data, all_wt, niter=15, nvec=10, smooth=0) efftemplate = model.eigvec.T.dot(model.coeff[-1]).reshape(nx, ny) chisq = model.rchi2() print "Full models dof", model.dof print "Final chisq PCA", chisq # 281*281*32-10*281*281-32*10 diff_var = sc_variance + temp_variance else: #- kernel based print "kernel based" A = convtemp.T from scipy.sparse import spdiags diff_var = sc_variance + temp_variance if sqrt: diff_var = np.sqrt(diff_var) ivar = 1 / diff_var.ravel() wt = spdiags(np.sqrt(ivar), 0, ivar.size, ivar.size) else: ivar = 1 / diff_var.ravel() wt = spdiags(ivar, 0, ivar.size, ivar.size) b = Zs.ravel() xsol = util.solve_for_coeff(A, b, W=wt) efftemplate = convtemp.T.dot(xsol).reshape(Zs.shape) dof = Zs.size - convtemp.shape[0] chisq = np.sum((Zs - efftemplate)**2 / diff_var) / dof print "dof Kernel based", dof print "Chisq Kernel based", chisq #- diff image diffimage = (Zs - efftemplate) if maskreg is not None: diffrms = np.std( diffimage[(diffimage > np.percentile(diffimage, 5)) & (diffimage < np.percentile(diffimage, 95))]) print "diff rms", diffrms, 'rmsi', rmsi diffimage[mxlo:mxhi, mylo:myhi] = np.random.normal(scale=diffrms, size=(mxhi - mxlo, myhi - mylo)) # mse estimate: mse = 1. / (np.size(diffimage)) * diffimage.ravel().T.dot( diffimage.ravel()) print "mse", mse #- fraction of the variance recovered. R_var = 1.0 - np.var(diffimage) / np.var(Zs) return diffimage, diff_var, efftemplate, Zs, R_var, chisq