def prep_data(date): dir_files = "/home/annaho/xcalib_4labels/test_obj" dir_dat = "/home/share/LAMOST/DR2/DR2_release/" test_ID = np.loadtxt("%s/%s_test_obj.txt" %(dir_files, date), dtype=str) print("%s obj" %len(test_ID)) np.savez("output/%s_ids.npz" %date, test_ID) test_ID_long = np.array([dir_dat + f for f in test_ID]) wl, test_flux, test_ivar, npix, SNRs = load_spectra(test_ID_long) np.savez("output/%s_SNRs.npz" %date, SNRs) np.savez("output/%s_frac_good_pix.npz" %date, npix) lamost_info = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0'] inds = np.array([np.where(lamost_info[:,0]==a)[0][0] for a in test_ID]) nstars = len(test_ID) lamost_info_sorted = np.zeros((nstars,4)) lamost_label = lamost_info[inds,:][:,1:].astype(float) lamost_info_sorted[:,0:3] = lamost_label np.savez("output/%s_tr_label" %date, lamost_label) ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) ds.diagnostics_SNR(figname="%s_SNRdist.png" %date) ds.continuum_normalize_gaussian_smoothing(L=50) np.savez("output/%s_norm.npz" %date, ds.test_flux, ds.test_ivar)
import pickle import random plt.rc('text', usetex=True) plt.rc('font', family='serif') # STEP 1: DATA MUNGING import glob allfiles = glob.glob("example_LAMOST/Data_All/*fits") allfiles = np.char.lstrip(allfiles, 'example_LAMOST/Data_All/') tr_ID = np.loadtxt("tr_files.txt", dtype=str) test_ID = np.setdiff1d(allfiles, tr_ID) from lamost import load_spectra, load_labels dir_dat = "example_LAMOST/Data_All" tr_IDs, wl, tr_flux, tr_ivar = load_spectra(dir_dat, tr_ID) label_file = "reference_labels.csv" tr_label = load_labels(label_file, tr_ID) test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID) good = np.logical_and(tr_label[:,0] > 0, tr_label[:,2]>-5) tr_IDs = tr_IDs[good] tr_flux = tr_flux[good] tr_ivar = tr_ivar[good] tr_label = tr_label[good] from TheCannon import dataset dataset = dataset.Dataset( wl, tr_IDs, tr_flux, tr_ivar, tr_label, test_IDs, test_flux, test_ivar) dataset.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) cont = pickle.load(open("cont.p", "r")) norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \ dataset.continuum_normalize(cont) tr_cont, test_cont = cont tr_cont[tr_cont==0] = dataset.tr_flux[tr_cont==0] test_cont[test_cont==0] = dataset.test_flux[test_cont==0]
def run(date): print(date) # Training step has already been completed. Load the model, spectral_model = model.CannonModel(2) # 2 = quadratic model spectral_model.coeffs = np.load("./model_coeffs.npz")['arr_0'] spectral_model.scatters = np.load("./model_scatter.npz")['arr_0'] spectral_model.chisqs = np.load("./model_chisqs.npz")['arr_0'] spectral_model.pivots = np.load("./model_pivots.npz")['arr_0'] # Load the wavelength array wl = np.load("wl.npz")['arr_0'] # Load the test set, filename = "./%s_ids.npz" %date if glob.glob(filename): test_IDs = np.load(filename)['arr_0'] else: dir_dat = "lamost_dr2/DR2_release" test_ID = np.loadtxt("lamost_dr2/%s_test_obj.txt" %date, dtype=str) test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID) np.savez("./%s_ids" %date, test_IDs) print("%s test objects" %len(test_ID)) # Load the corresponding LAMOST labels, labels = np.load("lamost_dr2/lamost_labels_%s.npz" %date)['arr_0'] inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) nstars = len(test_IDs) lamost_labels = np.zeros((nstars,4)) lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) np.savez("./%s_lamost_label" %date, lamost_labels) # Set dataset object data = dataset.Dataset( wl, test_IDs, test_flux, test_ivar, lamost_labels, test_IDs, test_flux, test_ivar) # set the headers for plotting data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) # Plot SNR distribution data.diagnostics_SNR(figname="%s_SNRdist.png" %date) # Continuum normalize, if glob.glob("./%s_norm"): data.test_flux = np.load("./%s_norm")['arr_0'] data.test_ivar = np.load("./%s_norm")['arr_1'] else: data.tr_ID = data.tr_ID[0] data.tr_flux = data.tr_flux[0,:] data.tr_ivar = data.tr_ivar[0,:] data.continuum_normalize_gaussian_smoothing(L=50) np.savez("./%s_norm" %date, data.test_flux, data.test_ivar) # Infer labels errs, chisq = spectral_model.infer_labels(data) np.savez("./%s_cannon_labels.npz" %date, data.test_label_vals) np.savez("./%s_formal_errors.npz" %date, errs) np.savez("./%s_chisq.npz" %date, chisq) # Make plots data.test_label_vals = data.test_label_vals[:,0:3] # no alpha data.set_label_names(['T_{eff}', '\log g', '[M/H]']) data.diagnostics_1to1(figname="%s_1to1_label" %date)
import numpy as np import glob from lamost import load_spectra allfiles = np.array(glob.glob("example_LAMOST/Data_All/*fits")) # we want just the file names allfiles = np.char.lstrip(allfiles, 'example_LAMOST/Data_All/') dir_dat = "example_LAMOST/Data_All" ID, wl, flux, ivar = load_spectra(dir_dat, allfiles) npix = np.array([np.count_nonzero(ivar[jj, :]) for jj in range(0, 11057)]) good_frac = npix / 3626. SNR_raw = flux * ivar**0.5 bad = SNR_raw == 0 SNR_raw = np.ma.array(SNR_raw, mask=bad) SNR = np.ma.median(SNR_raw, axis=1) # we want to have at least 94% of pixels, and SNR of at least 100 good = np.logical_and(good_frac > 0.94, SNR > 100) tr_files = ID[good] #945 spectra outputf = open("tr_files.txt", "w") for tr_file in tr_files: outputf.write(tr_file + '\n') outputf.close()
def run(date): # Training step has already been completed. Load the model, spectral_model = model.CannonModel(2) # 2 = quadratic model spectral_model.coeffs = np.load("./coeffs.npz")['arr_0'] spectral_model.scatters = np.load("./scatter.npz")['arr_0'] spectral_model.chisqs = np.load("./chisqs.npz")['arr_0'] spectral_model.pivots = np.load("./pivots.npz")['arr_0'] # Load the wavelength array wl = np.load("wl.npz")['arr_0'] # Load the test set, test_ID = np.loadtxt("test_obj/%s_test_obj.txt" %date, dtype=str) print("%s test objects" %len(test_ID)) dir_dat = "/home/share/LAMOST/DR2/DR2_release" test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID) np.savez("output/%s_ids" %date, test_IDs) #np.savez("./%s_data_raw" %date, test_flux, test_ivar) # Load the corresponding LAMOST labels, labels = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0'] inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) nstars = len(test_IDs) lamost_labels = np.zeros((nstars,4)) lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) np.savez("output/%s_lamost_label" %date, lamost_labels) # Set dataset object data = dataset.Dataset( wl, test_IDs, test_flux, test_ivar, lamost_labels, test_IDs, test_flux, test_ivar) # set the headers for plotting data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) # Plot SNR distribution data.diagnostics_SNR(figname="%s_SNRdist.png" %date) np.savez("output/%s_SNR" %date, data.test_SNR) # Continuum normalize, filename = "output/%s_norm.npz" %date if glob.glob(filename): print("already cont normalized") data.test_flux = np.load(filename)['arr_0'] data.test_ivar = np.load(filename)['arr_1'] else: data.tr_ID = data.tr_ID[0] data.tr_flux = data.tr_flux[0,:] data.tr_ivar = data.tr_ivar[0,:] data.continuum_normalize_gaussian_smoothing(L=50) np.savez("output/%s_norm" %date, data.test_flux, data.test_ivar) # Infer labels errs, chisq = spectral_model.infer_labels(data) np.savez("output/%s_cannon_labels.npz" %date, data.test_label_vals) np.savez("./%s_formal_errors.npz" %date, errs) np.savez("./%s_chisq.npz" %date, chisq) # Make plots data.test_label_vals = data.test_label_vals[:,0:3] # so it doesn't try alpha data.set_label_names(['T_{eff}', '\log g', '[M/H]']) data.diagnostics_1to1(figname="%s_1to1_label" %date)
import numpy as np import glob from lamost import load_spectra allfiles = np.array(glob.glob("example_LAMOST/Data_All/*fits")) # we want just the file names allfiles = np.char.lstrip(allfiles, 'example_LAMOST/Data_All/') dir_dat = "example_LAMOST/Data_All" ID, wl, flux, ivar = load_spectra(dir_dat, allfiles) npix = np.array([np.count_nonzero(ivar[jj,:]) for jj in range(0,11057)]) good_frac = npix/3626. SNR_raw = flux * ivar**0.5 bad = SNR_raw == 0 SNR_raw = np.ma.array(SNR_raw, mask=bad) SNR = np.ma.median(SNR_raw, axis=1) # we want to have at least 94% of pixels, and SNR of at least 100 good = np.logical_and(good_frac > 0.94, SNR>100) tr_files = ID[good] #945 spectra outputf = open("tr_files.txt", "w") for tr_file in tr_files: outputf.write(tr_file + '\n') outputf.close()
import numpy as np from matplotlib import rc from TheCannon import model from TheCannon import dataset from lamost import load_spectra, load_labels rc('text', usetex=True) rc('font', family='serif') tr_ID = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", usecols=(1,), dtype='str', delimiter=',') dir_dat = "example_LAMOST/Data_All" tr_IDs, wl, tr_flux, tr_ivar = load_spectra(dir_dat, tr_ID) label_file = "apogee_dr12_labels.csv" all_labels = load_labels(label_file, tr_IDs) teff = all_labels[:,0] logg = all_labels[:,1] mh = all_labels[:,2] alpha = all_labels[:,3] tr_label = np.vstack((teff, logg, mh, alpha)).T data = dataset.Dataset( wl, tr_IDs, tr_flux, tr_ivar, tr_label, tr_IDs, tr_flux, tr_ivar) data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) data.continuum_normalize_gaussian_smoothing(L=50) # get colors