예제 #1
0
def prep_data(date):
    dir_files = "/home/annaho/xcalib_4labels/test_obj" 
    dir_dat = "/home/share/LAMOST/DR2/DR2_release/"
    test_ID = np.loadtxt("%s/%s_test_obj.txt" %(dir_files, date), dtype=str)
    print("%s obj" %len(test_ID))
    np.savez("output/%s_ids.npz" %date, test_ID)
    test_ID_long = np.array([dir_dat + f for f in test_ID])
    wl, test_flux, test_ivar, npix, SNRs = load_spectra(test_ID_long)
    np.savez("output/%s_SNRs.npz" %date, SNRs)
    np.savez("output/%s_frac_good_pix.npz" %date, npix)

    lamost_info = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0']
    inds = np.array([np.where(lamost_info[:,0]==a)[0][0] for a in test_ID])
    nstars = len(test_ID)
    lamost_info_sorted = np.zeros((nstars,4))
    lamost_label = lamost_info[inds,:][:,1:].astype(float)
    lamost_info_sorted[:,0:3] = lamost_label
    np.savez("output/%s_tr_label" %date, lamost_label)

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, 
            test_ID, test_flux, test_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
    ds.diagnostics_SNR(figname="%s_SNRdist.png" %date)

    ds.continuum_normalize_gaussian_smoothing(L=50)
    np.savez("output/%s_norm.npz" %date, ds.test_flux, ds.test_ivar)
예제 #2
0
import pickle
import random
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
# STEP 1: DATA MUNGING
import glob
allfiles = glob.glob("example_LAMOST/Data_All/*fits")
allfiles = np.char.lstrip(allfiles, 'example_LAMOST/Data_All/')
tr_ID = np.loadtxt("tr_files.txt", dtype=str)
test_ID = np.setdiff1d(allfiles, tr_ID)
from lamost import load_spectra, load_labels
dir_dat = "example_LAMOST/Data_All"
tr_IDs, wl, tr_flux, tr_ivar = load_spectra(dir_dat, tr_ID)
label_file = "reference_labels.csv"
tr_label = load_labels(label_file, tr_ID)
test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID)
good = np.logical_and(tr_label[:,0] > 0, tr_label[:,2]>-5)
tr_IDs = tr_IDs[good]
tr_flux = tr_flux[good]
tr_ivar = tr_ivar[good]
tr_label = tr_label[good]
from TheCannon import dataset
dataset = dataset.Dataset(
    wl, tr_IDs, tr_flux, tr_ivar, tr_label, test_IDs, test_flux, test_ivar)
dataset.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
cont = pickle.load(open("cont.p", "r"))
norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \
                dataset.continuum_normalize(cont)
tr_cont, test_cont = cont
tr_cont[tr_cont==0] = dataset.tr_flux[tr_cont==0]
test_cont[test_cont==0] = dataset.test_flux[test_cont==0]
예제 #3
0
def run(date):
    print(date)

    # Training step has already been completed. Load the model,
    spectral_model = model.CannonModel(2) # 2 = quadratic model
    spectral_model.coeffs = np.load("./model_coeffs.npz")['arr_0']
    spectral_model.scatters = np.load("./model_scatter.npz")['arr_0']
    spectral_model.chisqs = np.load("./model_chisqs.npz")['arr_0']
    spectral_model.pivots = np.load("./model_pivots.npz")['arr_0']

    # Load the wavelength array
    wl = np.load("wl.npz")['arr_0']

    # Load the test set,
    filename = "./%s_ids.npz" %date
    if glob.glob(filename):
        test_IDs = np.load(filename)['arr_0']
    else:
        dir_dat = "lamost_dr2/DR2_release" 
        test_ID = np.loadtxt("lamost_dr2/%s_test_obj.txt" %date, dtype=str)
        test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID)
        np.savez("./%s_ids" %date, test_IDs)

    print("%s test objects" %len(test_ID))

    # Load the corresponding LAMOST labels,
    labels = np.load("lamost_dr2/lamost_labels_%s.npz" %date)['arr_0']
    inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) 
    nstars = len(test_IDs)
    lamost_labels = np.zeros((nstars,4))
    lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) 
    np.savez("./%s_lamost_label" %date, lamost_labels)

    # Set dataset object
    data = dataset.Dataset(
            wl, test_IDs, test_flux, test_ivar, 
            lamost_labels, test_IDs, test_flux, test_ivar)

    # set the headers for plotting
    data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
    
    # Plot SNR distribution
    data.diagnostics_SNR(figname="%s_SNRdist.png" %date)

    # Continuum normalize,
    if glob.glob("./%s_norm"):
        data.test_flux = np.load("./%s_norm")['arr_0']
        data.test_ivar = np.load("./%s_norm")['arr_1']
    else:
        data.tr_ID = data.tr_ID[0]
        data.tr_flux = data.tr_flux[0,:]
        data.tr_ivar = data.tr_ivar[0,:]
        data.continuum_normalize_gaussian_smoothing(L=50)
        np.savez("./%s_norm" %date, data.test_flux, data.test_ivar)

    # Infer labels 
    errs, chisq = spectral_model.infer_labels(data)
    np.savez("./%s_cannon_labels.npz" %date, data.test_label_vals)
    np.savez("./%s_formal_errors.npz" %date, errs)
    np.savez("./%s_chisq.npz" %date, chisq)

    # Make plots
    data.test_label_vals = data.test_label_vals[:,0:3] # no alpha
    data.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    data.diagnostics_1to1(figname="%s_1to1_label" %date)
예제 #4
0
import numpy as np
import glob
from lamost import load_spectra
allfiles = np.array(glob.glob("example_LAMOST/Data_All/*fits"))
# we want just the file names
allfiles = np.char.lstrip(allfiles, 'example_LAMOST/Data_All/')
dir_dat = "example_LAMOST/Data_All"
ID, wl, flux, ivar = load_spectra(dir_dat, allfiles)
npix = np.array([np.count_nonzero(ivar[jj, :]) for jj in range(0, 11057)])
good_frac = npix / 3626.
SNR_raw = flux * ivar**0.5
bad = SNR_raw == 0
SNR_raw = np.ma.array(SNR_raw, mask=bad)
SNR = np.ma.median(SNR_raw, axis=1)

# we want to have at least 94% of pixels, and SNR of at least 100
good = np.logical_and(good_frac > 0.94, SNR > 100)
tr_files = ID[good]  #945 spectra
outputf = open("tr_files.txt", "w")
for tr_file in tr_files:
    outputf.write(tr_file + '\n')
outputf.close()
예제 #5
0
def run(date):
    # Training step has already been completed. Load the model,
    spectral_model = model.CannonModel(2) # 2 = quadratic model
    spectral_model.coeffs = np.load("./coeffs.npz")['arr_0']
    spectral_model.scatters = np.load("./scatter.npz")['arr_0']
    spectral_model.chisqs = np.load("./chisqs.npz")['arr_0']
    spectral_model.pivots = np.load("./pivots.npz")['arr_0']

    # Load the wavelength array
    wl = np.load("wl.npz")['arr_0']

    # Load the test set,
    test_ID = np.loadtxt("test_obj/%s_test_obj.txt" %date, dtype=str)
    print("%s test objects" %len(test_ID))
    dir_dat = "/home/share/LAMOST/DR2/DR2_release"
    test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID)
    np.savez("output/%s_ids" %date, test_IDs)
    #np.savez("./%s_data_raw" %date, test_flux, test_ivar)

    # Load the corresponding LAMOST labels,
    labels = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0']
    inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) 
    nstars = len(test_IDs)
    lamost_labels = np.zeros((nstars,4))
    lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) 
    np.savez("output/%s_lamost_label" %date, lamost_labels)
    
    # Set dataset object
    data = dataset.Dataset(
            wl, test_IDs, test_flux, test_ivar, 
            lamost_labels, test_IDs, test_flux, test_ivar)

    # set the headers for plotting
    data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
    
    # Plot SNR distribution
    data.diagnostics_SNR(figname="%s_SNRdist.png" %date)
    np.savez("output/%s_SNR" %date, data.test_SNR)

    # Continuum normalize, 
    filename = "output/%s_norm.npz" %date
    if glob.glob(filename):
        print("already cont normalized")
        data.test_flux = np.load(filename)['arr_0']
        data.test_ivar = np.load(filename)['arr_1']
    else:
        data.tr_ID = data.tr_ID[0]
        data.tr_flux = data.tr_flux[0,:]
        data.tr_ivar = data.tr_ivar[0,:]
        data.continuum_normalize_gaussian_smoothing(L=50)
        np.savez("output/%s_norm" %date, data.test_flux, data.test_ivar)

    # Infer labels 
    errs, chisq = spectral_model.infer_labels(data)
    np.savez("output/%s_cannon_labels.npz" %date, data.test_label_vals)
    np.savez("./%s_formal_errors.npz" %date, errs)
    np.savez("./%s_chisq.npz" %date, chisq)

    # Make plots
    data.test_label_vals = data.test_label_vals[:,0:3] # so it doesn't try alpha
    data.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    data.diagnostics_1to1(figname="%s_1to1_label" %date)
예제 #6
0
import numpy as np
import glob
from lamost import load_spectra
allfiles = np.array(glob.glob("example_LAMOST/Data_All/*fits")) 
# we want just the file names 
allfiles = np.char.lstrip(allfiles, 'example_LAMOST/Data_All/') 
dir_dat = "example_LAMOST/Data_All"
ID, wl, flux, ivar = load_spectra(dir_dat, allfiles)
npix = np.array([np.count_nonzero(ivar[jj,:]) for jj in range(0,11057)])
good_frac = npix/3626. 
SNR_raw = flux * ivar**0.5
bad = SNR_raw == 0
SNR_raw = np.ma.array(SNR_raw, mask=bad)
SNR = np.ma.median(SNR_raw, axis=1)

# we want to have at least 94% of pixels, and SNR of at least 100 
good = np.logical_and(good_frac > 0.94, SNR>100) 
tr_files = ID[good] #945 spectra 
outputf = open("tr_files.txt", "w")
for tr_file in tr_files: 
    outputf.write(tr_file + '\n')
outputf.close()
예제 #7
0
import numpy as np
from matplotlib import rc
from TheCannon import model
from TheCannon import dataset
from lamost import load_spectra, load_labels

rc('text', usetex=True)
rc('font', family='serif')

tr_ID = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", 
                   usecols=(1,), dtype='str', delimiter=',')

dir_dat = "example_LAMOST/Data_All"
tr_IDs, wl, tr_flux, tr_ivar = load_spectra(dir_dat, tr_ID)

label_file = "apogee_dr12_labels.csv"
all_labels = load_labels(label_file, tr_IDs)
teff = all_labels[:,0]
logg = all_labels[:,1]
mh = all_labels[:,2]
alpha = all_labels[:,3]
tr_label = np.vstack((teff, logg, mh, alpha)).T

data = dataset.Dataset(
        wl, tr_IDs, tr_flux, tr_ivar, tr_label, 
        tr_IDs, tr_flux, tr_ivar)
data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
data.continuum_normalize_gaussian_smoothing(L=50)

# get colors