Ejemplo n.º 1
0
def test_step(date):
    wl = np.load("../run_2_train_on_good/wl.npz")['arr_0']
    test_ID = np.load("%s_test_ids.npz" %date)['arr_0']
    test_flux = np.load("%s_test_flux.npz" %date)['arr_0']
    test_ivar = np.load("%s_test_ivar.npz" %date)['arr_0']

    nlabels = 4
    nobj = len(test_ID)

    lamost_label_3 = np.load("%s_lamost_label.npz" %date)['arr_0']
    # add extra column to make it symmetric with the inferred test labels
    toadd = np.ones(nobj)[...,None]
    lamost_label = np.hstack((lamost_label_3, toadd))

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, 
            test_ID, test_flux, test_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])

    m = model.CannonModel(2)
    m.coeffs = np.load("../run_5_train_on_good/coeffs.npz")['arr_0']
    m.scatters = np.load("../run_5_train_on_good/scatters.npz")['arr_0']
    m.chisqs = np.load("../run_5_train_on_good/chisqs.npz")['arr_0']
    m.pivots = np.load("../run_5_train_on_good/pivots.npz")['arr_0']

    nguesses = 4
    starting_guesses = np.zeros((nguesses,nlabels)) 
    hiT_hiG_hiM = np.array([  5.15273730e+03,   3.71762228e+00,   3.16861898e-01, 2.46907920e-02])
    hiT_hiG_loM = np.array([  5.16350098e+03,   3.45917511e+00,  -9.24426436e-01, 2.49296919e-01])
    loT_loG_hiM = np.array([  4.04936841e+03,   1.47109437e+00,   2.07210138e-01, 1.49733415e-02])
    loT_loG_loM = np.array([  4.00651318e+03,   8.35013509e-01,  -8.98257852e-01, 7.65705928e-02])
    starting_guesses[0,:] = hiT_hiG_hiM-m.pivots
    starting_guesses[1,:] = hiT_hiG_loM-m.pivots
    starting_guesses[2,:] = loT_loG_loM-m.pivots
    starting_guesses[3,:] = loT_loG_hiM-m.pivots

    labels = np.zeros((nguesses, nobj, nlabels)) # 4,10955,4
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)
    
    for ii,guess in enumerate(starting_guesses):
        a,b,c = test_step_iteration(ds,m,starting_guesses[ii])
        labels[ii,:] = a
        chisq[ii,:] = b
        errs[ii,:] = c

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj,val in enumerate(choose):
        best_labels[jj,:] = labels[:,jj,:][val]
        best_errs[jj,:] = errs[:,jj,:][val]

    np.savez("./%s_all_cannon_labels.npz" %date, best_labels)
    np.savez("./%s_cannon_label_chisq.npz" %date, best_chisq)
    np.savez("./%s_cannon_label_errs.npz" %date, best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date)
    ds.diagnostics_1to1(figname = "%s_1to1_test_label.png" %date)
Ejemplo n.º 2
0
def test_step(date):
    direc = "../xcalib_4labels"
    wl = np.load("%s/wl.npz" % direc)['arr_0']
    test_ID = np.load("%s/output/%s_ids.npz" % (direc, date))['arr_0']
    print(str(len(test_ID)) + " objects")
    test_flux = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_0']
    test_ivar = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_1']

    lamost_label = np.load("%s/output/%s_tr_label.npz" %
                           (direc, date))['arr_0']
    apogee_label = np.load("./tr_label.npz")['arr_0']

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :],
                         lamost_label, test_ID, test_flux, test_ivar)
    ds.set_label_names(
        ['T_{eff}', '\log g', '[Fe/H]', '[\\alpha/Fe]', 'log M', 'A_k'])

    m = model.CannonModel(2)
    m.coeffs = np.load("./coeffs.npz")['arr_0']
    m.scatters = np.load("./scatters.npz")['arr_0']
    m.chisqs = np.load("./chisqs.npz")['arr_0']
    m.pivots = np.load("./pivots.npz")['arr_0']

    nlabels = len(m.pivots)
    nobj = len(test_ID)

    nguesses = 7
    choose = np.random.randint(0, apogee_label.shape[0], size=nguesses)
    starting_guesses = apogee_label[choose, :] - m.pivots

    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    np.savez("output/%s_cannon_label_guesses.npz" % date, labels)
    np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("output/%s_all_cannon_labels.npz" % date, best_labels)
    np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq)
    np.savez("output/%s_cannon_label_errs.npz" % date, best_errs)

    ds.test_label_vals = best_labels
    #ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date)
    ds.test_label_vals = best_labels[:, 0:3]
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
Ejemplo n.º 3
0
    def __init__(self, order):

        self.m_oder = order
        self.m_train_dataset_newtype = None
        self.m_train_dataset = None

        self.m_model = model.CannonModel(order, useErrors=False)
Ejemplo n.º 4
0
def train():
    wl = np.load("%s/../wl_cols.npz" % SPEC_DIR)['arr_0']
    tr_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0']
    tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0']
    tr_label = tr_label[:, 0:3]
    tr_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0']
    tr_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0']

    ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux,
                         tr_ivar)
    # teff, logg, mh, cm, nm, am, ak
    ds.set_label_names(['T_{eff}', '\log g', '[Fe/H]'])  #, '[C/M]','[N/M]',
    #'[\\alpha/M]', 'A_k'])
    #ds.diagnostics_SNR()
    #ds.diagnostics_ref_labels()
    #np.savez("ref_snr.npz", ds.tr_SNR)

    print("Training model")
    nlab = ds.tr_label.shape[1]
    print(nlab)
    npix = len(ds.wl)
    print(npix)
    filt = np.ones((nlab, npix), dtype=bool)
    print(filt)
    #filt[nlab-1,0:500] = 0
    m = model.CannonModel(2, wl_filter=filt)
    m.fit(ds)
    np.savez("./coeffs.npz", m.coeffs)
    np.savez("./scatters.npz", m.scatters)
    np.savez("./chisqs.npz", m.chisqs)
    np.savez("./pivots.npz", m.pivots)
    m.diagnostics_leading_coeffs(ds)
Ejemplo n.º 5
0
def test_step():
    #wl = np.load("%s/wl.npz" %SPEC_DIR)['arr_0']
    wl = np.load("wl_cols.npz")['arr_0']
    test_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0']
    tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0']
    test_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0']
    test_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0']

    #tr_id = np.load("./ref_id.npz")['arr_0']
    #tr_flux = np.load("./ref_flux.npz")['arr_0']
    #tr_ivar = np.load("./ref_ivar.npz")['arr_0']
    #tr_label = np.load("./ref_label.npz")['arr_0']

    ds = dataset.Dataset(wl, test_id, test_flux, test_ivar, tr_label, test_id,
                         test_flux, test_ivar)
    ds.set_label_names([
        'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k'
    ])

    m = model.CannonModel(2)
    m.coeffs = np.load("./culled_coeffs.npz")['arr_0']
    m.scatters = np.load("./culled_scatters.npz")['arr_0']
    m.chisqs = np.load("./culled_chisqs.npz")['arr_0']
    m.pivots = np.load("./culled_pivots.npz")['arr_0']

    nguesses = 10
    nobj = len(ds.test_ID)
    nlabels = len(m.pivots)
    choose = np.random.randint(0, nobj, size=nguesses)
    starting_guesses = ds.tr_label[choose] - m.pivots
    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    #np.savez("labels_all_starting_vals.npz", labels)
    #np.savez("chisq_all_starting_vals.npz", chisq)
    #np.savez("errs_all_starting_vals.npz", errs)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros(tr_label.shape)
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("./cannon_label_vals.npz", best_labels)
    np.savez("./cannon_label_chisq.npz", best_chisq)
    np.savez("./cannon_label_errs.npz", best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_survey_labels()
    ds.diagnostics_1to1(figname="1to1_test_label")
Ejemplo n.º 6
0
def load_model(ii):
    print("Loading model")
    m = model.CannonModel(2)
    m.coeffs = np.load("./ex%s_coeffs.npz" % ii)['arr_0']
    m.scatters = np.load("./ex%s_scatters.npz" % ii)['arr_0']
    m.chisqs = np.load("./ex%s_chisqs.npz" % ii)['arr_0']
    m.pivots = np.load("./ex%s_pivots.npz" % ii)['arr_0']
    return m
Ejemplo n.º 7
0
def train(ds, leave_out):
    print("TRAINING")
    m = model.CannonModel(2)
    m.fit(ds)
    np.savez("./model_%s.npz" % leave_out, m.coeffs, m.scatters, m.chisqs,
             m.pivots)
    fig = m.diagnostics_leading_coeffs(ds)
    plt.savefig("leading_coeffs_%s.png" % leave_out)
    plt.close()
    return m
Ejemplo n.º 8
0
def train(ds, ii):
    """ Run the training step, given a dataset object. """
    print("Loading model")
    m = model.CannonModel(2)
    print("Training...")
    m.fit(ds)
    np.savez("./ex%s_coeffs.npz" % ii, m.coeffs)
    np.savez("./ex%s_scatters.npz" % ii, m.scatters)
    np.savez("./ex%s_chisqs.npz" % ii, m.chisqs)
    np.savez("./ex%s_pivots.npz" % ii, m.pivots)
    fig = m.diagnostics_leading_coeffs(ds)
    plt.savefig("ex%s_leading_coeffs.png" % ii)
    # m.diagnostics_leading_coeffs_triangle(ds)
    # m.diagnostics_plot_chisq(ds)
    return m
Ejemplo n.º 9
0
def load_model():
    """ Load the model 

    Parameters
    ----------
    direc: directory with all of the model files
    
    Returns
    -------
    m: model object
    """
    direc = "/home/annaho/TheCannon/code/lamost/mass_age/cn"
    m = model.CannonModel(2)
    m.coeffs = np.load(direc + "/coeffs.npz")['arr_0'][0:3626, :]  # no cols
    m.scatters = np.load(direc + "/scatters.npz")['arr_0'][0:3626]  # no cols
    m.chisqs = np.load(direc + "/chisqs.npz")['arr_0'][0:3626]  # no cols
    m.pivots = np.load(direc + "/pivots.npz")['arr_0']
    return m
Ejemplo n.º 10
0
def train():
    wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0']
    tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0']
    tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0']
    tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0']
    tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0']
    val_id = np.load("%s/val_id.npz" %DATA_DIR)['arr_0']
    val_flux = np.load("%s/val_flux_norm.npz" %DATA_DIR)['arr_0']
    val_ivar = np.load("%s/val_ivar_norm.npz" %DATA_DIR)['arr_0']

    ds = dataset.Dataset(
            wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4], 
            val_id, val_flux, val_ivar)
    ds.set_label_names(["Teff", "logg", "FeH", 'aFe'])

    np.savez("%s/tr_SNR.npz" %DATA_DIR, ds.tr_SNR)

    fig = ds.diagnostics_SNR()
    plt.savefig("%s/SNR_dist.png" %DATA_DIR)
    plt.close()

    fig = ds.diagnostics_ref_labels()
    plt.savefig("%s/ref_label_triangle.png" %DATA_DIR)
    plt.close()

    md = model.CannonModel(2)
    md.fit(ds)

    fig = md.diagnostics_leading_coeffs(ds)
    plt.savefig("%s/leading_coeffs.png" %DATA_DIR)
    plt.close()

    np.savez("%s/coeffs.npz" %DATA_DIR, md.coeffs)
    np.savez("%s/scatters.npz" %DATA_DIR, md.scatters)
    np.savez("%s/chisqs.npz" %DATA_DIR, md.chisqs)
    np.savez("%s/pivots.npz" %DATA_DIR, md.pivots)
Ejemplo n.º 11
0
def train():
    # Load training set
    wl = np.load("../data/wl.npz")['arr_0']
    tr_id = np.load("tr_id.npz")['arr_0']
    tr_label = np.load("tr_label.npz")['arr_0']
    tr_flux = np.load("tr_flux.npz")['arr_0']
    tr_ivar = np.load("tr_ivar.npz")['arr_0']

    ds = dataset.Dataset(
            wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux, tr_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE'])
    ds.diagnostics_SNR()
    ds.diagnostics_ref_labels()
    np.savez("./tr_snr.npz", ds.tr_SNR)

    m = model.CannonModel(2)
    m.fit(ds)
    np.savez("./coeffs.npz", m.coeffs)
    np.savez("./scatters.npz", m.scatters)
    np.savez("./chisqs.npz", m.chisqs)
    np.savez("./pivots.npz", m.pivots)
    m.diagnostics_leading_coeffs(ds)
    m.diagnostics_leading_coeffs_triangle(ds)
    m.diagnostics_plot_chisq(ds)
Ejemplo n.º 12
0
for ii,val in enumerate(files):
    ids.append(val.split("/")[-1])
wl, flux, ivar = load_spectra(files)

# normalize
norm_flux, norm_ivar = normalize(wl, flux, ivar, L=50)

# import model parameters
modeldir = "/Users/annaho/Github/TheCannon/data/LAMOST/Label_Transfer"
chisq = np.load(modeldir + "/chisqs.npz")['arr_0']
coeff = np.load(modeldir + "/coeffs.npz")['arr_0']
scat = np.load(modeldir + "/scatters.npz")['arr_0']
pivot = np.load(modeldir + "/pivots.npz")['arr_0']

# initialize dataset and model
m = model.CannonModel(2, useErrors=False)
m.coeffs = coeff
m.chisq = chisq
m.scatters = scat
m.pivots = pivot
m.scales = np.ones(len(pivot))

# labels
labeldir = "/Users/annaho/Github/TheCannon/data/LAMOST/Label_Transfer"
inputf = pyfits.open("%s/Ho_et_all_catalog_resubmit.fits" %labeldir)
cat = inputf[1].data
inputf.close()

inds = np.array([np.where(cat['LAMOST_ID']==val)[0][0] for val in ids])
ra = cat['RA'][inds]
dec = cat['Dec'][inds]
Ejemplo n.º 13
0
def test():
    wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0']
    tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0']
    tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0']
    tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0']
    test_id = np.load("%s/test_id.npz" %DATA_DIR)['arr_0']
    test_flux = np.load("%s/test_flux_norm.npz" %DATA_DIR)['arr_0']
    test_ivar = np.load("%s/test_ivar_norm.npz" %DATA_DIR)['arr_0']
    tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0']

    coeffs = np.load("%s/coeffs.npz" %DATA_DIR)['arr_0']
    scatters = np.load("%s/scatters.npz" %DATA_DIR)['arr_0']
    chisqs = np.load("%s/chisqs.npz" %DATA_DIR)['arr_0']
    pivots = np.load("%s/pivots.npz" %DATA_DIR)['arr_0']

    ds = dataset.Dataset(
            wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4],
            test_id, test_flux, test_ivar)

    np.savez("%s/test_SNR.npz" %DATA_DIR, ds.test_SNR)

    ds.set_label_names(["Teff", "logg", "FeH", "aFe"])
    md = model.CannonModel(2)
    md.coeffs = coeffs
    md.scatters = scatters
    md.chisqs = chisqs
    md.pivots = pivots
    md.diagnostics_leading_coeffs(ds)

    nguesses = 7
    nobj = len(ds.test_ID)
    nlabels = ds.tr_label.shape[1]
    choose = np.random.randint(0,nobj,size=nguesses)
    starting_guesses = ds.tr_label[choose]-md.pivots
    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    ds.tr_label = np.zeros((nobj, nlabels))
    
    for ii,guess in enumerate(starting_guesses):
        a,b,c = test_step_iteration(ds,md,starting_guesses[ii])
        labels[ii,:] = a
        chisq[ii,:] = b
        errs[ii,:] = c

    np.savez("%s/labels_all_starting_vals.npz" %DATA_DIR, labels)
    np.savez("%s/chisq_all_starting_vals.npz" %DATA_DIR, chisq)
    np.savez("%s/errs_all_starting_vals.npz" %DATA_DIR, errs)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj,val in enumerate(choose):
        best_labels[jj,:] = labels[:,jj,:][val]
        best_errs[jj,:] = errs[:,jj,:][val]

    np.savez("%s/test_cannon_labels.npz" %DATA_DIR, best_labels)
    np.savez("%s/test_errs.npz" %DATA_DIR, best_errs)
    np.savez("%s/test_chisq.npz" %DATA_DIR, best_chisq)

    ds.test_label_vals = best_labels
Ejemplo n.º 14
0
# (2) IDENTIFY CONTINUUM PIXELS
pseudo_tr_flux, pseudo_tr_ivar = ds.continuum_normalize_training_q(
    q=0.90, delta_lambda=50)

ds.ranges = [[371, 3192], [3697, 5500], [5500, 5997], [6461, 8255]]
contmask = ds.make_contmask(pseudo_tr_flux, pseudo_tr_ivar, frac=0.07)

ds.set_continuum(contmask)
cont = ds.fit_continuum(3, "sinusoid")

norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \
        ds.continuum_normalize(cont)

ds.tr_flux = norm_tr_flux
ds.tr_ivar = norm_tr_ivar
ds.test_flux = norm_test_flux
ds.test_ivar = norm_test_ivar

from TheCannon import model
md = model.CannonModel(2)
md.fit(ds)
md.diagnostics_contpix(ds)
md.diagnostics_leading_coeffs(ds)
md.diagnostics_plot_chisq(ds)

label_errs = md.infer_labels(ds)
test_labels = ds.test_label_vals
ds.diagnostics_test_step_flagstars()
ds.diagnostics_survey_labels()
dset.diagnostics_1to1()
Ejemplo n.º 15
0
def loop(num_sets):
    wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0']
    label_names = [
        'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k'
    ]
    ref_id = np.load("%s/ref_id_col.npz" % SPEC_DIR)['arr_0']
    ref_choose = np.load("%s/ref_id.npz" % DATA_DIR)['arr_0']
    inds = np.array([np.where(ref_id == val)[0][0] for val in ref_choose])
    ref_id = ref_id[inds]
    ref_flux = np.load("%s/ref_flux_col.npz" % SPEC_DIR)['arr_0'][inds]
    ref_ivar = np.load("%s/ref_ivar_col.npz" % SPEC_DIR)['arr_0'][inds]
    np.savez("ref_id.npz", ref_id)
    np.savez("ref_flux.npz", ref_flux)
    np.savez("ref_ivar.npz", ref_ivar)
    ds = dataset.Dataset(wl[0:3626], ref_id, ref_flux[:, 0:3626],
                         ref_ivar[:, 0:3626], [], [], [], [])
    np.savez("ref_snr.npz", ds.tr_SNR)
    ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'][inds]
    #ref_label = np.load("%s/xval_cannon_label_vals.npz" %TR_LAB_DIR)['arr_0']
    np.savez("ref_label.npz", ref_label)
    assignments = np.load("%s/../assignments.npz" % DATA_DIR)['arr_0']

    print("looping through %s sets" % num_sets)
    for leave_out in range(0, num_sets):
        print("leaving out %s" % leave_out)
        training = assignments != leave_out
        test = assignments == leave_out
        tr_id = ref_id[training]
        tr_flux = ref_flux[training]
        tr_ivar = ref_ivar[training]
        tr_ivar[np.isnan(tr_ivar)] = 0.0
        tr_label = ref_label[training]
        np.savez("tr_set_%s.npz" % leave_out, tr_id, tr_flux, tr_ivar,
                 tr_label)
        test_id = ref_id[test]
        test_flux = ref_flux[test]
        test_ivar = ref_ivar[test]
        test_ivar[np.isnan(test_ivar)] = 0.0
        test_label = ref_label[test]
        np.savez("test_set_%s.npz" % leave_out, test_id, test_flux, test_ivar,
                 test_label)
        ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id,
                             test_flux, test_ivar)
        print(ds.wl)
        ds.set_label_names(label_names)
        fig = ds.diagnostics_SNR()
        plt.savefig("SNRdist_%s.png" % leave_out)
        plt.close()
        fig = ds.diagnostics_ref_labels()
        plt.savefig("ref_label_triangle_%s.png" % leave_out)
        plt.close()
        np.savez("tr_snr_%s.npz" % leave_out, ds.tr_SNR)

        modelf = "model_%s.npz" % leave_out
        if glob.glob(modelf):
            print("model already exists")
            coeffs = np.load(modelf)['arr_0']
            scatters = np.load(modelf)['arr_1']
            chisqs = np.load(modelf)['arr_2']
            pivots = np.load(modelf)['arr_3']
            m = model.CannonModel(2)
            m.coeffs = coeffs
            m.scatters = scatters
            m.chisqs = chisqs
            m.pivots = pivots
        else:
            m = train(ds, leave_out)
        ds.tr_label = test_label
        validate(ds, m, leave_out)
Ejemplo n.º 16
0
def plot(ii, wl, flux, ivar, model_all, coeffs, scatters, chisqs, pivots,
         start_wl, end_wl, highlights, figname):
    xmin = start_wl
    xmax = end_wl

    f = flux[ii, :]
    iv = ivar[ii, :]
    model_spec = model_all[ii, :]
    choose = np.logical_and(wl > xmin, wl < xmax)
    ymin = min(f[choose]) - 0.05
    ymax = max(f[choose]) + 0.05

    # err = scat ^2 + uncertainty^2
    m = model.CannonModel(2, useErrors=False)
    m.coeffs = coeffs
    m.scatters = scatters
    m.chisqs = chisqs
    m.pivots = pivots

    scat = m.scatters
    iv_tot = (iv / (scat**2 * iv + 1))
    err = np.ones(len(iv_tot)) * 1000
    err[iv_tot > 0] = 1 / iv_tot[iv_tot > 0]**0.5

    #print("X2 is: " + str(sum((f - model_spec)**2 * iv_tot)))

    # Cinv = ivars / (1 + ivars*scatter**2)
    # lTCinvl = np.dot(lvec.T, Cinv[:, None] * lvec)
    # lTCinvf = np.dot(lvec.T, Cinv * fluxes)

    # Thanks to David Hogg / Andy Casey for this...
    # I stole it from the Annie's Lasso Github.
    gs = gridspec.GridSpec(2, 1, height_ratios=[1, 4])
    fig = plt.figure(figsize=(13.3, 4))
    ax_residual = plt.subplot(gs[0])
    ax_spectrum = plt.subplot(gs[1])

    ax_spectrum.plot(wl,
                     f,
                     c='k',
                     alpha=0.7,
                     drawstyle='steps-mid',
                     label="Data")
    #ax_spectrum.scatter(wl, f, c='k')
    ax_spectrum.plot(wl,
                     model_spec,
                     c='r',
                     alpha=0.7,
                     label="The Cannon Model")
    ax_spectrum.fill_between(wl,
                             model_spec + err,
                             model_spec - err,
                             alpha=0.1,
                             color='r')
    ax_spectrum.set_ylim(ymin, ymax)
    ax_spectrum.set_xlim(xmin, xmax)
    ax_spectrum.axhline(1, c="k", linestyle=":", zorder=-1)
    ax_spectrum.legend(loc="lower right")

    resid = f - model_spec
    r_ymin = min(resid[choose]) - 0.01
    r_ymax = max(resid[choose]) + 0.01
    ax_residual.plot(wl, resid, c='k', alpha=0.8, drawstyle='steps-mid')
    ax_residual.fill_between(wl,
                             resid + err,
                             resid - err,
                             alpha=0.1,
                             color='k')
    ax_residual.set_ylim(r_ymin, r_ymax)
    ax_residual.set_xlim(ax_spectrum.get_xlim())
    ax_residual.axhline(0, c="k", linestyle=":", zorder=-1)
    for highlight in highlights:
        ax_residual.axvline(x=highlight, c='r', linewidth=2, linestyle='--')
    ax_residual.set_xticklabels([])

    ax_residual.yaxis.set_major_locator(MaxNLocator(3))
    ax_residual.xaxis.set_major_locator(MaxNLocator(6))
    ax_spectrum.xaxis.set_major_locator(MaxNLocator(6))
    ax_spectrum.yaxis.set_major_locator(MaxNLocator(4))
    ax_spectrum.set_xlabel(r"Wavelength $\lambda (\AA)$", fontsize=18)
    ax_spectrum.set_ylabel("Normalized flux", fontsize=18)
    ax_spectrum.tick_params(axis="both", labelsize=18)
    ax_residual.tick_params(axis="both", labelsize=18)

    fig.tight_layout()
    for highlight in highlights:
        plt.axvline(x=highlight, c='r', linewidth=2, linestyle='--')
    #plt.show()
    plt.savefig(figname)
    plt.close()
Ejemplo n.º 17
0
def run(date):
    # Training step has already been completed. Load the model,
    spectral_model = model.CannonModel(2) # 2 = quadratic model
    spectral_model.coeffs = np.load("./coeffs.npz")['arr_0']
    spectral_model.scatters = np.load("./scatter.npz")['arr_0']
    spectral_model.chisqs = np.load("./chisqs.npz")['arr_0']
    spectral_model.pivots = np.load("./pivots.npz")['arr_0']

    # Load the wavelength array
    wl = np.load("wl.npz")['arr_0']

    # Load the test set,
    test_ID = np.loadtxt("test_obj/%s_test_obj.txt" %date, dtype=str)
    print("%s test objects" %len(test_ID))
    dir_dat = "/home/share/LAMOST/DR2/DR2_release"
    test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID)
    np.savez("output/%s_ids" %date, test_IDs)
    #np.savez("./%s_data_raw" %date, test_flux, test_ivar)

    # Load the corresponding LAMOST labels,
    labels = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0']
    inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) 
    nstars = len(test_IDs)
    lamost_labels = np.zeros((nstars,4))
    lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) 
    np.savez("output/%s_lamost_label" %date, lamost_labels)
    
    # Set dataset object
    data = dataset.Dataset(
            wl, test_IDs, test_flux, test_ivar, 
            lamost_labels, test_IDs, test_flux, test_ivar)

    # set the headers for plotting
    data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])
    
    # Plot SNR distribution
    data.diagnostics_SNR(figname="%s_SNRdist.png" %date)
    np.savez("output/%s_SNR" %date, data.test_SNR)

    # Continuum normalize, 
    filename = "output/%s_norm.npz" %date
    if glob.glob(filename):
        print("already cont normalized")
        data.test_flux = np.load(filename)['arr_0']
        data.test_ivar = np.load(filename)['arr_1']
    else:
        data.tr_ID = data.tr_ID[0]
        data.tr_flux = data.tr_flux[0,:]
        data.tr_ivar = data.tr_ivar[0,:]
        data.continuum_normalize_gaussian_smoothing(L=50)
        np.savez("output/%s_norm" %date, data.test_flux, data.test_ivar)

    # Infer labels 
    errs, chisq = spectral_model.infer_labels(data)
    np.savez("output/%s_cannon_labels.npz" %date, data.test_label_vals)
    np.savez("./%s_formal_errors.npz" %date, errs)
    np.savez("./%s_chisq.npz" %date, chisq)

    # Make plots
    data.test_label_vals = data.test_label_vals[:,0:3] # so it doesn't try alpha
    data.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    data.diagnostics_1to1(figname="%s_1to1_label" %date)
Ejemplo n.º 18
0
if glob.glob('cont.p'):
    cont = pickle.load(open('cont.p', 'r'))
else:
    cont = dataset.fit_continuum(3, "sinusoid")
    pickle.dump(cont, open('cont.p', 'w'))

# (3) CONTINUUM NORMALIZE
norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \
        dataset.continuum_normalize(cont)

# replace with normalized values
dataset.tr_flux = norm_tr_flux
dataset.tr_ivar = norm_tr_ivar
dataset.test_flux = norm_test_flux
dataset.test_ivar = norm_test_ivar

# (4) TRAINING STEP

# learn the model from the reference_set
model = model.CannonModel(dataset, 2)  # 2 = quadratic model
model.fit()  # model.train would work equivalently.
model.diagnostics()

# (5) TEST STEP

# infer labels with the new model for the test_set
label_errs = model.infer_labels(dataset)
dataset.diagnostics_test_step_flagstars()
dataset.diagnostics_survey_labels()
dataset.diagnostics_1to1()
Ejemplo n.º 19
0
def test_step(date):
    wl = np.load("wl.npz")['arr_0']
    test_ID = np.load("output/%s_ids.npz" % date)['arr_0']
    print(str(len(test_ID)) + " objects")
    test_flux = np.load("output/%s_norm.npz" % date)['arr_0']
    test_ivar = np.load("output/%s_norm.npz" % date)['arr_1']

    nlabels = 4
    nobj = len(test_ID)

    lamost_label = np.load("output/%s_tr_label.npz" % date)['arr_0']

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :],
                         lamost_label, test_ID, test_flux, test_ivar)
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]'])

    m = model.CannonModel(2)
    m.coeffs = np.load("./coeffs.npz")['arr_0']
    m.scatters = np.load("./scatters.npz")['arr_0']
    m.chisqs = np.load("./chisqs.npz")['arr_0']
    m.pivots = np.load("./pivots.npz")['arr_0']

    nguesses = 7
    starting_guesses = np.zeros((nguesses, nlabels))
    hiT_hiG_hiM = np.array(
        [5.15273730e+03, 3.71762228e+00, 3.16861898e-01, 2.46907920e-02])
    hiT_hiG_loM = np.array(
        [5.16350098e+03, 3.45917511e+00, -9.24426436e-01, 2.49296919e-01])
    loT_loG_hiM = np.array(
        [4.04936841e+03, 1.47109437e+00, 2.07210138e-01, 1.49733415e-02])
    loT_loG_loM = np.array(
        [4.00651318e+03, 8.35013509e-01, -8.98257852e-01, 7.65705928e-02])
    high_alpha = np.array([[4750, 2.6, -0.096, 0.25]])
    low_alpha = np.array([[4840, 2.67, -0.045, 0.049]])
    low_feh = np.array([[4500, 1.45, -1.54, 0.24]])
    starting_guesses[0, :] = hiT_hiG_hiM - m.pivots
    starting_guesses[1, :] = hiT_hiG_loM - m.pivots
    starting_guesses[2, :] = loT_loG_loM - m.pivots
    starting_guesses[3, :] = loT_loG_hiM - m.pivots
    starting_guesses[4, :] = high_alpha - m.pivots
    starting_guesses[5, :] = low_alpha - m.pivots
    starting_guesses[6, :] = low_feh - m.pivots

    labels = np.zeros((nguesses, nobj, nlabels))  # 4,10955,4
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    np.savez("output/%s_cannon_label_guesses.npz" % date, labels)
    np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels)

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("output/%s_all_cannon_labels.npz" % date, best_labels)
    np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq)
    np.savez("output/%s_cannon_label_errs.npz" % date, best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %
                                 date)
    ds.test_label_vals = best_labels[:, 0:3]
    ds.set_label_names(['T_{eff}', '\log g', '[M/H]'])
    ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
Ejemplo n.º 20
0
def test_step():
    wl = np.load(SPEC_DIR + "/wl_cols.npz")
    ref_id_all = np.load(SPEC_DIR + "/ref_id_col.npz")['arr_0']
    excised = np.load(SPEC_DIR + "/excised_obj/excised_ids.npz")['arr_0']
    inds = np.array([np.where(ref_id_all == val)[0][0] for val in excised])
    test_ID = ref_id_all[inds]
    print(str(len(test_ID)) + " objects")
    test_flux = np.load("%s/ref_flux_col.npz" % (SPEC_DIR))['arr_0'][inds]
    test_ivar = np.load("%s/ref_ivar_col.npz" % (SPEC_DIR))['arr_0'][inds]

    apogee_label = np.load("%s/ref_label.npz" % (SPEC_DIR))['arr_0'][inds]
    #np.savez("excised_label.npz", apogee_label)

    ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :],
                         apogee_label, test_ID, test_flux, test_ivar)
    ds.set_label_names([
        'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/Fe]', 'A_k'
    ])
    np.savez("excised_snr.npz", ds.test_SNR)
    print("DONE")

    m = model.CannonModel(2)
    m.coeffs = np.load(MODEL_DIR + "/coeffs.npz")['arr_0']
    m.scatters = np.load(MODEL_DIR + "/scatters.npz")['arr_0']
    m.chisqs = np.load(MODEL_DIR + "/chisqs.npz")['arr_0']
    m.pivots = np.load(MODEL_DIR + "/pivots.npz")['arr_0']

    nlabels = len(m.pivots)
    nobj = len(test_ID)

    nguesses = 7
    choose = np.random.randint(0, nobj, size=nguesses)
    print(apogee_label.shape)
    print(choose.shape)
    print(m.pivots.shape)
    starting_guesses = apogee_label[choose] - m.pivots

    labels = np.zeros((nguesses, nobj, nlabels))
    chisq = np.zeros((nguesses, nobj))
    errs = np.zeros(labels.shape)

    for ii, guess in enumerate(starting_guesses):
        a, b, c = test_step_iteration(ds, m, starting_guesses[ii])
        labels[ii, :] = a
        chisq[ii, :] = b
        errs[ii, :] = c

    choose = np.argmin(chisq, axis=0)
    best_chisq = np.min(chisq, axis=0)
    best_labels = np.zeros((nobj, nlabels))
    best_errs = np.zeros(best_labels.shape)
    for jj, val in enumerate(choose):
        best_labels[jj, :] = labels[:, jj, :][val]
        best_errs[jj, :] = errs[:, jj, :][val]

    np.savez("excised_all_cannon_labels.npz", best_labels)
    np.savez("excised_cannon_label_chisq.npz", best_chisq)
    np.savez("excised_cannon_label_errs.npz", best_errs)

    ds.test_label_vals = best_labels
    ds.diagnostics_1to1(figname="excised_1to1_test_label")
Ejemplo n.º 21
0
colors = colors[np.argsort(tr_ID)]
ivars = ivars[np.argsort(tr_ID)]
ivars = ivars * 1e15

# add another column to the tr_flux, tr_ivar, test_flux, test_ivar

logwl = np.log(data.wl)
delta = logwl[1]-logwl[0]
toadd = logwl[-1]+delta*np.arange(1,5)
new_logwl = np.hstack((logwl, toadd))
data.wl = np.exp(new_logwl)
data.tr_flux = np.hstack((data.tr_flux, colors))
data.test_flux = data.tr_flux
data.tr_ivar = np.hstack((data.tr_ivar, ivars))
data.test_ivar = data.tr_ivar

# train model
m = model.CannonModel(2) # 2 = quadratic model
m.fit(data)
m.infer_labels(data)
# data.diagnostics_1to1()

def scatter(i):
    return np.std(data.tr_label[:,i]-data.test_label_vals[:,i])

def bias(i):
    return np.mean(data.tr_label[:,i]-data.test_label_vals[:,i])

for i in range(0,4):
    print(scatter(i), bias(i))
Ejemplo n.º 22
0
def loop(num_sets):
    wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0']
    label_names = np.load("%s/label_names.npz" % DATA_DIR)['arr_0']
    ref_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0']
    #ref_choose = np.load("%s/ref_id_culled.npz" %DATA_DIR)['arr_0']
    #inds = np.array([np.where(ref_id==val)[0][0] for val in ref_choose])
    #ref_id = ref_id[inds]
    ref_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0']
    ref_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0']
    ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0']
    np.savez("ref_label.npz", ref_label)
    assignments = np.load("%s/assignments.npz" % DATA_DIR)['arr_0']

    print("looping through %s sets" % num_sets)
    for leave_out in range(0, num_sets):
        print("leaving out %s" % leave_out)
        training = assignments != leave_out
        test = assignments == leave_out
        tr_id = ref_id[training]
        tr_flux = ref_flux[training]
        tr_ivar = ref_ivar[training]
        tr_ivar[np.isnan(tr_ivar)] = 0.0
        tr_label = ref_label[training]
        #np.savez(
        #    "tr_set_%s.npz" %leave_out,
        #    tr_id, tr_flux, tr_ivar, tr_label)
        test_id = ref_id[test]
        test_flux = ref_flux[test]
        test_ivar = ref_ivar[test]
        test_ivar[np.isnan(test_ivar)] = 0.0
        test_label = ref_label[test]
        #np.savez(
        #    "test_set_%s.npz" %leave_out,
        #    test_id, test_flux, test_ivar, test_label)
        ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id,
                             test_flux, test_ivar)
        ds.set_label_names(label_names)
        fig = ds.diagnostics_SNR()
        plt.savefig("SNRdist_%s.png" % leave_out)
        plt.close()
        #fig = ds.diagnostics_ref_labels()
        #plt.savefig("ref_label_triangle_%s.png" %leave_out)
        #plt.close()
        #np.savez("tr_snr_%s.npz" %leave_out, ds.tr_SNR)

        modelf = "model_%s.npz" % leave_out
        if glob.glob(modelf):
            print("model already exists")
            coeffs = np.load(modelf)['arr_0']
            scatters = np.load(modelf)['arr_1']
            chisqs = np.load(modelf)['arr_2']
            pivots = np.load(modelf)['arr_3']
            m = model.CannonModel(2)
            m.coeffs = coeffs
            m.scatters = scatters
            m.chisqs = chisqs
            m.pivots = pivots
        else:
            m = train(ds, leave_out)
        ds.tr_label = test_label
        validate(ds, m, leave_out)
Ejemplo n.º 23
0
    def __init__(self,
                 training_set,
                 label_names,
                 wavelength_arms=None,
                 censors=None,
                 progress_bar=False,
                 threads=None,
                 tolerance=None,
                 polynomial_order=2,
                 load_from_file=None,
                 debugging=False):
        """
        Instantiate the Cannon and train it on the spectra contained within a SpectrumArray.

        :param training_set:
            A SpectrumArray containing the spectra to train the Cannon on.

        :param label_names:
            A list of the names of the labels the Cannon is to estimate. We require that all of the training spectra
            have metadata fields defining all of these labels.

        :param wavelength_arms:
            A list of the wavelength break-points between arms which should have continuum fitted separately. For
            compatibility we accept this argument, but it is not used for continuum-normalised spectra.

        :param threads:
            The number of CPU cores we should use. If None, we look up how many cores this computer has.

        :param tolerance:
            The tolerance xtol which the method <scipy.optimize.fmin_powell> uses to determine convergence.

        :param polynomial_order:
            The order of polynomials to use as fitting functions within the Cannon.

        :param load_from_file:
            The filename of the internal state of a pre-trained Cannon, which we should load rather than doing
            training from scratch.

        :param debugging:
            Boolean flag determining whether we produce debugging output

        :type debugging:
            bool
        """

        assert polynomial_order == 2, "Anna Ho's Cannon only supports quadratic polynomials. " \
                                      "You requested <polynomial_order={}>.".format(polynomial_order)

        assert censors == None, "Anna Ho's Cannon does not support censoring. " \
                                "But you requested that it should be enabled."

        self._debugging_output_counter = 0
        self._debugging = debugging
        self.cannon_version = "AnnaHo"
        self._label_names = label_names
        self._wavelength_arms = wavelength_arms
        logger.info("Wavelength arm breakpoints: {}".format(
            self._wavelength_arms))

        assert isinstance(training_set, fourgp_speclib.SpectrumArray), \
            "Training set for the Cannon should be a SpectrumArray."

        # Hook for normalising input spectra
        training_set = self.normalise(training_set)

        self._training_set = training_set

        # Turn error bars on fluxes into inverse variances
        inverse_variances = training_set.value_errors**(-2)

        # Flag bad data points
        ignore = (training_set.values < 0) + ~np.isfinite(inverse_variances)
        inverse_variances[ignore] = 0
        training_set.values[ignore] = 1

        # Check that labels are correctly set in metadata
        for index in range(len(training_set)):
            metadata = training_set.get_metadata(index)
            for label in label_names:
                assert label in metadata, "Label <{}> not set on training spectrum number {}. " \
                                          "Labels on this spectrum are: {}.".format(
                    label, index, ", ".join(list(metadata.keys())))
                assert np.isfinite(metadata[label]), "Label <{}> is not finite on training spectrum number {}. " \
                                                     "Labels on this spectrum are: {}.".format(
                    label, index, metadata)

        # Compile table of training values of labels from metadata contained in SpectrumArray
        dataset = ho_dataset.Dataset(
            wl=training_set.wavelengths,
            tr_ID=range(len(training_set)),
            tr_flux=training_set.values,
            tr_ivar=inverse_variances,
            tr_label=np.array([
                np.array([
                    training_set.get_metadata(index)[label]
                    for label in label_names
                ]) for index in range(len(training_set))
            ]),
            test_ID=[],
            test_flux=[],
            test_ivar=[])

        dataset.set_label_names(names=label_names)
        self._model = ho_model.CannonModel(order=2, useErrors=False)

        if load_from_file is None:
            logger.info("Starting to train the Cannon")
            self._model.train(ds=dataset)
            logger.info("Cannon training completed")
        else:
            logger.info("Loading Cannon from disk")
            self._model = pickle.load(file=open(load_from_file, "rb"))
            logger.info("Cannon loaded successfully")