def prep_data(date): dir_files = "/home/annaho/xcalib_4labels/test_obj" dir_dat = "/home/share/LAMOST/DR2/DR2_release/" test_ID = np.loadtxt("%s/%s_test_obj.txt" %(dir_files, date), dtype=str) print("%s obj" %len(test_ID)) np.savez("output/%s_ids.npz" %date, test_ID) test_ID_long = np.array([dir_dat + f for f in test_ID]) wl, test_flux, test_ivar, npix, SNRs = load_spectra(test_ID_long) np.savez("output/%s_SNRs.npz" %date, SNRs) np.savez("output/%s_frac_good_pix.npz" %date, npix) lamost_info = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0'] inds = np.array([np.where(lamost_info[:,0]==a)[0][0] for a in test_ID]) nstars = len(test_ID) lamost_info_sorted = np.zeros((nstars,4)) lamost_label = lamost_info[inds,:][:,1:].astype(float) lamost_info_sorted[:,0:3] = lamost_label np.savez("output/%s_tr_label" %date, lamost_label) ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) ds.diagnostics_SNR(figname="%s_SNRdist.png" %date) ds.continuum_normalize_gaussian_smoothing(L=50) np.savez("output/%s_norm.npz" %date, ds.test_flux, ds.test_ivar)
def test_step(date): wl = np.load("../run_2_train_on_good/wl.npz")['arr_0'] test_ID = np.load("%s_test_ids.npz" %date)['arr_0'] test_flux = np.load("%s_test_flux.npz" %date)['arr_0'] test_ivar = np.load("%s_test_ivar.npz" %date)['arr_0'] nlabels = 4 nobj = len(test_ID) lamost_label_3 = np.load("%s_lamost_label.npz" %date)['arr_0'] # add extra column to make it symmetric with the inferred test labels toadd = np.ones(nobj)[...,None] lamost_label = np.hstack((lamost_label_3, toadd)) ds = dataset.Dataset(wl, test_ID, test_flux[0:2,:], test_ivar[0:2,:], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) m = model.CannonModel(2) m.coeffs = np.load("../run_5_train_on_good/coeffs.npz")['arr_0'] m.scatters = np.load("../run_5_train_on_good/scatters.npz")['arr_0'] m.chisqs = np.load("../run_5_train_on_good/chisqs.npz")['arr_0'] m.pivots = np.load("../run_5_train_on_good/pivots.npz")['arr_0'] nguesses = 4 starting_guesses = np.zeros((nguesses,nlabels)) hiT_hiG_hiM = np.array([ 5.15273730e+03, 3.71762228e+00, 3.16861898e-01, 2.46907920e-02]) hiT_hiG_loM = np.array([ 5.16350098e+03, 3.45917511e+00, -9.24426436e-01, 2.49296919e-01]) loT_loG_hiM = np.array([ 4.04936841e+03, 1.47109437e+00, 2.07210138e-01, 1.49733415e-02]) loT_loG_loM = np.array([ 4.00651318e+03, 8.35013509e-01, -8.98257852e-01, 7.65705928e-02]) starting_guesses[0,:] = hiT_hiG_hiM-m.pivots starting_guesses[1,:] = hiT_hiG_loM-m.pivots starting_guesses[2,:] = loT_loG_loM-m.pivots starting_guesses[3,:] = loT_loG_hiM-m.pivots labels = np.zeros((nguesses, nobj, nlabels)) # 4,10955,4 chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii,guess in enumerate(starting_guesses): a,b,c = test_step_iteration(ds,m,starting_guesses[ii]) labels[ii,:] = a chisq[ii,:] = b errs[ii,:] = c choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj,val in enumerate(choose): best_labels[jj,:] = labels[:,jj,:][val] best_errs[jj,:] = errs[:,jj,:][val] np.savez("./%s_all_cannon_labels.npz" %date, best_labels) np.savez("./%s_cannon_label_chisq.npz" %date, best_chisq) np.savez("./%s_cannon_label_errs.npz" %date, best_errs) ds.test_label_vals = best_labels ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date) ds.diagnostics_1to1(figname = "%s_1to1_test_label.png" %date)
def test_step(date): direc = "../xcalib_4labels" wl = np.load("%s/wl.npz" % direc)['arr_0'] test_ID = np.load("%s/output/%s_ids.npz" % (direc, date))['arr_0'] print(str(len(test_ID)) + " objects") test_flux = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_0'] test_ivar = np.load("%s/output/%s_norm.npz" % (direc, date))['arr_1'] lamost_label = np.load("%s/output/%s_tr_label.npz" % (direc, date))['arr_0'] apogee_label = np.load("./tr_label.npz")['arr_0'] ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names( ['T_{eff}', '\log g', '[Fe/H]', '[\\alpha/Fe]', 'log M', 'A_k']) m = model.CannonModel(2) m.coeffs = np.load("./coeffs.npz")['arr_0'] m.scatters = np.load("./scatters.npz")['arr_0'] m.chisqs = np.load("./chisqs.npz")['arr_0'] m.pivots = np.load("./pivots.npz")['arr_0'] nlabels = len(m.pivots) nobj = len(test_ID) nguesses = 7 choose = np.random.randint(0, apogee_label.shape[0], size=nguesses) starting_guesses = apogee_label[choose, :] - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c np.savez("output/%s_cannon_label_guesses.npz" % date, labels) np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("output/%s_all_cannon_labels.npz" % date, best_labels) np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq) np.savez("output/%s_cannon_label_errs.npz" % date, best_errs) ds.test_label_vals = best_labels #ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" %date) ds.test_label_vals = best_labels[:, 0:3] ds.set_label_names(['T_{eff}', '\log g', '[M/H]']) ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
def normalize_test_set(): wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0'] test_id = np.load("%s/test_id.npz" %DATA_DIR)['arr_0'] test_flux = np.load("%s/test_flux.npz" %DATA_DIR)['arr_0'] test_ivar = np.load("%s/test_ivar_corr.npz" %DATA_DIR)['arr_0'] test_scat = np.load("%s/test_spec_scat.npz" %DATA_DIR)['arr_0'] contmask = np.load("%s/wl_contmask.npz" %DATA_DIR)['arr_0'] ds = dataset.Dataset( wl, test_id[0:2], test_flux[0:2], test_ivar[0:2], wl, test_id, test_flux, test_ivar) ds.set_continuum(contmask) # For the sake of the normalization, no pixel with flux >= 3 sigma # should be continuum. for ii,spec in enumerate(ds.test_flux): err = test_scat[ii] bad = np.logical_and( ds.contmask == True, np.abs(1-spec) >= 3*err) ds.test_ivar[ii][bad] = SMALL cont = ds.fit_continuum(3, "sinusoid") np.savez("%s/test_cont.npz" %DATA_DIR, cont) norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \ ds.continuum_normalize(cont) bad = np.logical_or(test_flux <= 0, test_flux > 1.1) norm_test_ivar[bad] = 0.0 np.savez("%s/test_flux_norm.npz" %DATA_DIR, norm_test_flux) np.savez("%s/test_ivar_norm.npz" %DATA_DIR, norm_test_ivar)
def train(): wl = np.load("%s/../wl_cols.npz" % SPEC_DIR)['arr_0'] tr_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0'] tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'] tr_label = tr_label[:, 0:3] tr_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0'] tr_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0'] ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux, tr_ivar) # teff, logg, mh, cm, nm, am, ak ds.set_label_names(['T_{eff}', '\log g', '[Fe/H]']) #, '[C/M]','[N/M]', #'[\\alpha/M]', 'A_k']) #ds.diagnostics_SNR() #ds.diagnostics_ref_labels() #np.savez("ref_snr.npz", ds.tr_SNR) print("Training model") nlab = ds.tr_label.shape[1] print(nlab) npix = len(ds.wl) print(npix) filt = np.ones((nlab, npix), dtype=bool) print(filt) #filt[nlab-1,0:500] = 0 m = model.CannonModel(2, wl_filter=filt) m.fit(ds) np.savez("./coeffs.npz", m.coeffs) np.savez("./scatters.npz", m.scatters) np.savez("./chisqs.npz", m.chisqs) np.savez("./pivots.npz", m.pivots) m.diagnostics_leading_coeffs(ds)
def test_step(): #wl = np.load("%s/wl.npz" %SPEC_DIR)['arr_0'] wl = np.load("wl_cols.npz")['arr_0'] test_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0'] tr_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'] test_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0'] test_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0'] #tr_id = np.load("./ref_id.npz")['arr_0'] #tr_flux = np.load("./ref_flux.npz")['arr_0'] #tr_ivar = np.load("./ref_ivar.npz")['arr_0'] #tr_label = np.load("./ref_label.npz")['arr_0'] ds = dataset.Dataset(wl, test_id, test_flux, test_ivar, tr_label, test_id, test_flux, test_ivar) ds.set_label_names([ 'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k' ]) m = model.CannonModel(2) m.coeffs = np.load("./culled_coeffs.npz")['arr_0'] m.scatters = np.load("./culled_scatters.npz")['arr_0'] m.chisqs = np.load("./culled_chisqs.npz")['arr_0'] m.pivots = np.load("./culled_pivots.npz")['arr_0'] nguesses = 10 nobj = len(ds.test_ID) nlabels = len(m.pivots) choose = np.random.randint(0, nobj, size=nguesses) starting_guesses = ds.tr_label[choose] - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c #np.savez("labels_all_starting_vals.npz", labels) #np.savez("chisq_all_starting_vals.npz", chisq) #np.savez("errs_all_starting_vals.npz", errs) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros(tr_label.shape) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("./cannon_label_vals.npz", best_labels) np.savez("./cannon_label_chisq.npz", best_chisq) np.savez("./cannon_label_errs.npz", best_errs) ds.test_label_vals = best_labels ds.diagnostics_survey_labels() ds.diagnostics_1to1(figname="1to1_test_label")
def get_normed_spectra(): """ Spectra to compare with models """ filenames = np.array( [DATA_DIR + "/Spectra" + "/" + val for val in lamost_id]) grid, fluxes, ivars, npix, SNRs = lamost.load_spectra( lamost_id, input_grid=wl) ds = dataset.Dataset( wl, lamost_id, fluxes, ivars, [1], lamost_id[0:2], fluxes[0:2], ivars[0:2]) ds.continuum_normalize_gaussian_smoothing(L=50) np.savez(DATA_DIR + "/" + "norm_flux.npz", ds.tr_flux) np.savez(DATA_DIR + "/" + "norm_ivar.npz", ds.tr_ivar) return ds.tr_flux, ds.tr_ivar
def infer_labels_from_spectra(self, fluxes_in, flux_vars_in): lis = range(fluxes_in.shape[0]) ids = ["{:02d}".format(x) for x in lis] ds_test = dataset.Dataset(self.m_train_dataset_newtype.m_wavelength, ids, self.m_train_dataset_newtype.m_spectra, self.m_train_dataset_newtype.m_spectra_ivar, self.m_train_dataset_newtype.m_label, ids, fluxes_in, 1.0 / flux_vars_in) ds_test.set_label_names(['L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'L7']) _, _ = self.m_model.infer_labels(ds_test) return ds_test.test_label_vals
def fit_spectrum(self, spectrum): """ Fit stellar labels to a continuum-normalised spectrum. :param spectrum: A Spectrum object containing the spectrum for the Cannon to fit. :type spectrum: Spectrum :return: """ assert isinstance(spectrum, fourgp_speclib.Spectrum), \ "Supplied spectrum for the Cannon to fit is not a Spectrum object." assert spectrum.raster_hash == self._training_set.raster_hash, \ "Supplied spectrum for the Cannon to fit is not sampled on the same raster as the training set." # Hook for normalising input spectra spectrum = self.normalise(spectrum) inverse_variances = spectrum.value_errors**(-2) # Ignore bad pixels. bad = (spectrum.value_errors < 0) + (~np.isfinite(inverse_variances * spectrum.values)) inverse_variances[bad] = 0 spectrum.values[bad] = np.nan # Compile table of training values of labels from metadata contained in SpectrumArray dataset = ho_dataset.Dataset(wl=spectrum.wavelengths, tr_ID=[], tr_flux=[], tr_ivar=[], tr_label=[], test_ID=np.array((0, )), test_flux=np.array((spectrum.values, )), test_ivar=np.array((inverse_variances, ))) dataset.set_label_names(names=self._label_names) errs_all, chisq_all = self._model.infer_labels(ds=dataset) labels = dataset.test_label_vals cov = errs_all meta = None return labels, cov, meta
def xvalidate(): """ Train a model, leaving out a group corresponding to a random integer from 0 to 7, e.g. leave out 0. Test on the remaining 1/8 of the sample. """ print("Loading data") groups = np.load("ref_groups.npz")['arr_0'] ref_label = np.load("%s/ref_label.npz" % direc_ref)['arr_0'] ref_id = np.load("%s/ref_id.npz" % direc_ref)['arr_0'] ref_flux = np.load("%s/ref_flux.npz" % direc_ref)['arr_0'] ref_ivar = np.load("%s/ref_ivar.npz" % direc_ref)['arr_0'] wl = np.load("%s/wl.npz" % direc_ref)['arr_0'] num_models = 8 for ii in np.arange(num_models): print("Leaving out group %s" % ii) train_on = groups != ii test_on = groups == ii tr_label = ref_label[train_on] tr_id = ref_id[train_on] tr_flux = ref_flux[train_on] tr_ivar = ref_ivar[train_on] print("Training on %s objects" % len(tr_id)) test_label = ref_label[test_on] test_id = ref_id[test_on] test_flux = ref_flux[test_on] test_ivar = ref_ivar[test_on] print("Testing on %s objects" % len(test_id)) print("Loading dataset...") ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id, test_flux, test_ivar) ds.set_label_names( ['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE']) fig = ds.diagnostics_SNR() plt.savefig("ex%s_SNR.png" % ii) fig = ds.diagnostics_ref_labels() plt.savefig("ex%s_ref_label_triangle.png" % ii) np.savez("ex%s_tr_snr.npz" % ii, ds.tr_SNR) # train a model m = train(ds, ii) # test step ds.tr_label = test_label # to compare the results test(ds, m, ii)
def train(self, train_dataset): lis = range(train_dataset.m_label.shape[0]) ids = ["{:02d}".format(x) for x in lis] self.m_train_dataset_newtype = deepcopy(train_dataset) self.m_train_dataset = dataset.Dataset(train_dataset.m_wavelength, ids, train_dataset.m_spectra, train_dataset.m_spectra_ivar, train_dataset.m_label, ids, train_dataset.m_spectra, train_dataset.m_spectra_ivar) self.m_train_dataset.set_label_names( np.array(range(train_dataset.m_label.shape[1]), dtype=np.str)) self.m_model.fit(self.m_train_dataset)
def normalize_ref_set(): wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0'] ref_id = np.load("%s/ref_id.npz" %DATA_DIR)['arr_0'] ref_flux = np.load("%s/ref_flux.npz" %DATA_DIR)['arr_0'] ref_ivar = np.load("%s/ref_ivar.npz" %DATA_DIR)['arr_0'] ref_label = np.load("%s/ref_label.npz" %DATA_DIR)['arr_0'] ds = dataset.Dataset( wl, ref_id, ref_flux, ref_ivar, ref_label, ref_id, ref_flux, ref_ivar) contmask = np.load("%s/wl_contmask.npz" %DATA_DIR)['arr_0'] ds.set_continuum(contmask) cont = ds.fit_continuum(3, "sinusoid") np.savez("%s/ref_cont.npz" %DATA_DIR, cont) norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \ ds.continuum_normalize(cont) bad = np.logical_or(ref_flux <= 0, ref_flux > 1.1) norm_tr_ivar[bad] = 0.0 np.savez("%s/ref_flux_norm.npz" %DATA_DIR, norm_tr_flux) np.savez("%s/ref_ivar_norm.npz" %DATA_DIR, norm_tr_ivar)
def test_step(date): wl = np.load("%s/wl_cols.npz" % MODEL_DIR)['arr_0'] test_ID = np.load("%s/output/%s_ids.npz" % (SPEC_DIR, date))['arr_0'] print(str(len(test_ID)) + " objects") test_flux_temp = np.load("%s/output/%s_norm.npz" % (SPEC_DIR, date))['arr_0'] test_ivar_temp = np.load("%s/output/%s_norm.npz" % (SPEC_DIR, date))['arr_1'] # Mask mask = np.load("mask.npz")['arr_0'] test_ivar_masked = apply_mask(wl[0:3626], test_ivar_temp, mask) # Append colors col = np.load(COL_DIR + "/" + date + "_col.npz")['arr_0'] col_ivar = np.load(COL_DIR + "/" + date + "_col_ivar.npz")['arr_0'] bad_flux = np.logical_or(np.isnan(col), col == np.inf) col[bad_flux] = 1.0 col_ivar[bad_flux] = 0.0 bad_ivar = np.logical_or(np.isnan(col_ivar), col_ivar == np.inf) col_ivar[bad_ivar] = 0.0 test_flux = np.hstack((test_flux_temp, col.T)) test_ivar = np.hstack((test_ivar_temp, col_ivar.T)) lamost_label = np.load("%s/output/%s_tr_label.npz" % (SPEC_DIR, date))['arr_0'] apogee_label = np.load("./ref_label.npz")['arr_0'] ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :], lamost_label, test_ID, test_flux, test_ivar) #np.savez(COL_DIR + "/%s_test_flux.npz" %date, ds.test_flux) #np.savez(COL_DIR + "/%s_test_ivar.npz" %date, ds.test_ivar) np.savez(COL_DIR + "/%s_test_snr.npz" % date, ds.test_SNR) np.savez(COL_DIR + "/%s_test_id.npz" % date, ds.test_ID) ds.set_label_names([ 'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/Fe]', 'A_k' ])
def load_dataset(ii): ("loading data") groups = np.load("ref_groups.npz")['arr_0'] ref_label = np.load("%s/ref_label.npz" % direc_ref)['arr_0'] ref_id = np.load("%s/ref_id.npz" % direc_ref)['arr_0'] ref_flux = np.load("%s/ref_flux.npz" % direc_ref)['arr_0'] ref_ivar = np.load("%s/ref_ivar.npz" % direc_ref)['arr_0'] wl = np.load("%s/wl.npz" % direc_ref)['arr_0'] print("Leaving out group %s" % ii) train_on = groups != ii test_on = groups == ii tr_label = ref_label[train_on] tr_id = ref_id[train_on] tr_flux = ref_flux[train_on] tr_ivar = ref_ivar[train_on] print("Training on %s objects" % len(tr_id)) test_label = ref_label[test_on] test_id = ref_id[test_on] test_flux = ref_flux[test_on] test_ivar = ref_ivar[test_on] print("Testing on %s objects" % len(test_id)) print("Loading dataset...") ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id, test_flux, test_ivar) ds.test_label_vals = np.load("./ex%s_cannon_label_vals.npz" % group)['arr_0'] print(ds.test_label_vals.shape) ds.set_label_names( ['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE']) fig = ds.diagnostics_SNR() plt.savefig("ex%s_SNR.png" % ii) fig = ds.diagnostics_ref_labels() plt.savefig("ex%s_ref_label_triangle.png" % ii) np.savez("ex%s_tr_snr.npz" % ii, ds.tr_SNR) return ds
def load_all_ref_spectra(ref_id): DATA_DIR = "/Users/annaho/Data/LAMOST/Label_Transfer" wl = np.load(DATA_DIR + "/../Abundances/wl_cols.npz")['arr_0'] all_ref_ivar = np.load("%s/tr_ivar.npz" %DATA_DIR)['arr_0'] all_ref_flux = np.load("%s/tr_flux.npz" %DATA_DIR)['arr_0'] all_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0'] all_id = np.array([val.decode('utf-8') for val in all_id]) inds = np.array([np.where(all_id==val)[0][0] for val in ref_id]) ref_flux = all_ref_flux[inds] ref_ivar = all_ref_ivar[inds] mask = np.load("%s/../Abundances/mask.npz" %DATA_DIR)['arr_0'] ref_ivar_masked = apply_mask(wl[0:3626], ref_ivar, mask) ref_id_col, ref_flux_col, ref_ivar_col = find_colors( ref_id, ref_flux, ref_ivar_masked) np.savez("ref_id_col.npz", ref_id_col) np.savez("ref_flux.npz", ref_flux_col) np.savez("ref_ivar.npz", ref_ivar_col) ds = dataset.Dataset( wl[0:3626], ref_id_col, ref_flux_col[:,3626], ref_ivar_col[:,3626], [], [], [], []) np.savez("ref_snr.npz", ds.tr_SNR)
def train(): wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0'] tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0'] tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0'] tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0'] tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0'] val_id = np.load("%s/val_id.npz" %DATA_DIR)['arr_0'] val_flux = np.load("%s/val_flux_norm.npz" %DATA_DIR)['arr_0'] val_ivar = np.load("%s/val_ivar_norm.npz" %DATA_DIR)['arr_0'] ds = dataset.Dataset( wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4], val_id, val_flux, val_ivar) ds.set_label_names(["Teff", "logg", "FeH", 'aFe']) np.savez("%s/tr_SNR.npz" %DATA_DIR, ds.tr_SNR) fig = ds.diagnostics_SNR() plt.savefig("%s/SNR_dist.png" %DATA_DIR) plt.close() fig = ds.diagnostics_ref_labels() plt.savefig("%s/ref_label_triangle.png" %DATA_DIR) plt.close() md = model.CannonModel(2) md.fit(ds) fig = md.diagnostics_leading_coeffs(ds) plt.savefig("%s/leading_coeffs.png" %DATA_DIR) plt.close() np.savez("%s/coeffs.npz" %DATA_DIR, md.coeffs) np.savez("%s/scatters.npz" %DATA_DIR, md.scatters) np.savez("%s/chisqs.npz" %DATA_DIR, md.chisqs) np.savez("%s/pivots.npz" %DATA_DIR, md.pivots)
def train(): # Load training set wl = np.load("../data/wl.npz")['arr_0'] tr_id = np.load("tr_id.npz")['arr_0'] tr_label = np.load("tr_label.npz")['arr_0'] tr_flux = np.load("tr_flux.npz")['arr_0'] tr_ivar = np.load("tr_ivar.npz")['arr_0'] ds = dataset.Dataset( wl, tr_id, tr_flux, tr_ivar, tr_label, tr_id, tr_flux, tr_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]', 'AKWISE']) ds.diagnostics_SNR() ds.diagnostics_ref_labels() np.savez("./tr_snr.npz", ds.tr_SNR) m = model.CannonModel(2) m.fit(ds) np.savez("./coeffs.npz", m.coeffs) np.savez("./scatters.npz", m.scatters) np.savez("./chisqs.npz", m.chisqs) np.savez("./pivots.npz", m.pivots) m.diagnostics_leading_coeffs(ds) m.diagnostics_leading_coeffs_triangle(ds) m.diagnostics_plot_chisq(ds)
def load_dataset(date): """ Load the dataset for a single date Parameters ---------- date: the date (string) for which to load the data & dataset Returns ------- ds: the dataset object """ LAB_DIR = "/home/annaho/TheCannon/data/lamost" WL_DIR = "/home/annaho/TheCannon/code/lamost/mass_age/cn" SPEC_DIR = "/home/annaho/TheCannon/code/apogee_lamost/xcalib_4labels/output" wl = np.load(WL_DIR + "/wl_cols.npz")['arr_0'][0:3626] # no cols ds = dataset.Dataset(wl, [], [], [], [], [], [], []) test_label = np.load("%s/%s_all_cannon_labels.npz" % (LAB_DIR, date))['arr_0'] ds.test_label_vals = test_label a = np.load("%s/%s_norm.npz" % (SPEC_DIR, date)) ds.test_flux = a['arr_0'] ds.test_ivar = a['arr_1'] ds.test_ID = np.load("%s/%s_ids.npz" % (SPEC_DIR, date))['arr_0'] return ds
def loop(num_sets): wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0'] label_names = np.load("%s/label_names.npz" % DATA_DIR)['arr_0'] ref_id = np.load("%s/ref_id.npz" % SPEC_DIR)['arr_0'] #ref_choose = np.load("%s/ref_id_culled.npz" %DATA_DIR)['arr_0'] #inds = np.array([np.where(ref_id==val)[0][0] for val in ref_choose]) #ref_id = ref_id[inds] ref_flux = np.load("%s/ref_flux.npz" % SPEC_DIR)['arr_0'] ref_ivar = np.load("%s/ref_ivar.npz" % SPEC_DIR)['arr_0'] ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'] np.savez("ref_label.npz", ref_label) assignments = np.load("%s/assignments.npz" % DATA_DIR)['arr_0'] print("looping through %s sets" % num_sets) for leave_out in range(0, num_sets): print("leaving out %s" % leave_out) training = assignments != leave_out test = assignments == leave_out tr_id = ref_id[training] tr_flux = ref_flux[training] tr_ivar = ref_ivar[training] tr_ivar[np.isnan(tr_ivar)] = 0.0 tr_label = ref_label[training] #np.savez( # "tr_set_%s.npz" %leave_out, # tr_id, tr_flux, tr_ivar, tr_label) test_id = ref_id[test] test_flux = ref_flux[test] test_ivar = ref_ivar[test] test_ivar[np.isnan(test_ivar)] = 0.0 test_label = ref_label[test] #np.savez( # "test_set_%s.npz" %leave_out, # test_id, test_flux, test_ivar, test_label) ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id, test_flux, test_ivar) ds.set_label_names(label_names) fig = ds.diagnostics_SNR() plt.savefig("SNRdist_%s.png" % leave_out) plt.close() #fig = ds.diagnostics_ref_labels() #plt.savefig("ref_label_triangle_%s.png" %leave_out) #plt.close() #np.savez("tr_snr_%s.npz" %leave_out, ds.tr_SNR) modelf = "model_%s.npz" % leave_out if glob.glob(modelf): print("model already exists") coeffs = np.load(modelf)['arr_0'] scatters = np.load(modelf)['arr_1'] chisqs = np.load(modelf)['arr_2'] pivots = np.load(modelf)['arr_3'] m = model.CannonModel(2) m.coeffs = coeffs m.scatters = scatters m.chisqs = chisqs m.pivots = pivots else: m = train(ds, leave_out) ds.tr_label = test_label validate(ds, m, leave_out)
labeldir = "/Users/annaho/Github/TheCannon/data/LAMOST/Label_Transfer" inputf = pyfits.open("%s/Ho_et_all_catalog_resubmit.fits" %labeldir) cat = inputf[1].data inputf.close() inds = np.array([np.where(cat['LAMOST_ID']==val)[0][0] for val in ids]) ra = cat['RA'][inds] dec = cat['Dec'][inds] teff = cat['Teff'][inds] logg = cat['logg'][inds] mh = cat['FeH'][inds] alpham = cat['alphaM'][inds] ak = 0.05*np.ones(len(inds)) lab = np.vstack((teff,logg,mh,alpham,ak)) ds = dataset.Dataset( wl, ids, norm_flux, norm_ivar, lab, ids, norm_flux, norm_ivar) ds.test_label_vals = lab.T # generate model test spectra m.infer_spectra(ds) Cinv = ds.test_ivar / (1 + ds.test_ivar*m.scatters**2) #res = Cinv*(ds.test_flux - m.model_spectra)**2 res = (ds.test_flux - m.model_spectra) # get height above the plane c = SkyCoord(ra, dec, unit='deg') lat = np.abs(c.icrs.galactic.b) for ii in range(0, len(ids)):
def test_step(date): wl = np.load("wl.npz")['arr_0'] test_ID = np.load("output/%s_ids.npz" % date)['arr_0'] print(str(len(test_ID)) + " objects") test_flux = np.load("output/%s_norm.npz" % date)['arr_0'] test_ivar = np.load("output/%s_norm.npz" % date)['arr_1'] nlabels = 4 nobj = len(test_ID) lamost_label = np.load("output/%s_tr_label.npz" % date)['arr_0'] ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :], lamost_label, test_ID, test_flux, test_ivar) ds.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) m = model.CannonModel(2) m.coeffs = np.load("./coeffs.npz")['arr_0'] m.scatters = np.load("./scatters.npz")['arr_0'] m.chisqs = np.load("./chisqs.npz")['arr_0'] m.pivots = np.load("./pivots.npz")['arr_0'] nguesses = 7 starting_guesses = np.zeros((nguesses, nlabels)) hiT_hiG_hiM = np.array( [5.15273730e+03, 3.71762228e+00, 3.16861898e-01, 2.46907920e-02]) hiT_hiG_loM = np.array( [5.16350098e+03, 3.45917511e+00, -9.24426436e-01, 2.49296919e-01]) loT_loG_hiM = np.array( [4.04936841e+03, 1.47109437e+00, 2.07210138e-01, 1.49733415e-02]) loT_loG_loM = np.array( [4.00651318e+03, 8.35013509e-01, -8.98257852e-01, 7.65705928e-02]) high_alpha = np.array([[4750, 2.6, -0.096, 0.25]]) low_alpha = np.array([[4840, 2.67, -0.045, 0.049]]) low_feh = np.array([[4500, 1.45, -1.54, 0.24]]) starting_guesses[0, :] = hiT_hiG_hiM - m.pivots starting_guesses[1, :] = hiT_hiG_loM - m.pivots starting_guesses[2, :] = loT_loG_loM - m.pivots starting_guesses[3, :] = loT_loG_hiM - m.pivots starting_guesses[4, :] = high_alpha - m.pivots starting_guesses[5, :] = low_alpha - m.pivots starting_guesses[6, :] = low_feh - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) # 4,10955,4 chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c np.savez("output/%s_cannon_label_guesses.npz" % date, labels) np.savez("output/%s_cannon_chisq_guesses.npz" % date, labels) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("output/%s_all_cannon_labels.npz" % date, best_labels) np.savez("output/%s_cannon_label_chisq.npz" % date, best_chisq) np.savez("output/%s_cannon_label_errs.npz" % date, best_errs) ds.test_label_vals = best_labels ds.diagnostics_survey_labels(figname="%s_survey_labels_triangle.png" % date) ds.test_label_vals = best_labels[:, 0:3] ds.set_label_names(['T_{eff}', '\log g', '[M/H]']) ds.diagnostics_1to1(figname="%s_1to1_test_label" % date)
def loop(num_sets): wl = np.load("%s/wl_cols.npz" % SPEC_DIR)['arr_0'] label_names = [ 'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/M]', 'A_k' ] ref_id = np.load("%s/ref_id_col.npz" % SPEC_DIR)['arr_0'] ref_choose = np.load("%s/ref_id.npz" % DATA_DIR)['arr_0'] inds = np.array([np.where(ref_id == val)[0][0] for val in ref_choose]) ref_id = ref_id[inds] ref_flux = np.load("%s/ref_flux_col.npz" % SPEC_DIR)['arr_0'][inds] ref_ivar = np.load("%s/ref_ivar_col.npz" % SPEC_DIR)['arr_0'][inds] np.savez("ref_id.npz", ref_id) np.savez("ref_flux.npz", ref_flux) np.savez("ref_ivar.npz", ref_ivar) ds = dataset.Dataset(wl[0:3626], ref_id, ref_flux[:, 0:3626], ref_ivar[:, 0:3626], [], [], [], []) np.savez("ref_snr.npz", ds.tr_SNR) ref_label = np.load("%s/ref_label.npz" % SPEC_DIR)['arr_0'][inds] #ref_label = np.load("%s/xval_cannon_label_vals.npz" %TR_LAB_DIR)['arr_0'] np.savez("ref_label.npz", ref_label) assignments = np.load("%s/../assignments.npz" % DATA_DIR)['arr_0'] print("looping through %s sets" % num_sets) for leave_out in range(0, num_sets): print("leaving out %s" % leave_out) training = assignments != leave_out test = assignments == leave_out tr_id = ref_id[training] tr_flux = ref_flux[training] tr_ivar = ref_ivar[training] tr_ivar[np.isnan(tr_ivar)] = 0.0 tr_label = ref_label[training] np.savez("tr_set_%s.npz" % leave_out, tr_id, tr_flux, tr_ivar, tr_label) test_id = ref_id[test] test_flux = ref_flux[test] test_ivar = ref_ivar[test] test_ivar[np.isnan(test_ivar)] = 0.0 test_label = ref_label[test] np.savez("test_set_%s.npz" % leave_out, test_id, test_flux, test_ivar, test_label) ds = dataset.Dataset(wl, tr_id, tr_flux, tr_ivar, tr_label, test_id, test_flux, test_ivar) print(ds.wl) ds.set_label_names(label_names) fig = ds.diagnostics_SNR() plt.savefig("SNRdist_%s.png" % leave_out) plt.close() fig = ds.diagnostics_ref_labels() plt.savefig("ref_label_triangle_%s.png" % leave_out) plt.close() np.savez("tr_snr_%s.npz" % leave_out, ds.tr_SNR) modelf = "model_%s.npz" % leave_out if glob.glob(modelf): print("model already exists") coeffs = np.load(modelf)['arr_0'] scatters = np.load(modelf)['arr_1'] chisqs = np.load(modelf)['arr_2'] pivots = np.load(modelf)['arr_3'] m = model.CannonModel(2) m.coeffs = coeffs m.scatters = scatters m.chisqs = chisqs m.pivots = pivots else: m = train(ds, leave_out) ds.tr_label = test_label validate(ds, m, leave_out)
def test(): wl = np.load("%s/wl.npz" %DATA_DIR)['arr_0'] tr_id = np.load("%s/tr_id.npz" %DATA_DIR)['arr_0'] tr_flux = np.load("%s/tr_flux_norm.npz" %DATA_DIR)['arr_0'] tr_ivar = np.load("%s/tr_ivar_norm.npz" %DATA_DIR)['arr_0'] test_id = np.load("%s/test_id.npz" %DATA_DIR)['arr_0'] test_flux = np.load("%s/test_flux_norm.npz" %DATA_DIR)['arr_0'] test_ivar = np.load("%s/test_ivar_norm.npz" %DATA_DIR)['arr_0'] tr_label = np.load("%s/tr_label.npz" %DATA_DIR)['arr_0'] coeffs = np.load("%s/coeffs.npz" %DATA_DIR)['arr_0'] scatters = np.load("%s/scatters.npz" %DATA_DIR)['arr_0'] chisqs = np.load("%s/chisqs.npz" %DATA_DIR)['arr_0'] pivots = np.load("%s/pivots.npz" %DATA_DIR)['arr_0'] ds = dataset.Dataset( wl, tr_id, tr_flux, tr_ivar, tr_label[:,0:4], test_id, test_flux, test_ivar) np.savez("%s/test_SNR.npz" %DATA_DIR, ds.test_SNR) ds.set_label_names(["Teff", "logg", "FeH", "aFe"]) md = model.CannonModel(2) md.coeffs = coeffs md.scatters = scatters md.chisqs = chisqs md.pivots = pivots md.diagnostics_leading_coeffs(ds) nguesses = 7 nobj = len(ds.test_ID) nlabels = ds.tr_label.shape[1] choose = np.random.randint(0,nobj,size=nguesses) starting_guesses = ds.tr_label[choose]-md.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) ds.tr_label = np.zeros((nobj, nlabels)) for ii,guess in enumerate(starting_guesses): a,b,c = test_step_iteration(ds,md,starting_guesses[ii]) labels[ii,:] = a chisq[ii,:] = b errs[ii,:] = c np.savez("%s/labels_all_starting_vals.npz" %DATA_DIR, labels) np.savez("%s/chisq_all_starting_vals.npz" %DATA_DIR, chisq) np.savez("%s/errs_all_starting_vals.npz" %DATA_DIR, errs) choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj,val in enumerate(choose): best_labels[jj,:] = labels[:,jj,:][val] best_errs[jj,:] = errs[:,jj,:][val] np.savez("%s/test_cannon_labels.npz" %DATA_DIR, best_labels) np.savez("%s/test_errs.npz" %DATA_DIR, best_errs) np.savez("%s/test_chisq.npz" %DATA_DIR, best_chisq) ds.test_label_vals = best_labels
import numpy as np import pickle import glob from matplotlib import rc from lamost import load_spectra, load_labels from TheCannon import continuum_normalization from TheCannon import dataset from TheCannon import model rc('text', usetex=True) rc('font', family='serif') with np.load("test_data_raw.npz") as data: test_IDs = data['arr_0'] wl = data['arr_1'] test_flux = data['arr_2'] test_ivar = data['arr_3'] data = dataset.Dataset( wl, test_IDs[0:10], test_flux[0:10,:], test_ivar[0:10,:], [1], test_IDs, test_flux, test_ivar) data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) data.continuum_normalize_gaussian_smoothing(L=50) np.savez("./test_norm", test_IDs, wl, data.test_flux, data.test_ivar)
from TheCannon import dataset from TheCannon import model # (1) PREPARE DATA tr_ID, wl, tr_flux, tr_ivar = apogee.load_spectra("example_DR10/Data") tr_label = apogee.load_labels("example_DR10/reference_labels.csv") # doing a 1-to-1 test for simplicity test_ID = tr_ID test_flux = tr_flux test_ivar = tr_ivar tr_label = apogee.load_labels("example_DR10/reference_labels.csv") # choose labels and make a new array ds = dataset.Dataset(wl, tr_ID, tr_flux, tr_ivar, tr_label, test_ID, test_flux, test_ivar) # set LaTeX label names for making diagnostic plots ds.set_label_names(['T_{eff}', '\log g', '[Fe/H]']) # Plot SNR distributions and triangle plot of reference labels fig = ds.diagnostics_SNR() fig = ds.diagnostics_ref_labels() # (2) IDENTIFY CONTINUUM PIXELS pseudo_tr_flux, pseudo_tr_ivar = ds.continuum_normalize_training_q( q=0.90, delta_lambda=50) ds.ranges = [[371, 3192], [3697, 5500], [5500, 5997], [6461, 8255]] contmask = ds.make_contmask(pseudo_tr_flux, pseudo_tr_ivar, frac=0.07)
ref_ivar = np.load("%s/ref_ivar.npz" %DIR)['arr_0'] ref_label = np.load("%s/ref_label.npz" %DIR)['arr_0'] cannon_label = np.load("%s/xval_cannon_label_vals.npz" %DIR)['arr_0'] snr = np.load("%s/ref_snr.npz" %DIR)['arr_0'] rv = np.load("%s/ref_rvs.npz" %DIR)['arr_0'] chisq = np.load("%s/xval_cannon_label_chisq.npz" %DIR)['arr_0'] coeffs = np.load("%s/coeffs.npz" %DIR)['arr_0'] pivots = np.load("%s/pivots.npz" %DIR)['arr_0'] scatters = np.load("%s/scatters.npz" %DIR)['arr_0'] # Create model spectra m = model.CannonModel(2) m.coeffs = coeffs m.pivots = pivots m.scatters = scatters ds = dataset.Dataset([], [], [], [], [], [], ref_flux, ref_ivar) ds.test_label_vals = cannon_label m.infer_spectra(ds) # Plot residuals of stars with large diff and large negative rvel # (Neg rvel seems worse to me, in the scatterplot...) diff = ref_label[:,5] - cannon_label[:,5] choose_bad = np.logical_and(np.abs(diff) < 0.01, np.abs(rv) < 10) choose_bad = np.logical_and(diff < -0.05, rv < -50) choose_bad = np.logical_and(diff > 0.05, rv > 50) choose_snr = snr > 70 #choose_chisq = np.logical_and(chisq > 1000, chisq < 10000) #choose_quality = np.logical_and(choose_snr, choose_chisq) #choose = np.logical_and(choose_bad, choose_quality) choose = np.logical_and(choose_bad, choose_snr)
tr_ID = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", usecols=(1,), dtype='str', delimiter=',') dir_dat = "example_LAMOST/Data_All" tr_IDs, wl, tr_flux, tr_ivar = load_spectra(dir_dat, tr_ID) label_file = "apogee_dr12_labels.csv" all_labels = load_labels(label_file, tr_IDs) teff = all_labels[:,0] logg = all_labels[:,1] mh = all_labels[:,2] alpha = all_labels[:,3] tr_label = np.vstack((teff, logg, mh, alpha)).T data = dataset.Dataset( wl, tr_IDs, tr_flux, tr_ivar, tr_label, tr_IDs, tr_flux, tr_ivar) data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) data.continuum_normalize_gaussian_smoothing(L=50) # get colors colors = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", usecols=(2,4,6,8), dtype='float', delimiter=',') errors = np.loadtxt("example_PS1/ps_colors_ts_overlap.txt", usecols=(3,5,7,9), dtype='float', delimiter=',') ivars = 1./ errors**2 colors = colors[np.argsort(tr_ID)] ivars = ivars[np.argsort(tr_ID)] ivars = ivars * 1e15
def fitting_ve(name): image_path = name if not os.path.exists(image_path): print("{}/{} could not be found: {}".format(i + 1, N, image_path)) keep[i] = False # We only store flux,ivar,inf_flux,parameters,parameters_new,parameters_sim,ve(n*3)(include ve, ve_new,ve_sim) try: image = fits.open(image_path, ignore_missing_end=True) dat = Table.read(image_path) flux = image[1].data flux_err = image[2].data flux = np.atleast_2d(flux) flux_err = np.atleast_2d(flux_err) except IOError: print("opts. This one fail") em =0 else: em =1 badpix = get_pixmask(flux, flux_err) ivar = 1.0 / flux_err ** 2 error = flux_err # badpix is a array and the length is 8575 flux = np.array(flux, dtype=np.float64) ivar = np.array(ivar, dtype=np.float64) flux[badpix] = np.median(flux) ivar[badpix] = 0.0 flux = np.array(flux) ivar = np.array(ivar) # normalize flux: # value tr_ID = image_path test_labels_all_i = np.array([5000, 1, 1]) ds = dataset.Dataset(wl, tr_ID, flux, ivar, test_labels_all_i, tr_ID, flux, ivar) ds.ranges = [[371, 3192], [3697, 5997], [6461, 8255]] # set sudo-continuous spectrum pseudo_tr_flux, pseudo_tr_ivar = ds.continuum_normalize_training_q \ (q=0.90, delta_lambda=50) # set mask contmask = ds.make_contmask(pseudo_tr_flux, pseudo_tr_ivar, frac=0.07) # get continuous mask ds.set_continuum(contmask) # fit the normalized-spectrum in the continuous region cont = ds.fit_continuum(3, "sinusoid") # Obtain the normalized flux norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \ ds.continuum_normalize(cont) norm_tr_flux = np.atleast_2d(norm_tr_flux) if len(norm_tr_flux[:,0])<3: em=0 else: nothing=1 # infer labels # inf_labels = model.fit(norm_tr_flux, norm_tr_ivar) # Use inferred labels from the combined spectra: inf_labels = model.fit(norm_tr_flux, norm_tr_ivar) # only use the inf labels from the combined spectra com = len(inf_labels[:, 0]) inf_labels_com = inf_labels[0, :] inf_labels = [] for z in range(0, com): inf_labels.append(inf_labels_com) inf_labels = np.array(inf_labels) v = model.vectorizer.get_label_vector(inf_labels) inf_flux = np.dot(v, model.theta.T) opt_flux, parameters = model.fitting_spectrum_parameters_single \ (norm_tr_flux, norm_tr_ivar, inf_flux) # calculate chi-squared! chi_inf = (norm_tr_flux-inf_flux)**2*norm_tr_ivar chi_inf = np.sum(chi_inf,axis=1) chi_mix = (norm_tr_flux-opt_flux)**2*norm_tr_ivar chi_mix = np.sum(chi_mix,axis=1) ve = (parameters[:, 2] - parameters[:, 0]) / (parameters[:, 0] + parameters[:, 1] + parameters[:, 2]) * 4144.68 ve_un = model.uncertainty # old a0 = parameters a1 = ve a2 = ve_un # covariance matrix for abc a3 = model.un_cov # spectra a4 = norm_tr_flux a5 = norm_tr_ivar a6 = inf_flux a7 = opt_flux # inf_labels are from the a8 = inf_labels a9 = chi_inf a10 = chi_mix # VHELIO a11 = np.array(dat[0]["VHELIO"]) # Fiber a12 = np.array(dat[0]["FIBER"]) # Files # BJD RA = image[0].header["RA"] DEC = image[0].header["DEC"] SNR = image[0].header["SNR"] MJD = dat[0]["MJD"] c = SkyCoord(RA, DEC, frame='icrs', unit='deg') BJD = MJD2BJD(MJD, c) a13 = np.array(BJD) # calculate chi-squared: try: # save them # pay attention to the fits file saving path_fits_i = image_path.replace("/Volumes/Data_2TB/Data/DR13_rc/apStar-r6-", "/Users/caojunzhi/Desktop/Data/dr13_red_clump/") print("saving files" + path_fits_i) hdu = fits.PrimaryHDU(data=a0) hdu.header[ 'COMMENT'] = "Simple orange juice" # add header info hdu.header['SNR'] = SNR hdu.header['RA'] = RA hdu.header['DEC'] = DEC hdu.writeto(path_fits_i, clobber=True) ts.append(path_fits_i, a1) ts.append(path_fits_i, a2) ts.append(path_fits_i, a3) ts.append(path_fits_i, a4) ts.append(path_fits_i, a5) ts.append(path_fits_i, a6) ts.append(path_fits_i, a7) ts.append(path_fits_i, a8) ts.append(path_fits_i, a9) ts.append(path_fits_i, a10) ts.append(path_fits_i, a11) ts.append(path_fits_i, a12) ts.append(path_fits_i, a13) except OSError: print("fail") em=0 return em
def run(date): # Training step has already been completed. Load the model, spectral_model = model.CannonModel(2) # 2 = quadratic model spectral_model.coeffs = np.load("./coeffs.npz")['arr_0'] spectral_model.scatters = np.load("./scatter.npz")['arr_0'] spectral_model.chisqs = np.load("./chisqs.npz")['arr_0'] spectral_model.pivots = np.load("./pivots.npz")['arr_0'] # Load the wavelength array wl = np.load("wl.npz")['arr_0'] # Load the test set, test_ID = np.loadtxt("test_obj/%s_test_obj.txt" %date, dtype=str) print("%s test objects" %len(test_ID)) dir_dat = "/home/share/LAMOST/DR2/DR2_release" test_IDs, wl, test_flux, test_ivar = load_spectra(dir_dat, test_ID) np.savez("output/%s_ids" %date, test_IDs) #np.savez("./%s_data_raw" %date, test_flux, test_ivar) # Load the corresponding LAMOST labels, labels = np.load("lamost_labels/lamost_labels_%s.npz" %date)['arr_0'] inds = np.array([np.where(labels[:,0]==a)[0][0] for a in test_IDs]) nstars = len(test_IDs) lamost_labels = np.zeros((nstars,4)) lamost_labels[:,0:3] = labels[inds,:][:,1:].astype(float) np.savez("output/%s_lamost_label" %date, lamost_labels) # Set dataset object data = dataset.Dataset( wl, test_IDs, test_flux, test_ivar, lamost_labels, test_IDs, test_flux, test_ivar) # set the headers for plotting data.set_label_names(['T_{eff}', '\log g', '[M/H]', '[\\alpha/Fe]']) # Plot SNR distribution data.diagnostics_SNR(figname="%s_SNRdist.png" %date) np.savez("output/%s_SNR" %date, data.test_SNR) # Continuum normalize, filename = "output/%s_norm.npz" %date if glob.glob(filename): print("already cont normalized") data.test_flux = np.load(filename)['arr_0'] data.test_ivar = np.load(filename)['arr_1'] else: data.tr_ID = data.tr_ID[0] data.tr_flux = data.tr_flux[0,:] data.tr_ivar = data.tr_ivar[0,:] data.continuum_normalize_gaussian_smoothing(L=50) np.savez("output/%s_norm" %date, data.test_flux, data.test_ivar) # Infer labels errs, chisq = spectral_model.infer_labels(data) np.savez("output/%s_cannon_labels.npz" %date, data.test_label_vals) np.savez("./%s_formal_errors.npz" %date, errs) np.savez("./%s_chisq.npz" %date, chisq) # Make plots data.test_label_vals = data.test_label_vals[:,0:3] # so it doesn't try alpha data.set_label_names(['T_{eff}', '\log g', '[M/H]']) data.diagnostics_1to1(figname="%s_1to1_label" %date)
def test_step(): wl = np.load(SPEC_DIR + "/wl_cols.npz") ref_id_all = np.load(SPEC_DIR + "/ref_id_col.npz")['arr_0'] excised = np.load(SPEC_DIR + "/excised_obj/excised_ids.npz")['arr_0'] inds = np.array([np.where(ref_id_all == val)[0][0] for val in excised]) test_ID = ref_id_all[inds] print(str(len(test_ID)) + " objects") test_flux = np.load("%s/ref_flux_col.npz" % (SPEC_DIR))['arr_0'][inds] test_ivar = np.load("%s/ref_ivar_col.npz" % (SPEC_DIR))['arr_0'][inds] apogee_label = np.load("%s/ref_label.npz" % (SPEC_DIR))['arr_0'][inds] #np.savez("excised_label.npz", apogee_label) ds = dataset.Dataset(wl, test_ID, test_flux[0:2, :], test_ivar[0:2, :], apogee_label, test_ID, test_flux, test_ivar) ds.set_label_names([ 'T_{eff}', '\log g', '[Fe/H]', '[C/M]', '[N/M]', '[\\alpha/Fe]', 'A_k' ]) np.savez("excised_snr.npz", ds.test_SNR) print("DONE") m = model.CannonModel(2) m.coeffs = np.load(MODEL_DIR + "/coeffs.npz")['arr_0'] m.scatters = np.load(MODEL_DIR + "/scatters.npz")['arr_0'] m.chisqs = np.load(MODEL_DIR + "/chisqs.npz")['arr_0'] m.pivots = np.load(MODEL_DIR + "/pivots.npz")['arr_0'] nlabels = len(m.pivots) nobj = len(test_ID) nguesses = 7 choose = np.random.randint(0, nobj, size=nguesses) print(apogee_label.shape) print(choose.shape) print(m.pivots.shape) starting_guesses = apogee_label[choose] - m.pivots labels = np.zeros((nguesses, nobj, nlabels)) chisq = np.zeros((nguesses, nobj)) errs = np.zeros(labels.shape) for ii, guess in enumerate(starting_guesses): a, b, c = test_step_iteration(ds, m, starting_guesses[ii]) labels[ii, :] = a chisq[ii, :] = b errs[ii, :] = c choose = np.argmin(chisq, axis=0) best_chisq = np.min(chisq, axis=0) best_labels = np.zeros((nobj, nlabels)) best_errs = np.zeros(best_labels.shape) for jj, val in enumerate(choose): best_labels[jj, :] = labels[:, jj, :][val] best_errs[jj, :] = errs[:, jj, :][val] np.savez("excised_all_cannon_labels.npz", best_labels) np.savez("excised_cannon_label_chisq.npz", best_chisq) np.savez("excised_cannon_label_errs.npz", best_errs) ds.test_label_vals = best_labels ds.diagnostics_1to1(figname="excised_1to1_test_label")