def betas_from_lower_subsample(lam_sub_hi, lam_sub_lo): lam_lo, lam_lo_delta = ru.get_lam0(lam_subsample=lam_sub_lo) lam_hi, lam_hi_delta = ru.get_lam0(lam_subsample=lam_sub_hi) basis_file = BASIS_DIR + "basis_fit_K-%d_V-%d_split-%s.pkl" % \ (NUM_BASES, len(lam_lo), SPLIT_TYPE) if os.path.exists(basis_file): print "grabbing file (%s) from CACHE, optimizing more!"%basis_file th, lam0, lam0_delta, parser = qfb.load_basis_fit(basis_file) betas_lo = parser.get(th, 'betas') betas_hi = np.array([np.interp(lam_hi, lam_lo, beta) for beta in betas_lo]) return betas_hi else: return None
def betas_from_lower_subsample(lam_sub_hi, lam_sub_lo): lam_lo, lam_lo_delta = ru.get_lam0(lam_subsample=lam_sub_lo) lam_hi, lam_hi_delta = ru.get_lam0(lam_subsample=lam_sub_hi) basis_file = BASIS_DIR + "basis_fit_K-%d_V-%d_split-%s.pkl" % \ (NUM_BASES, len(lam_lo), SPLIT_TYPE) if os.path.exists(basis_file): print "grabbing file (%s) from CACHE, optimizing more!" % basis_file th, lam0, lam0_delta, parser = qfb.load_basis_fit(basis_file) betas_lo = parser.get(th, 'betas') betas_hi = np.array( [np.interp(lam_hi, lam_lo, beta) for beta in betas_lo]) return betas_hi else: return None
def load_fit_params(num_bases, split_type, lam_subsample, basis_dir=""): ### load MLE basis lam0, lam0_delta = ru.get_lam0(lam_subsample=lam_subsample) basis_file = os.path.join(basis_dir, qfb.basis_filename(num_bases = num_bases, split_type = split_type, lam0 = lam0)) print "loading fit file: ", basis_file th, lam0, lam0_delta, parser = load_basis_fit(basis_file) mus = parser.get(th, 'mus') betas = parser.get(th, 'betas') omegas = parser.get(th, 'omegas') return mus, betas, omegas, th, lam0, lam0_delta, parser
def load_basis(num_bases, split_type, lam_subsample, basis_dir=""): ### load MLE basis lam0, lam0_delta = ru.get_lam0(lam_subsample=lam_subsample) basis_file = os.path.join(basis_dir, qfb.basis_filename(num_bases = num_bases, split_type = split_type, lam0 = lam0)) th, lam0, lam0_delta, parser = load_basis_fit(basis_file) mus = parser.get(th, 'mus') betas = parser.get(th, 'betas') omegas = parser.get(th, 'omegas') W_mle = np.exp(omegas) W_mle /= np.sum(W_mle, axis=1, keepdims=True) B_mle = np.exp(betas) B_mle /= np.sum(B_mle * lam0_delta, axis=1, keepdims=True) M_mle = np.exp(mus) return B_mle
train_idx_sub = train_idx[rand_idx[0:NUM_TRAIN_EXAMPLE]] #rand_idx = np.random.permutation(len(test_idx)) #test_idx_sub = test_idx[rand_idx[0:NUM_TEST_EXAMPLE]] ## only load in NUM_TRAIN spec files train_spec_files = np.array(spec_files)[train_idx_sub] #test_spec_files = np.array(spec_files)[test_idx] spec_grid, spec_ivar_grid, spec_mod_grid, unique_lams, spec_zs, spec_ids = \ load_cached_train_matrix(train_spec_files, train_idx) ## initialize a basis using existing eigenQuasar Model lam_subsample = 5 lam0, lam0_delta = ru.get_lam0( lam_subsample=lam_subsample, eigen_file='../../../../data/eigen_specs/spEigenQSO-55732.fits') # resample spectra and spectra inverse variance onto common rest frame spectra_resampled, spectra_ivar_resampled, lam_mat = \ ru.resample_rest_frame(spectra = spec_grid, spectra_ivar = spec_ivar_grid, zs = spec_zs, lam_obs = unique_lams, lam0 = lam0) ## construct smooth + spiky prior over betas print " Computing covariance cholesky " beta_kern = GPy.kern.Matern52(input_dim=1, variance=BETA_VARIANCE, lengthscale=BETA_LENGTHSCALE)
spec_mod_grid = np.load(handle) unique_lams = np.load(handle) spec_zs = np.load(handle) spec_ids = np.load(handle) handle.close() ## iterate over different lambda subsamples to get a quick starting ## point for more refined model lam_schedule = [5] for lam_idx, lam_subsample in enumerate(lam_schedule): print "=========================================================" print " FITTING LAM SUBSAMPLE %d" % lam_subsample sys.stdout.flush() ## initialize a basis using existing eigenQuasar Model lam0, lam0_delta = ru.get_lam0(lam_subsample=lam_subsample, eigen_file="") # resample spectra and spectra inverse variance onto common rest frame spectra_resampled, spectra_ivar_resampled, lam_mat = \ ru.resample_rest_frame(spectra = spec_grid, spectra_ivar = spec_ivar_grid, zs = spec_zs, lam_obs = unique_lams, lam0 = lam0) ## construct smooth + spiky prior over betas print " Computing covariance cholesky " beta_kern = GPy.kern.Matern52(input_dim=1, variance=BETA_VARIANCE, lengthscale=BETA_LENGTHSCALE) K_beta = beta_kern.K(lam0.reshape((-1, 1)))
rand_idx = np.random.permutation(len(train_idx)) train_idx_sub = train_idx[rand_idx[0:NUM_TRAIN_EXAMPLE]] #rand_idx = np.random.permutation(len(test_idx)) #test_idx_sub = test_idx[rand_idx[0:NUM_TEST_EXAMPLE]] ## only load in NUM_TRAIN spec files train_spec_files = np.array(spec_files)[train_idx_sub] #test_spec_files = np.array(spec_files)[test_idx] spec_grid, spec_ivar_grid, spec_mod_grid, unique_lams, spec_zs, spec_ids = \ load_cached_train_matrix(train_spec_files, train_idx) ## initialize a basis using existing eigenQuasar Model lam_subsample = 5 lam0, lam0_delta = ru.get_lam0(lam_subsample=lam_subsample, eigen_file = '../../../../data/eigen_specs/spEigenQSO-55732.fits') # resample spectra and spectra inverse variance onto common rest frame spectra_resampled, spectra_ivar_resampled, lam_mat = \ ru.resample_rest_frame(spectra = spec_grid, spectra_ivar = spec_ivar_grid, zs = spec_zs, lam_obs = unique_lams, lam0 = lam0) ## construct smooth + spiky prior over betas print " Computing covariance cholesky " beta_kern = GPy.kern.Matern52(input_dim = 1, variance = BETA_VARIANCE, lengthscale = BETA_LENGTHSCALE) K_beta = beta_kern.K(lam0.reshape((-1, 1)))
print "==== SAMPLING CHAIN ID = %d ============== "%chain_idx print " Nsamps = %d "%Nsamps print " length_scale = %2.2f"%length_scale print " num init_iters = %d "%init_iter print " K = %d "%K ################################################################## ## load a handful of quasar spectra and resample ################################################################## lam_obs, qtrain, qtest = \ load_data_clean_split(spec_fits_file = 'quasar_data.fits', Ntrain = 400) N = qtrain['spectra'].shape[0] ## resample to lam0 => rest frame basis lam0, lam0_delta = get_lam0(lam_subsample=10) print " resampling de-redshifted data" spectra_resampled, spectra_ivar_resampled, lam_mat = \ resample_rest_frame(qtrain['spectra'], qtrain['spectra_ivar'], qtrain['Z'], lam_obs, lam0) # clean nans X = spectra_resampled X[np.isnan(X)] = 0 Lam = spectra_ivar_resampled Lam[np.isnan(Lam)] = 0 ########################################################################### ## Set prior variables (K_chol, sig2_omega, sig2_mu)
# assemble a few thousand samples B0 = parser.get(th_samples[0], 'betas') B_samps = np.zeros((len(chain_perm), B0.shape[0], B0.shape[1])) for i, idx in enumerate(chain_perm): betas = K_chol.dot(parser.get(th_samples[idx, :], 'betas').T).T B_samp = np.exp(betas) B_samp /= np.sum(B_samp * lam0_delta, axis=1, keepdims=True) B_samps[i, :, :] = B_samp B_chains.append(B_samps) B_samps = np.vstack(B_chains) B_samps = B_samps[npr.permutation(B_samps.shape[0]), :, :] B_mle = load_basis(num_bases = NUM_BASES, split_type = SPLIT_TYPE, lam_subsample = LAM_SUBSAMPLE) lam0, lam0_delta = ru.get_lam0(lam_subsample=LAM_SUBSAMPLE) def get_basis_sample(idx, mle = False): """ Method to return a basis sample to condition on (or the MLE if specified) """ if mle: return B_mle else: return B_samps[idx] ########################################################################## ## Load in spectroscopically measured quasars + fluxes ########################################################################## # DR10 qso dataset and spec files qso_psf_flux, qso_psf_flux_ivar, qso_psf_mags, qso_z, \ spec_files, train_idx, test_idx = \ ru.load_DR10QSO_train_test_idx(split_type = SPLIT_TYPE)
print "==== SAMPLING CHAIN ID = %d ============== " % chain_idx print " Nsamps = %d " % Nsamps print " length_scale = %2.2f" % length_scale print " num init_iters = %d " % init_iter print " K = %d " % K ################################################################## ## load a handful of quasar spectra and resample ################################################################## lam_obs, qtrain, qtest = \ load_data_clean_split(spec_fits_file = 'quasar_data.fits', Ntrain = 400) N = qtrain['spectra'].shape[0] ## resample to lam0 => rest frame basis lam0, lam0_delta = get_lam0(lam_subsample=10) print " resampling de-redshifted data" spectra_resampled, spectra_ivar_resampled, lam_mat = \ resample_rest_frame(qtrain['spectra'], qtrain['spectra_ivar'], qtrain['Z'], lam_obs, lam0) # clean nans X = spectra_resampled X[np.isnan(X)] = 0 Lam = spectra_ivar_resampled Lam[np.isnan(Lam)] = 0 ########################################################################### ## Set prior variables (K_chol, sig2_omega, sig2_mu)
spec_mod_grid = np.load(handle) unique_lams = np.load(handle) spec_zs = np.load(handle) spec_ids = np.load(handle) handle.close() ## iterate over different lambda subsamples to get a quick starting ## point for more refined model lam_schedule = [5] for lam_idx, lam_subsample in enumerate(lam_schedule): print "=========================================================" print " FITTING LAM SUBSAMPLE %d"%lam_subsample sys.stdout.flush() ## initialize a basis using existing eigenQuasar Model lam0, lam0_delta = ru.get_lam0(lam_subsample=lam_subsample,eigen_file = "") # resample spectra and spectra inverse variance onto common rest frame spectra_resampled, spectra_ivar_resampled, lam_mat = \ ru.resample_rest_frame(spectra = spec_grid, spectra_ivar = spec_ivar_grid, zs = spec_zs, lam_obs = unique_lams, lam0 = lam0) ## construct smooth + spiky prior over betas print " Computing covariance cholesky " beta_kern = GPy.kern.Matern52(input_dim = 1, variance = BETA_VARIANCE, lengthscale = BETA_LENGTHSCALE) K_beta = beta_kern.K(lam0.reshape((-1, 1)))