kernel_results.is_accepted, ]) total_min = (time.time() - time_start) / 60. print('Acceptance Rate: {}'.format(np.mean(is_accepted_))) print('Total time: {:.2f} min'.format(total_min)) sess.close() # plt.plot(orig_prob_train, np.mean(f_samples_val, 0), 'o') # plt.plot(orig_prob_pred, np.mean(f_pred_samples_val, 0), 'o') """ 7.3.3. prediction and visualization""" # prediction over a regular grid between [0, 1] df_pred_val = gp.sample_posterior_full( X_new=orig_prob_pred, X=orig_prob_derv, f_sample=f_deriv_samples_val.T, ls=DEFAULT_LS_CALIB_VAL, kernel_func=gpr_mono.rbf_hess_1d, ridge_factor=5e-2, return_mean=False) calib_prob_pred_val = gp.sample_posterior_full( X_new=orig_prob_pred, X=orig_prob_train, f_sample=f_samples_val.T, ls=DEFAULT_LS_CALIB_VAL, kernel_func=gp.rbf, ridge_factor=5e-2, return_mean=False, return_vcov=False) # # sample f conditional on f_deriv
]) print('Acceptance Rate: {}'.format(np.mean(is_accepted_))) sess.close() """ 2.3. prediction and visualization""" # prediction by sample from posterior f_sample_indices = np.asarray([ np.argmax(np.random.multinomial(1, pvals=mix_prob)) for mix_prob in mix_prob_sample_val ]) f_samples_val = np.asarray([f_samples_full_val[i, f_sample_indices[i]] for i in range(num_results)]) f_valid_val = gp.sample_posterior_full(X_new=X_valid, X=X_train, f_sample=f_samples_val.T, ls=ls_val, kernel_func=gp.rbf) # visualize mu = np.mean(f_valid_val, axis=1) cov = np.var(f_valid_val, axis=1) visual_util.gpr_1d_visual(mu, cov, pred_quantiles=[], X_train=X_train, y_train=y_train, X_test=X_valid, y_test=y_valid, title="GP Mixture Posterior, {} Mixtures, Hamilton MC".format(n_mix_val), save_addr=os.path.join(_SAVE_ADDR_PREFIX, "gpr_hmc_mixture.png")) visual_util.gpr_1d_visual(pred_mean=None, pred_cov=None, pred_quantiles=[], pred_samples=list(f_valid_val.T)[:10000],
f_deriv_samples_val, sigma_sample_val, is_accepted_, ] = sess.run([ gpf_sample, gpf_deriv_sample, sigma_sample, kernel_results.is_accepted, ]) print('Acceptance Rate: {}'.format(np.mean(is_accepted_))) sess.close() """ 2.3. prediction and visualization""" # prediction df_test_val = gp.sample_posterior_full(X_new=X_test, X=X_deriv, f_sample=f_deriv_samples_val.T, ls=DEFAULT_LS_VAL, kernel_func=gpr_mono.rbf_hess_1d) # sample f conditional on f_deriv f_test_val = gpr_mono.sample_posterior_predictive( X_new=X_test, X_obs=X_train, X_deriv=X_deriv, f_sample=f_samples_val.T, f_deriv_sample=f_deriv_samples_val.T, kernel_func_ff=gp.rbf, kernel_func_df=gpr_mono.rbf_grad_1d, kernel_func_dd=gpr_mono.rbf_hess_1d, ls=DEFAULT_LS_VAL, )
X_valid = X_valid.reshape((X_valid.size, 1)) N_test, _ = X_valid.shape feature_1_test = np.repeat(uniform_quantile, repeats=N_test, axis=-1).T feature_2_test = np.tile(X_valid, reps=(1, n_test_cdf_eval)) X_test = np.stack([feature_1_test, feature_2_test], axis=-1) # predict calibrated quantiles at test dataset predicted_quantiles = [] for test_x_id in tqdm.tqdm(range(N_test)): X_test_val = X_test[test_x_id] f_test_val = gp.sample_posterior_full(X_new=X_test_val, X=X_train, f_sample=f_samples_val.T, ls=np.exp(_DEFAULT_LOG_LS_SCALE), kernel_func=gp.rbf) predicted_quantiles.append(f_test_val) # dimension n_x_eval, n_cdf_eval, n_sample predicted_quantiles = np.asarray(predicted_quantiles) with open(os.path.join(_SAVE_ADDR_PREFIX, '{}/calibration_local/ensemble_posterior_predicted_quantiles.pkl'.format(family_name)), 'wb') as file: pk.dump(predicted_quantiles, file, protocol=pk.HIGHEST_PROTOCOL) """ 4.5. sample from calibrated predictive posterior """ # load data with open(os.path.join(_SAVE_ADDR_PREFIX, '{}/ensemble_posterior_dist_sample.pkl'.format(family_name)),
with open( os.path.join(_SAVE_ADDR_PREFIX, '{}/weight_sample.pkl'.format(family_name)), 'rb') as file: weight_sample_dict_val = pk.load(file) with open( os.path.join( _SAVE_ADDR_PREFIX, '{}/ensemble_resid_sample.pkl'.format(family_name)), 'rb') as file: resid_gp_sample_val = pk.load(file) # compute GP prediction for residual GP ensemble_resid_valid_sample = gp.sample_posterior_full( X_new=X_valid, X=X_test, f_sample=resid_gp_sample_val.T, ls=np.exp(DEFAULT_LOG_LS_RESID), kernel_func=gp.rbf).T # compute residual noise ensemble_noise_valid_sample = np.random.normal( loc=0, scale=np.exp(np.mean(sigma_sample_val)), size=ensemble_resid_valid_sample.shape) # compute posterior samples for ensemble weight and outcome base_weight_sample_val = list(weight_sample_dict_val.values()) temp_sample_val = np.squeeze(temp_sample_val) with tf.Session() as sess: # W_ensemble = parametric_ensemble.sample_posterior_weight(
is_accepted_, ] = sess.run([ alpha_sample, features, outcomes_value, kernel_results.is_accepted, ]) print('Acceptance Rate: {}'.format(np.mean(is_accepted_))) sess.close() """ 2.4. prediction and visualization""" # compute sample gpf_sample_lr = feature_val.dot(alpha_sample_val.T) f_test_val = gp.sample_posterior_full(X_new=X_test, X=X_train, f_sample=gpf_sample_lr, ls=ls_val, kernfunc=gp.rbf, ridge_factor=1e-4) # visualize mu = np.mean(f_test_val, axis=1) cov = np.var(f_test_val, axis=1) gpr_1d_visual(mu, cov, X_train, y_train, X_test, y_test, title="RBF, MCMC, Linear Regression Approximation", save_addr="./result/gpr/gpr_mcmc_lr.png")
with open(os.path.join(_SAVE_ADDR_PREFIX, 'temp_sample.pkl'), 'rb') as file: temp_sample_val = pk.load(file) with open(os.path.join(_SAVE_ADDR_PREFIX, 'weight_sample.pkl'), 'rb') as file: weight_sample_val = pk.load(file) with open(os.path.join(_SAVE_ADDR_PREFIX, 'ensemble_resid_sample.pkl'), 'rb') as file: resid_sample_val = pk.load(file) """ 2.3.1. prediction """ # compute GP prediction for weight GP and residual GP model_weight_valid_sample = [] for model_weight_sample in weight_sample_val: model_weight_valid_sample.append( gp.sample_posterior_full(X_new=X_valid, X=X_test, f_sample=model_weight_sample.T, ls=DEFAULT_LS_WEIGHT, kernel_func=gp.rbf).T.astype(np.float32)) ensemble_resid_valid_sample = gp.sample_posterior_full( X_new=X_valid, X=X_test, f_sample=resid_sample_val.T, ls=DEFAULT_LS_RESID, kernel_func=gp.rbf).T # compute sample for posterior mean with tf.Session() as sess: W_ensemble = adaptive_ensemble.sample_posterior_weight_flat( model_weight_valid_sample, temp_sample_val, link_func=sparse_softmax)
def prediction_tailfree( X_pred, X_train, base_pred_dict, family_tree, weight_sample_list, resid_sample, temp_sample, default_log_ls_weight=None, default_log_ls_resid=None, ): """ Generates predictive samples for adaptive ensemble Args: X_pred: (np.ndarray of float32) testing locations, N_new x D X_train: (np.ndarray of float32) training locations, N_train x D base_pred_dict: (dict of np.ndarray) A dictionary of out-of-sample prediction from base models. family_tree: (dict of list or None) A dictionary of list of strings to specify the family tree between models, if None then assume there's no structure (i.e. flat structure). weight_sample_list: (list of np.ndarray of float32) List of untransformed ensemble weight for each base model, shape (M, N_train). resid_sample: (np.ndarray of float32) GP samples for residual process corresponding to X_pred, shape (M, N_train). temp_sample: (np.ndarray of float32) Temperature random variables for each parent model. default_log_ls_weight: (float32) default value for length-scale parameter for weight GP. default_log_ls_resid: (float32) default value for length-scale parameter for residual GP. Returns: ensemble_sample: (np.ndarray) Samples from full posterior predictive. ensemble_mean: (np.ndarray) Samples from posterior mean. """ if not default_log_ls_weight: default_log_ls_weight = np.log(0.35) if not default_log_ls_resid: default_log_ls_resid = np.log(0.1) default_log_ls_weight = default_log_ls_weight.astype(np.float32) default_log_ls_resid = default_log_ls_resid.astype(np.float32) # compute GP prediction for weight GP and residual GP model_weight_valid_sample = [] for model_weight_sample in weight_sample_list: model_weight_valid_sample.append( gp.sample_posterior_full(X_new=X_pred, X=X_train, f_sample=model_weight_sample.T, ls=np.exp(default_log_ls_weight), kernel_func=gp.rbf).T.astype(np.float32)) ensemble_resid_valid_sample = (gp.sample_posterior_full( X_new=X_pred, X=X_train, f_sample=resid_sample.T, ls=np.exp(default_log_ls_resid), kernel_func=gp.rbf).T) # compute sample for posterior mean raw_weights_dict = dict( zip(tail_free.get_nonroot_node_names(family_tree), model_weight_valid_sample)) parent_temp_dict = dict( zip(tail_free.get_parent_node_names(family_tree), temp_sample)) (ensemble_sample_val, ensemble_mean_val, ensemble_weights_val, cond_weights_dict_val, ensemble_model_names) = (adaptive_ensemble.sample_posterior_tailfree( X=X_pred, base_pred_dict=base_pred_dict, family_tree=family_tree, weight_gp_dict=raw_weights_dict, temp_dict=parent_temp_dict, resid_gp_sample=ensemble_resid_valid_sample, log_ls_weight=default_log_ls_weight)) return (ensemble_sample_val, ensemble_mean_val, ensemble_weights_val, cond_weights_dict_val, ensemble_model_names)