def evaluate_random_numbers(model, split, n_marginals=5): """ Evaluates the model by looking at the distribution of the random numbers for some data split. The more gaussian it look, the better the model fits the data. :param model: the model :param split: the data split, must be 'trn', 'val', or 'tst' :param n_marginals: number of marginal histograms of random numbers to plot """ assert is_data_loaded(), 'Dataset hasn\'t been loaded' # choose which data split to use data_split = getattr(data, split, None) if data_split is None: raise ValueError('Invalid data split') # determine whether model is conditional if is_conditional(model): x = [data_split.y, data_split.x] else: x = data_split.x # calculate random numbers u = model.calc_random_numbers(x) # estimate kl to unit gaussian q = pdfs.fit_gaussian(u) p = pdfs.Gaussian(m=np.zeros(data.n_dims), S=np.eye(data.n_dims)) print('KL(q||p) = {0:.2f}'.format(q.kl(p))) # plot some marginals util.plot_hist_marginals(u[:, :n_marginals]) plt.show()
def fit_and_evaluate_gaussian(split, cond=False, use_image_space=False, return_avg=True): """ Fits a gaussian to the train data and evaluates it on the given split. :param split: the data split to evaluate on, must be 'trn', 'val', or 'tst' :param cond: boolean, whether to fit a gaussian per conditional :param use_image_space: bool, whether to report log probability in [0, 1] image space (only for cifar and mnist) :param return_avg: bool, whether to return average log prob with std error, or all log probs :return: average log probability & standard error, or all lop probs """ assert is_data_loaded(), 'Dataset hasn\'t been loaded' # choose which data split to evaluate on data_split = getattr(data, split, None) if data_split is None: raise ValueError('Invalid data split') if cond: comps = [] for i in range(data.n_labels): idx = data.trn.labels == i comp = pdfs.fit_gaussian(data.trn.x[idx]) comps.append(comp) prior = np.ones(data.n_labels, dtype=float) / data.n_labels model = pdfs.MoG(prior, xs=comps) else: model = pdfs.fit_gaussian(data.trn.x) logprobs = model.eval(data_split.x) if use_image_space: assert data_name in ['mnist', 'cifar10'] z = util.logistic(data_split.x) logprobs += data.n_dims * np.log(1 - 2 * data.alpha) - np.sum( np.log(z) + np.log(1 - z), axis=1) if return_avg: avg_logprob = logprobs.mean() std_err = logprobs.std() / np.sqrt(data_split.N) return avg_logprob, std_err else: return logprobs
def get_err_gaussian(sim_name): """ Calculates the error for a gaussian fit. """ res_file = os.path.join(root, 'results', translate_sim_name(sim_name), 'other', 'gaussian_lik_mmd') if os.path.exists(res_file + '.pkl'): err = util.io.load(res_file) else: _, true_samples, _ = get_truth() gauss = pdfs.fit_gaussian(true_samples) err = calc_mmd(gauss) util.io.save(err, res_file) return err
def test_fit_gaussian(): data = 10*np.random.normal(loc=1.0, scale=2.0, size=(100,100)) center, std_dev = pdfs.fit_gaussian(data) assert abs(center - 10) < 0.5 assert abs(std_dev - 20) < 0.5