Example #1
0
def evaluate_random_numbers(model, split, n_marginals=5):
    """
    Evaluates the model by looking at the distribution of the random numbers for some data split. The more gaussian it
    look, the better the model fits the data.
    :param model: the model
    :param split: the data split, must be 'trn', 'val', or 'tst'
    :param n_marginals: number of marginal histograms of random numbers to plot
    """

    assert is_data_loaded(), 'Dataset hasn\'t been loaded'

    # choose which data split to use
    data_split = getattr(data, split, None)
    if data_split is None:
        raise ValueError('Invalid data split')

    # determine whether model is conditional
    if is_conditional(model):
        x = [data_split.y, data_split.x]
    else:
        x = data_split.x

    # calculate random numbers
    u = model.calc_random_numbers(x)

    # estimate kl to unit gaussian
    q = pdfs.fit_gaussian(u)
    p = pdfs.Gaussian(m=np.zeros(data.n_dims), S=np.eye(data.n_dims))
    print('KL(q||p) = {0:.2f}'.format(q.kl(p)))

    # plot some marginals
    util.plot_hist_marginals(u[:, :n_marginals])
    plt.show()
Example #2
0
def fit_and_evaluate_gaussian(split,
                              cond=False,
                              use_image_space=False,
                              return_avg=True):
    """
    Fits a gaussian to the train data and evaluates it on the given split.
    :param split: the data split to evaluate on, must be 'trn', 'val', or 'tst'
    :param cond: boolean, whether to fit a gaussian per conditional
    :param use_image_space: bool, whether to report log probability in [0, 1] image space (only for cifar and mnist)
    :param return_avg: bool, whether to return average log prob with std error, or all log probs
    :return: average log probability & standard error, or all lop probs
    """

    assert is_data_loaded(), 'Dataset hasn\'t been loaded'

    # choose which data split to evaluate on
    data_split = getattr(data, split, None)
    if data_split is None:
        raise ValueError('Invalid data split')

    if cond:
        comps = []
        for i in range(data.n_labels):
            idx = data.trn.labels == i
            comp = pdfs.fit_gaussian(data.trn.x[idx])
            comps.append(comp)
        prior = np.ones(data.n_labels, dtype=float) / data.n_labels
        model = pdfs.MoG(prior, xs=comps)

    else:
        model = pdfs.fit_gaussian(data.trn.x)

    logprobs = model.eval(data_split.x)

    if use_image_space:
        assert data_name in ['mnist', 'cifar10']
        z = util.logistic(data_split.x)
        logprobs += data.n_dims * np.log(1 - 2 * data.alpha) - np.sum(
            np.log(z) + np.log(1 - z), axis=1)

    if return_avg:
        avg_logprob = logprobs.mean()
        std_err = logprobs.std() / np.sqrt(data_split.N)
        return avg_logprob, std_err

    else:
        return logprobs
Example #3
0
def get_err_gaussian(sim_name):
    """
    Calculates the error for a gaussian fit.
    """

    res_file = os.path.join(root, 'results', translate_sim_name(sim_name),
                            'other', 'gaussian_lik_mmd')

    if os.path.exists(res_file + '.pkl'):
        err = util.io.load(res_file)

    else:
        _, true_samples, _ = get_truth()
        gauss = pdfs.fit_gaussian(true_samples)
        err = calc_mmd(gauss)
        util.io.save(err, res_file)

    return err
Example #4
0
def test_fit_gaussian():
    data = 10*np.random.normal(loc=1.0, scale=2.0, size=(100,100))
    center, std_dev = pdfs.fit_gaussian(data)
    assert abs(center - 10) < 0.5
    assert abs(std_dev - 20) < 0.5