Beispiel #1
0
    def check_geweke(self, setting_idx, hypparam_dict):
        import os
        from matplotlib import pyplot as plt
        plt.ioff()
        fig = plt.figure()
        figpath = self.geweke_figure_filepath(setting_idx)
        mkdir(os.path.dirname(figpath))

        nsamples, data_size, ntrials = self.geweke_nsamples, \
                self.geweke_data_size, self.geweke_ntrials

        d = self.distribution_class(**hypparam_dict)
        sample_dim = np.atleast_1d(self.geweke_statistics(
            d, d.rvs(size=10))).shape[0]

        num_statistic_fails = 0
        for trial in range(ntrials):
            # collect forward-generated statistics
            forward_statistics = np.squeeze(np.empty((nsamples, sample_dim)))
            for i in range(nsamples):
                d = self.distribution_class(**hypparam_dict)
                data = d.rvs(size=data_size)
                forward_statistics[i] = self.geweke_statistics(d, data)

            # collect gibbs-generated statistics
            gibbs_statistics = np.squeeze(np.empty((nsamples, sample_dim)))
            d = self.distribution_class(**hypparam_dict)
            data = d.rvs(size=data_size)
            for i in range(nsamples):
                d.resample(data, **self.geweke_resample_kwargs)
                data = d.rvs(size=data_size)
                gibbs_statistics[i] = self.geweke_statistics(d, data)

            testing.populations_eq_quantile_plot(forward_statistics,
                                                 gibbs_statistics,
                                                 fig=fig)
            try:
                sl = self.geweke_numerical_slice(d, setting_idx)
                testing.assert_populations_eq_moments(forward_statistics[...,
                                                                         sl],
                                                      gibbs_statistics[...,
                                                                       sl],
                                                      pval=self.geweke_pval)
            except AssertionError:
                datapath = os.path.join(
                    os.path.dirname(__file__), 'figures',
                    self.__class__.__name__,
                    'setting_%d_trial_%d.npz' % (setting_idx, trial))
                np.savez(datapath,
                         fwd=forward_statistics,
                         gibbs=gibbs_statistics)
                example_violating_means = forward_statistics.mean(
                    0), gibbs_statistics.mean(0)
                num_statistic_fails += 1

        plt.savefig(figpath)

        assert num_statistic_fails <= self.geweke_num_statistic_fails_to_tolerate, \
                'Geweke MAY have failed, check FIGURES in %s (e.g. %s vs %s)' \
                % ((os.path.dirname(figpath),) + example_violating_means)
Beispiel #2
0
def test_gaussian():
    prior_data = 2 * np.random.randn(5, 2) + np.array([1., 3.])
    a = Gaussian().empirical_bayes(prior_data)

    # data = a.rvs(10)

    gibbs_statistics = []
    for itr in range(20000):
        a.resample()
        # a.resample(data)
        gibbs_statistics.append(a.mu)
    gibbs_statistics = np.array(gibbs_statistics)

    b = AnnealedGaussianModel().empirical_bayes(prior_data)
    # b.add_data(data)

    pt = ParallelTempering(b, [5.])
    pt_samples = pt.run(20000, 1)
    pt_statistics = np.array([m.mu for m in pt_samples])

    fig = plt.figure()
    testing.populations_eq_quantile_plot(gibbs_statistics,
                                         pt_statistics,
                                         fig=fig)
    plt.savefig('gaussian_test.png')

    testing.assert_populations_eq_moments(gibbs_statistics,pt_statistics), \
            'Annealing MAY have failed, check FIGURES'
Beispiel #3
0
def test_gaussian():
    prior_data = 2*np.random.randn(5,2) + np.array([1.,3.])
    a = Gaussian().empirical_bayes(prior_data)

    # data = a.rvs(10)

    gibbs_statistics = []
    for itr in range(20000):
        a.resample()
        # a.resample(data)
        gibbs_statistics.append(a.mu)
    gibbs_statistics = np.array(gibbs_statistics)

    b = AnnealedGaussianModel().empirical_bayes(prior_data)
    # b.add_data(data)

    pt = ParallelTempering(b,[5.])
    pt_samples = pt.run(20000,1)
    pt_statistics = np.array([m.mu for m in pt_samples])

    fig = plt.figure()
    testing.populations_eq_quantile_plot(gibbs_statistics,pt_statistics,fig=fig)
    plt.savefig('gaussian_test.png')

    testing.assert_populations_eq_moments(gibbs_statistics,pt_statistics), \
            'Annealing MAY have failed, check FIGURES'
Beispiel #4
0
def discrete_geweke_test(fig):
    Nstates = 2
    Nemissions = 2
    alpha = 3.
    init_state_concentration=3.
    T = 10
    num_iter = 10000
    num_checks = 10

    obs_distns = [distributions.Categorical(K=Nemissions,alpha_0=1.)
            for _ in range(Nstates)]

    hmm = models.HMM(
            alpha=alpha,init_state_concentration=init_state_concentration,
            obs_distns=obs_distns)

    # generate state sequences and parameters from the prior
    prior_stateseqs = []
    prior_weights = []
    for itr in range(num_iter):
        hmm.resample_model() # sample parameters from the prior
        _, stateseq = hmm.generate(T,keep=False)
        prior_stateseqs.append(stateseq)
        prior_weights.append(hmm.obs_distns[0].weights)
    prior_stateseqs = np.array(prior_stateseqs)
    prior_weights = np.array(prior_weights)

    # generate state sequences and parameters using Gibbs
    hmm.generate(T,keep=True)
    s = hmm.states_list[0]

    gibbs_stateseqs = []
    gibbs_weights = []
    for itr in range(num_iter):
        s.data = None
        hmm._generate_obs(s)  # resamples data given state sequence, obs params
        hmm.resample_model()  # resamples everything else as usual
        gibbs_stateseqs.append(s.stateseq)
        gibbs_weights.append(hmm.obs_distns[0].weights)
    gibbs_stateseqs = np.array(gibbs_stateseqs)
    gibbs_weights = np.array(gibbs_weights)

    # test that they look similar by checking probability of co-assignment
    time_indices = np.arange(T)
    for itr in range(num_checks):
        i,j = np.random.choice(time_indices,replace=False,size=2)
        prior_prob_of_coassignment = (prior_stateseqs[:,i] == prior_stateseqs[:,j]).std()
        gibbs_prob_of_coassignment = (gibbs_stateseqs[:,i] == gibbs_stateseqs[:,j]).std()

        assert np.isclose(
                prior_prob_of_coassignment,gibbs_prob_of_coassignment,
                rtol=0.025,atol=0.025,
                )

    # test that they look similar by checking parameters
    testing.populations_eq_quantile_plot(prior_weights,gibbs_weights,fig=fig)
    figpath = os.path.join(figure_dir_path,'discrete_geweke_test_weights.pdf')
    plt.savefig(figpath)
Beispiel #5
0
    def check_geweke(self, setting_idx, hypparam_dict):
        import os
        from matplotlib import pyplot as plt

        plt.ioff()
        fig = plt.figure()
        figpath = self.geweke_figure_filepath(setting_idx)
        mkdir(os.path.dirname(figpath))

        nsamples, data_size, ntrials = self.geweke_nsamples, self.geweke_data_size, self.geweke_ntrials

        d = self.distribution_class(**hypparam_dict)
        sample_dim = np.atleast_1d(self.geweke_statistics(d, d.rvs(size=10))).shape[0]

        num_statistic_fails = 0
        for trial in xrange(ntrials):
            # collect forward-generated statistics
            forward_statistics = np.squeeze(np.empty((nsamples, sample_dim)))
            for i in xrange(nsamples):
                d = self.distribution_class(**hypparam_dict)
                data = d.rvs(size=data_size)
                forward_statistics[i] = self.geweke_statistics(d, data)

            # collect gibbs-generated statistics
            gibbs_statistics = np.squeeze(np.empty((nsamples, sample_dim)))
            d = self.distribution_class(**hypparam_dict)
            data = d.rvs(size=data_size)
            for i in xrange(nsamples):
                d.resample(data, **self.geweke_resample_kwargs)
                data = d.rvs(size=data_size)
                gibbs_statistics[i] = self.geweke_statistics(d, data)

            testing.populations_eq_quantile_plot(forward_statistics, gibbs_statistics, fig=fig)
            try:
                sl = self.geweke_numerical_slice(d, setting_idx)
                testing.assert_populations_eq_moments(
                    forward_statistics[..., sl], gibbs_statistics[..., sl], pval=self.geweke_pval
                )
            except AssertionError:
                datapath = os.path.join(
                    os.path.dirname(__file__),
                    "figures",
                    self.__class__.__name__,
                    "setting_%d_trial_%d.npz" % (setting_idx, trial),
                )
                np.savez(datapath, fwd=forward_statistics, gibbs=gibbs_statistics)
                example_violating_means = forward_statistics.mean(0), gibbs_statistics.mean(0)
                num_statistic_fails += 1

        plt.savefig(figpath)

        assert num_statistic_fails <= self.geweke_num_statistic_fails_to_tolerate, (
            "Geweke MAY have failed, check FIGURES in %s (e.g. %s vs %s)"
            % ((os.path.dirname(figpath),) + example_violating_means)
        )
Beispiel #6
0
def discrete_geweke_test(fig):
    Nstates = 2
    Nemissions = 2
    alpha = 3.
    init_state_concentration = 3.
    T = 10
    num_iter = 10000
    num_checks = 10

    obs_distns = [
        distributions.Categorical(K=Nemissions, alpha_0=1.)
        for _ in range(Nstates)
    ]

    hmm = models.HMM(alpha=alpha,
                     init_state_concentration=init_state_concentration,
                     obs_distns=obs_distns)

    # generate state sequences and parameters from the prior
    prior_stateseqs = []
    prior_weights = []
    for itr in range(num_iter):
        hmm.resample_model()  # sample parameters from the prior
        _, stateseq = hmm.generate(T, keep=False)
        prior_stateseqs.append(stateseq)
        prior_weights.append(hmm.obs_distns[0].weights)
    prior_stateseqs = np.array(prior_stateseqs)
    prior_weights = np.array(prior_weights)

    # generate state sequences and parameters using Gibbs
    hmm.generate(T, keep=True)
    s = hmm.states_list[0]

    gibbs_stateseqs = []
    gibbs_weights = []
    for itr in range(num_iter):
        s.data = None
        hmm._generate_obs(s)  # resamples data given state sequence, obs params
        hmm.resample_model()  # resamples everything else as usual
        gibbs_stateseqs.append(s.stateseq)
        gibbs_weights.append(hmm.obs_distns[0].weights)
    gibbs_stateseqs = np.array(gibbs_stateseqs)
    gibbs_weights = np.array(gibbs_weights)

    # test that they look similar by checking probability of co-assignment
    time_indices = np.arange(T)
    for itr in range(num_checks):
        i, j = np.random.choice(time_indices, replace=False, size=2)
        prior_prob_of_coassignment = (
            prior_stateseqs[:, i] == prior_stateseqs[:, j]).std()
        gibbs_prob_of_coassignment = (
            gibbs_stateseqs[:, i] == gibbs_stateseqs[:, j]).std()

        assert np.isclose(
            prior_prob_of_coassignment,
            gibbs_prob_of_coassignment,
            rtol=0.025,
            atol=0.025,
        )

    # test that they look similar by checking parameters
    testing.populations_eq_quantile_plot(prior_weights, gibbs_weights, fig=fig)
    figpath = os.path.join(figure_dir_path, 'discrete_geweke_test_weights.pdf')
    plt.savefig(figpath)