Ejemplo n.º 1
0
    def plot_results(self, mc=False):

        if self.models[0].n_qoi == 1:

            # Determine bounds
            xmin = np.min([
                np.min(self.models[-1].model_evals_pred),
                np.min(self.models[0].model_evals_pred)
            ])
            xmax = np.max([
                np.max(self.models[-1].model_evals_pred),
                np.max(self.models[0].model_evals_pred)
            ])

            for i in range(self.n_models):
                # Plot
                color = 'C' + str(i)
                self.models[i].distribution.plot_kde(fignum=1,
                                                     color=color,
                                                     xmin=xmin,
                                                     xmax=xmax)

            if mc and self.mc_model is not None:
                self.mc_model.distribution.plot_kde(fignum=1,
                                                    color='k',
                                                    linestyle='--',
                                                    xmin=xmin,
                                                    xmax=xmax)

            elif mc and self.mc_model is None:
                print(
                    'No Monte Carlo reference samples available. Call calculate_mc_reference() first.'
                )
                exit()

            # plt.legend(loc='center left', bbox_to_anchor=(1.01, 0.51))
            plt.legend(loc='best')
            plt.grid(b=True)
            plt.gcf().savefig('output/mfmc_densities.pdf', dpi=300)
            # plt.gcf().savefig('output/mfmc_densities.pdf', dpi=300, bbox_inches='tight')

        else:
            # Seaborn pairplot of the high-fidelity push-forward
            utils.plot_multi_qoi(samples=self.models[-1].model_evals_pred)
            plt.grid(b=True)
            plt.gcf().savefig('output/mfmc_hf_pairplot.pdf', dpi=300)
            plt.clf()

            # Plot marginals
            for k in range(self.models[-1].n_qoi):
                # Determine bounds
                xmin = np.min([
                    np.min(self.models[-1].model_evals_pred[:, k]),
                    np.min(self.models[0].model_evals_pred[:, k])
                ])
                xmax = np.max([
                    np.max(self.models[-1].model_evals_pred[:, k]),
                    np.max(self.models[0].model_evals_pred[:, k])
                ])

                if k is 0:
                    rms = 'I'
                elif k is 1:
                    rms = 'II'
                elif k is 2:
                    rms = 'III'
                else:
                    rms = ''

                rv_name = '$Q^{\mathrm{(%s)}}$' % rms

                for i in range(self.n_models):
                    if i is 0:
                        label = 'Low-Fidelity'
                    elif i is self.n_models - 1:
                        label = 'High-Fidelity'
                    elif i is 1 and self.n_models is 3:
                        label = 'Mid-Fidelity'
                    else:
                        label = 'Mid-%d-Fidelity' % (i + 1)
                    # Plot
                    color = 'C' + str(i)
                    samples = self.models[i].distribution.samples[:, k]
                    samples = np.expand_dims(samples, axis=1)
                    marginal = Distribution(samples,
                                            label=label,
                                            rv_name=rv_name)
                    marginal.plot_kde(fignum=1,
                                      color=color,
                                      xmin=xmin,
                                      xmax=xmax)

                if mc and self.mc_model is not None:
                    samples = self.mc_model.distribution.samples[:, k]
                    samples = np.expand_dims(samples, axis=1)
                    marginal = Distribution(samples,
                                            label='MC reference',
                                            rv_name=rv_name)
                    marginal.plot_kde(fignum=1,
                                      color='k',
                                      linestyle='--',
                                      xmin=xmin,
                                      xmax=xmax)

                elif mc and self.mc_model is None:
                    print(
                        'No Monte Carlo reference samples available. Call calculate_mc_reference() first.'
                    )
                    exit()

                plt.grid(b=True)
                plt.gcf().savefig('output/mfmc_densities_q%d.pdf' % (k + 1),
                                  dpi=300)
                plt.clf()

        if self.models[0].n_qoi == 2:

            sns.kdeplot(self.models[0].distribution.samples[:, 0],
                        self.models[0].distribution.samples[:, 1],
                        shade=True,
                        shade_lowest=False,
                        cmap='Blues',
                        label='Low-fidelity',
                        color='C0')
            sns.kdeplot(self.models[-1].distribution.samples[:, 0],
                        self.models[-1].distribution.samples[:, 1],
                        shade=True,
                        shade_lowest=False,
                        cmap='Reds',
                        label='High-fidelity',
                        color='C3')

            if mc and self.mc_model is not None:
                sns.kdeplot(self.mc_model.distribution.samples[:, 0],
                            self.mc_model.distribution.samples[:, 1],
                            cmap='Greys',
                            alpha=1.0,
                            label='MC reference',
                            color='Black')
            elif mc and self.mc_model is None:
                print(
                    'No Monte Carlo reference samples available. Call calculate_mc_reference() first.'
                )
                exit()
            plt.xlabel('$Q_1$')
            plt.ylabel('$Q_2$')
            plt.legend(loc='upper left')
            plt.grid(b=True)

            plt.gcf().savefig('output/mfmc_dists.pdf', dpi=300)
            xmin, xmax = plt.xlim()
            ymin, ymax = plt.ylim()
            plt.clf()

            self.models[0].distribution.plot_kde()
            plt.xlim([xmin, xmax])
            plt.ylim([ymin, ymax])
            plt.gcf().savefig('output/mfmc_lf.pdf', dpi=300)
            plt.clf()

            self.models[-1].distribution.plot_kde()
            plt.xlim([xmin, xmax])
            plt.ylim([ymin, ymax])
            plt.gcf().savefig('output/mfmc_hf.pdf', dpi=300)

            if mc and self.mc_model is not None:
                self.mc_model.distribution.plot_kde()
                plt.xlim([xmin, xmax])
                plt.ylim([ymin, ymax])
                plt.gcf().savefig('output/mfmc_mc.pdf', dpi=300)
            elif mc and self.mc_model is None:
                print(
                    'No Monte Carlo reference samples available. Call calculate_mc_reference() first.'
                )
                exit()

        plt.clf()
Ejemplo n.º 2
0
class CBayesPosterior:

    # Class attributes:
    # - p_obs: the observed density
    # - p_prior: the prior
    # - p_prior_pf: the push-forward of the prior
    # - p_post: the posterior
    # - p_post_pf: the push-forward of the posterior
    # - r: the ratio between p_obs and p_prior_pf evaluations
    # - acc_rate: the acceptance rate of the sampling algorithm

    # Constructor
    def __init__(self, p_obs, p_prior, p_prior_pf):

        assert type(
            p_obs
        ) is Distribution, "p_obs is not of type Distribution: %r" % p_obs
        assert type(
            p_prior
        ) is Distribution, "p_prior is not of type Distribution: %r" % p_prior
        assert type(
            p_prior_pf
        ) is Distribution, "p_prior_pf is not of type Distribution: %r" % p_prior_pf

        if p_obs.n_dim > 3:
            print('Framework has only been tested with up to 3 QoIs.')
            exit()

        self.p_obs = p_obs
        self.p_prior = p_prior
        self.p_prior_pf = p_prior_pf
        self.p_post = None
        self.p_post_pf = None
        self.r = None
        self.acc_rate = None
        self.acc_idx = None

    # Perform accept/reject sampling on a set of proposal samples using the weights r associated with the set of
    # samples and return the indices idx of the proposal sample set that are accepted.
    def generate_posterior_samples(self):

        # Calculate the weights
        r = np.divide(
            self.p_obs.kernel_density(np.squeeze(self.p_prior_pf.samples).T) +
            1e-10,
            self.p_prior_pf.kernel_density(
                np.squeeze(self.p_prior_pf.samples).T))

        # Check against
        check = np.random.uniform(low=0, high=1, size=r.size)

        # Normalize weights
        r_scaled = r / np.max(r)

        # Evaluate criterion
        idx = np.where(r_scaled >= check)[0]

        self.r = r
        self.acc_rate = idx.size / r.shape[0]

        if self.acc_rate < 1.0e-2:
            warnings.warn('Small acceptance rate: %f / %d accepted samples.' %
                          (self.acc_rate, idx.size))

        return self.p_prior.samples[idx], self.p_prior_pf.samples[idx]

    # Create the posterior and its push-forward
    def setup_posterior_and_pf(self):

        # Sample the posterior
        post_samples, post_pf_samples = self.generate_posterior_samples()

        # Create a posterior distribution
        self.p_post = Distribution(samples=post_samples,
                                   rv_name=self.p_prior.rv_name,
                                   rv_transform=self.p_prior.rv_transform,
                                   label='Updated',
                                   kde=False)

        # Create the posterior push-forward distribution
        self.p_post_pf = Distribution(samples=post_pf_samples,
                                      rv_name=self.p_obs.rv_name,
                                      rv_transform=self.p_obs.rv_transform,
                                      label='PF Updated')

    # Get the KL between prior and posterior
    def get_prior_post_kl(self):
        return np.mean(self.r * np.log(self.r))

    # Print a bunch of output diagnostics
    def print_stats(self):

        print('')
        print('########### CBayes statistics ##########')
        print('')

        # The rejection sampling acceptance rate
        print('Acceptance rate:\t\t\t\t%f' % self.acc_rate)

        # The posterior push-forward mean and std
        # (these should match the observed density)
        print('Posterior push-forward mean:\t%s' % self.p_post_pf.mean())
        print('Posterior push-forward std:\t\t%s' % self.p_post_pf.std())

        # The KL between the push-forward of the posterior and the observed density
        # (this should be very close to zero)
        print('Posterior-PF-Obs KL:\t\t\t%f' %
              self.p_post_pf.calculate_kl_divergence(self.p_obs))

        # The posterior integral
        # (this should be very close to 1.0)
        print('Posterior integral:\t\t\t\t%f' % np.mean(self.r))

        # The KL between posterior and prior (i.e. how informative is the data?)
        # This is done via r / doing KDE for the prior and posterior densities is infeasible when the number of
        # random variables is large.
        print('Posterior-Prior KL:\t\t\t\t%f' %
              np.mean(self.r * np.log(self.r)))

        print('')
        print('########################################')
        print('')

    # Plot results
    def plot_results(self, model_tag='hf'):

        # Determine bounds
        xmin = np.min(
            [np.min(self.p_prior_pf.samples),
             np.min(self.p_obs.samples)])
        xmax = np.max(
            [np.max(self.p_prior_pf.samples),
             np.max(self.p_obs.samples)])

        # Plot
        if self.p_obs.n_dim == 1:
            self.p_prior_pf.plot_kde(color='C0', xmin=xmin, xmax=xmax)
            self.p_obs.plot_kde(color='C1', xmin=xmin, xmax=xmax)
            self.p_post_pf.plot_kde(color='C2',
                                    linestyle='--',
                                    xmin=xmin,
                                    xmax=xmax)

        elif self.p_obs.n_dim == 2:
            sns.kdeplot(self.p_prior_pf.samples[:, 0],
                        self.p_prior_pf.samples[:, 1],
                        shade=True,
                        shade_lowest=False,
                        cmap='Blues',
                        label='PF-initial',
                        color='C0')
            sns.kdeplot(self.p_obs.samples[:, 0],
                        self.p_obs.samples[:, 1],
                        shade=True,
                        shade_lowest=False,
                        cmap='Reds',
                        label='Observed density',
                        color='C3')
            sns.kdeplot(self.p_post_pf.samples[:, 0],
                        self.p_post_pf.samples[:, 1],
                        cmap='Greys',
                        alpha=1.0,
                        label='PF-updated',
                        color='Black')
            plt.legend(loc='upper right')
            plt.xlabel('$Q_1$')
            plt.ylabel('$Q_2$')
        else:
            return

        plt.grid(b=True)
        plt.gcf().savefig('output/cbayes_dists_%s.pdf' % model_tag, dpi=300)

        # Plot some bivariate distributions
        if self.p_obs.n_dim == 2 and model_tag == 'hf':
            self.p_obs.plot_kde()
            plt.grid(b=True)
            plt.gcf().savefig('output/cbayes_dists_obs.pdf', dpi=300)
            plt.clf()
            self.p_post_pf.plot_kde()
            plt.grid(b=True)
            plt.gcf().savefig('output/cbayes_dists_hf_post_pf.pdf', dpi=300)

        plt.clf()

    # Plot posterior
    def plot_posterior(self,
                       fignum=1,
                       color='C0',
                       linestyle='-',
                       label='Posterior',
                       save_fig=False):

        if self.p_obs.n_dim == 1 and self.p_post.n_dim == 1:
            self.p_post.create_kernel_density()
            xmin = np.min(self.p_prior.samples, axis=0)
            xmax = np.max(self.p_prior.samples, axis=0)
            self.p_post.plot_kde(fignum=fignum,
                                 color=color,
                                 linestyle=linestyle,
                                 label=label,
                                 xmin=xmin,
                                 xmax=xmax)
            if save_fig:
                plt.grid(b=True)
                plt.gcf().savefig('output/cbayes_post_densities.pdf', dpi=300)

        elif self.p_post.n_dim == 2:
            self.p_post.create_kernel_density()
            self.p_post.plot_kde(fignum=fignum,
                                 color=color,
                                 linestyle=linestyle,
                                 label=label)
            if save_fig:
                xmin = np.min(self.p_prior.samples[:, 0], axis=0)
                xmax = np.max(self.p_prior.samples[:, 0], axis=0)
                ymin = np.min(self.p_prior.samples[:, 1], axis=0)
                ymax = np.max(self.p_prior.samples[:, 1], axis=0)
                plt.xlim([xmin, xmax])
                plt.ylim([ymin, ymax])
                plt.grid(b=True)
                plt.gcf().savefig('output/cbayes_post_densities.pdf', dpi=300)
                plt.clf()