Exemple #1
0
def fit_beta_by_pt(nsteps):
    """Fit a beta distribution by parallel tempering."""
    num_dimensions = 1
    # Create the dummy model
    b = BetaFit(0.5, 0.5)

    # Create the options
    opts = MCMCOpts()
    opts.model = b
    opts.estimate_params = b.parameters
    opts.initial_values = [10 ** 0.5]
    opts.nsteps = nsteps
    opts.anneal_length = 0
    opts.T_init = 1
    opts.use_hessian = False
    opts.seed = 1
    opts.norm_step_size = 0.5
    opts.likelihood_fn = b.likelihood
    opts.step_fn = step

    # Create the MCMC object
    num_temps = 8
    pt = PT_MCMC(opts, num_temps, 10)
    pt.estimate()

    plt.ion()
    for chain in pt.chains:
        fig = plt.figure()
        chain.prune(nsteps/10, 1)
        (heights, points, lines) = plt.hist(chain.positions, bins=100,
                                            normed=True)
        plt.plot(points, beta.pdf(points, b.a, b.b), 'r')
        plt.ylim((0,10))
        plt.xlim((0, 1))
    return pt
Exemple #2
0
def fit_twod_gaussians_by_pt(nsteps):
    means_x = [ 0.1, 0.5, 0.9,
                0.1, 0.5, 0.9,
                0.1, 0.5, 0.9]
    means_y = [0.1, 0.1, 0.1,
               0.5, 0.5, 0.5,
               0.9, 0.9, 0.9]
    sd = 0.01
    tdg = TwoDGaussianFit(means_x, means_y, sd ** 2)

    # Create the options
    opts = MCMCOpts()
    opts.model = tdg
    opts.estimate_params = tdg.parameters
    opts.initial_values = [1.001, 1.001]
    opts.nsteps = nsteps
    opts.anneal_length = 0 # necessary so cooling does not occur
    opts.T_init = 1
    opts.use_hessian = False
    opts.seed = 1
    opts.norm_step_size = 0.1
    opts.likelihood_fn = tdg.likelihood
    opts.step_fn = step

    # Create the PT object
    num_temps = 8
    pt = PT_MCMC(opts, num_temps, 100)
    pt.estimate()

    plt.ion()
    for chain in pt.chains:
        fig = plt.figure()
        chain.prune(nsteps/10, 1)
        plt.scatter(chain.positions[:,0], chain.positions[:,1])
        ax = fig.gca()
        for x, y in zip(means_x, means_y):
            circ = plt.Circle((x, y), radius=2*sd, color='r', fill=False)
            ax.add_patch(circ)
        plt.xlim((-0.5, 1.5))
        plt.ylim((-0.5, 1.5))
        plt.title('Temp = %.2f' % chain.options.T_init)
        plt.show()

    return pt
    def __init__(self, options, nbd_avgs, nbd_stds, nbd_sites, nbd_observables,
                 builder):

        #self.mcmc = MCMC(options)
        self.nbd_avgs = nbd_avgs
        self.nbd_stds = nbd_stds
        self.nbd_sites = nbd_sites
        self.nbd_observables = nbd_observables
        self.builder = builder

        # Set the MCMC functions
        options.likelihood_fn = self.get_likelihood_function()
        options.prior_fn = self.builder.prior
        options.step_fn = self.step

        self.options = options
        self.pt = PT_MCMC(options, 10, max_temp=1e7, min_temp=1, swap_period=5)
class NBD_MCMC_PT(object):
    """Fit mechanistic tBid/Bax models to NBD data.

    Initialize internal MCMC object and then set additional fields.

    Parameters
    ----------
    options : MCMCOpts
        Options for MCMC initialization.
    nbd_avgs : numpy.array
        data mean
    nbd_stds : numpy.array
        data SD
    nbd_sites : list of strings
        Sites from data to fit.
    nbd_observables : list of strings
        Observables from model to fit to the sites in nbd_sites.
    builder : tbidbaxlipo.models.core.Builder
    """

    def __init__(self, options, nbd_avgs, nbd_stds, nbd_sites, nbd_observables,
                 builder):

        #self.mcmc = MCMC(options)
        self.nbd_avgs = nbd_avgs
        self.nbd_stds = nbd_stds
        self.nbd_sites = nbd_sites
        self.nbd_observables = nbd_observables
        self.builder = builder

        # Set the MCMC functions
        options.likelihood_fn = self.get_likelihood_function()
        options.prior_fn = self.builder.prior
        options.step_fn = self.step

        self.options = options
        self.pt = PT_MCMC(options, 10, max_temp=1e7, min_temp=1, swap_period=5)

    # Pickling functions for this class
    #def __getstate__(self):
        #self.pt = None
        #self.options.likelihood_fn = None
        #self.options.prior_fn = None
        #self.options.step_fn = None
        #mcmc_state = bayessb.MCMC.__getstate__(self)
        #nbd_mcmc_state = self.__dict__.copy()
        #return (mcmc_state, nbd_mcmc_state)
        # May have to explicitly reset these in the chains of the PT
    #    return self.__dict__.copy()

    #def __setstate__(self, state):
        #(mcmc_state, nbd_mcmc_state) = state
        #bayessb.MCMC.__setstate__(self, state)
        #self.__dict__.update(nbd_mcmc_state)
        #self.options.likelihood_fn = self.get_likelihood_function()
        #self.options.prior_fn = self.builder.prior
        #self.options.step_fn = self.step
        # May have to explicitly reset these in the chains of the PT

    # MCMC Functions
    # ==============
    def do_fit(self):
        """Runs MCMC on the given model."""

        #self.mcmc.initialize()

        # Print initial parameter values
        """
        init_vals = zip([p.name for p in self.options.model.parameters],
                        self.mcmc.cur_params(position=self.mcmc.initial_position))
        init_vals_str = 'Initial values:\n'
        init_vals_str += '\n'.join(['%s: %g' % (init_vals[i][0],
                                                 init_vals[i][1])
                                     for i in range(0, len(init_vals))])
        print "------------------------"
        print init_vals_str
        print "------------------------"
        """
        # Run it!
        #self.mcmc.estimate()
        self.pt.estimate()

    def generate_figures(self, report_name='report', do_report=True,
                         mixed_start=None, num_samples=500):
        """Plots a series of useful visualizations of the walk, the
        quality of the fit, etc."""
        plt.ion()

        if do_report:
            rep = Report()

        # Plot "Before" curves -------
        before_fig = self.fit_plotting_function(self.mcmc.initial_position)
        if do_report:
            rep.add_figure(before_fig)

        # Print initial parameter values
        init_vals = zip([p.name for p in self.mcmc.options.model.parameters],
                        self.mcmc.cur_params(position=self.mcmc.initial_position))
        init_vals_str = 'Initial values:\n'
        init_vals_str += '\n'.join(['%s: %g' % (init_vals[i][0],
                                                init_vals[i][1])
                                     for i in range(0, len(init_vals))])
        print init_vals_str
        if do_report:
            rep.add_text(init_vals_str)

        # Plot "After" curves ------------
        # Set to last fit position
        if mixed_start is None:
            mixed_start = self.mcmc.options.nsteps / 2
        mixed_positions = self.mcmc.positions[mixed_start:,:]
        mixed_accepted_positions = mixed_positions[
                                        self.mcmc.accepts[mixed_start:]]
        last_position = mixed_accepted_positions[-1,:]

        after_fig = self.fit_plotting_function(last_position)
        if do_report:
            rep.add_figure(after_fig)

        # Print final parameter values
        last_fit_params = self.mcmc.cur_params(position=last_position)
        last_vals = zip([p.name for p in self.mcmc.options.model.parameters],
                           last_fit_params)
        last_vals_str = 'Final values:\n'
        last_vals_str += '\n'.join(['%s: %g' % (last_vals[i][0],
                                                last_vals[i][1])
                                    for i in range(0, len(last_vals))])
        print last_vals_str
        if do_report:
            rep.add_text(last_vals_str)

        """
        # Plot sampling of fits # FIXME should be real sampling------
        plt.figure()
        plot_data(nbd_site)
        max_position_index = len(mixed_accepted_positions) - 1
        num_to_plot = min(num_samples, max_position_index)
        for i in range(num_to_plot):
            cur_position = mixed_accepted_positions[max_position_index - i,:]
            x = nbd_timecourse(mcmc, cur_position, nbd_observable)
            plt.plot(mcmc.options.tspan, x, 'b', label=nbd_site, alpha=0.02)
        plt.title("Last %d accepted positions" % num_to_plot)
        plt.show()
        if do_report:
            rep.add_current_figure()

        # Plot marginal distributions of parameters ---------------
        for i, cur_param in enumerate(mcmc.options.estimate_params):
            plt.figure()
            plt.hist(mixed_accepted_positions[:,i])
            plt.title("%s, last %d accepts: initval %f" %
                    (cur_param.name, len(mixed_accepted_positions[:,i]),
                     cur_param.value))
            plt.show()

            if do_report:
                rep.add_current_figure()
        """
        # Plot convergence traces of all parameters
        plt.figure()
        plt.plot(self.mcmc.positions)
        plt.title("Parameter traces")
        plt.legend([p.name for p in self.mcmc.options.estimate_params],
                    loc='lower left', prop={'size':7})
        plt.show()
        if do_report:
            rep.add_current_figure()

        # Add code to report
        if do_report:
            # Add the code for the fitting (this file)
            #rep.add_python_code('nbd_mcmc_pt.py')

            # Add the code for the model
            #rep.add_python_code('models/core.py')
            #rep.add_python_code('models/one_cpt.py')

            # Write the report 
            rep.write_report(report_name)

    def plot_data(self, axis):
        """Plots the current data into the given axis."""
        alpha = 0.5
        if 'c3' in self.nbd_sites:
            axis.plot(self.mcmc.options.tspan, self.nbd_avgs[0], 'r.',
                     label='c3 data', alpha=alpha)
        if 'c62' in self.nbd_sites:
            axis.plot(self.mcmc.options.tspan, self.nbd_avgs[1], 'g.',
                     label='c62 data', alpha=alpha)
        #plt.plot(nbd.time_other, nbd_avgs[2], 'b.', label='c120 data',
        #          alpha=alpha)
        #plt.plot(nbd.time_other, nbd_avgs[3], 'm.', label='c122 data',
        #         alpha=alpha)
        #plt.plot(nbd.time_other, nbd_avgs[4], 'k.', label='c126 data',
        #         alpha=alpha)

    def get_observable_timecourses(self, position):
        """Gets the timecourses associated with the experimental observables.

        Parameters
        ----------
        position : numpy.array
            Values of parameters (in log10) at desired position.

        Returns
        -------
        dict
            Dict containing the a timecourse for every observable. Keys are
            the observable names.
        """
        # TODO Need to be able to get the indices for scaling parameters
        # from the model so that they're not hardcoded

        # Run the simulation and scale the simulated timecourses
        yout = self.mcmc.simulate(position, observables=True)
        params = self.mcmc.cur_params(position)
        timecourses = {}
        for obs in self.nbd_observables:
            timecourses[obs] = ((yout[obs] /
                           self.mcmc.options.model.parameters['Bax_0'].value)
                          * params[3])

        return timecourses

    def fit_plotting_function(self, position):
        """Gets the observable timecourse and plots it against the data."""
        # Run the simulation at the given position
        timecourses = self.get_observable_timecourses(position)
        # Make the plot
        fig = Figure()
        ax = fig.gca()
        # Add the data to the plot
        self.plot_data(ax)
        # Add the simulations to the plot
        for obs_name, timecourse in timecourses.iteritems():
            ax.plot(self.mcmc.options.tspan, timecourse, label=obs_name)
        # Label the plot
        ax.set_xlabel('Time')
        ax.set_ylabel('Concentration')
        fontP = FontProperties() 
        fontP.set_size('small')
        ax.legend(loc='upper center', prop=fontP, ncol=5,
                    bbox_to_anchor=(0.5, 1.1), fancybox=True, shadow=True)
        canvas = FigureCanvasAgg(fig)
        fig.set_canvas(canvas)
        return fig

    # A function to generate the likelihood function
    def get_likelihood_function(self):
        """Returns a likelihood function for the specified NBD site."""
        data_indices = []
        if 'c3' in self.nbd_sites:
            data_indices.append(0)
        if 'c62' in self.nbd_sites:
            data_indices.append(1)
        # Make sure the list is not empty
        if not data_indices:
            raise Exception('Failed to initialize data_indices!')

        # Make sure that there is a corresponding data trajectory for each
        # observable
        if len(data_indices) != len(self.nbd_observables):
            raise Exception('Length of list of nbd_sites does not match the '
                            'list of nbd_observables!')

        def likelihood(mcmc, position):
            yout = mcmc.simulate(position, observables=True)
            # TODO Need to be able to get the indices from the model so that
            # they're not hardcoded
            params = mcmc.cur_params(position)
            err = 0
            for data_index, obs_name in zip(data_indices, self.nbd_observables):
                timecourse = ((yout[obs_name] /
                               mcmc.options.model.parameters['Bax_0'].value)
                              * params[3])
                err += np.sum((self.nbd_avgs[data_index] - timecourse)**2 /
                                 (2 * self.nbd_stds[data_index]**2))
            return err

        return likelihood

    def get_basename(self):
        """A function for standardizing, in one place, the format for pickled
        NBD_MCMC objects.
        """
        return '%s_%s_%s_%d_s%d' % (self.options.model.name,
                                    '-'.join(self.nbd_sites),
                                    '-'.join(self.nbd_observables),
                                    self.options.nsteps,
                                    #self.mcmc.options.T_init,
                                    #np.log10(self.options.thermo_temp),
                                    self.options.seed)

    @staticmethod
    def step(mcmc):
        """The function to call at every iteration. Currently just prints
        out a few progress indicators.
        """
        window = mcmc.options.accept_window

        local_acc = np.sum(mcmc.accepts[(mcmc.iter - window):mcmc.iter]) / \
                              float(window)

        if mcmc.iter % 20 == 0:
            print 'iter=%-5d  sigma=%-.3f  T=%-.3f  loc_acc=%-.3f  ' \
                  'glob_acc=%-.3f  lkl=%g  prior=%g  post=%g' % \
                  (mcmc.iter, mcmc.sig_value, mcmc.T,
                   local_acc,
                   mcmc.acceptance/(mcmc.iter+1.), mcmc.accept_likelihood,
                   mcmc.accept_prior, mcmc.accept_posterior)