Beispiel #1
0
    def test_plot(self):

        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M, path=DIR, verbose=0)
Beispiel #2
0
    def test_plot(self):

        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M)
Beispiel #3
0
    def test_plot(self):

        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M, path=DIR, verbose=0)
Beispiel #4
0
def fit_model():
    M = MCMC(disaster_model)
    M.sample(iter=10000, burn=1000, thin=10)
    print('switchpoint: ', M.trace('switchpoint')[:])
    print('hist: ', hist(M.trace('late_mean')[:]))
    # show()
    plot(M)
Beispiel #5
0
    def plot(self):
        mc_map = pm.MAP(self.mc)
        mc_map.fit(tol=.01)
        # iterlim = 250,
        print("BIC score: {}".format(mc_map.BIC))
        plot(self.mc)
        # set years and months
        years = mdates.YearLocator()  # every year
        months = mdates.MonthLocator()  # every month
        years_fmt = mdates.DateFormatter('%Y')
        fig, ax = plt.subplots()
        # plot the data
        ax.plot(months_list,
                confirmed_cases,
                'o',
                mec='black',
                color='black',
                label='confirmed cases')
        ax.plot(months_list,
                mortalitysim.stats()['mean'],
                color='red',
                linewidth=1,
                label='MIH (mean)')
        y_min = mortalitysim.stats()['quantiles'][2.5]
        y_max = mortalitysim.stats()['quantiles'][97.5]
        ax.fill_between(months_list,
                        y_min,
                        y_max,
                        color='r',
                        alpha=0.3,
                        label='BPL (95% CI)')
        # format the ticks
        ax.xaxis.set_major_locator(years)
        ax.xaxis.set_major_formatter(years_fmt)
        ax.xaxis.set_minor_locator(months)
        # set the axis limit
        datemin = min(months_list) - 1
        datemax = max(months_list) + 1
        ax.set_xlim(datemin, datemax)

        # format the coords message box
        def price(x):
            return '$%1.2f' % x

        ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
        ax.format_ydata = price
        ax.grid(True)
        # rotates and right aligns the x labels, and moves the bottom of the
        # axes up to make room for them
        fig.autofmt_xdate()
        # some extra plot formating
        ax.legend(loc='best')
        plt.style.use('ggplot')
        plt.rc('font', size=12)
        plt.rc('lines', linewidth=2)
        plt.title("Plague model fit to laboratory confirmed cases")
        plt.xlabel('Time in months')
        plt.ylabel('Number of infecteds')
        plt.legend()
        plt.savefig(self.dir.split("\\")[-1] + '_fit.png')
def run1():
    #fake data [x, y, yerr, xyerr]
    data = np.array([[201, 592, 61, 9],
                     [244, 401, 25, 4],
                     [47, 583, 58, 11],
                     [287, 402, 15, 7],
                     [203, 495, 21, 5],
                     [58, 173, 15, 9],
                     [210, 479, 27, 4],
                     [202, 504, 14, 4],
                     [198, 510, 30, 11],
                     [158, 416, 16, 7],
                     [165, 393, 14, 5],
                     [201, 442, 25, 5],
                     [157, 317, 52, 5],
                     [131, 311, 16, 6],
                     [166, 400, 34, 6],
                     [160, 337, 31, 5],
                     [186, 423, 42, 9],
                     [125, 334, 26, 8],
                     [218, 533, 16, 6],
                     [146, 344, 22, 5],
                     [150, 300, 23, 10],
                     [270, 620, 40, 15]])

    #rename columns
    xdata, ydata = data[:, 0], data[:, 1]
    xerr, yerr = data[:, 3], data[:, 2]

    #perform MCMC
    MC = pymc_linear_fit_withoutliers(xdata, ydata, data1err=xerr, data2err=yerr, return_MC=True)
    MC.sample(100000, burn=1000, verbose=0)

    #show the results
    fig = plt.figure()

    #plot the confidence levels
    low25 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][2.5] + MC.stats()['intercept']['quantiles'][2.5]
    top97 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][97.5] + MC.stats()['intercept']['quantiles'][97.5]
    plt.fill_between(np.linspace(20,300), low25, top97, color='k', alpha=0.1, label='2.5/97.5 quartile')

    #plot the average results
    plt.plot(np.linspace(20,300), np.linspace(20,300)*MC.stats()['slope']['mean'] + MC.stats()['intercept']['mean'],
             color='k', linewidth=1, label='Average fit')

    #plot data
    plt.errorbar(xdata, ydata, xerr=xerr, yerr=yerr, color='b', label='data', fmt='o')

    #show likely outliers
    plt.plot(xdata[MC.badvals.value.astype('bool')], ydata[MC.badvals.value.astype('bool')], 'rs',
             label='likely outliers')

    plt.xlim(20, 300)
    plt.legend(shadow=True, fancybox=True, scatterpoints=1, numpoints=1, loc='upper left')
    plt.savefig('test.pdf')
    plt.close()

    #MCMC plot
    plot(MC)
Beispiel #7
0
 def run_mc(self,nsample = 10000,interactive=False):
     """run the model using mcmc"""
     from pymc.Matplot import plot
     from pymc import MCMC
     self.M = MCMC(self)
     if interactive:
         self.M.isample(iter=nsample, burn=1000, thin=10)
     else:
         self.M.sample(iter=nsample, burn=1000, thin=10)
     plot(self.M)
Beispiel #8
0
 def run_mc(self,nsample = 10000,interactive=False,doplot=False,verbose=0):
     """run the model using mcmc"""
     from pymc import MCMC
     self.M = MCMC(self)
     if interactive:
         self.M.isample(iter=nsample, burn=1000, thin=10,verbose=verbose)
     else:
         self.M.sample(iter=nsample, burn=1000, thin=10,verbose=verbose)
     if doplot:
         from pymc.Matplot import plot
         plot(self.M)
    def test_simple(self):

        intervals = 20
        scores = pymc.geweke(S, intervals=intervals)
        a_scores = scores['a']
        assert_equal(len(a_scores), intervals)

        # Plot diagnostics (if plotting is available)
        try:
            from pymc.Matplot import geweke_plot as plot
            plot(scores, path=DIR, verbose=0)
        except ImportError:
            pass
    def test_simple(self):

        intervals = 20

        scores = pymc.geweke(S, intervals=intervals, maxlag=5)
        a_scores = scores['a']
        assert_equal(len(a_scores), intervals)

        # Plot diagnostics (if plotting is available)
        try:
            from pymc.Matplot import geweke_plot as plot
            plot(scores, path=DIR)
        except ImportError:
            pass
Beispiel #11
0
def analizeMwm():
	masked_values = np.ma.masked_equal(x, value=None)
	print("m v: ", masked_values)

	print("dmwm da: ", dmwm.disasters_array)

	Mwm = MCMC(dmwm)
	Mwm.sample(iter=10000, burn=1000, thin=10)

	print("Mwm t: ", Mwm.trace('switchpoint')[:])

	hist(Mwm.trace('late_mean')[:])
	# show()

	plot(Mwm)
Beispiel #12
0
 def run_mc(self,
            nsample=10000,
            interactive=False,
            doplot=False,
            verbose=0):
     """run the model using mcmc"""
     from pymc import MCMC
     self.M = MCMC(self)
     if interactive:
         self.M.isample(iter=nsample, burn=1000, thin=10, verbose=verbose)
     else:
         self.M.sample(iter=nsample, burn=1000, thin=10, verbose=verbose)
     if doplot:
         from pymc.Matplot import plot
         plot(self.M)
Beispiel #13
0
    def test_simple(self):
        scores = pymc.geweke(S, intervals=20)
        a_scores = scores['a']
        assert_equal(len(a_scores), 20)

        # If the model has converged, 95% the scores should lie
        # within 2 standard deviations of zero, under standard normal model
        assert(sum(np.abs(np.array(a_scores)[:, 1]) > 1.96) < 2)

        # Plot diagnostics (if plotting is available)
        try:
            from pymc.Matplot import geweke_plot as plot
            plot(scores, path=DIR, verbose=0)
        except ImportError:
            pass
Beispiel #14
0
    def test_simple(self):
        scores = pymc.geweke(S, intervals=20)
        a_scores = scores['a']
        assert_equal(len(a_scores), 20)

        # If the model has converged, 95% the scores should lie
        # within 2 standard deviations of zero, under standard normal model
        assert(sum(np.abs(np.array(a_scores)[:,1]) > 1.96) < 2)

        # Plot diagnostics (if plotting is available)
        try:
            from pymc.Matplot import geweke_plot as plot
            plot(scores,  path=DIR, verbose=0)
        except ImportError:
            pass
Beispiel #15
0
def analizeM():
	M = MCMC(dm)
	print("M: ", M)

	M.sample(iter=10000, burn=1000, thin=10)
	print("M t: ", M.trace('switchpoint')[:])

	hist(M.trace('late_mean')[:])
	# show()

	plot(M)
	# show()

	print("M smd dm sp: ", M.step_method_dict[dm.switchpoint])
	print("M smd dm em: ", M.step_method_dict[dm.early_mean])
	print("M smd dm lm: ", M.step_method_dict[dm.late_mean])

	M.use_step_method(Metropolis, dm.late_mean, proposal_sd=2.)
Beispiel #16
0
def main():

    Amu = 2
    Bmu = 2
    Skappa = 1
    Rkappa = 0.1
    data = [(12, 6), (7,2)]

    model=pymc.MCMC(hierarchical_prior.set_flips(data, 
                                                 Amu, 
                                                 Bmu, 
                                                 Skappa, 
                                                 Rkappa))



    print "PRIOR KAPPA/MU", model.kappa.value, model.mu.value
    model.sample(iter=10000, burn=9000, thin=2)
    print "POST KAPPA/MU", model.kappa.value, model.mu.value
    
    print "MODEL", model.variables

    plot(model, format='pdf')
Beispiel #17
0
    #perform MCMC
    MC = pymc_linear_fit_withoutliers(xdata, ydata, data1err=xerr, data2err=yerr, return_MC=True)
    MC.sample(100000, burn=1000, verbose=0)

    #show the results
    fig = plt.figure()

    #plot the confidence levels
    low25 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][2.5] + MC.stats()['intercept']['quantiles'][2.5]
    top97 = np.linspace(20,300)*MC.stats()['slope']['quantiles'][97.5] + MC.stats()['intercept']['quantiles'][97.5]
    plt.fill_between(np.linspace(20,300), low25, top97, color='k', alpha=0.1, label='2.5/97.5 quartile')

    #plot the average results
    plt.plot(np.linspace(20,300), np.linspace(20,300)*MC.stats()['slope']['mean'] + MC.stats()['intercept']['mean'],
             color='k', linewidth=1, label='Average fit')

    #plot data
    plt.errorbar(xdata, ydata, xerr=xerr, yerr=yerr, color='b', label='data', fmt='o')

    #show likely outliers
    plt.plot(xdata[MC.badvals.value.astype('bool')], ydata[MC.badvals.value.astype('bool')], 'rs',
             label='likely outliers')

    plt.xlim(20, 300)
    plt.legend(shadow=True, fancybox=True, scatterpoints=1, numpoints=1, loc='upper left')
    plt.savefig('test.pdf')
    plt.close()

    #MCMC plot
    plot(MC)
Beispiel #18
0
import mean_std
import pymc
from pymc.Matplot import plot
from pylab import show

# now, use MCMC sampling
model = pymc.MCMC(mean_std);
model.sample(iter=10000);
print(model.stats())
plot(model)
show()

Beispiel #19
0
def pymc_linear_fit_withoutliers(data1, data2, data1err=None, data2err=None,
                                 print_results=True, intercept=True, nsample=50000, burn=5000,
                                 thin=2, return_MC=False, guess=None, verbose=0):
    """
    Use pymc to fit a line to data with outliers, assuming outliers
    come from a broad, uniform distribution that cover all the data.

    :param data1: xdata
    :param data2: ydata
    :param data1err: x errors
    :param data2err: y errors
    :param print_results: whether or not to print out the results
    :param intercept: whether or not to fit for intercept
    :param nsample: number of samples
    :param burn: number of burn-in samples
    :param thin: thinnening value
    :param return_MC: whether or not to return the pymc MCMC instance
    :param guess: initial guessues for slope and intercept
    :param verbose: verbosity level of MCMC sampler
    """
    if guess is None:
        guess = (0, 0)

    xmu = pymc.distributions.Uninformative(name='x_observed', value=0)

    if data1err is None:
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=1, trace=False)
    else:
        xtau = pymc.distributions.Uninformative(name='x_tau', value=1.0 / data1err ** 2, observed=True, trace=False)
        xdata = pymc.distributions.Normal('x', mu=xmu, observed=True, value=data1, tau=xtau, trace=False)

    d = {'slope': pymc.distributions.Uninformative(name='slope', value=guess[0], doc='Slope of the straight line'),
         'badvals': pymc.distributions.DiscreteUniform('bad', lower=0, upper=1, value=[False] * len(data2), doc='Outliers'),
         'bady': pymc.distributions.Uniform('bady', np.min(data2 - data2err), np.max(data2 + data2err), value=data2)}

    if intercept:
        d['intercept'] = pymc.distributions.Uninformative(name='intercept', value=guess[1])

        @pymc.deterministic(trace=False)
        def model(x=xdata, slope=d['slope'], intercept=d['intercept'], badvals=d['badvals'], bady=d['bady']):
            return (x * slope + intercept) * (True - badvals) + badvals * bady
    else:
        @pymc.deterministic(trace=False)
        def model(x=xdata, slope=d['slope'], badvals=d['badvals'], bady=d['bady']):
            return x * slope * (True - badvals) + badvals * bady

    d['f'] = model

    if data2err is None:
        ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=1, trace=False)
    else:
        ytau = pymc.distributions.Uninformative(name='y_tau', value=1.0 / data2err ** 2, observed=True, trace=False)
        ydata = pymc.distributions.Normal('y', mu=model, observed=True, value=data2, tau=ytau, trace=False)

    d['y'] = ydata

    MC = pymc.MCMC(d)
    MC.sample(nsample, burn=burn, thin=thin, verbose=verbose)

    #generate plots
    plot(MC)

    MCs = MC.stats()
    m, em = MCs['slope']['mean'], MCs['slope']['standard deviation']

    if intercept:
        b, eb = MCs['intercept']['mean'], MCs['intercept']['standard deviation']

    if print_results:
        print "MCMC Best fit y = %g x" % (m),

        if intercept:
            print " + %g" % (b)
        else:
            print ""

        print "m = %g +/- %g" % (m, em)

        if intercept:
            print "b = %g +/- %g" % (b, eb)

        print "Chi^2 = %g, N = %i" % (((data2 - (data1 * m)) ** 2).sum(), data1.shape[0] - 1)

    if return_MC:
        return MC

    if intercept:
        return m, b
    else:
        return m
Beispiel #20
0
    ent = 0
    accept = dbstate[dbstate.keys()[ent]]['_accepted']
    reject = dbstate[dbstate.keys()[ent]]['_rejected']

acceptancerate_theta = accept / (reject+accept)

if args.verbose:
    print '      with theta = (k,L*,N) acceptance rate =',acceptancerate_theta
    print '      No. of accepted steps = '+str(accept)
    print '      No. of rejected steps = '+str(reject)

MM.db.close()
if args.verbose: print ' - Saved chains to ',dbname
#-------------------------------------------------------------------------------------------------------------
if args.verbose: print ' - Plot logNobjuniverse, k and logLstar samples'
plot(MM)                                         # plotting results

if not os.path.isdir('./balff_plots/'):
    if args.verbose: print ' - Did not find "./balff_plots/" so creating it'
    os.mkdir('./balff_plots/')

thetafile = './balff_plots/'+dbname.split('balff_output/')[-1].replace('.pickle','_theta.png')
if args.verbose: print ' - Moving theta_2.png to',thetafile
out = commands.getoutput('mv theta_2.png '+thetafile)

if args.verbose: print '\n - Printing Summary'
statval = 1 # resetting stat indicator
ssval = MM.stats() # checking that stats can be created

if (ssval['theta'] == None): statval = 0
if statval == 1:
#!/usr/bin/env python
import two_normal_model
from pymc import MCMC
from pymc.Matplot import plot

# do posterior sampling
m = MCMC(two_normal_model)
m.sample(iter=100000, burn=1000)
print(m.stats())

import numpy
for p in ['mean1', 'mean2', 'std_dev', 'theta']:
    numpy.savetxt("%s.trace" % p, m.trace(p)[:])

# draw some pictures
plot(m)
"""
A wrapper for the main tomography script.
"""

# Author : Sangeeta Bhatia

import pymc as pm
import tomography as tm
from pymc.Matplot import plot
from os import rename
import sys

runs = int(input("Enter the number of iterations for the MCMC simulation: "))
burnin = int(input("Enter the burn in for the MCMC simulation: "))
thin = int(input("Enter the thining variable for the MCMC simulation: "))
S = pm.MCMC(tm)
S.sample(runs, burnin, thin)
stats = S.stats()
S.write_csv("summary.csv", variables=['Q', 'tau'])
plot(S)  # Automatically saves the output - one .png file for each variable.

#Finally
fname = sys.argv[1]
sname = fname + '-summary.csv'
qname = fname + '-Q.png'
tname = fname + '-tau.png'
rename("summary.csv", sname)
rename("Q.png", qname)
rename("tau.png", tname)
from pymc import MCMC
from pymc.Matplot import plot
import numpy as np

import small_model as model

A = MCMC(model)
A.sample(iter=5000)
plot(A, suffix='-gamma')

print '%s prior' % model.prior
print[(x, A.stats()[x]['mean']) for x in A.stats()]
error = (1 - A.stats()['ABp']['mean']) * 400 + A.stats(
)['CAp']['mean'] * 600 + A.stats()['CBp']['mean'] * 1000 - 200
print 'Error: %s' % error
Beispiel #24
0
def run1():
    #fake data [x, y, yerr, xyerr]
    data = np.array([[201, 592, 61, 9], [244, 401, 25, 4], [47, 583, 58, 11],
                     [287, 402, 15, 7], [203, 495, 21, 5], [58, 173, 15, 9],
                     [210, 479, 27, 4], [202, 504, 14, 4], [198, 510, 30, 11],
                     [158, 416, 16, 7], [165, 393, 14, 5], [201, 442, 25, 5],
                     [157, 317, 52, 5], [131, 311, 16, 6], [166, 400, 34, 6],
                     [160, 337, 31, 5], [186, 423, 42, 9], [125, 334, 26, 8],
                     [218, 533, 16, 6], [146, 344, 22, 5], [150, 300, 23, 10],
                     [270, 620, 40, 15]])

    #rename columns
    xdata, ydata = data[:, 0], data[:, 1]
    xerr, yerr = data[:, 3], data[:, 2]

    #perform MCMC
    MC = pymc_linear_fit_withoutliers(xdata,
                                      ydata,
                                      data1err=xerr,
                                      data2err=yerr,
                                      return_MC=True)
    MC.sample(100000, burn=1000, verbose=0)

    #show the results
    fig = plt.figure()

    #plot the confidence levels
    low25 = np.linspace(20, 300) * MC.stats(
    )['slope']['quantiles'][2.5] + MC.stats()['intercept']['quantiles'][2.5]
    top97 = np.linspace(20, 300) * MC.stats(
    )['slope']['quantiles'][97.5] + MC.stats()['intercept']['quantiles'][97.5]
    plt.fill_between(np.linspace(20, 300),
                     low25,
                     top97,
                     color='k',
                     alpha=0.1,
                     label='2.5/97.5 quartile')

    #plot the average results
    plt.plot(np.linspace(20, 300),
             np.linspace(20, 300) * MC.stats()['slope']['mean'] +
             MC.stats()['intercept']['mean'],
             color='k',
             linewidth=1,
             label='Average fit')

    #plot data
    plt.errorbar(xdata,
                 ydata,
                 xerr=xerr,
                 yerr=yerr,
                 color='b',
                 label='data',
                 fmt='o')

    #show likely outliers
    plt.plot(xdata[MC.badvals.value.astype('bool')],
             ydata[MC.badvals.value.astype('bool')],
             'rs',
             label='likely outliers')

    plt.xlim(20, 300)
    plt.legend(shadow=True,
               fancybox=True,
               scatterpoints=1,
               numpoints=1,
               loc='upper left')
    plt.savefig('test.pdf')
    plt.close()

    #MCMC plot
    plot(MC)
import model
import pymc
import networkx as nx
import matplotlib.pyplot as plt
from pymc import MCMC
from pymc.Matplot import plot
M = MCMC(model)
M.sample(iter=10000, burn=1000, thin=10)
plot(M, path='./plots/test_soft_evidence')

#g = pymc.graph.graph(pymc.Model(model), path='.')
#g.write('model.dot')
#G = nx.drawing.nx_agraph.read_dot('model.dot')
#nx.draw(G)
#plt.draw()

#hist(M.trace('late_mean')[:])
#show()

Beispiel #26
0
class MCMCRunManager(object):
    """Manages a single MCMC result in disk."""
    def __init__(self, root_dir, name=None, backend='pickle'):
        super(MCMCRunManager, self).__init__()

        # root-dir and name
        if name is None:
            self.name = op.basename(root_dir)
        else:
            root_dir = op.join(root_dir, name)
        self.root_dir = ensure_dir(root_dir)

        # Models storage
        self.model_dir = ensure_dir(op.join(root_dir, 'model'))
        self.model_pickle = op.join(self.model_dir, '%s.pickle' % self.name)
        self.model_txt = op.join(self.model_dir, '%s.py' % self.name)
        self.model_dot = op.join(self.model_dir, '%s.dot' % self.name)
        self.model_png = op.join(self.model_dir, '%s.png' % self.name)

        # Traces storage
        self.db_dir = ensure_dir(op.join(self.root_dir, 'db'))
        self.backend = backend
        self.db_file = op.join(self.db_dir,
                               '%s.pymc.%s' % (self.name, self.backend))
        self._db = None

        # Plots storage
        self.plots_dir = ensure_dir(op.join(self.root_dir, 'plots'))

        # Stats storage
        self.stats_dir = ensure_dir(op.join(self.root_dir, 'stats'))

        # Data storage - only if we really want to keep provenance clear
        self.data_dir = ensure_dir(op.join(self.root_dir, 'data'))
        self.data_file = op.join(self.data_dir, 'data.pickle')

        # Done file
        self.done_file = op.join(self.root_dir, 'DONE')

    def is_done(self):
        return op.isfile(self.done_file)

    def save_model_txt(self, txt):
        with open(self.model_txt, 'w') as writer:
            writer.write(txt)

    def load_model_txt(self):
        try:
            with open(self.model_txt) as reader:
                return reader.read()
        except:
            return None

    def save_data(self, data, overwrite=True):
        save_perturbation_record_data_to_hdf5(data,
                                              self.data_file,
                                              overwrite=overwrite)

    def load_data(self):
        try:
            return load_perturbation_record_data_from_hdf5(self.data_file)
        except:
            return None

    def save_pymc_model_dict(self, model_dict):
        try:
            import dill
            with open(self.model_pickle, 'w') as writer:
                dill.dump(model_dict, writer, dill.HIGHEST_PROTOCOL)
        except:
            print 'Move to PyMC3 and hope that theano allows serialization...'
            raise  # No way to pickle these nasty fortrans, does PyMC allow to serialize models like this?

    def pymc_db(self):
        # WARNING: weakrefed cache
        traces = self._db() if self._db is not None else None
        if traces is None:
            backend = getattr(pymc.database, self.backend)
            traces = backend.load(self.db_file)
            self._db = weakref.ref(traces)
        return traces

    def traces(self, varname):
        """Returns a numpy array with the traces for the variable, one squeezed row per chain."""
        # TODO: Q&D to eliminate delays on reading, rethink...
        cache_file = op.join(self.db_dir, '%s.%s' % (varname, 'pickle'))
        if not op.isfile(cache_file):
            # We could instead combine all the chains into a long one with chain=None
            # See e.g. https://github.com/pymc-devs/pymc/issues/144
            traces = np.array([
                self.pymc_db().trace(varname, chain=chain)[:].squeeze()
                for chain in xrange(self.num_chains())
            ])
            joblib.dump(traces, cache_file, compress=3)
            return traces
        return joblib.load(cache_file)

    def pymctraces(self, varname):
        traces = self.pymc_db()._traces
        print traces
        return traces[varname]

    def varnames(self):
        # TODO: Q&D to eliminate delays on reading, rethink
        cache_file = op.join(self.db_dir, 'tracenames.pickled')
        if not op.isfile(cache_file):
            trace_names = self.pymc_db().trace_names
            joblib.dump(trace_names, cache_file, compress=3)
            return trace_names
        return joblib.load(cache_file)

    def num_chains(self):
        return len(self.varnames())

    def group_plots_in_dirs(self):
        def ensure_symlink(dest_dir, file_name, plot_file):
            dest_file = op.join(dest_dir, file_name)
            plot_file = op.join('../%s' % op.basename(plot_file))
            if not op.islink(dest_file):
                ensure_dir(dest_dir)
                os.symlink(plot_file, dest_file)

        for plot_file in glob(op.join(self.plots_dir, '*.png')):
            file_name = op.basename(plot_file)
            if 'group=' in file_name:
                ensure_symlink(op.join(self.plots_dir, 'posteriors-group'),
                               file_name, plot_file)
                # Assume only one group per model ATM
            elif 'summary__' in file_name:
                ensure_symlink(op.join(self.plots_dir, 'summaries'), file_name,
                               plot_file)
            elif 'fly=':
                fly_name = file_name.partition('fly=')[2].partition('_')[0]
                ensure_symlink(
                    op.join(self.plots_dir, 'posteriors-fly=%s' % fly_name),
                    file_name, plot_file)

    def sample(self,
               model,
               mapstart=False,
               step_methods=None,
               iters=80000,
               burn=20000,
               num_chains=4,
               doplot=True,
               showplots=False,
               force=False,
               progress_bar=False):

        print('MCMC for %s' % self.name)

        if self.is_done():
            if not force:
                print('\tAlready done, skipping...')
                return self.db_file
            else:
                print(
                    '\tWARNING: recomputing, there might be spurious files from previous runs...'
                )  # Not a good idea
        # Let's graph the model
        graph = pymc.graph.dag(pymc.Model(model),
                               name=self.name,
                               path=self.model_dir)
        graph.write_png(op.join(self.model_dir, self.name + '.png'))

        start = time.time()

        if mapstart:
            # See http://stronginference.com/post/burn-in-and-other-mcmc-folklore
            # BUT WARNING, WOULD THIS MAKE MULTIPLE CHAIN START BE OVERLY CORRELATED?
            try:
                from pymc import MAP
                print('\tFinding MAP estimates...')
                M = MAP(model)
                M.fit()
                model = M.variables
                print('\tMAP estimates found...')
            except Exception, e:
                print('\tMAP Failed...', str(e))

        # Instantiate model
        M = pymc.MCMC(model,
                      db=self.backend,
                      dbname=self.db_file,
                      name=self.name)

        # Tune step methods
        if step_methods is not None:
            for var, step_method, sm_kwargs in step_methods:
                M.use_step_method(step_method, var, **sm_kwargs)

        # Sample!
        for chain in xrange(num_chains):
            print('\tChain %d of %d' % (chain + 1, num_chains))
            M.sample(iter=iters, burn=burn, progress_bar=progress_bar)
            try:
                if doplot:  # Summaries for the chain
                    plot(M,
                         suffix='__' + self.name + '__chain=%d' % chain,
                         path=self.plots_dir,
                         verbose=0)
                    summary_plot(M,
                                 name='summary__' + self.name +
                                 '__chain=%d' % chain,
                                 path=self.plots_dir + '/')
                    # TODO: report no op.join (+'/') bug to pymc people
                    self.group_plots_in_dirs()
                if showplots:
                    plt.show()
                chain_stats = M.stats(chain=chain)
                with open(op.join(self.stats_dir, 'stats__chain=%d' % chain),
                          'w') as writer:
                    pprint(chain_stats, writer)
            except Exception, e:
                print('\tError plotting or summarizing')
                print(str(e))
Beispiel #27
0
    ent = 0
    accept = dbstate[dbstate.keys()[ent]]['_accepted']
    reject = dbstate[dbstate.keys()[ent]]['_rejected']

acceptancerate_theta = accept / (reject + accept)

if args.verbose:
    print '      with theta = (k,L*,N) acceptance rate =', acceptancerate_theta
    print '      No. of accepted steps = ' + str(accept)
    print '      No. of rejected steps = ' + str(reject)

MM.db.close()
if args.verbose: print ' - Saved chains to ', dbname
#-------------------------------------------------------------------------------------------------------------
if args.verbose: print ' - Plot logNobjuniverse, k and logLstar samples'
plot(MM)  # plotting results

if not os.path.isdir('./balff_plots/'):
    if args.verbose: print ' - Did not find "./balff_plots/" so creating it'
    os.mkdir('./balff_plots/')

thetafile = './balff_plots/' + dbname.split('balff_output/')[-1].replace(
    '.pickle', '_theta.png')
if args.verbose: print ' - Moving theta_2.png to', thetafile
out = commands.getoutput('mv theta_2.png ' + thetafile)

if args.verbose: print '\n - Printing Summary'
statval = 1  # resetting stat indicator
ssval = MM.stats()  # checking that stats can be created

if (ssval['theta'] == None): statval = 0
Beispiel #28
0
def get_Bayes(measurements=[], chunksize=5, Ndp=5, iter=50000, burn=5000):

    sc = pymc.Uniform('sc', 0.1, 2.0, value=0.24)
    tau = pymc.Uniform('tau', 0.0, 1.0, value=0.5)

    concinit = 1.0
    conclo = 0.1
    conchi = 10.0
    concentration = pymc.Uniform('concentration',
                                 lower=conclo,
                                 upper=conchi,
                                 value=concinit)

    # The stick-breaking construction: requires Ndp beta draws dependent on the
    # concentration, before the probability mass function is actually constructed.
    #betas = pymc.Beta('betas', alpha=1, beta=concentration, size=Ndp)
    betas = pymc.Beta('betas', alpha=1, beta=1, size=Ndp - 1)

    @pymc.deterministic
    def pmf(betas=betas):
        "Construct a probability mass function for the truncated Dirichlet process"
        # prod = lambda x: np.exp(np.sum(np.log(x))) # Slow but more accurate(?)
        prod = np.prod
        value = map(lambda i, u: u * prod(1.0 - betas[:i]), enumerate(betas))
        value.append(1.0 - sum(value[:]))  # force value to sum to 1
        return value

    # The cluster assignments: each data point's estimated cluster ID.
    # Remove idinit to allow clusterid to be randomly initialized:
    Ndata = len(measurements)
    idinit = np.zeros(Ndata, dtype=np.int64)
    clusterid = pymc.Categorical('clusterid', p=pmf, size=Ndata, value=idinit)

    @pymc.deterministic(name='clustermean')
    def clustermean(clusterid=clusterid, sc=sc, Ndp=Ndp):
        return sc * np.arange(1, Ndp + 1)[clusterid]

    @pymc.deterministic(name='clusterprec')
    def clusterprec(clusterid=clusterid, sc=sc, tau=tau, Ndp=Ndp):
        return 1.0 / (sc * sc * tau * tau * (np.arange(1, Ndp + 1)[clusterid]))

    y = pymc.Normal('y',
                    mu=clustermean,
                    tau=clusterprec,
                    observed=True,
                    value=measurements)

    ## for predictive poeterior simulation
    @pymc.deterministic(name='y_sim')
    def y_sim(value=[0], sc=sc, tau=tau, clusterid=clusterid, Ndp=Ndp):
        n = np.arange(1, Ndp + 1)[np.random.choice(clusterid)]
        return np.random.normal(loc=sc * n, scale=sc * tau * n)

    m = pymc.Model({
        "scale": sc,
        "tau": tau,
        "betas": betas,
        "clusterid": clusterid,
        "normal": y,
        "pred": y_sim
    })

    sc_samples = []
    modes = []
    simulations = []

    for i in range(0, chunksize):
        mc = pymc.MCMC(m)
        mc.sample(iter=50000, burn=10000)
        plot(mc)

        sc_sample = mc.trace('sc')[:]
        sc_samples.append(sc_sample)

        simulation = mc.trace('y_sim')[:]
        simulations.append(simulation)

        plt.hist(measurements,
                 50,
                 fc='gray',
                 histtype='stepfilled',
                 alpha=0.3,
                 normed=False)
        plt.hist(simulation,
                 30,
                 fc='blue',
                 histtype='stepfilled',
                 alpha=0.3,
                 normed=True)
        hist, edges = np.histogram(
            measurements,
            bins=100,
            range=[np.min(measurements) - 0.25,
                   np.max(measurements) + 0.25])

        argm = hist.argmax()
        (edges[argm] + edges[argm + 1]) / 2
        modes.append((edges[argm] + edges[argm + 1]) / 2)

    if chunksize <= 1:
        gr = np.nan
    else:
        pymc.gelman_rubin(sc_samples)

    dic = {
        'gelman_rubin': gr,
        'modes': modes,
        'simulations': simulations,
        'sc_samples': sc_samples
    }
    return dic
Beispiel #29
0
             color='k',
             linewidth=1,
             label='Average fit')

    #plot data
    plt.errorbar(xdata,
                 ydata,
                 xerr=xerr,
                 yerr=yerr,
                 color='b',
                 label='data',
                 fmt='o')

    #show likely outliers
    plt.plot(xdata[MC.badvals.value.astype('bool')],
             ydata[MC.badvals.value.astype('bool')],
             'rs',
             label='likely outliers')

    plt.xlim(20, 300)
    plt.legend(shadow=True,
               fancybox=True,
               scatterpoints=1,
               numpoints=1,
               loc='upper left')
    plt.savefig('test.pdf')
    plt.close()

    #MCMC plot
    plot(MC)
Beispiel #30
0
# Ph21 Set 5
# Aritra Biswas

# coin_mcmc.py
# Run MCMC on coin_model.py

import coin_model
from pymc import MCMC
from pymc.Matplot import plot

M = MCMC(coin_model)
M.sample(iter = 10000, burn = 0, thin = 1)
print
plot(M)
M.pheads.summary()
def bayesian_model():
    model = MCMC(disastermodel)
    model.isample(iter=10000, burn=1000, thin=10)
    print model.trace('l')[:]
    plot(model) 
    return model