Beispiel #1
0
    def test_simple(self):

        # Priors
        mu = Normal('mu', mu=0, tau=0.0001)
        s = Uniform('s', lower=0, upper=100, value=10)
        tau = s ** -2

        # Likelihood with missing data
        x = Normal('x', mu=mu, tau=tau, value=m, observed=True)

        # Instantiate sampler
        M = MCMC([mu, s, tau, x])

        # Run sampler
        M.sample(10000, 5000, progress_bar=0)

        # Check length of value
        assert_equal(len(x.value), 100)
        # Check size of trace
        tr = M.trace('x')()
        assert_equal(shape(tr), (5000, 2))

        sd2 = [-2 < i < 2 for i in ravel(tr)]

        # Check for standard normal output
        assert_almost_equal(sum(sd2) / 10000., 0.95, decimal=1)
Beispiel #2
0
    def test_simple(self):

        # Priors
        mu = Normal('mu', mu=0, tau=0.0001)
        s = Uniform('s', lower=0, upper=100, value=10)
        tau = s**-2

        # Likelihood with missing data
        x = Normal('x', mu=mu, tau=tau, value=m, observed=True)

        # Instantiate sampler
        M = MCMC([mu, s, tau, x])

        # Run sampler
        M.sample(10000, 5000, progress_bar=0)

        # Check length of value
        assert_equal(len(x.value), 100)
        # Check size of trace
        tr = M.trace('x')()
        assert_equal(shape(tr), (5000, 2))

        sd2 = [-2 < i < 2 for i in ravel(tr)]

        # Check for standard normal output
        assert_almost_equal(sum(sd2) / 10000., 0.95, decimal=1)
Beispiel #3
0
def test_interactive():
    S = MCMC(disaster_model)
    S.isample(200,
              100,
              2,
              out=open('testresults/interactive.log', 'w'),
              progress_bar=0)
Beispiel #4
0
    def test_fit(self):
        p = self._build_parent()
        s = MyStochastic(self.STOCHASTIC_NAME, p)

        mcmc = MCMC({p, s})

        mcmc.sample(100, burn=10, thin=2)
Beispiel #5
0
 def test_nd(self):
     M = MCMC([self.NDstoch()], db=self.name, dbname=os.path.join(testdir, 'ND.'+self.name), dbmode='w')
     M.sample(10, progress_bar=0)
     a = M.trace('nd')[:]
     assert_equal(a.shape, (10,2,2))
     db = getattr(pymc.database, self.name).load(os.path.join(testdir, 'ND.'+self.name))
     assert_equal(db.trace('nd')[:], a)
Beispiel #6
0
def test_interactive():
    if 'sqlite' not in dir(pymc.database):
        raise nose.SkipTest
    M=MCMC(disaster_model,db='sqlite',
           dbname=os.path.join(testdir, 'interactiveDisaster.sqlite'),
           dbmode='w')
    M.isample(10, out=open('testresults/interactivesqlite.log', 'w'), progress_bar=0)
Beispiel #7
0
def test_interactive():
    if 'sqlite' not in dir(pymc.database):
        raise nose.SkipTest
    M = MCMC(DisasterModel,
             db='sqlite',
             dbname=os.path.join(testdir, 'interactiveDisaster.sqlite'),
             dbmode='w')
    M.isample(10, out=open('testresults/interactivesqlite.log', 'w'))
Beispiel #8
0
    def test_fit_with_sibling(self):
        p = self._build_parent()
        s = MyStochastic(self.STOCHASTIC_NAME, p)
        sib = MyStochastic(self.SIBLING_NAME, p)

        mcmc = MCMC({p, s, sib})

        mcmc.sample(100, burn=10, thin=2)
Beispiel #9
0
    def test_pymc_model(self):
        """ Tests sampler """

        sampler = MCMC(model_omm.pymc_parameters)
        self.assert_(isinstance(model_omm, TorsionFitModelOMM))
        self.assert_(isinstance(sampler, pymc.MCMC))

        sampler.sample(iter=1)
    def test_pymc_model(self):
        """ Tests sampler """

        sampler = MCMC(model_omm.pymc_parameters)
        self.assert_(isinstance(model_omm, TorsionFitModelOMM))
        self.assert_(isinstance(sampler, pymc.MCMC))

        sampler.sample(iter=1)
Beispiel #11
0
def mcmc(prob, nsample=100, modulename='model'):
    try:
        mystr = "from " + modulename + " import model"
        exec(mystr)
    except:
        print('cannot import', modulename)
    M = MCMC(model(prob))
    M.sample(nsample)
    return M
Beispiel #12
0
def mcmc(prob, nsample=100, modulename = 'model' ):
    try:
        mystr = "from " + modulename + " import model"
        exec(mystr)
    except:
        print 'cannot import', modulename
    M = MCMC( model(prob) )
    M.sample(nsample)
    return M
Beispiel #13
0
 def test_zcompression(self):
     db = pymc.database.hdf5.Database(dbname=os.path.join(testdir, 'DisasterModelCompressed.hdf5'),
                                      dbmode='w',
                                      dbcomplevel=5)
     S = MCMC(DisasterModel, db=db)
     S.sample(45,10,1)
     assert_array_equal(S.e.trace().shape, (35,))
     S.db.close()
     db.close()
     del S
def test_interactive():
    S = MCMC(disaster_model)
    S.isample(
        200,
        100,
        2,
        out=open(
            'testresults/interactive.log',
            'w'),
        progress_bar=0)
Beispiel #15
0
 def test_nd(self):
     M = MCMC([self.NDstoch()],
              db=self.name,
              dbname=os.path.join(testdir, 'ND.' + self.name),
              dbmode='w')
     M.sample(10, progress_bar=0)
     a = M.trace('nd')[:]
     assert_equal(a.shape, (10, 2, 2))
     db = getattr(pymc.database,
                  self.name).load(os.path.join(testdir, 'ND.' + self.name))
     assert_equal(db.trace('nd')[:], a)
Beispiel #16
0
 def test_zcompression(self):
     db = pymc.database.hdf5.Database(dbname=os.path.join(
         testdir, 'DisasterModelCompressed.hdf5'),
                                      dbmode='w',
                                      dbcomplevel=5)
     S = MCMC(DisasterModel, db=db)
     S.sample(45, 10, 1)
     assert_array_equal(S.e.trace().shape, (35, ))
     S.db.close()
     db.close()
     del S
Beispiel #17
0
 def test_zcompression(self):
     with warnings.catch_warnings():
         warnings.simplefilter('ignore')
         db = pymc.database.hdf5.Database(dbname=os.path.join(testdir, 'disaster_modelCompressed.hdf5'),
                                          dbmode='w',
                                          dbcomplevel=5)                                 
         S = MCMC(disaster_model, db=db)
         S.sample(45,10,1, progress_bar=0)
         assert_array_equal(S.trace('early_mean')[:].shape, (35,))
         S.db.close()
         db.close()
         del S
Beispiel #18
0
def compute(var_LB, var_UB, num_samples=10):
    from pymc import Uniform, MCMC

    X = Uniform('X', var_LB, var_UB)
    mc = MCMC([X])
    mc.sample(num_samples)

    #import matplotlib.pyplot as plt
    #plt.plot(X.trace()[:,0], X.trace()[:,1],',')
    #plt.show()

    return X.trace()
Beispiel #19
0
def estimate_failures(samples, #samples from noisy labelers
                      n_samples=10000, #number of samples to run MCMC for
                      burn=None, #burn-in. Defaults to n_samples/2
                      thin=10, #thinning rate. Sample every k samples from markov chain 
                      alpha_p=1, beta_p=1, #beta parameters for true positive rate
                      alpha_e=1, beta_e=10 #beta parameters for noise rates
                      ):

  if burn is None:
    burn = n_samples / 2

  S,N = samples.shape
  p = Beta('p', alpha=alpha_p, beta=beta_p) #prior on true label
  l = Bernoulli('l', p=p, size=S)
  e_pos = Beta('e_pos', alpha_e, beta_e, size=N) # error rate if label = 1
  e_neg = Beta('e_neg', alpha_e, beta_e, size=N) # error rate if label = 0

  @deterministic(plot=False)
  def noise_rate(l=l, e_pos=e_pos, e_neg=e_neg):
    #probability that a noisy labeler puts a label 1
    return np.outer(l, 1-e_pos) + np.outer(1-l, e_neg)

  noisy_label = Bernoulli('noisy_label', p=noise_rate, size=samples.shape, value=samples, observed=True)
  variables = [l, e_pos, e_neg, p, noisy_label, noise_rate]
  model = MCMC(variables, verbose=3)
  model.sample(iter=n_samples, burn=burn, thin=thin)
  model.write_csv('out.csv', ['p', 'e_pos', 'e_neg'])
  p = np.median(model.trace('p')[:])
  e_pos = np.median(model.trace('e_pos')[:],0)
  e_neg = np.median(model.trace('e_neg')[:],0)
  return p, e_pos, e_neg
Beispiel #20
0
    def MCMC( self, nruns=10000, burn=1000, init_error_std=1., max_error_std=100., verbose=1 ):
        ''' Perform Markov Chain Monte Carlo sampling using pymc package

            :param nruns: Number of MCMC iterations (samples)
            :type nruns: int
            :param burn: Number of initial samples to burn (discard)
            :type burn: int
            :param verbose: verbosity of output
            :type verbose: int
            :param init_error_std: Initial standard deviation of residuals
            :type init_error_std: fl64
            :param max_error_std: Maximum standard deviation of residuals that will be considered
            :type max_error_std: fl64
            :returns: pymc MCMC object
        '''
        if max_error_std < init_error_std:
            print "Error: max_error_std must be greater than or equal to init_error_std"
            return
        try:
            from pymc import Uniform, deterministic, Normal, MCMC, Matplot
        except ImportError as exc:
            sys.stderr.write("Warning: failed to import pymc module. ({})\n".format(exc))
            sys.stderr.write("If pymc is not installed, try installing:\n")
            sys.stderr.write("e.g. try using easy_install: easy_install pymc\n")
        def __mcmc_model( self, init_error_std=1., max_error_std=100. ):
            #priors
            variables = []
            sig = Uniform('error_std', 0.0, max_error_std, value=init_error_std)
            variables.append( sig )
            for nm,mn,mx in zip(self.parnames,self.parmins,self.parmaxs):
                evalstr = "Uniform( '" + str(nm) + "', " +  str(mn) + ", " +  str(mx) + ")"
                variables.append( eval(evalstr) )
            #model
            @deterministic()
            def residuals( pars = variables, p=self ):
                values = []
                for i in range(1,len(pars)):
                    values.append(float(pars[i]))
                pardict = dict(zip(p.parnames,values))
                p.forward(pardict=pardict, reuse_dirs=True)
                return numpy.array(p.residuals)*numpy.array(p.obsweights)
            #likelihood
            y = Normal('y', mu=residuals, tau=1.0/sig**2, observed=True, value=numpy.zeros(len(self.obs)))
            variables.append(y)
            return variables

        M = MCMC( __mcmc_model(self, init_error_std=init_error_std, max_error_std=max_error_std) )
        M.sample(iter=nruns,burn=burn,verbose=verbose)
        return M
Beispiel #21
0
 def run_mc(self,
            nsample=10000,
            interactive=False,
            doplot=False,
            verbose=0):
     """run the model using mcmc"""
     from pymc import MCMC
     self.M = MCMC(self)
     if interactive:
         self.M.isample(iter=nsample, burn=1000, thin=10, verbose=verbose)
     else:
         self.M.sample(iter=nsample, burn=1000, thin=10, verbose=verbose)
     if doplot:
         from pymc.Matplot import plot
         plot(self.M)
Beispiel #22
0
def analizeMwm():
	masked_values = np.ma.masked_equal(x, value=None)
	print("m v: ", masked_values)

	print("dmwm da: ", dmwm.disasters_array)

	Mwm = MCMC(dmwm)
	Mwm.sample(iter=10000, burn=1000, thin=10)

	print("Mwm t: ", Mwm.trace('switchpoint')[:])

	hist(Mwm.trace('late_mean')[:])
	# show()

	plot(Mwm)
Beispiel #23
0
    def run(self, n_iter=110000, n_burn=10000, thin=1):
        """Run the Bayesian test.

        :param int n_iter: total number of MCMC iterations
        :param int n_burn: no tallying done during the first n_burn iterations - these samples will be forgotten
        :param int thin: variables will be tallied at intervals of this many iterations

        :return: None
        """
        self.model.setup((n_iter - n_burn) / thin)
        self.sampler = MCMC(self.model.stochastics)
        self.sampler.sample(iter=n_iter,
                            burn=n_burn,
                            thin=thin,
                            progress_bar=self.verbose)
Beispiel #24
0
def imputeBayesian(row, dist):
    out = sys.stdout  #Save the stdout path for later, we're going to need it
    f = open('/dev/null', 'w')  #were going to use this to redirect stdout

    # filling nan with 0 so everything works
    row.fillna(0, inplace=True)

    # Masked Values
    maskedValues = np.ma.masked_equal(row.values, value=0)

    # Choose between distributions, either normal or Poisson.
    if dist == "Normal":

        # Calculate tau
        if np.std(maskedValues) == 0:
            tau = np.square(1 / (np.mean(maskedValues) / 3))
        else:
            tau = np.square((1 / (np.std(maskedValues))))

        # Uses only mean
        x = Impute('x',
                   Normal,
                   maskedValues,
                   tau=tau,
                   mu=np.mean(maskedValues))

    # For Poisson
    elif dist == "Poisson":
        x = Impute('x', Poisson, maskedValues, mu=np.mean(maskedValues))

    # Fancy test
    sys.stdout = f  # Skipin stdout
    m = MCMC(x)
    m.sample(iter=1, burn=0, thin=1)
    sys.stdout = out  # coming back

    # Getting list of missing values
    missing = [i for i in range(len(row.values)) if row.values[i] == 0]

    # Getting the imputed values from the model
    for i in range(len(missing)):
        keyString = "x[" + str(missing[i]) + "]"
        imputedValue = m.trace(keyString)[:]
        row.iloc[missing[i]] = imputedValue[0]

    # Returning to use nans
    row.replace(0, np.nan, inplace=True)
    return row
Beispiel #25
0
def run_trials(trials=0,
               iters=0,
               tau=10000,
               prior=None,
               errort_b=[],
               Linf=[],
               sparsity=[],
               logps=[]):
    for i in range(trials):
        # NOTE need to create new model per iteration, pymc might be using
        # the model instance to seed a random number generator somewhere...
        model = models.toy_model(tau=tau, prior=prior)
        A = MCMC(model)
        A, logp, errors_b, errors_x = simulation.sample_toy_save(model,A, \
                iters=iters,verbose=False)
        logps.append(logp[-1])
        errort_b.append(errors_b[-1])
        Linf.append(np.sum(1 /
                           errors_x[:, -1][errors_x[:, -1].argsort()[-3:]]))
        sparsity.append(np.sum(errors_x[:, -1] <= 0.02))

    print ''
    for (i, j) in [('Linf', Linf), ('Sparsity', sparsity),
                   ('Error_b', errort_b), ('logp', logps)]:
        print i
        if i == 'Sparsity':
            print[np.sum(np.array(j) == k) for k in range(4)]
        else:
            (H1, H2) = np.histogram(j)
            print H1
            print H2
        print "Median: %s" % (np.median(j))
    return Linf, sparsity, errort_b, logps
Beispiel #26
0
def test_identical_object_names():
    A = pymc.Uniform('a', 0, 10)
    B = pymc.Uniform('a', 0, 10)
    try:
        M = MCMC([A, B])
    except ValueError:
        pass
Beispiel #27
0
    def test_zcompression(self):

        original_filters = warnings.filters[:]
        warnings.simplefilter("ignore")
        try:
            db = pymc.database.hdf5.Database(dbname=os.path.join(testdir, 'disaster_modelCompressed.hdf5'),
                                             dbmode='w',
                                             dbcomplevel=5)
            S = MCMC(disaster_model, db=db)
            S.sample(45,10,1, progress_bar=0)
            assert_array_equal(S.trace('early_mean')[:].shape, (35,))
            S.db.close()
            db.close()
            del S
        finally:
            warnings.filters = original_filters
Beispiel #28
0
class test_MCMC(TestCase):

    # Instantiate samplers
    M = MCMC(DisasterModel)

    # Sample
    M.sample(4000, 2000, verbose=0)

    def test_instantiation(self):

        # Check stochastic arrays
        assert_equal(len(self.M.stochastics), 3)
        assert_equal(len(self.M.observed_stochastics), 1)
        assert_array_equal(self.M.D.value, DisasterModel.disasters_array)

    def test_plot(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M.e, path=DIR, verbose=0)

    def test_autocorrelation(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        autocorrelation(self.M.e, path=DIR, verbose=0)

    def test_stats(self):
        S = self.M.e.stats()
Beispiel #29
0
    def test_zcompression(self):

        original_filters = warnings.filters[:]
        warnings.simplefilter("ignore")
        try:
            db = pymc.database.hdf5.Database(dbname=os.path.join(
                testdir, 'disaster_modelCompressed.hdf5'),
                                             dbmode='w',
                                             dbcomplevel=5)
            S = MCMC(disaster_model, db=db)
            S.sample(45, 10, 1, progress_bar=0)
            assert_array_equal(S.trace('early_mean')[:].shape, (35, ))
            S.db.close()
            db.close()
            del S
        finally:
            warnings.filters = original_filters
Beispiel #30
0
class BABTest:
    def __init__(self, control, variant, model='student', verbose=True):
        """Init.

        :param np.array control: 1 dimensional array of observations for control group
        :param np.array variant: 1 dimensional array of observations for variant group
        :param string model: desired distribution to describe both groups, defaults to Student
        """
        assert control.ndim == 1
        assert variant.ndim == 1
        self.control = control
        self.variant = variant
        self.sampler = None
        if model not in models:
            raise KeyError(
                'Unknown model - please select a model from {}'.format(
                    models.keys()))
        self.model = models[model](self.control, self.variant)
        self.verbose = verbose

    def run(self, n_iter=110000, n_burn=10000, thin=1):
        """Run the Bayesian test.

        :param int n_iter: total number of MCMC iterations
        :param int n_burn: no tallying done during the first n_burn iterations - these samples will be forgotten
        :param int thin: variables will be tallied at intervals of this many iterations

        :return: None
        """
        self.model.setup((n_iter - n_burn) / thin)
        self.sampler = MCMC(self.model.stochastics)
        self.sampler.sample(iter=n_iter,
                            burn=n_burn,
                            thin=thin,
                            progress_bar=self.verbose)

    def plot(self, n_bins=30):
        """Display the results of the test.

        :param int n_bins: number of bins in the histograms

        :return: None
        """
        self.model.plot(n_bins=n_bins)
Beispiel #31
0
def bimodal_gauss(data,pm):
    '''run MCMC to get regression on bimodal normal distribution'''
    m1 = np.mean(data[pm])/2.
    m2 = np.mean(data[pm])*2.
    dm = m2 - m1
    size = len(data[pm])

    ### set up model
    p = Uniform( "p", 0.2 , 0.8) #this is the fraction that come from mean1 vs mean2
    # p = distributions.truncated_normal_like('p', mu=0.5, tau=0.001, a=0., b=1.)
    # p = Normal( 'p', mu=(1.*sum(comp0==1))/size, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2
    # p = Normal( 'p', mu=0.5, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2
    
    ber = Bernoulli( "ber", p = p, size = size) # produces 1 with proportion p
    precision = Gamma('precision', alpha=0.01, beta=0.01)
    
    dmu = Normal( 'dmu', dm, tau=1./0.05**2 ) # [PS] give difference between means, finite
    # dmu = Lognormal( 'dmu', 0.3, tau=1./0.1**2)
    
    mean1 = Normal( "mean1", mu = m1,          tau = 1./0.1**2 ) # better to use Normals versus Uniforms,
                                                                 # if not truncated
    mean2 = Normal( "mean2", mu = mean1 + dmu, tau = 1./0.1**2 ) # tau is 1/sig^2
    
    @deterministic
    def mean( ber = ber, mean1 = mean1, mean2 = mean2):
        return ber*mean1 + (1-ber)*mean2

    
    obs = Normal( "obs", mean, precision, value = data[pm], observed = True)
    model = Model( {"p":p, "precision": precision, "mean1": mean1, "mean2":mean2, "obs":obs} )
    
    from pymc import MCMC, Matplot



    M = MCMC(locals(), db='pickle', dbname='metals.pickle')
    iter = 3000; burn = 2000; thin = 10
    M.sample(iter=iter, burn=burn, thin=thin)
    M.db.commit()

    mu1 = np.mean(M.trace('mean1')[:])
    mu2 = np.mean(M.trace('mean2')[:])
    p   = np.mean(M.trace('p')[:])
    return p, mu1, 0.1, mu2, 0.1, M
Beispiel #32
0
class LeagueModel(object):
    """MCMC model of a football league."""
    def __init__(self, fname):
        super(LeagueModel, self).__init__()
        league = fuba.League(fname)

        N = len(league.teams)
        #dummy future games
        future_games = [[league.teams["Werder Bremen"],league.teams["Dortmund"]]]

        self.goal_rate = np.empty(N,dtype=object)
        self.match_rate = np.empty(len(league.games)*2,dtype=object)
        self.match_goals_future = np.empty(len(future_games)*2,dtype=object)
        self.home_adv = Normal(name = 'home_adv',mu=0,tau=10.)

        for t in league.teams.values():
            print t.name,t.team_id
            self.goal_rate[t.team_id] = Exponential('goal_rate_%i'%t.team_id,beta=1)

        for game in range(len(league.games)):
            self.match_rate[2*game] = Poisson('match_rate_%i'%(2*game),
                    mu=self.goal_rate[league.games[game].hometeam.team_id] + self.home_adv,
                    value=league.games[game].homescore, observed=True)
            self.match_rate[2*game+1] = Poisson('match_rate_%i'%(2*game+1),
                    mu=self.goal_rate[league.games[game].hometeam.team_id],
                    value=league.games[game].homescore, observed=True)

        for game in range(len(future_games)):
            self.match_goals_future[2*game] = Poisson('match_goals_future_%i'%(2*game),
                    mu=self.goal_rate[future_games[game][0].team_id] + self.home_adv)
            self.match_goals_future[2*game+1] = Poisson('match_goals_future_%i'%(2*game+1),
                    mu=self.goal_rate[future_games[game][1].team_id])

    def run_mc(self,nsample = 10000,interactive=False):
        """run the model using mcmc"""
        from pymc.Matplot import plot
        from pymc import MCMC
        self.M = MCMC(self)
        if interactive:
            self.M.isample(iter=nsample, burn=1000, thin=10)
        else:
            self.M.sample(iter=nsample, burn=1000, thin=10)
        plot(self.M)
Beispiel #33
0
 def run_mc(self,nsample = 10000,interactive=False):
     """run the model using mcmc"""
     from pymc.Matplot import plot
     from pymc import MCMC
     self.M = MCMC(self)
     if interactive:
         self.M.isample(iter=nsample, burn=1000, thin=10)
     else:
         self.M.sample(iter=nsample, burn=1000, thin=10)
     plot(self.M)
Beispiel #34
0
def fit_std_curve_by_pymc(i_vals, i_sds, dpx_concs):
    import pymc
    from pymc import Uniform, stochastic, deterministic, MCMC
    from pymc import Matplot
    # Define prior distributions for both Ka and Kd
    ka = Uniform('ka', lower=0, upper=1000)
    kd = Uniform('kd', lower=0, upper=1000)

    @stochastic(plot=True, observed=True)
    def quenching_model(ka=ka, kd=kd, value=i_vals):
        pred_i = quenching_func(ka, kd, dpx_concs)
        # The first concentration in dpx_concs should always be zero
        # (that is, the first point in the titration should be the
        # unquenched fluorescence), so we assert that here:
        assert dpx_concs[0] == 0
        # The reason this is necessary is that in the likelihood calculation
        # we skip the error for the first point, since (when the std. err
        # is calculated by well) the error is 0 (the I / I_0 ratio is
        # always 1 for each well, the the variance/SD across the wells is 0).
        # If we don't skip this first point, we get nan for the likelihood.
        # In addition, the model always predicts 1 for the I / I_0 ratio
        # when the DPX concentration is 0, so it contributes nothing to
        # the overall fit.
        return -np.sum((value[1:] - pred_i[1:])**2 / (2 * i_sds[1:]**2))

    pymc_model = pymc.Model([ka, kd, quenching_model])
    mcmc = MCMC(pymc_model)
    mcmc.sample(iter=155000, burn=5000, thin=150)
    Matplot.plot(mcmc)

    plt.figure()
    num_to_plot = 1000
    ka_vals = mcmc.trace('ka')[:]
    kd_vals = mcmc.trace('kd')[:]
    if num_to_plot > len(ka_vals):
        num_to_plot = len(ka_vals)
    for i in range(num_to_plot):
        plt.plot(dpx_concs, quenching_func(ka_vals[i], kd_vals[i], dpx_concs),
                 alpha=0.01, color='r')
    plt.errorbar(dpx_concs, i_vals, yerr=i_sds, linestyle='', marker='o',
            color='k', linewidth=2)

    return (ka_vals, kd_vals)
Beispiel #35
0
    def bayesian_regression(self, Methodology):
        
        fit_dict                = OrderedDict()
        
        fit_dict['methodology'] = r'Inference $\chi^{2}$ model'
        
        #Initial guess for the fitting:
        Np_lsf                  = polyfit(self.x_array, self.y_array, 1)
        m_0, n_0                = Np_lsf[0], Np_lsf[1]
                
        MCMC_dict               = self.lr_ChiSq(self.x_array, self.y_array, m_0, n_0)
        
        myMCMC                  = MCMC(MCMC_dict)
        
        myMCMC.sample(iter=10000, burn=1000)

        fit_dict['m'], fit_dict['n'], fit_dict['m_error'], fit_dict['n_error'] = myMCMC.stats()['m']['mean'], myMCMC.stats()['n']['mean'], myMCMC.stats()['m']['standard deviation'], myMCMC.stats()['n']['standard deviation']
        
        return fit_dict
Beispiel #36
0
 def run_mc(self,nsample = 10000,interactive=False,doplot=False,verbose=0):
     """run the model using mcmc"""
     from pymc import MCMC
     self.M = MCMC(self)
     if interactive:
         self.M.isample(iter=nsample, burn=1000, thin=10,verbose=verbose)
     else:
         self.M.sample(iter=nsample, burn=1000, thin=10,verbose=verbose)
     if doplot:
         from pymc.Matplot import plot
         plot(self.M)
Beispiel #37
0
def main():
    s1 = PoissonStudent("arnaud", 1)
    s2 = PoissonStudent("francois", 1)
    s3 = PoissonStudent("david", 0.5)

    students = [s1, s2, s3]
    env = Environment(students)
    statements = env.simulate(1000, verbose=True)

    student_names = set(s['actor'] for s in statements)
    lam = Uniform('lam', lower=0, upper=1)
    students = [PoissonStudent(name=name, lam=lam) for name in student_names]
    env = Environment(students, statements)
    params = [lam]
    for s in students:
        params.extend(s.params)
    m = MCMC(params)
    m.sample(iter=10000, burn=1000, thin=10)
    hist(m.trace('lambda_david')[:])
    show()
Beispiel #38
0
    def load_pymc_database(self, Database_address):

        #In case the database is open from a previous use
        if self.pymc_database != None:
            self.pymc_database.close()

        #Load the pymc output textfile database
        self.pymc_database = database.pickle.load(Database_address)

        #Create a dictionary with the bases to
        self.Traces_dict = {}
        self.traces_list = self.pymc_database.trace_names[
            0]  #This variable contains all the traces from the MCMC (stochastic and deterministic)

        for trace in self.traces_list:
            self.Traces_dict[trace] = self.pymc_database.trace(trace)

        #Generate a MCMC object to recover all the data from the run
        self.dbMCMC = MCMC(self.Traces_dict, self.pymc_database)

        return
Beispiel #39
0
def fit_model():
    M = MCMC(disaster_model)
    M.sample(iter=10000, burn=1000, thin=10)
    print('switchpoint: ', M.trace('switchpoint')[:])
    print('hist: ', hist(M.trace('late_mean')[:]))
    # show()
    plot(M)
Beispiel #40
0
    def compute(
            self,
            observation,
            prediction,
            observation_name='observation',
            prediction_name='prediction',
            mcmc_iter=110000,
            mcmc_burn=10000,
            effect_size_type='mode',  # 'mean'
            assume_normal=False,
            **kwargs):
        if not pymc:
            raise ImportError('Module best or pymc could not be loaded!')

        data_dict = {
            observation_name: observation,
            prediction_name: prediction
        }
        best_model = self.make_model(data_dict, assume_normal)
        M = MCMC(best_model)
        M.sample(iter=mcmc_iter, burn=mcmc_burn)

        group1_data = M.get_node(observation_name).value
        group2_data = M.get_node(prediction_name).value

        N1 = len(group1_data)
        N2 = len(group2_data)

        posterior_mean1 = M.trace('group1_mean')[:]
        posterior_mean2 = M.trace('group2_mean')[:]
        diff_means = posterior_mean1 - posterior_mean2

        posterior_std1 = M.trace('group1_std')[:]
        posterior_std2 = M.trace('group2_std')[:]

        pooled_var = ((N1 - 1) * posterior_std1**2 +
                      (N2 - 1) * posterior_std2**2) / (N1 + N2 - 2)

        self.effect_size = diff_means / np.sqrt(pooled_var)

        stats = best.calculate_sample_statistics(self.effect_size)

        self.score = best_effect_size(stats[effect_size_type])
        self.score.mcmc_iter = mcmc_iter
        self.score.mcmc_burn = mcmc_burn
        self.score.data_size = [N1, N2]
        self.score.HDI = (stats['hdi_min'], stats['hdi_max'])
        self.HDI = self.score.HDI
        return self.score
Beispiel #41
0
    def test_non_missing(self):
        """
        Test to ensure that masks without any missing values are not imputed.
        """

        fake_data = rnormal(0, 1, size=10)
        m = ma.masked_array(fake_data, fake_data == -999)

        # Priors
        mu = Normal('mu', mu=0, tau=0.0001)
        s = Uniform('s', lower=0, upper=100, value=10)
        tau = s**-2

        # Likelihood with missing data
        x = Normal('x', mu=mu, tau=tau, value=m, observed=True)

        # Instantiate sampler
        M = MCMC([mu, s, tau, x])

        # Run sampler
        M.sample(20000, 19000, progress_bar=0)

        # Ensure likelihood does not have a trace
        assert_raises(AttributeError, x.__getattribute__, 'trace')
Beispiel #42
0
    def test_non_missing(self):
        """
        Test to ensure that masks without any missing values are not imputed.
        """

        fake_data = rnormal(0, 1, size=10)
        m = ma.masked_array(fake_data, fake_data == -999)

        # Priors
        mu = Normal('mu', mu=0, tau=0.0001)
        s = Uniform('s', lower=0, upper=100, value=10)
        tau = s ** -2

        # Likelihood with missing data
        x = Normal('x', mu=mu, tau=tau, value=m, observed=True)

        # Instantiate sampler
        M = MCMC([mu, s, tau, x])

        # Run sampler
        M.sample(20000, 19000, progress_bar=0)

        # Ensure likelihood does not have a trace
        assert_raises(AttributeError, x.__getattribute__, 'trace')
Beispiel #43
0
 def Outliers_Krough(self):
     
     fit_dict                = OrderedDict()
     
     fit_dict['methodology'] = r'Outliers Krough'
     
     #Initial Guess for fitting
     Bces_guess              = self.bces_regression()
     m_0, n_0                = Bces_guess['m'][0], Bces_guess['n'][0]
             
     Spread_vector           = ones(len(self.x_array))
     
     #Model for outliers detection
     Outliers_dect_dict      = self.inference_outliers(self.x_array, self.y_array, m_0, n_0, Spread_vector)
     
     mcmc = MCMC(Outliers_dect_dict)
     mcmc.sample(100000, 20000)
     
     #Extract the data with the outliers coordinates
     probability_of_points           = mcmc.trace('inlier')[:].astype(float).mean(0)
     fit_dict['x_coords_outliers']   = self.x_array[probability_of_points < self.prob_threshold]
     fit_dict['y_coords_outliers']   = self.y_array[probability_of_points < self.prob_threshold]
             
     return fit_dict
Beispiel #44
0
    def __init__(self, lattice=ngc.grid_graph( dim=[N,N] ), 
                 data=zeros((N,N)), tau_x = 1, tau_y = 1, phi = 0.1):
        
        # sanity test
        if lattice.number_of_nodes() != data.size:
            raise Exception('data and lattice sizes do not match', 
                            '%d vs %d' % (data.size, lattice.number_of_nodes()) );

        self.num_nodes = lattice.number_of_nodes();
        self.phi = phi;
        self.tau_x = 1;
        self.tau_y = 1;

        #just in case the input decides to give us weights
        for e in lattice.edges_iter():
            if not lattice.get_edge_data(e[0],e[1]) :
                 #setting lattice
                 lattice.edge[e[0]][e[1]] = {'weight':phi};
                 lattice.edge[e[1]][e[0]] = {'weight':phi};
            else:
                #keep the data
                pass;
        self.lattice , self.data = lattice, data;
                
        # convert the lattice into a GMRF precision matrix
        self.Lambda = zeros(self.num_nodes,self.num_nodes);
       
        #set up the grid and the data
        #@stochastic(dtype=float)
        #@def X

        # this v is a tuple index of the grid
        self.Y  = [ Normal('Y_'+str(v), mu=0.5, tau=Sigma_Y**-1, 
                                value=data[v], observed = True ) 
                                for v in lattice.nodes_iter() ];
        MCMC.__init__(self, [self.Y])  
Beispiel #45
0
class test_tiny_MCMC(TestCase):

    # Instantiate samplers
    M = MCMC(disaster_model)

    # Sample
    M.sample(10, progress_bar=False)

    def test_plot(self):

        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M, path=DIR, verbose=0)
Beispiel #46
0
    def compute(
            self,
            observation,
            prediction,
            observation_name='observation',
            prediction_name='prediction',
            mcmc_iter=110000,
            mcmc_burn=10000,
            effect_size_type='mode',  # 'mean'
            **kwargs):
        self.mcmc_iter = mcmc_iter
        self.mcmc_burn = mcmc_burn
        data_dict = {
            observation_name: observation,
            prediction_name: prediction
        }
        best_model = self.make_model(data_dict)
        M = MCMC(best_model)
        M.sample(iter=mcmc_iter, burn=mcmc_burn)

        group1_data = M.get_node(observation_name).value
        group2_data = M.get_node(prediction_name).value

        N1 = len(group1_data)
        N2 = len(group2_data)
        self.data_size = [N1, N2]

        posterior_mean1 = M.trace('group1_mean')[:]
        posterior_mean2 = M.trace('group2_mean')[:]
        diff_means = posterior_mean1 - posterior_mean2

        posterior_std1 = M.trace('group1_std')[:]
        posterior_std2 = M.trace('group2_std')[:]

        pooled_var = ((N1 - 1) * posterior_std1**2 +
                      (N2 - 1) * posterior_std2**2) / (N1 + N2 - 2)

        self.effect_size = diff_means / np.sqrt(pooled_var)

        stats = best.calculate_sample_statistics(self.effect_size)

        self.HDI = (stats['hdi_min'], stats['hdi_max'])

        self.score = best_effect_size(stats[effect_size_type])

        return self.score
Beispiel #47
0
def bimodal_gauss(data,pm,dmin=0.3):
    '''run MCMC to get regression on bimodal normal distribution'''
    size = len(data[pm])

    

    ### set up model
    p = Uniform( "p", 0.2 , 0.8) #this is the fraction that come from mean1 vs mean2
    # p = distributions.truncated_normal_like('p', mu=0.5, tau=0.001, a=0., b=1.)
    # p = Normal( 'p', mu=(1.*sum(comp0==1))/size, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2
    # p = Normal( 'p', mu=0.5, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2
    
    ber = Bernoulli( "ber", p = p, size = size) # produces 1 with proportion p
    precision = Gamma('precision', alpha=0.01, beta=0.01)
    
    mean1 = Uniform( "mean1", -0.5, 1.0) # if not truncated
    sig1  = Uniform( 'sig1',  0.01, 1.)
    mean2 = Uniform( "mean2", mean1 + dmin, 1.5)
    sig2  = Uniform( 'sig2',  0.01, 1.)

    pop1  = Normal( 'pop1', mean1, 1./sig1**2) # tau is 1/sig^2
    pop2  = Normal( 'pop2', mean2, 1./sig2**2)


    @deterministic
    def bimod(ber = ber, pop1 = pop1, pop2 = pop2): # value determined from parents completely
        return ber*pop1 + (1-ber)*pop2

    obs = Normal( "obs", bimod, precision, value = data[pm], observed = True)
    model = Model( {"p":p, "precision": precision, "mean1": mean1, 'sig1': sig1, "mean2":mean2, 'sig2':sig2, "obs":obs} )
    
    from pymc import MCMC, Matplot


    M = MCMC(locals(), db='pickle', dbname='metals.pickle')
    iter = 10000; burn = 9000; thin = 10
    M.sample(iter=iter, burn=burn, thin=thin)
    M.db.commit()

    mu1 = np.mean(M.trace('mean1')[:])
    sig1= np.mean(M.trace('sig1')[:])
    mu2 = np.mean(M.trace('mean2')[:])
    sig2= np.mean(M.trace('sig2')[:])
    p   = np.mean(M.trace('p')[:])
    return p, mu1, sig1, mu2, sig2, M
Beispiel #48
0
class test_MCMC(TestCase):

    # Instantiate samplers
    M = MCMC(disaster_model, db='pickle')

    # Sample
    M.sample(2000, 100, thin=15, verbose=0, progress_bar=False)
    M.db.close()

    def test_instantiation(self):

        # Check stochastic arrays
        assert_equal(len(self.M.stochastics), 3)
        assert_equal(len(self.M.observed_stochastics), 1)
        assert_array_equal(self.M.disasters.value,
                           disaster_model.disasters_array)

    def test_plot(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M.early_mean, path=DIR, verbose=0)

    def test_autocorrelation(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        autocorrelation(self.M.early_mean, path=DIR, verbose=0)

    def test_stats(self):
        S = self.M.early_mean.stats()
        self.M.stats()

    def test_summary(self):
        self.M.rate.summary()

    def test_stats_after_reload(self):
        db = database.pickle.load('MCMC.pickle')
        M2 = MCMC(disaster_model, db=db)
        M2.stats()
        db.close()
        os.remove('MCMC.pickle')
Beispiel #49
0
def analizeM():
	M = MCMC(dm)
	print("M: ", M)

	M.sample(iter=10000, burn=1000, thin=10)
	print("M t: ", M.trace('switchpoint')[:])

	hist(M.trace('late_mean')[:])
	# show()

	plot(M)
	# show()

	print("M smd dm sp: ", M.step_method_dict[dm.switchpoint])
	print("M smd dm em: ", M.step_method_dict[dm.early_mean])
	print("M smd dm lm: ", M.step_method_dict[dm.late_mean])

	M.use_step_method(Metropolis, dm.late_mean, proposal_sd=2.)
Beispiel #50
0
 def load_pymc_database(self, Database_address):
     
     #In case the database is open from a previous use
     if self.pymc_database != None:
         self.pymc_database.close()
     
     #Load the pymc output textfile database
     self.pymc_database  = database.pickle.load(Database_address)
     
     #Create a dictionary with the bases to 
     self.Traces_dict = {}
     self.traces_list = self.pymc_database.trace_names[0] #This variable contains all the traces from the MCMC (stochastic and deterministic)
     
     for trace in self.traces_list:
         self.Traces_dict[trace] = self.pymc_database.trace(trace)
 
     #Generate a MCMC object to recover all the data from the run
     self.dbMCMC      = MCMC(self.Traces_dict, self.pymc_database)
     
     return
Beispiel #51
0
class test_MCMC(TestCase):

    dbname = DIR + 'test_MCMC'

    if not os.path.exists(DIR):
        os.mkdir(DIR)

    # Instantiate samplers
    M = MCMC(disaster_model, db='txt', dbname=dbname)

    # Sample
    M.sample(2000, 100, thin=15, verbose=0, progress_bar=False)

    def test_instantiation(self):

        # Check stochastic arrays
        assert_equal(len(self.M.stochastics), 3)
        assert_equal(len(self.M.observed_stochastics), 1)
        assert_array_equal(self.M.disasters.value,
                           disaster_model.disasters_array)

    def test_plot(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M.early_mean, path=DIR, verbose=0)

    def test_autocorrelation(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        autocorrelation(self.M.early_mean, path=DIR, verbose=0)

    def test_stats(self):
        S = self.M.early_mean.stats()
        self.M.stats()

    def test_float_iter(self):
        self.M.sample(10.5, verbose=0, progress_bar=False)
Beispiel #52
0
def estimate_failures_from_counts(counts, #samples from noisy labelers
                      n_samples=10000, #number of samples to run MCMC for
                      burn=None, #burn-in. Defaults to n_samples/2
                      thin=10, #thinning rate. Sample every k samples from markov chain 
                      alpha_p=1, beta_p=1, #beta parameters for true positive rate
                      alpha_e=1, beta_e=10 #beta parameters for noise rates
                      ):

  if burn is None:
    burn = n_samples / 2

  S = counts.sum()
  N = len(counts.shape)

  p_label = Beta('p_label', alpha=alpha_p, beta=beta_p) #prior on true label
  e_pos = Beta('e_pos', alpha_e, beta_e, size=N) # error rate if label = 1
  e_neg = Beta('e_neg', alpha_e, beta_e, size=N) # error rate if label = 0

  print counts
  @deterministic(plot=False)
  def patterns(p_label=p_label, e_pos=e_pos, e_neg=e_neg):
    #probability that the noisy labelers output pattern p
    P = np.zeros((2,)*N)
    for pat in itertools.product([0,1], repeat=N):
      P[pat] = p_label*np.product([1-e_pos[i] if pat[i]==1 else e_pos[i] for i in xrange(N)])
      P[pat] += (1-p_label)*np.product([e_neg[i] if pat[i]==1 else 1-e_neg[i] for i in xrange(N)])
    assert np.abs(P.sum() - 1) < 1e-6
    return P.ravel()
    
  pattern_counts = Multinomial('pattern_counts',n=S, p=patterns, value=counts.ravel(), observed=True)
  variables = [p_label, e_pos, e_neg, patterns]
  model = MCMC(variables, verbose=3)
  model.sample(iter=n_samples, burn=burn, thin=thin)
  model.write_csv('out.csv', ['p_label', 'e_pos', 'e_neg'])
  p = np.median(model.trace('p_label')[:])
  e_pos = np.median(model.trace('e_pos')[:],0)
  e_neg = np.median(model.trace('e_neg')[:],0)
  return p, e_pos, e_neg
Beispiel #53
0
class test_MCMC(TestCase):

    # Instantiate samplers
    M = MCMC(DisasterModel, db='pickle')

    # Sample
    M.sample(4000, 2000, verbose=0)
    M.db.close()

    def test_instantiation(self):

        # Check stochastic arrays
        assert_equal(len(self.M.stochastics), 3)
        assert_equal(len(self.M.observed_stochastics), 1)
        assert_array_equal(self.M.D.value, DisasterModel.disasters_array)

    def test_plot(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        plot(self.M.e, path=DIR, verbose=0)

    def test_autocorrelation(self):
        if not PLOT:
            raise nose.SkipTest

        # Plot samples
        autocorrelation(self.M.e, path=DIR, verbose=0)

    def test_stats(self):
        S = self.M.e.stats()
        self.M.stats()

    def test_stats_after_reload(self):
        db = database.pickle.load('MCMC.pickle')
        M2 = MCMC(DisasterModel, db=db)
        M2.stats()
        db.close()
        os.remove('MCMC.pickle')
Beispiel #54
0
def differenceOfmeans(humanMean=4.5, sampleSize=50, variance=0.2):
    #note that tau is not sigma
    #sigma^2=1/tau
    t = 1 / variance
    #what is the probability that an analyst would give this image the same rating?
    mu = TruncatedNormal('mu', mu=humanMean, tau=t, a=1,
                         b=10)  #hypothetical ground truth
    botOutput = TruncatedNormal('botOutput', mu=mu, tau=t, a=1, b=10)
    humanOutput = TruncatedNormal('humanOutput', mu=mu, tau=t, a=1, b=10)
    #when we have data from the model we can use this here
    #like this d = pymc.Binomial(‘d’, n=n, p=theta, value=np.array([0.,1.,3.,5.]), observed=True)

    sim = MCMC([mu, botOutput, humanOutput])

    sim.sample(sampleSize, 0, 1)
    botOutput = sim.trace("botOutput")[:]
    #if humans only give ratings at the 0.5 interval, not smaller
    #        humanOutput = round_to_half(sim.trace("humanOutput")[:])
    humanOutput = sim.trace("humanOutput")[:]
    #difference of the means
    #but what we care about is the mean of the human output for each image.
    difference = botOutput - humanOutput.mean()
    return difference
Beispiel #55
0
# Ph21 Set 5
# Aritra Biswas

# coin_mcmc.py
# Run MCMC on coin_model.py

import coin_model
from pymc import MCMC
from pymc.Matplot import plot

M = MCMC(coin_model)
M.sample(iter = 10000, burn = 0, thin = 1)
print
plot(M)
M.pheads.summary()
        coefs[tName] = Normal(tName,0,0.001,value=sp.rand()-0.5)
        termList.append(d*coefs[tName])

    # get individual edge probabilities
    @deterministic(trace=False,plot=False)
    def probs(termList=termList):
        probs = 1./(1+sp.exp(-1*sum(termList)))
        probs[sp.diag_indices_from(probs)]= 0
        return(probs)

    # define the outcome as 
    outcome = Bernoulli('outcome',probs,value=adjMat,observed=True)

    return(locals())


if __name__ == '__main__':
    # load the prison data
    with open('prison.dat','r') as f:
        rowList = list()
        for l in f:
            rowList.append([int(x) for x in l.strip().split(' ')])
        adjMat = sp.array(rowList)
    
    # make the model as an MCMC object
    m = makeModel(adjMat)
    mc = MCMC(m)

    # estimate
    mc.sample(30000,1000,50)
Beispiel #57
0
from pylab import *

# The mu and tau are in log units; to get to log units,
# do the following
# (has mean around 1e2, with a variance of 9 logs in base 10)
mean_b10 = 2
var_b10 = 9

print "Setting mean (base 10) to %f, variance (base 10) to %f" % (mean_b10, var_b10)

# The lognormal variable
k = Lognormal('k', mu=np.log(10 ** mean_b10),
                   tau=1./(np.log(10) * np.log(10 ** var_b10)))

# Sample it
m = MCMC(Model([k]))
m.sample(iter=50000)

ion()

# Plot the distribution in base e
figure()
y = log(m.trace('k')[:])
y10 = log10(m.trace('k')[:])
hist(y, bins=100)
print
print "Mean, base e: %f; Variance, base e: %f" % (mean(y), var(y))

# Plot the distribution in base 10
figure()
hist(y10, bins=100)
Beispiel #58
0
def test_regression_155():
    """thin > iter"""
    M = MCMC(disaster_model, db='ram')
    M.sample(10,0,100, progress_bar=0)