def test_simple(self): # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s ** -2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(10000, 5000, progress_bar=0) # Check length of value assert_equal(len(x.value), 100) # Check size of trace tr = M.trace('x')() assert_equal(shape(tr), (5000, 2)) sd2 = [-2 < i < 2 for i in ravel(tr)] # Check for standard normal output assert_almost_equal(sum(sd2) / 10000., 0.95, decimal=1)
def test_simple(self): # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s**-2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(10000, 5000, progress_bar=0) # Check length of value assert_equal(len(x.value), 100) # Check size of trace tr = M.trace('x')() assert_equal(shape(tr), (5000, 2)) sd2 = [-2 < i < 2 for i in ravel(tr)] # Check for standard normal output assert_almost_equal(sum(sd2) / 10000., 0.95, decimal=1)
def test_interactive(): S = MCMC(disaster_model) S.isample(200, 100, 2, out=open('testresults/interactive.log', 'w'), progress_bar=0)
def test_fit(self): p = self._build_parent() s = MyStochastic(self.STOCHASTIC_NAME, p) mcmc = MCMC({p, s}) mcmc.sample(100, burn=10, thin=2)
def test_nd(self): M = MCMC([self.NDstoch()], db=self.name, dbname=os.path.join(testdir, 'ND.'+self.name), dbmode='w') M.sample(10, progress_bar=0) a = M.trace('nd')[:] assert_equal(a.shape, (10,2,2)) db = getattr(pymc.database, self.name).load(os.path.join(testdir, 'ND.'+self.name)) assert_equal(db.trace('nd')[:], a)
def test_interactive(): if 'sqlite' not in dir(pymc.database): raise nose.SkipTest M=MCMC(disaster_model,db='sqlite', dbname=os.path.join(testdir, 'interactiveDisaster.sqlite'), dbmode='w') M.isample(10, out=open('testresults/interactivesqlite.log', 'w'), progress_bar=0)
def test_interactive(): if 'sqlite' not in dir(pymc.database): raise nose.SkipTest M = MCMC(DisasterModel, db='sqlite', dbname=os.path.join(testdir, 'interactiveDisaster.sqlite'), dbmode='w') M.isample(10, out=open('testresults/interactivesqlite.log', 'w'))
def test_fit_with_sibling(self): p = self._build_parent() s = MyStochastic(self.STOCHASTIC_NAME, p) sib = MyStochastic(self.SIBLING_NAME, p) mcmc = MCMC({p, s, sib}) mcmc.sample(100, burn=10, thin=2)
def test_pymc_model(self): """ Tests sampler """ sampler = MCMC(model_omm.pymc_parameters) self.assert_(isinstance(model_omm, TorsionFitModelOMM)) self.assert_(isinstance(sampler, pymc.MCMC)) sampler.sample(iter=1)
def mcmc(prob, nsample=100, modulename='model'): try: mystr = "from " + modulename + " import model" exec(mystr) except: print('cannot import', modulename) M = MCMC(model(prob)) M.sample(nsample) return M
def mcmc(prob, nsample=100, modulename = 'model' ): try: mystr = "from " + modulename + " import model" exec(mystr) except: print 'cannot import', modulename M = MCMC( model(prob) ) M.sample(nsample) return M
def test_zcompression(self): db = pymc.database.hdf5.Database(dbname=os.path.join(testdir, 'DisasterModelCompressed.hdf5'), dbmode='w', dbcomplevel=5) S = MCMC(DisasterModel, db=db) S.sample(45,10,1) assert_array_equal(S.e.trace().shape, (35,)) S.db.close() db.close() del S
def test_interactive(): S = MCMC(disaster_model) S.isample( 200, 100, 2, out=open( 'testresults/interactive.log', 'w'), progress_bar=0)
def test_nd(self): M = MCMC([self.NDstoch()], db=self.name, dbname=os.path.join(testdir, 'ND.' + self.name), dbmode='w') M.sample(10, progress_bar=0) a = M.trace('nd')[:] assert_equal(a.shape, (10, 2, 2)) db = getattr(pymc.database, self.name).load(os.path.join(testdir, 'ND.' + self.name)) assert_equal(db.trace('nd')[:], a)
def test_zcompression(self): db = pymc.database.hdf5.Database(dbname=os.path.join( testdir, 'DisasterModelCompressed.hdf5'), dbmode='w', dbcomplevel=5) S = MCMC(DisasterModel, db=db) S.sample(45, 10, 1) assert_array_equal(S.e.trace().shape, (35, )) S.db.close() db.close() del S
def test_zcompression(self): with warnings.catch_warnings(): warnings.simplefilter('ignore') db = pymc.database.hdf5.Database(dbname=os.path.join(testdir, 'disaster_modelCompressed.hdf5'), dbmode='w', dbcomplevel=5) S = MCMC(disaster_model, db=db) S.sample(45,10,1, progress_bar=0) assert_array_equal(S.trace('early_mean')[:].shape, (35,)) S.db.close() db.close() del S
def compute(var_LB, var_UB, num_samples=10): from pymc import Uniform, MCMC X = Uniform('X', var_LB, var_UB) mc = MCMC([X]) mc.sample(num_samples) #import matplotlib.pyplot as plt #plt.plot(X.trace()[:,0], X.trace()[:,1],',') #plt.show() return X.trace()
def estimate_failures(samples, #samples from noisy labelers n_samples=10000, #number of samples to run MCMC for burn=None, #burn-in. Defaults to n_samples/2 thin=10, #thinning rate. Sample every k samples from markov chain alpha_p=1, beta_p=1, #beta parameters for true positive rate alpha_e=1, beta_e=10 #beta parameters for noise rates ): if burn is None: burn = n_samples / 2 S,N = samples.shape p = Beta('p', alpha=alpha_p, beta=beta_p) #prior on true label l = Bernoulli('l', p=p, size=S) e_pos = Beta('e_pos', alpha_e, beta_e, size=N) # error rate if label = 1 e_neg = Beta('e_neg', alpha_e, beta_e, size=N) # error rate if label = 0 @deterministic(plot=False) def noise_rate(l=l, e_pos=e_pos, e_neg=e_neg): #probability that a noisy labeler puts a label 1 return np.outer(l, 1-e_pos) + np.outer(1-l, e_neg) noisy_label = Bernoulli('noisy_label', p=noise_rate, size=samples.shape, value=samples, observed=True) variables = [l, e_pos, e_neg, p, noisy_label, noise_rate] model = MCMC(variables, verbose=3) model.sample(iter=n_samples, burn=burn, thin=thin) model.write_csv('out.csv', ['p', 'e_pos', 'e_neg']) p = np.median(model.trace('p')[:]) e_pos = np.median(model.trace('e_pos')[:],0) e_neg = np.median(model.trace('e_neg')[:],0) return p, e_pos, e_neg
def MCMC( self, nruns=10000, burn=1000, init_error_std=1., max_error_std=100., verbose=1 ): ''' Perform Markov Chain Monte Carlo sampling using pymc package :param nruns: Number of MCMC iterations (samples) :type nruns: int :param burn: Number of initial samples to burn (discard) :type burn: int :param verbose: verbosity of output :type verbose: int :param init_error_std: Initial standard deviation of residuals :type init_error_std: fl64 :param max_error_std: Maximum standard deviation of residuals that will be considered :type max_error_std: fl64 :returns: pymc MCMC object ''' if max_error_std < init_error_std: print "Error: max_error_std must be greater than or equal to init_error_std" return try: from pymc import Uniform, deterministic, Normal, MCMC, Matplot except ImportError as exc: sys.stderr.write("Warning: failed to import pymc module. ({})\n".format(exc)) sys.stderr.write("If pymc is not installed, try installing:\n") sys.stderr.write("e.g. try using easy_install: easy_install pymc\n") def __mcmc_model( self, init_error_std=1., max_error_std=100. ): #priors variables = [] sig = Uniform('error_std', 0.0, max_error_std, value=init_error_std) variables.append( sig ) for nm,mn,mx in zip(self.parnames,self.parmins,self.parmaxs): evalstr = "Uniform( '" + str(nm) + "', " + str(mn) + ", " + str(mx) + ")" variables.append( eval(evalstr) ) #model @deterministic() def residuals( pars = variables, p=self ): values = [] for i in range(1,len(pars)): values.append(float(pars[i])) pardict = dict(zip(p.parnames,values)) p.forward(pardict=pardict, reuse_dirs=True) return numpy.array(p.residuals)*numpy.array(p.obsweights) #likelihood y = Normal('y', mu=residuals, tau=1.0/sig**2, observed=True, value=numpy.zeros(len(self.obs))) variables.append(y) return variables M = MCMC( __mcmc_model(self, init_error_std=init_error_std, max_error_std=max_error_std) ) M.sample(iter=nruns,burn=burn,verbose=verbose) return M
def run_mc(self, nsample=10000, interactive=False, doplot=False, verbose=0): """run the model using mcmc""" from pymc import MCMC self.M = MCMC(self) if interactive: self.M.isample(iter=nsample, burn=1000, thin=10, verbose=verbose) else: self.M.sample(iter=nsample, burn=1000, thin=10, verbose=verbose) if doplot: from pymc.Matplot import plot plot(self.M)
def analizeMwm(): masked_values = np.ma.masked_equal(x, value=None) print("m v: ", masked_values) print("dmwm da: ", dmwm.disasters_array) Mwm = MCMC(dmwm) Mwm.sample(iter=10000, burn=1000, thin=10) print("Mwm t: ", Mwm.trace('switchpoint')[:]) hist(Mwm.trace('late_mean')[:]) # show() plot(Mwm)
def run(self, n_iter=110000, n_burn=10000, thin=1): """Run the Bayesian test. :param int n_iter: total number of MCMC iterations :param int n_burn: no tallying done during the first n_burn iterations - these samples will be forgotten :param int thin: variables will be tallied at intervals of this many iterations :return: None """ self.model.setup((n_iter - n_burn) / thin) self.sampler = MCMC(self.model.stochastics) self.sampler.sample(iter=n_iter, burn=n_burn, thin=thin, progress_bar=self.verbose)
def imputeBayesian(row, dist): out = sys.stdout #Save the stdout path for later, we're going to need it f = open('/dev/null', 'w') #were going to use this to redirect stdout # filling nan with 0 so everything works row.fillna(0, inplace=True) # Masked Values maskedValues = np.ma.masked_equal(row.values, value=0) # Choose between distributions, either normal or Poisson. if dist == "Normal": # Calculate tau if np.std(maskedValues) == 0: tau = np.square(1 / (np.mean(maskedValues) / 3)) else: tau = np.square((1 / (np.std(maskedValues)))) # Uses only mean x = Impute('x', Normal, maskedValues, tau=tau, mu=np.mean(maskedValues)) # For Poisson elif dist == "Poisson": x = Impute('x', Poisson, maskedValues, mu=np.mean(maskedValues)) # Fancy test sys.stdout = f # Skipin stdout m = MCMC(x) m.sample(iter=1, burn=0, thin=1) sys.stdout = out # coming back # Getting list of missing values missing = [i for i in range(len(row.values)) if row.values[i] == 0] # Getting the imputed values from the model for i in range(len(missing)): keyString = "x[" + str(missing[i]) + "]" imputedValue = m.trace(keyString)[:] row.iloc[missing[i]] = imputedValue[0] # Returning to use nans row.replace(0, np.nan, inplace=True) return row
def run_trials(trials=0, iters=0, tau=10000, prior=None, errort_b=[], Linf=[], sparsity=[], logps=[]): for i in range(trials): # NOTE need to create new model per iteration, pymc might be using # the model instance to seed a random number generator somewhere... model = models.toy_model(tau=tau, prior=prior) A = MCMC(model) A, logp, errors_b, errors_x = simulation.sample_toy_save(model,A, \ iters=iters,verbose=False) logps.append(logp[-1]) errort_b.append(errors_b[-1]) Linf.append(np.sum(1 / errors_x[:, -1][errors_x[:, -1].argsort()[-3:]])) sparsity.append(np.sum(errors_x[:, -1] <= 0.02)) print '' for (i, j) in [('Linf', Linf), ('Sparsity', sparsity), ('Error_b', errort_b), ('logp', logps)]: print i if i == 'Sparsity': print[np.sum(np.array(j) == k) for k in range(4)] else: (H1, H2) = np.histogram(j) print H1 print H2 print "Median: %s" % (np.median(j)) return Linf, sparsity, errort_b, logps
def test_identical_object_names(): A = pymc.Uniform('a', 0, 10) B = pymc.Uniform('a', 0, 10) try: M = MCMC([A, B]) except ValueError: pass
def test_zcompression(self): original_filters = warnings.filters[:] warnings.simplefilter("ignore") try: db = pymc.database.hdf5.Database(dbname=os.path.join(testdir, 'disaster_modelCompressed.hdf5'), dbmode='w', dbcomplevel=5) S = MCMC(disaster_model, db=db) S.sample(45,10,1, progress_bar=0) assert_array_equal(S.trace('early_mean')[:].shape, (35,)) S.db.close() db.close() del S finally: warnings.filters = original_filters
class test_MCMC(TestCase): # Instantiate samplers M = MCMC(DisasterModel) # Sample M.sample(4000, 2000, verbose=0) def test_instantiation(self): # Check stochastic arrays assert_equal(len(self.M.stochastics), 3) assert_equal(len(self.M.observed_stochastics), 1) assert_array_equal(self.M.D.value, DisasterModel.disasters_array) def test_plot(self): if not PLOT: raise nose.SkipTest # Plot samples plot(self.M.e, path=DIR, verbose=0) def test_autocorrelation(self): if not PLOT: raise nose.SkipTest # Plot samples autocorrelation(self.M.e, path=DIR, verbose=0) def test_stats(self): S = self.M.e.stats()
def test_zcompression(self): original_filters = warnings.filters[:] warnings.simplefilter("ignore") try: db = pymc.database.hdf5.Database(dbname=os.path.join( testdir, 'disaster_modelCompressed.hdf5'), dbmode='w', dbcomplevel=5) S = MCMC(disaster_model, db=db) S.sample(45, 10, 1, progress_bar=0) assert_array_equal(S.trace('early_mean')[:].shape, (35, )) S.db.close() db.close() del S finally: warnings.filters = original_filters
class BABTest: def __init__(self, control, variant, model='student', verbose=True): """Init. :param np.array control: 1 dimensional array of observations for control group :param np.array variant: 1 dimensional array of observations for variant group :param string model: desired distribution to describe both groups, defaults to Student """ assert control.ndim == 1 assert variant.ndim == 1 self.control = control self.variant = variant self.sampler = None if model not in models: raise KeyError( 'Unknown model - please select a model from {}'.format( models.keys())) self.model = models[model](self.control, self.variant) self.verbose = verbose def run(self, n_iter=110000, n_burn=10000, thin=1): """Run the Bayesian test. :param int n_iter: total number of MCMC iterations :param int n_burn: no tallying done during the first n_burn iterations - these samples will be forgotten :param int thin: variables will be tallied at intervals of this many iterations :return: None """ self.model.setup((n_iter - n_burn) / thin) self.sampler = MCMC(self.model.stochastics) self.sampler.sample(iter=n_iter, burn=n_burn, thin=thin, progress_bar=self.verbose) def plot(self, n_bins=30): """Display the results of the test. :param int n_bins: number of bins in the histograms :return: None """ self.model.plot(n_bins=n_bins)
def bimodal_gauss(data,pm): '''run MCMC to get regression on bimodal normal distribution''' m1 = np.mean(data[pm])/2. m2 = np.mean(data[pm])*2. dm = m2 - m1 size = len(data[pm]) ### set up model p = Uniform( "p", 0.2 , 0.8) #this is the fraction that come from mean1 vs mean2 # p = distributions.truncated_normal_like('p', mu=0.5, tau=0.001, a=0., b=1.) # p = Normal( 'p', mu=(1.*sum(comp0==1))/size, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2 # p = Normal( 'p', mu=0.5, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2 ber = Bernoulli( "ber", p = p, size = size) # produces 1 with proportion p precision = Gamma('precision', alpha=0.01, beta=0.01) dmu = Normal( 'dmu', dm, tau=1./0.05**2 ) # [PS] give difference between means, finite # dmu = Lognormal( 'dmu', 0.3, tau=1./0.1**2) mean1 = Normal( "mean1", mu = m1, tau = 1./0.1**2 ) # better to use Normals versus Uniforms, # if not truncated mean2 = Normal( "mean2", mu = mean1 + dmu, tau = 1./0.1**2 ) # tau is 1/sig^2 @deterministic def mean( ber = ber, mean1 = mean1, mean2 = mean2): return ber*mean1 + (1-ber)*mean2 obs = Normal( "obs", mean, precision, value = data[pm], observed = True) model = Model( {"p":p, "precision": precision, "mean1": mean1, "mean2":mean2, "obs":obs} ) from pymc import MCMC, Matplot M = MCMC(locals(), db='pickle', dbname='metals.pickle') iter = 3000; burn = 2000; thin = 10 M.sample(iter=iter, burn=burn, thin=thin) M.db.commit() mu1 = np.mean(M.trace('mean1')[:]) mu2 = np.mean(M.trace('mean2')[:]) p = np.mean(M.trace('p')[:]) return p, mu1, 0.1, mu2, 0.1, M
class LeagueModel(object): """MCMC model of a football league.""" def __init__(self, fname): super(LeagueModel, self).__init__() league = fuba.League(fname) N = len(league.teams) #dummy future games future_games = [[league.teams["Werder Bremen"],league.teams["Dortmund"]]] self.goal_rate = np.empty(N,dtype=object) self.match_rate = np.empty(len(league.games)*2,dtype=object) self.match_goals_future = np.empty(len(future_games)*2,dtype=object) self.home_adv = Normal(name = 'home_adv',mu=0,tau=10.) for t in league.teams.values(): print t.name,t.team_id self.goal_rate[t.team_id] = Exponential('goal_rate_%i'%t.team_id,beta=1) for game in range(len(league.games)): self.match_rate[2*game] = Poisson('match_rate_%i'%(2*game), mu=self.goal_rate[league.games[game].hometeam.team_id] + self.home_adv, value=league.games[game].homescore, observed=True) self.match_rate[2*game+1] = Poisson('match_rate_%i'%(2*game+1), mu=self.goal_rate[league.games[game].hometeam.team_id], value=league.games[game].homescore, observed=True) for game in range(len(future_games)): self.match_goals_future[2*game] = Poisson('match_goals_future_%i'%(2*game), mu=self.goal_rate[future_games[game][0].team_id] + self.home_adv) self.match_goals_future[2*game+1] = Poisson('match_goals_future_%i'%(2*game+1), mu=self.goal_rate[future_games[game][1].team_id]) def run_mc(self,nsample = 10000,interactive=False): """run the model using mcmc""" from pymc.Matplot import plot from pymc import MCMC self.M = MCMC(self) if interactive: self.M.isample(iter=nsample, burn=1000, thin=10) else: self.M.sample(iter=nsample, burn=1000, thin=10) plot(self.M)
def run_mc(self,nsample = 10000,interactive=False): """run the model using mcmc""" from pymc.Matplot import plot from pymc import MCMC self.M = MCMC(self) if interactive: self.M.isample(iter=nsample, burn=1000, thin=10) else: self.M.sample(iter=nsample, burn=1000, thin=10) plot(self.M)
def fit_std_curve_by_pymc(i_vals, i_sds, dpx_concs): import pymc from pymc import Uniform, stochastic, deterministic, MCMC from pymc import Matplot # Define prior distributions for both Ka and Kd ka = Uniform('ka', lower=0, upper=1000) kd = Uniform('kd', lower=0, upper=1000) @stochastic(plot=True, observed=True) def quenching_model(ka=ka, kd=kd, value=i_vals): pred_i = quenching_func(ka, kd, dpx_concs) # The first concentration in dpx_concs should always be zero # (that is, the first point in the titration should be the # unquenched fluorescence), so we assert that here: assert dpx_concs[0] == 0 # The reason this is necessary is that in the likelihood calculation # we skip the error for the first point, since (when the std. err # is calculated by well) the error is 0 (the I / I_0 ratio is # always 1 for each well, the the variance/SD across the wells is 0). # If we don't skip this first point, we get nan for the likelihood. # In addition, the model always predicts 1 for the I / I_0 ratio # when the DPX concentration is 0, so it contributes nothing to # the overall fit. return -np.sum((value[1:] - pred_i[1:])**2 / (2 * i_sds[1:]**2)) pymc_model = pymc.Model([ka, kd, quenching_model]) mcmc = MCMC(pymc_model) mcmc.sample(iter=155000, burn=5000, thin=150) Matplot.plot(mcmc) plt.figure() num_to_plot = 1000 ka_vals = mcmc.trace('ka')[:] kd_vals = mcmc.trace('kd')[:] if num_to_plot > len(ka_vals): num_to_plot = len(ka_vals) for i in range(num_to_plot): plt.plot(dpx_concs, quenching_func(ka_vals[i], kd_vals[i], dpx_concs), alpha=0.01, color='r') plt.errorbar(dpx_concs, i_vals, yerr=i_sds, linestyle='', marker='o', color='k', linewidth=2) return (ka_vals, kd_vals)
def bayesian_regression(self, Methodology): fit_dict = OrderedDict() fit_dict['methodology'] = r'Inference $\chi^{2}$ model' #Initial guess for the fitting: Np_lsf = polyfit(self.x_array, self.y_array, 1) m_0, n_0 = Np_lsf[0], Np_lsf[1] MCMC_dict = self.lr_ChiSq(self.x_array, self.y_array, m_0, n_0) myMCMC = MCMC(MCMC_dict) myMCMC.sample(iter=10000, burn=1000) fit_dict['m'], fit_dict['n'], fit_dict['m_error'], fit_dict['n_error'] = myMCMC.stats()['m']['mean'], myMCMC.stats()['n']['mean'], myMCMC.stats()['m']['standard deviation'], myMCMC.stats()['n']['standard deviation'] return fit_dict
def run_mc(self,nsample = 10000,interactive=False,doplot=False,verbose=0): """run the model using mcmc""" from pymc import MCMC self.M = MCMC(self) if interactive: self.M.isample(iter=nsample, burn=1000, thin=10,verbose=verbose) else: self.M.sample(iter=nsample, burn=1000, thin=10,verbose=verbose) if doplot: from pymc.Matplot import plot plot(self.M)
def main(): s1 = PoissonStudent("arnaud", 1) s2 = PoissonStudent("francois", 1) s3 = PoissonStudent("david", 0.5) students = [s1, s2, s3] env = Environment(students) statements = env.simulate(1000, verbose=True) student_names = set(s['actor'] for s in statements) lam = Uniform('lam', lower=0, upper=1) students = [PoissonStudent(name=name, lam=lam) for name in student_names] env = Environment(students, statements) params = [lam] for s in students: params.extend(s.params) m = MCMC(params) m.sample(iter=10000, burn=1000, thin=10) hist(m.trace('lambda_david')[:]) show()
def load_pymc_database(self, Database_address): #In case the database is open from a previous use if self.pymc_database != None: self.pymc_database.close() #Load the pymc output textfile database self.pymc_database = database.pickle.load(Database_address) #Create a dictionary with the bases to self.Traces_dict = {} self.traces_list = self.pymc_database.trace_names[ 0] #This variable contains all the traces from the MCMC (stochastic and deterministic) for trace in self.traces_list: self.Traces_dict[trace] = self.pymc_database.trace(trace) #Generate a MCMC object to recover all the data from the run self.dbMCMC = MCMC(self.Traces_dict, self.pymc_database) return
def fit_model(): M = MCMC(disaster_model) M.sample(iter=10000, burn=1000, thin=10) print('switchpoint: ', M.trace('switchpoint')[:]) print('hist: ', hist(M.trace('late_mean')[:])) # show() plot(M)
def compute( self, observation, prediction, observation_name='observation', prediction_name='prediction', mcmc_iter=110000, mcmc_burn=10000, effect_size_type='mode', # 'mean' assume_normal=False, **kwargs): if not pymc: raise ImportError('Module best or pymc could not be loaded!') data_dict = { observation_name: observation, prediction_name: prediction } best_model = self.make_model(data_dict, assume_normal) M = MCMC(best_model) M.sample(iter=mcmc_iter, burn=mcmc_burn) group1_data = M.get_node(observation_name).value group2_data = M.get_node(prediction_name).value N1 = len(group1_data) N2 = len(group2_data) posterior_mean1 = M.trace('group1_mean')[:] posterior_mean2 = M.trace('group2_mean')[:] diff_means = posterior_mean1 - posterior_mean2 posterior_std1 = M.trace('group1_std')[:] posterior_std2 = M.trace('group2_std')[:] pooled_var = ((N1 - 1) * posterior_std1**2 + (N2 - 1) * posterior_std2**2) / (N1 + N2 - 2) self.effect_size = diff_means / np.sqrt(pooled_var) stats = best.calculate_sample_statistics(self.effect_size) self.score = best_effect_size(stats[effect_size_type]) self.score.mcmc_iter = mcmc_iter self.score.mcmc_burn = mcmc_burn self.score.data_size = [N1, N2] self.score.HDI = (stats['hdi_min'], stats['hdi_max']) self.HDI = self.score.HDI return self.score
def test_non_missing(self): """ Test to ensure that masks without any missing values are not imputed. """ fake_data = rnormal(0, 1, size=10) m = ma.masked_array(fake_data, fake_data == -999) # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s**-2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(20000, 19000, progress_bar=0) # Ensure likelihood does not have a trace assert_raises(AttributeError, x.__getattribute__, 'trace')
def test_non_missing(self): """ Test to ensure that masks without any missing values are not imputed. """ fake_data = rnormal(0, 1, size=10) m = ma.masked_array(fake_data, fake_data == -999) # Priors mu = Normal('mu', mu=0, tau=0.0001) s = Uniform('s', lower=0, upper=100, value=10) tau = s ** -2 # Likelihood with missing data x = Normal('x', mu=mu, tau=tau, value=m, observed=True) # Instantiate sampler M = MCMC([mu, s, tau, x]) # Run sampler M.sample(20000, 19000, progress_bar=0) # Ensure likelihood does not have a trace assert_raises(AttributeError, x.__getattribute__, 'trace')
def Outliers_Krough(self): fit_dict = OrderedDict() fit_dict['methodology'] = r'Outliers Krough' #Initial Guess for fitting Bces_guess = self.bces_regression() m_0, n_0 = Bces_guess['m'][0], Bces_guess['n'][0] Spread_vector = ones(len(self.x_array)) #Model for outliers detection Outliers_dect_dict = self.inference_outliers(self.x_array, self.y_array, m_0, n_0, Spread_vector) mcmc = MCMC(Outliers_dect_dict) mcmc.sample(100000, 20000) #Extract the data with the outliers coordinates probability_of_points = mcmc.trace('inlier')[:].astype(float).mean(0) fit_dict['x_coords_outliers'] = self.x_array[probability_of_points < self.prob_threshold] fit_dict['y_coords_outliers'] = self.y_array[probability_of_points < self.prob_threshold] return fit_dict
def __init__(self, lattice=ngc.grid_graph( dim=[N,N] ), data=zeros((N,N)), tau_x = 1, tau_y = 1, phi = 0.1): # sanity test if lattice.number_of_nodes() != data.size: raise Exception('data and lattice sizes do not match', '%d vs %d' % (data.size, lattice.number_of_nodes()) ); self.num_nodes = lattice.number_of_nodes(); self.phi = phi; self.tau_x = 1; self.tau_y = 1; #just in case the input decides to give us weights for e in lattice.edges_iter(): if not lattice.get_edge_data(e[0],e[1]) : #setting lattice lattice.edge[e[0]][e[1]] = {'weight':phi}; lattice.edge[e[1]][e[0]] = {'weight':phi}; else: #keep the data pass; self.lattice , self.data = lattice, data; # convert the lattice into a GMRF precision matrix self.Lambda = zeros(self.num_nodes,self.num_nodes); #set up the grid and the data #@stochastic(dtype=float) #@def X # this v is a tuple index of the grid self.Y = [ Normal('Y_'+str(v), mu=0.5, tau=Sigma_Y**-1, value=data[v], observed = True ) for v in lattice.nodes_iter() ]; MCMC.__init__(self, [self.Y])
class test_tiny_MCMC(TestCase): # Instantiate samplers M = MCMC(disaster_model) # Sample M.sample(10, progress_bar=False) def test_plot(self): if not PLOT: raise nose.SkipTest # Plot samples plot(self.M, path=DIR, verbose=0)
def compute( self, observation, prediction, observation_name='observation', prediction_name='prediction', mcmc_iter=110000, mcmc_burn=10000, effect_size_type='mode', # 'mean' **kwargs): self.mcmc_iter = mcmc_iter self.mcmc_burn = mcmc_burn data_dict = { observation_name: observation, prediction_name: prediction } best_model = self.make_model(data_dict) M = MCMC(best_model) M.sample(iter=mcmc_iter, burn=mcmc_burn) group1_data = M.get_node(observation_name).value group2_data = M.get_node(prediction_name).value N1 = len(group1_data) N2 = len(group2_data) self.data_size = [N1, N2] posterior_mean1 = M.trace('group1_mean')[:] posterior_mean2 = M.trace('group2_mean')[:] diff_means = posterior_mean1 - posterior_mean2 posterior_std1 = M.trace('group1_std')[:] posterior_std2 = M.trace('group2_std')[:] pooled_var = ((N1 - 1) * posterior_std1**2 + (N2 - 1) * posterior_std2**2) / (N1 + N2 - 2) self.effect_size = diff_means / np.sqrt(pooled_var) stats = best.calculate_sample_statistics(self.effect_size) self.HDI = (stats['hdi_min'], stats['hdi_max']) self.score = best_effect_size(stats[effect_size_type]) return self.score
def bimodal_gauss(data,pm,dmin=0.3): '''run MCMC to get regression on bimodal normal distribution''' size = len(data[pm]) ### set up model p = Uniform( "p", 0.2 , 0.8) #this is the fraction that come from mean1 vs mean2 # p = distributions.truncated_normal_like('p', mu=0.5, tau=0.001, a=0., b=1.) # p = Normal( 'p', mu=(1.*sum(comp0==1))/size, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2 # p = Normal( 'p', mu=0.5, tau=1./0.1**2 ) # attention: wings!, tau = 1/sig^2 ber = Bernoulli( "ber", p = p, size = size) # produces 1 with proportion p precision = Gamma('precision', alpha=0.01, beta=0.01) mean1 = Uniform( "mean1", -0.5, 1.0) # if not truncated sig1 = Uniform( 'sig1', 0.01, 1.) mean2 = Uniform( "mean2", mean1 + dmin, 1.5) sig2 = Uniform( 'sig2', 0.01, 1.) pop1 = Normal( 'pop1', mean1, 1./sig1**2) # tau is 1/sig^2 pop2 = Normal( 'pop2', mean2, 1./sig2**2) @deterministic def bimod(ber = ber, pop1 = pop1, pop2 = pop2): # value determined from parents completely return ber*pop1 + (1-ber)*pop2 obs = Normal( "obs", bimod, precision, value = data[pm], observed = True) model = Model( {"p":p, "precision": precision, "mean1": mean1, 'sig1': sig1, "mean2":mean2, 'sig2':sig2, "obs":obs} ) from pymc import MCMC, Matplot M = MCMC(locals(), db='pickle', dbname='metals.pickle') iter = 10000; burn = 9000; thin = 10 M.sample(iter=iter, burn=burn, thin=thin) M.db.commit() mu1 = np.mean(M.trace('mean1')[:]) sig1= np.mean(M.trace('sig1')[:]) mu2 = np.mean(M.trace('mean2')[:]) sig2= np.mean(M.trace('sig2')[:]) p = np.mean(M.trace('p')[:]) return p, mu1, sig1, mu2, sig2, M
class test_MCMC(TestCase): # Instantiate samplers M = MCMC(disaster_model, db='pickle') # Sample M.sample(2000, 100, thin=15, verbose=0, progress_bar=False) M.db.close() def test_instantiation(self): # Check stochastic arrays assert_equal(len(self.M.stochastics), 3) assert_equal(len(self.M.observed_stochastics), 1) assert_array_equal(self.M.disasters.value, disaster_model.disasters_array) def test_plot(self): if not PLOT: raise nose.SkipTest # Plot samples plot(self.M.early_mean, path=DIR, verbose=0) def test_autocorrelation(self): if not PLOT: raise nose.SkipTest # Plot samples autocorrelation(self.M.early_mean, path=DIR, verbose=0) def test_stats(self): S = self.M.early_mean.stats() self.M.stats() def test_summary(self): self.M.rate.summary() def test_stats_after_reload(self): db = database.pickle.load('MCMC.pickle') M2 = MCMC(disaster_model, db=db) M2.stats() db.close() os.remove('MCMC.pickle')
def analizeM(): M = MCMC(dm) print("M: ", M) M.sample(iter=10000, burn=1000, thin=10) print("M t: ", M.trace('switchpoint')[:]) hist(M.trace('late_mean')[:]) # show() plot(M) # show() print("M smd dm sp: ", M.step_method_dict[dm.switchpoint]) print("M smd dm em: ", M.step_method_dict[dm.early_mean]) print("M smd dm lm: ", M.step_method_dict[dm.late_mean]) M.use_step_method(Metropolis, dm.late_mean, proposal_sd=2.)
def load_pymc_database(self, Database_address): #In case the database is open from a previous use if self.pymc_database != None: self.pymc_database.close() #Load the pymc output textfile database self.pymc_database = database.pickle.load(Database_address) #Create a dictionary with the bases to self.Traces_dict = {} self.traces_list = self.pymc_database.trace_names[0] #This variable contains all the traces from the MCMC (stochastic and deterministic) for trace in self.traces_list: self.Traces_dict[trace] = self.pymc_database.trace(trace) #Generate a MCMC object to recover all the data from the run self.dbMCMC = MCMC(self.Traces_dict, self.pymc_database) return
class test_MCMC(TestCase): dbname = DIR + 'test_MCMC' if not os.path.exists(DIR): os.mkdir(DIR) # Instantiate samplers M = MCMC(disaster_model, db='txt', dbname=dbname) # Sample M.sample(2000, 100, thin=15, verbose=0, progress_bar=False) def test_instantiation(self): # Check stochastic arrays assert_equal(len(self.M.stochastics), 3) assert_equal(len(self.M.observed_stochastics), 1) assert_array_equal(self.M.disasters.value, disaster_model.disasters_array) def test_plot(self): if not PLOT: raise nose.SkipTest # Plot samples plot(self.M.early_mean, path=DIR, verbose=0) def test_autocorrelation(self): if not PLOT: raise nose.SkipTest # Plot samples autocorrelation(self.M.early_mean, path=DIR, verbose=0) def test_stats(self): S = self.M.early_mean.stats() self.M.stats() def test_float_iter(self): self.M.sample(10.5, verbose=0, progress_bar=False)
def estimate_failures_from_counts(counts, #samples from noisy labelers n_samples=10000, #number of samples to run MCMC for burn=None, #burn-in. Defaults to n_samples/2 thin=10, #thinning rate. Sample every k samples from markov chain alpha_p=1, beta_p=1, #beta parameters for true positive rate alpha_e=1, beta_e=10 #beta parameters for noise rates ): if burn is None: burn = n_samples / 2 S = counts.sum() N = len(counts.shape) p_label = Beta('p_label', alpha=alpha_p, beta=beta_p) #prior on true label e_pos = Beta('e_pos', alpha_e, beta_e, size=N) # error rate if label = 1 e_neg = Beta('e_neg', alpha_e, beta_e, size=N) # error rate if label = 0 print counts @deterministic(plot=False) def patterns(p_label=p_label, e_pos=e_pos, e_neg=e_neg): #probability that the noisy labelers output pattern p P = np.zeros((2,)*N) for pat in itertools.product([0,1], repeat=N): P[pat] = p_label*np.product([1-e_pos[i] if pat[i]==1 else e_pos[i] for i in xrange(N)]) P[pat] += (1-p_label)*np.product([e_neg[i] if pat[i]==1 else 1-e_neg[i] for i in xrange(N)]) assert np.abs(P.sum() - 1) < 1e-6 return P.ravel() pattern_counts = Multinomial('pattern_counts',n=S, p=patterns, value=counts.ravel(), observed=True) variables = [p_label, e_pos, e_neg, patterns] model = MCMC(variables, verbose=3) model.sample(iter=n_samples, burn=burn, thin=thin) model.write_csv('out.csv', ['p_label', 'e_pos', 'e_neg']) p = np.median(model.trace('p_label')[:]) e_pos = np.median(model.trace('e_pos')[:],0) e_neg = np.median(model.trace('e_neg')[:],0) return p, e_pos, e_neg
class test_MCMC(TestCase): # Instantiate samplers M = MCMC(DisasterModel, db='pickle') # Sample M.sample(4000, 2000, verbose=0) M.db.close() def test_instantiation(self): # Check stochastic arrays assert_equal(len(self.M.stochastics), 3) assert_equal(len(self.M.observed_stochastics), 1) assert_array_equal(self.M.D.value, DisasterModel.disasters_array) def test_plot(self): if not PLOT: raise nose.SkipTest # Plot samples plot(self.M.e, path=DIR, verbose=0) def test_autocorrelation(self): if not PLOT: raise nose.SkipTest # Plot samples autocorrelation(self.M.e, path=DIR, verbose=0) def test_stats(self): S = self.M.e.stats() self.M.stats() def test_stats_after_reload(self): db = database.pickle.load('MCMC.pickle') M2 = MCMC(DisasterModel, db=db) M2.stats() db.close() os.remove('MCMC.pickle')
def differenceOfmeans(humanMean=4.5, sampleSize=50, variance=0.2): #note that tau is not sigma #sigma^2=1/tau t = 1 / variance #what is the probability that an analyst would give this image the same rating? mu = TruncatedNormal('mu', mu=humanMean, tau=t, a=1, b=10) #hypothetical ground truth botOutput = TruncatedNormal('botOutput', mu=mu, tau=t, a=1, b=10) humanOutput = TruncatedNormal('humanOutput', mu=mu, tau=t, a=1, b=10) #when we have data from the model we can use this here #like this d = pymc.Binomial(ādā, n=n, p=theta, value=np.array([0.,1.,3.,5.]), observed=True) sim = MCMC([mu, botOutput, humanOutput]) sim.sample(sampleSize, 0, 1) botOutput = sim.trace("botOutput")[:] #if humans only give ratings at the 0.5 interval, not smaller # humanOutput = round_to_half(sim.trace("humanOutput")[:]) humanOutput = sim.trace("humanOutput")[:] #difference of the means #but what we care about is the mean of the human output for each image. difference = botOutput - humanOutput.mean() return difference
# Ph21 Set 5 # Aritra Biswas # coin_mcmc.py # Run MCMC on coin_model.py import coin_model from pymc import MCMC from pymc.Matplot import plot M = MCMC(coin_model) M.sample(iter = 10000, burn = 0, thin = 1) print plot(M) M.pheads.summary()
coefs[tName] = Normal(tName,0,0.001,value=sp.rand()-0.5) termList.append(d*coefs[tName]) # get individual edge probabilities @deterministic(trace=False,plot=False) def probs(termList=termList): probs = 1./(1+sp.exp(-1*sum(termList))) probs[sp.diag_indices_from(probs)]= 0 return(probs) # define the outcome as outcome = Bernoulli('outcome',probs,value=adjMat,observed=True) return(locals()) if __name__ == '__main__': # load the prison data with open('prison.dat','r') as f: rowList = list() for l in f: rowList.append([int(x) for x in l.strip().split(' ')]) adjMat = sp.array(rowList) # make the model as an MCMC object m = makeModel(adjMat) mc = MCMC(m) # estimate mc.sample(30000,1000,50)
from pylab import * # The mu and tau are in log units; to get to log units, # do the following # (has mean around 1e2, with a variance of 9 logs in base 10) mean_b10 = 2 var_b10 = 9 print "Setting mean (base 10) to %f, variance (base 10) to %f" % (mean_b10, var_b10) # The lognormal variable k = Lognormal('k', mu=np.log(10 ** mean_b10), tau=1./(np.log(10) * np.log(10 ** var_b10))) # Sample it m = MCMC(Model([k])) m.sample(iter=50000) ion() # Plot the distribution in base e figure() y = log(m.trace('k')[:]) y10 = log10(m.trace('k')[:]) hist(y, bins=100) print print "Mean, base e: %f; Variance, base e: %f" % (mean(y), var(y)) # Plot the distribution in base 10 figure() hist(y10, bins=100)
def test_regression_155(): """thin > iter""" M = MCMC(disaster_model, db='ram') M.sample(10,0,100, progress_bar=0)