def setUp(self): # model a -> b bn = {} self.arr = [] bn[0] = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1] }) bn[0].add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b') ]) self.arr.append([('a', 'b')]) bn[1] = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1], 'c': [0, 1] }) bn[1].add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b'), CPT(Factor(variables=['c', 'b'], data=[0.1, 0.9, 0.2, 0.8]), child='c') ]) self.arr.append([('a', 'b'), ('b', 'c')]) self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
def runTest(self): # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0 r_dx2 = 3453.429 r_dp = 2.2e-16 r_ddf = 4 # regression (NOT known correct! NOT from R) r_ix2 = 1.08094376504 r_ip = 0.58247332853 r_idf = 2 vars = ['a','b'] vals = {'a': [0,1,2], 'b':[0,1,2]} ddata = (vars,vals,vars,[(0,0,640),(0,1,1947),(0,2,648),(1,0,1709),(1,1,1364),(1,2,335),(2,0,0),(2,1,3357),(2,2,0)]) iddata = (vars,vals,vars,[(0,0,994),(0,1,2359),(0,2,0),(1,0,1027),(1,1,2312),(1,2,0),(2,0,989),(2,1,2319),(2,2,0)]) x2d = X2Separator(CompactFactor(ddata, domain=Domain())) x2i = X2Separator(CompactFactor(iddata, domain=Domain())) dp, dx2, dd = x2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dx2,r_dx2,3) self.assertAlmostEquals(dp,r_dp,4) ip, ix2, id = x2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ix2,r_ix2,4) self.assertAlmostEquals(ip,r_ip,4) # since there are more zero counts there should be fewer dof. self.assert_(id < dd) print ' x2 depend: p =',dp,'x2 =',dx2,'d =',dd print '!! x2 independ: p =',ip,'x2 =',ix2,'d =',id # regression (NOT known correct! NOT from R) r_dg2 = 4224.3017675 r_dp = 0.0 r_ddf = 4 r_ig2 = 1.07917600592 r_ip = 0.582988392395 r_idf = 2 g2d = G2Separator(CompactFactor(ddata, domain=Domain())) g2i = G2Separator(CompactFactor(iddata, domain=Domain())) dp, dg2, dd = g2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dg2,r_dg2,4) self.assertAlmostEquals(dp,r_dp,4) ip, ig2, id = g2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ig2,r_ig2,4) self.assertAlmostEquals(ip,r_ip,4) print '!! g2 depend: p =',dp,'g2 =',dg2,'d =',dd print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
def runTest(self): # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0 r_dx2 = 109.1547 r_dp = 0 r_ddf = 1 r_ix2 = 0.1236 r_ip = 0.7252 r_idf = 1 vars = ['a','b'] vals = {'a': [0,1], 'b':[0,1]} ddata = (vars,vals,vars,[(0,0,1509), (0,1,3538), (1,0,1974), (1,1,2979)]) iddata = (vars,vals,vars,[(0,0,1474), (0,1,3610), (1,0,1441), (1,1,3475)]) x2d = X2Separator(CompactFactor(ddata, domain=Domain())) x2i = X2Separator(CompactFactor(iddata, domain=Domain())) dp, dx2, dd = x2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dx2,r_dx2,4) self.assertAlmostEquals(dp,r_dp,4) ip, ix2, id = x2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ix2,r_ix2,4) self.assertAlmostEquals(ip,r_ip,4) print ' x2 depend: p =',dp,'x2 =',dx2,'d =',dd print ' x2 independ: p =',ip,'x2 =',ix2,'d =',id # regression (NOT known correct! NOT from R) r_dg2 = 109.389800878 r_dp = 0 r_ddf = 1 r_ig2 = 0.123551704118 r_ip = 0.725213854926 r_idf = 1 g2d = G2Separator(CompactFactor(ddata, domain=Domain())) g2i = G2Separator(CompactFactor(iddata, domain=Domain())) dp, dg2, dd = g2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dg2,r_dg2,4) self.assertAlmostEquals(dp,r_dp,4) ip, ig2, id = g2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ig2,r_ig2,4) self.assertAlmostEquals(ip,r_ip,4) print '!! g2 depend: p =',dp,'g2 =',dg2,'d =',dd print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
def setUp(self): from gPy.Variables import Domain self.bnm = BN(domain=Domain()) # don't use default domain self.bnm.from_dnet(read_dnet('Asia.dnet')) self.cptdict = self.bnm.factors # taken directly from Netica output self.marginals = [ Factor((('VisitAsia'), ), [0.99, 0.01]), Factor((('Tuberculosis'), ), [0.9896, 0.0104]), Factor((('Smoking'), ), [0.5, 0.5]), Factor((('Cancer'), ), [0.945, 0.055]), Factor((('TbOrCa'), ), [0.93517, 0.064828]), Factor((('XRay'), ), [0.11029, 0.88971]), Factor((('Bronchitis'), ), [0.55, 0.45]), Factor((('Dyspnea'), ), [0.56403, 0.43597]) ] # taken directly from Netica output self.cond_marginals = [ Factor((('VisitAsia'), ), [0.95192, 0.048077]), Factor((('Tuberculosis'), ), [0, 1]), Factor((('Smoking'), ), [0.52381, 0.47619]), #other marginals are conditional on these values #Factor((('Cancer'),), # [1,0]), #Factor((('TbOrCa'),), # [0,1]), Factor((('XRay'), ), [0.98, 0.02]), Factor((('Bronchitis'), ), [0.55714, 0.44286]), Factor((('Dyspnea'), ), [0.21143, 0.78857]) ]
def runTest(self): samples = GibbsSampler(self._minibn_do).samples(100000) data = CompactFactor(samples,domain=Domain()) p = self._minibn_do.copy(copy_domain=True) p.estimate_parameters(data) self.failUnless(same_factor(distribution_of(p), distribution_of(self._minibn_do), dp=2, verbose=True))
def runTest(self): # construct factors of various sizes with no data for sz in xrange(6): vars = ['V'+str(i) for i in xrange(sz)] vals = dict([(v,[0,1]) for v in vars]) data = (vars,vals,vars,[]) for v_on in subsetn(vars, sz): inst = [] for v in vars: if v in v_on: inst.append(1) else: inst.append(0) data[3].append(tuple(inst+[0])) d = CompactFactor(data,domain=Domain()) x2 = X2Separator(d) g2 = G2Separator(d) for a,b in pairs(vars): for s in powerset(set(vars) - set([a,b])): x2p, x2s, x2d = x2.test_independ(a, b, set(s)) g2p, g2s, g2d = g2.test_independ(a, b, set(s)) # one degree of freedom self.assertEquals(x2d, 0) self.assertEquals(g2d, 0) # default to independent self.assertEquals(x2p, 1) self.assertEquals(g2p, 1) # zero statistics (no data) self.assertEquals(x2s, 0) self.assertEquals(g2s, 0)
def rand_factor(vs): n = reduce(operator.mul, [len(vs[v]) for v in vs]) f = Factor(variables=vs.keys(), data=rand_factor_data(n), domain=Domain(), check=True, new_domain_variables=vs) return f
def runTest(self): data = CompactFactor(read_csv(open('tetrad_asia.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) g = ICPattern(ci) self.assertEquals(g.shd(self._asia_pdag),5) self.assertEquals(self._tetrad_pdag.shd(self._asia_pdag),4) # I think tetrad is wrong (in terms of implementation) self.assertEquals(g.shd(self._tetrad_pdag),1)
def runTest(self): num_vars = 5 num_vals = 10 for x in xrange(num_runs): vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)]) # should get some random data with 0. f = Factor(variables = vs.keys() ,data = [abs(randint(0,20)) for i in xrange(num_vals**num_vars)] #,data = [5, 0, 1, 2] #,data = [13,0,0,20] #,data = [14,15,20,16] ,domain = Domain() ,new_domain_variables=vs ,check = True) records = [] for inst in f.insts(): records.append(inst + (f[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) #print records cf = IncrementalCompactFactor(rawdata, rmin=0) #print 'old tree:' #print cf # g = Factor(variables = vs.keys() # #,data = [13,2,0,0] # ,data = [14,15,0,0] # ,domain = Domain() # ,new_domain_variables=vs # ,check = True) g = f.copy(copy_domain=True) def swap_some(x): r = random() if x == 0: if r <= 0.5: return randint(1,5) return 0 elif r <= 0.5: return 0 return x def invert_some(x): return 5-x g.map(swap_some) #print g f += g records = [] for inst in g.insts(): records.append(inst + (g[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) cf.update(rawdata) #print 'new tree:' #print cf for variables in powerset(vs.keys()): g = f.copy(copy_domain=True) g.marginalise_away(g.variables() - frozenset(variables)) self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
def testdnet(self): from gPy.IO import read_dnet from gPy.Models import BN from gPy.Variables import Domain bnm = BN(domain=Domain()) bnm.from_dnet(read_dnet('Asia.dnet')) self.samegraph(bnm.adg(), self.asia_adg) for name, cpt_in_file in self.asia_cpts.items(): cpt = bnm[name] self.samecpt(cpt, cpt_in_file, cpt.child())
def runTest(self): data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) print ci._ind for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b)) data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b))
def tryModel(self, model): # generate some samples cf = [] sampler = ForwardSampler(model) samples = sampler.samples(100) cf.append(CompactFactor(samples,domain=Domain())) icf = IncrementalCompactFactor(samples, domain=Domain()) for i in xrange(10): samples = sampler.samples(100) cf.append(CompactFactor(samples,domain=Domain())) icf.update(samples) # see if the sum of the CPT Factors in cf match that of icf for child in model.variables(): family = model.adg().parents(child) | set([child]) a = icf.makeFactor(family) b = cf[0].makeFactor(family) for f in cf[1:]: b += f.makeFactor(family) self.assert_(same_factor(a,b))
def observe(self, num_samples, skip=0): """Draw C{num_samples} observational samples, separated by C{skip} steps of the sampler. These samples are appended to any existing observational samples and then returned. @return: L{IncrementalCompactFactor} """ samples = self._pure_sampler.samples(num_samples,skip) if self._data is None: self._data = IncrementalCompactFactor(samples,domain=Domain()) else: self._data.update(samples) return self._data
def __init__(self, bn,burnin=1000): """ @param bn: A causal Bayesian network from which samples are drawn. @type bn: L{CBN} @param burnin: The burn in for the L{GibbsSampler} used for generating interventional data. @type burnin: int """ super(CausalWorld,self).__init__(domain=Domain.copy(bn),variables=bn.variables()) self._pure_model = bn.copy(copy_domain=True) self._pure_sampler = ForwardSampler(self._pure_model) self._burnin = burnin self._data = None self._inter_sampler = {} self._inter_data = {}
def rand_bn(vs, max_potential_parents=15): model = BN(domain=Domain(), new_domain_variables=vs) for child in vs.keys(): parents = list(model.variables()) too_many = len(parents) - max_potential_parents if too_many > 0: for i in xrange(too_many): parents.remove(choice(parents)) fv = rand_subset(parents) | set([child]) n = reduce(operator.mul, [len(vs[v]) for v in fv]) f = Factor(variables=fv, data=rand_factor_data(n), domain=model, check=True) cpt = CPT(f, child, True, True) model *= cpt return model
def runTest(self): num_vars = 5 num_vals = 10 for i in xrange(num_runs): vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)]) # should get some random data with 0. f = Factor(variables = vs.keys() ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)] ,domain = Domain() ,new_domain_variables=vs ,check = True) records = [] for inst in f.insts(): records.append(inst + (f[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) cf = IncrementalCompactFactor(rawdata) for variables in powerset(vs.keys()): g = f.copy(copy_domain=True) g.marginalise_away(g.variables() - frozenset(variables)) self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
def generate_dense_bn(density, num_vars=8, num_vals=3): if density > num_vars: raise RuntimeError, 'density must be less than number of variables' vars, parents = generate_dense_parents(density, num_vars) vals = dict([(var, frozenset([i for i in xrange(num_vals)])) for var in vars]) bn = BN(domain=Domain(), new_domain_variables=vals) for child in vars: if child in parents: n = num_vals**(len(parents[child]) + 1) else: n = num_vals parents[child] = frozenset() f = Factor(variables=frozenset([child]) | parents[child], data=rand_factor_data(n), domain=bn, check=True) bn *= CPT(f, child, True, True) return bn
def query(self, intervention, num_samples,skip=0): """Draw C{num_samples} interventional samples, separated by C{skip} steps of the sampler. These samples are appended to any existing interventional samples and then returned. The intervention made is that of C{intervention}. @param intervention: A dictionary mapping variables in the L{CBN} to a single value in the domain. @return: L{IncrementalCompactFactor} """ k = frozenset(intervention.keys()) if not self._inter_sampler.has_key(k): do_model = CBN.from_bn(self._pure_model.copy(copy_domain=True)) do_model.intervene(intervention) self._inter_sampler[k] = GibbsSampler(do_model, self._burnin) do_sampler = self._inter_sampler[k] samples = do_sampler.samples(num_samples,skip) if not self._inter_data.has_key(k): self._inter_data[k] = IncrementalCompactFactor(samples,domain=Domain()) else: self._inter_data[k].update(samples) return self._inter_data[k]
def rand_fr(vs, min_fact=1, max_fact=10, min_fact_vars=1, max_fact_vars=10): model = FR(domain=Domain(), new_domain_variables=vs) for i in xrange(randrange(min_fact, max_fact)): fv = [] while len(fv) == 0: for j in xrange( randrange(min_fact_vars, min(max_fact_vars, len(vs.keys())))): v = choice(vs.keys()) while v in fv: v = choice(vs.keys()) fv.append(v) fv = tuple(fv) n = reduce(operator.mul, [len(vs[v]) for v in fv]) f = Factor(variables=fv, data=rand_factor_data(n), domain=model, check=True) model *= f return model
def setUp(self): from gPy.Variables import Domain bnm = BN(domain=Domain()) bnm.from_dnet(read_dnet('Asia.dnet')) self.hypergraph = bnm._hypergraph self.adg = bnm._adg self.tarjan = UGraph(range(1,11), ((1,2),(1,3),(2,3),(2,10),(3,10),(4,5), (4,7),(5,6),(5,9),(5,7),(6,7),(6,9), (7,8),(7,9),(8,9),(8,10),(9,10))) self.tarjan2 = UGraph(range(1,10), ((1,4),(1,3),(2,3),(2,7),(3,5),(3,6), (4,5),(4,8),(5,6),(5,8),(6,7),(6,9), (7,9),(8,9))) self.tarjan3 = UGraph(range(1,10), ((1,4),(1,3),(2,3),(2,7),(3,5),(3,6), (4,5),(4,8),(5,6),(5,8),(6,7),(6,9), (7,9),(8,9), (3,4),(3,7),(4,6),(4,7),(5,7),(6,8),(7,8))) self.tarjanh1 = Hypergraph([[3,4],[2,4],[1,2,3]]) self.tarjanh2 = Hypergraph([[3,4],[2,4],[1,2,3],[2,3,4]]) self.graph1 = UGraph('ABCDEF',('AB','AC','BD','CE','EF')) self.graph2 = UGraph('ABCDEF',('AB','AC','BD','CE','EF','BC','CD','DE'))
from gPy.Examples import minibn, asia from gPy.Models import FR, BN from gPy.Parameters import Factor, CPT from gPy.Variables import Domain from random import choice, randrange, uniform, shuffle import operator, unittest, pickle xor = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1], 'c': [0, 1] }) xor.add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['b'], data=[0.5, 0.5]), child='b'), CPT(Factor(variables=['c', 'a', 'b'], data=[1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]), child='c') ]) cbn_small_names = ['xor', 'minibn', 'asia'] cbn_small_test_cases = [xor, minibn, asia] cbn_large_names = ['alarm', 'insurance', 'carpo'] try: # load the pickled large Bayes nets. cbn_large_test_cases = map( lambda fn: pickle.load(open('networks/' + fn + '_bn.pck', 'r')), cbn_large_names) except: cbn_large_names = [] cbn_large_test_cases = []
def disp(fn, samples): f = open(fn, 'w') fact = samples.makeFactor(samples.variables()) for var in fact.variables(): print >> f, var, print >> f, 'count' for inst in fact.insts(): for i in inst: print >> f, i, print >> f, fact[inst] f.close() bn0 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]}) bn0.add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b') ]) w = CausalWorld(bn0) samples = w.observe(10000) disp('two_depend', samples) bn1 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]}) bn1.add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['b'], data=[0.3, 0.7]), child='b') ]) w = CausalWorld(bn1) samples = w.observe(10000)