def runTest(self): # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0 r_dx2 = 3453.429 r_dp = 2.2e-16 r_ddf = 4 # regression (NOT known correct! NOT from R) r_ix2 = 1.08094376504 r_ip = 0.58247332853 r_idf = 2 vars = ['a','b'] vals = {'a': [0,1,2], 'b':[0,1,2]} ddata = (vars,vals,vars,[(0,0,640),(0,1,1947),(0,2,648),(1,0,1709),(1,1,1364),(1,2,335),(2,0,0),(2,1,3357),(2,2,0)]) iddata = (vars,vals,vars,[(0,0,994),(0,1,2359),(0,2,0),(1,0,1027),(1,1,2312),(1,2,0),(2,0,989),(2,1,2319),(2,2,0)]) x2d = X2Separator(CompactFactor(ddata, domain=Domain())) x2i = X2Separator(CompactFactor(iddata, domain=Domain())) dp, dx2, dd = x2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dx2,r_dx2,3) self.assertAlmostEquals(dp,r_dp,4) ip, ix2, id = x2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ix2,r_ix2,4) self.assertAlmostEquals(ip,r_ip,4) # since there are more zero counts there should be fewer dof. self.assert_(id < dd) print ' x2 depend: p =',dp,'x2 =',dx2,'d =',dd print '!! x2 independ: p =',ip,'x2 =',ix2,'d =',id # regression (NOT known correct! NOT from R) r_dg2 = 4224.3017675 r_dp = 0.0 r_ddf = 4 r_ig2 = 1.07917600592 r_ip = 0.582988392395 r_idf = 2 g2d = G2Separator(CompactFactor(ddata, domain=Domain())) g2i = G2Separator(CompactFactor(iddata, domain=Domain())) dp, dg2, dd = g2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dg2,r_dg2,4) self.assertAlmostEquals(dp,r_dp,4) ip, ig2, id = g2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ig2,r_ig2,4) self.assertAlmostEquals(ip,r_ip,4) print '!! g2 depend: p =',dp,'g2 =',dg2,'d =',dd print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
def runTest(self): # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0 r_dx2 = 109.1547 r_dp = 0 r_ddf = 1 r_ix2 = 0.1236 r_ip = 0.7252 r_idf = 1 vars = ['a','b'] vals = {'a': [0,1], 'b':[0,1]} ddata = (vars,vals,vars,[(0,0,1509), (0,1,3538), (1,0,1974), (1,1,2979)]) iddata = (vars,vals,vars,[(0,0,1474), (0,1,3610), (1,0,1441), (1,1,3475)]) x2d = X2Separator(CompactFactor(ddata, domain=Domain())) x2i = X2Separator(CompactFactor(iddata, domain=Domain())) dp, dx2, dd = x2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dx2,r_dx2,4) self.assertAlmostEquals(dp,r_dp,4) ip, ix2, id = x2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ix2,r_ix2,4) self.assertAlmostEquals(ip,r_ip,4) print ' x2 depend: p =',dp,'x2 =',dx2,'d =',dd print ' x2 independ: p =',ip,'x2 =',ix2,'d =',id # regression (NOT known correct! NOT from R) r_dg2 = 109.389800878 r_dp = 0 r_ddf = 1 r_ig2 = 0.123551704118 r_ip = 0.725213854926 r_idf = 1 g2d = G2Separator(CompactFactor(ddata, domain=Domain())) g2i = G2Separator(CompactFactor(iddata, domain=Domain())) dp, dg2, dd = g2d.test_independ('a','b',set()) self.assertEquals(dd,r_ddf) self.assertAlmostEquals(dg2,r_dg2,4) self.assertAlmostEquals(dp,r_dp,4) ip, ig2, id = g2i.test_independ('a','b',set()) self.assertEquals(id,r_idf) self.assertAlmostEquals(ig2,r_ig2,4) self.assertAlmostEquals(ip,r_ip,4) print '!! g2 depend: p =',dp,'g2 =',dg2,'d =',dd print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
def runTest(self): samples = GibbsSampler(self._minibn_do).samples(100000) data = CompactFactor(samples,domain=Domain()) p = self._minibn_do.copy(copy_domain=True) p.estimate_parameters(data) self.failUnless(same_factor(distribution_of(p), distribution_of(self._minibn_do), dp=2, verbose=True))
def runTest(self): # construct factors of various sizes with no data for sz in xrange(6): vars = ['V'+str(i) for i in xrange(sz)] vals = dict([(v,[0,1]) for v in vars]) data = (vars,vals,vars,[]) for v_on in subsetn(vars, sz): inst = [] for v in vars: if v in v_on: inst.append(1) else: inst.append(0) data[3].append(tuple(inst+[0])) d = CompactFactor(data,domain=Domain()) x2 = X2Separator(d) g2 = G2Separator(d) for a,b in pairs(vars): for s in powerset(set(vars) - set([a,b])): x2p, x2s, x2d = x2.test_independ(a, b, set(s)) g2p, g2s, g2d = g2.test_independ(a, b, set(s)) # one degree of freedom self.assertEquals(x2d, 0) self.assertEquals(g2d, 0) # default to independent self.assertEquals(x2p, 1) self.assertEquals(g2p, 1) # zero statistics (no data) self.assertEquals(x2s, 0) self.assertEquals(g2s, 0)
def runTest(self): data = CompactFactor(read_csv(open('tetrad_asia.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) g = ICPattern(ci) self.assertEquals(g.shd(self._asia_pdag),5) self.assertEquals(self._tetrad_pdag.shd(self._asia_pdag),4) # I think tetrad is wrong (in terms of implementation) self.assertEquals(g.shd(self._tetrad_pdag),1)
def runTest(self): num_vars = 5 num_vals = 10 for i in xrange(num_runs): vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)]) # should get some random data with 0. f = Factor(variables = vs.keys() ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)] ,domain = Domain() ,new_domain_variables=vs ,check = True) records = [] for inst in f.insts(): records.append(inst + (f[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) cf = CompactFactor(rawdata) for variables in powerset(vs.keys()): g = f.copy(copy_domain=True) g.marginalise_away(g.variables() - frozenset(variables)) self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
def tryModel(self, model): # generate some samples cf = [] sampler = ForwardSampler(model) samples = sampler.samples(100) cf.append(CompactFactor(samples,domain=Domain())) icf = IncrementalCompactFactor(samples, domain=Domain()) for i in xrange(10): samples = sampler.samples(100) cf.append(CompactFactor(samples,domain=Domain())) icf.update(samples) # see if the sum of the CPT Factors in cf match that of icf for child in model.variables(): family = model.adg().parents(child) | set([child]) a = icf.makeFactor(family) b = cf[0].makeFactor(family) for f in cf[1:]: b += f.makeFactor(family) self.assert_(same_factor(a,b))
def runTest(self): data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) print ci._ind for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b)) data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b))
"""Throwaway script to test BIC score search """ #from gPy.Data import Data from gPy.Data import CompactFactor import sys, gzip from gPy.IO import read_csv data = CompactFactor(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz'))) for v in data.variables(): print v print print data.bic_search(v)
from gPy.Examples import asia from gPy.Data import CompactFactor from gPy.IO import read_csv import sys data = CompactFactor(read_csv(open(sys.argv[1]))) print asia.bdeu_score(data) def score_adg(adg,data): print '^^^^' for child in adg.vertices(): parents = adg.parents(child) family = parents | set([child]) data_cpt = data.makeFactor(family).makeCPT(child,False) print child, data_cpt.bdeu_score() print 'vvvvv' print adg = asia.adg() score_adg(adg,data)