Esempio n. 1
0
    def runTest(self):
        # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0
        r_dx2 = 3453.429
        r_dp = 2.2e-16
        r_ddf = 4
        # regression (NOT known correct! NOT from R)
        r_ix2 = 1.08094376504
        r_ip = 0.58247332853
        r_idf = 2

        vars = ['a','b']
        vals = {'a': [0,1,2], 'b':[0,1,2]}
        ddata  = (vars,vals,vars,[(0,0,640),(0,1,1947),(0,2,648),(1,0,1709),(1,1,1364),(1,2,335),(2,0,0),(2,1,3357),(2,2,0)])
        iddata = (vars,vals,vars,[(0,0,994),(0,1,2359),(0,2,0),(1,0,1027),(1,1,2312),(1,2,0),(2,0,989),(2,1,2319),(2,2,0)])

        x2d = X2Separator(CompactFactor(ddata, domain=Domain()))
        x2i = X2Separator(CompactFactor(iddata, domain=Domain()))

        dp, dx2, dd = x2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dx2,r_dx2,3)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ix2, id = x2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ix2,r_ix2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        # since there are more zero counts there should be fewer dof.
        self.assert_(id < dd)

        print '     x2 depend: p =',dp,'x2 =',dx2,'d =',dd
        print '!! x2 independ: p =',ip,'x2 =',ix2,'d =',id


        # regression (NOT known correct! NOT from R)
        r_dg2 = 4224.3017675
        r_dp = 0.0
        r_ddf = 4
        r_ig2 = 1.07917600592
        r_ip = 0.582988392395
        r_idf = 2

        g2d = G2Separator(CompactFactor(ddata, domain=Domain()))
        g2i = G2Separator(CompactFactor(iddata, domain=Domain()))
        dp, dg2, dd = g2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dg2,r_dg2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ig2, id = g2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ig2,r_ig2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '!!   g2 depend: p =',dp,'g2 =',dg2,'d =',dd
        print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
Esempio n. 2
0
    def runTest(self):
        # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0
        r_dx2 = 109.1547
        r_dp  = 0
        r_ddf = 1
        r_ix2 = 0.1236
        r_ip  = 0.7252
        r_idf = 1

        vars = ['a','b']
        vals = {'a': [0,1], 'b':[0,1]}
        ddata = (vars,vals,vars,[(0,0,1509), (0,1,3538), (1,0,1974), (1,1,2979)])
        iddata = (vars,vals,vars,[(0,0,1474), (0,1,3610), (1,0,1441), (1,1,3475)])

        x2d = X2Separator(CompactFactor(ddata, domain=Domain()))
        x2i = X2Separator(CompactFactor(iddata, domain=Domain()))

        dp, dx2, dd = x2d.test_independ('a','b',set())

        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dx2,r_dx2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ix2, id = x2i.test_independ('a','b',set())

        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ix2,r_ix2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '     x2 depend: p =',dp,'x2 =',dx2,'d =',dd
        print '   x2 independ: p =',ip,'x2 =',ix2,'d =',id

        # regression (NOT known correct! NOT from R)
        r_dg2 = 109.389800878 
        r_dp  = 0
        r_ddf = 1
        r_ig2 = 0.123551704118
        r_ip  = 0.725213854926 
        r_idf = 1

        g2d = G2Separator(CompactFactor(ddata, domain=Domain()))
        g2i = G2Separator(CompactFactor(iddata, domain=Domain()))
        dp, dg2, dd = g2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dg2,r_dg2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ig2, id = g2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ig2,r_ig2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '!!   g2 depend: p =',dp,'g2 =',dg2,'d =',dd
        print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
Esempio n. 3
0
    def runTest(self):
        samples = GibbsSampler(self._minibn_do).samples(100000)
        data = CompactFactor(samples,domain=Domain())

        p = self._minibn_do.copy(copy_domain=True)
        p.estimate_parameters(data)
        self.failUnless(same_factor(distribution_of(p), distribution_of(self._minibn_do), dp=2, verbose=True))
Esempio n. 4
0
 def runTest(self):
     # construct factors of various sizes with no data
     for sz in xrange(6):
         vars = ['V'+str(i) for i in xrange(sz)]
         vals = dict([(v,[0,1]) for v in vars])
         data = (vars,vals,vars,[])
         for v_on in subsetn(vars, sz):
             inst = []
             for v in vars:
                 if v in v_on:
                     inst.append(1)
                 else:
                     inst.append(0)
             data[3].append(tuple(inst+[0]))
         d = CompactFactor(data,domain=Domain())
         x2 = X2Separator(d)
         g2 = G2Separator(d)
         for a,b in pairs(vars):
             for s in powerset(set(vars) - set([a,b])):
                 x2p, x2s, x2d = x2.test_independ(a, b, set(s))
                 g2p, g2s, g2d = g2.test_independ(a, b, set(s))
                 # one degree of freedom
                 self.assertEquals(x2d, 0)
                 self.assertEquals(g2d, 0)
                 # default to independent
                 self.assertEquals(x2p, 1)
                 self.assertEquals(g2p, 1)
                 # zero statistics (no data)
                 self.assertEquals(x2s, 0)
                 self.assertEquals(g2s, 0)
Esempio n. 5
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_asia.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     g = ICPattern(ci)
     self.assertEquals(g.shd(self._asia_pdag),5)
     self.assertEquals(self._tetrad_pdag.shd(self._asia_pdag),4)
     # I think tetrad is wrong (in terms of implementation)
     self.assertEquals(g.shd(self._tetrad_pdag),1)
Esempio n. 6
0
    def runTest(self):
        num_vars = 5
        num_vals = 10
        for i in xrange(num_runs):
            vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)])
            # should get some random data with 0.
            f = Factor(variables = vs.keys()
                    ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)]
                    ,domain = Domain()
                    ,new_domain_variables=vs
                    ,check = True)
            records = []
            for inst in f.insts():
                records.append(inst + (f[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)

            cf = CompactFactor(rawdata)
            for variables in powerset(vs.keys()):
                g = f.copy(copy_domain=True)
                g.marginalise_away(g.variables() - frozenset(variables))
                self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
Esempio n. 7
0
    def tryModel(self, model):
        # generate some samples
        cf = []
        sampler = ForwardSampler(model)
        samples = sampler.samples(100)
        cf.append(CompactFactor(samples,domain=Domain()))
        icf = IncrementalCompactFactor(samples, domain=Domain())

        for i in xrange(10):
            samples = sampler.samples(100)
            cf.append(CompactFactor(samples,domain=Domain()))
            icf.update(samples)

            # see if the sum of the CPT Factors in cf match that of icf
            for child in model.variables():
                family = model.adg().parents(child) | set([child])
                a = icf.makeFactor(family)
                b = cf[0].makeFactor(family)
                for f in cf[1:]:
                    b += f.makeFactor(family)
                self.assert_(same_factor(a,b))
Esempio n. 8
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     print ci._ind
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
Esempio n. 9
0
"""Throwaway script to test BIC score search
"""

#from gPy.Data import Data
from gPy.Data import CompactFactor
import sys, gzip
from gPy.IO import read_csv

data = CompactFactor(read_csv(gzip.open('/home/jc/godot/research/icml08/data/insurance_100.data.gz')))
for v in data.variables():
    print v
    print
    print data.bic_search(v)
Esempio n. 10
0
from gPy.Examples import asia
from gPy.Data import CompactFactor
from gPy.IO import read_csv
import sys

data = CompactFactor(read_csv(open(sys.argv[1])))
print asia.bdeu_score(data)


def score_adg(adg,data):
    print '^^^^'
    for child in adg.vertices():
        parents = adg.parents(child)
        family = parents | set([child])
        data_cpt = data.makeFactor(family).makeCPT(child,False)
        print child, data_cpt.bdeu_score()
    print 'vvvvv'
    print
    
adg = asia.adg()
score_adg(adg,data)