コード例 #1
0
 def setUp(self):
     # model a -> b
     bn = {}
     self.arr = []
     bn[0] = BN(domain=Domain(),
                new_domain_variables={
                    'a': [0, 1],
                    'b': [0, 1]
                })
     bn[0].add_cpts([
         CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
         CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]),
             child='b')
     ])
     self.arr.append([('a', 'b')])
     bn[1] = BN(domain=Domain(),
                new_domain_variables={
                    'a': [0, 1],
                    'b': [0, 1],
                    'c': [0, 1]
                })
     bn[1].add_cpts([
         CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
         CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]),
             child='b'),
         CPT(Factor(variables=['c', 'b'], data=[0.1, 0.9, 0.2, 0.8]),
             child='c')
     ])
     self.arr.append([('a', 'b'), ('b', 'c')])
     self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
コード例 #2
0
    def runTest(self):
        # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0
        r_dx2 = 3453.429
        r_dp = 2.2e-16
        r_ddf = 4
        # regression (NOT known correct! NOT from R)
        r_ix2 = 1.08094376504
        r_ip = 0.58247332853
        r_idf = 2

        vars = ['a','b']
        vals = {'a': [0,1,2], 'b':[0,1,2]}
        ddata  = (vars,vals,vars,[(0,0,640),(0,1,1947),(0,2,648),(1,0,1709),(1,1,1364),(1,2,335),(2,0,0),(2,1,3357),(2,2,0)])
        iddata = (vars,vals,vars,[(0,0,994),(0,1,2359),(0,2,0),(1,0,1027),(1,1,2312),(1,2,0),(2,0,989),(2,1,2319),(2,2,0)])

        x2d = X2Separator(CompactFactor(ddata, domain=Domain()))
        x2i = X2Separator(CompactFactor(iddata, domain=Domain()))

        dp, dx2, dd = x2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dx2,r_dx2,3)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ix2, id = x2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ix2,r_ix2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        # since there are more zero counts there should be fewer dof.
        self.assert_(id < dd)

        print '     x2 depend: p =',dp,'x2 =',dx2,'d =',dd
        print '!! x2 independ: p =',ip,'x2 =',ix2,'d =',id


        # regression (NOT known correct! NOT from R)
        r_dg2 = 4224.3017675
        r_dp = 0.0
        r_ddf = 4
        r_ig2 = 1.07917600592
        r_ip = 0.582988392395
        r_idf = 2

        g2d = G2Separator(CompactFactor(ddata, domain=Domain()))
        g2i = G2Separator(CompactFactor(iddata, domain=Domain()))
        dp, dg2, dd = g2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dg2,r_dg2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ig2, id = g2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ig2,r_ig2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '!!   g2 depend: p =',dp,'g2 =',dg2,'d =',dd
        print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
コード例 #3
0
    def runTest(self):
        # results from R version 2.6.2 (2008-02-08) ISBN 3-900051-07-0
        r_dx2 = 109.1547
        r_dp  = 0
        r_ddf = 1
        r_ix2 = 0.1236
        r_ip  = 0.7252
        r_idf = 1

        vars = ['a','b']
        vals = {'a': [0,1], 'b':[0,1]}
        ddata = (vars,vals,vars,[(0,0,1509), (0,1,3538), (1,0,1974), (1,1,2979)])
        iddata = (vars,vals,vars,[(0,0,1474), (0,1,3610), (1,0,1441), (1,1,3475)])

        x2d = X2Separator(CompactFactor(ddata, domain=Domain()))
        x2i = X2Separator(CompactFactor(iddata, domain=Domain()))

        dp, dx2, dd = x2d.test_independ('a','b',set())

        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dx2,r_dx2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ix2, id = x2i.test_independ('a','b',set())

        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ix2,r_ix2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '     x2 depend: p =',dp,'x2 =',dx2,'d =',dd
        print '   x2 independ: p =',ip,'x2 =',ix2,'d =',id

        # regression (NOT known correct! NOT from R)
        r_dg2 = 109.389800878 
        r_dp  = 0
        r_ddf = 1
        r_ig2 = 0.123551704118
        r_ip  = 0.725213854926 
        r_idf = 1

        g2d = G2Separator(CompactFactor(ddata, domain=Domain()))
        g2i = G2Separator(CompactFactor(iddata, domain=Domain()))
        dp, dg2, dd = g2d.test_independ('a','b',set())
        self.assertEquals(dd,r_ddf)
        self.assertAlmostEquals(dg2,r_dg2,4)
        self.assertAlmostEquals(dp,r_dp,4)

        ip, ig2, id = g2i.test_independ('a','b',set())
        self.assertEquals(id,r_idf)
        self.assertAlmostEquals(ig2,r_ig2,4)
        self.assertAlmostEquals(ip,r_ip,4)
        print '!!   g2 depend: p =',dp,'g2 =',dg2,'d =',dd
        print '!! g2 independ: p =',ip,'g2 =',ig2,'d =',id
コード例 #4
0
ファイル: test_Models.py プロジェクト: EJHortala/books-2
 def setUp(self):
     from gPy.Variables import Domain
     self.bnm = BN(domain=Domain())  # don't use default domain
     self.bnm.from_dnet(read_dnet('Asia.dnet'))
     self.cptdict = self.bnm.factors
     # taken directly from Netica output
     self.marginals = [
         Factor((('VisitAsia'), ), [0.99, 0.01]),
         Factor((('Tuberculosis'), ), [0.9896, 0.0104]),
         Factor((('Smoking'), ), [0.5, 0.5]),
         Factor((('Cancer'), ), [0.945, 0.055]),
         Factor((('TbOrCa'), ), [0.93517, 0.064828]),
         Factor((('XRay'), ), [0.11029, 0.88971]),
         Factor((('Bronchitis'), ), [0.55, 0.45]),
         Factor((('Dyspnea'), ), [0.56403, 0.43597])
     ]
     # taken directly from Netica output
     self.cond_marginals = [
         Factor((('VisitAsia'), ), [0.95192, 0.048077]),
         Factor((('Tuberculosis'), ), [0, 1]),
         Factor((('Smoking'), ), [0.52381, 0.47619]),
         #other marginals are conditional on these values
         #Factor((('Cancer'),),
         #       [1,0]),
         #Factor((('TbOrCa'),),
         #       [0,1]),
         Factor((('XRay'), ), [0.98, 0.02]),
         Factor((('Bronchitis'), ), [0.55714, 0.44286]),
         Factor((('Dyspnea'), ), [0.21143, 0.78857])
     ]
コード例 #5
0
ファイル: test_CBN.py プロジェクト: EJHortala/books-2
    def runTest(self):
        samples = GibbsSampler(self._minibn_do).samples(100000)
        data = CompactFactor(samples,domain=Domain())

        p = self._minibn_do.copy(copy_domain=True)
        p.estimate_parameters(data)
        self.failUnless(same_factor(distribution_of(p), distribution_of(self._minibn_do), dp=2, verbose=True))
コード例 #6
0
 def runTest(self):
     # construct factors of various sizes with no data
     for sz in xrange(6):
         vars = ['V'+str(i) for i in xrange(sz)]
         vals = dict([(v,[0,1]) for v in vars])
         data = (vars,vals,vars,[])
         for v_on in subsetn(vars, sz):
             inst = []
             for v in vars:
                 if v in v_on:
                     inst.append(1)
                 else:
                     inst.append(0)
             data[3].append(tuple(inst+[0]))
         d = CompactFactor(data,domain=Domain())
         x2 = X2Separator(d)
         g2 = G2Separator(d)
         for a,b in pairs(vars):
             for s in powerset(set(vars) - set([a,b])):
                 x2p, x2s, x2d = x2.test_independ(a, b, set(s))
                 g2p, g2s, g2d = g2.test_independ(a, b, set(s))
                 # one degree of freedom
                 self.assertEquals(x2d, 0)
                 self.assertEquals(g2d, 0)
                 # default to independent
                 self.assertEquals(x2p, 1)
                 self.assertEquals(g2p, 1)
                 # zero statistics (no data)
                 self.assertEquals(x2s, 0)
                 self.assertEquals(g2s, 0)
コード例 #7
0
ファイル: utils_test.py プロジェクト: EJHortala/books-2
def rand_factor(vs):
    n = reduce(operator.mul, [len(vs[v]) for v in vs])
    f = Factor(variables=vs.keys(),
               data=rand_factor_data(n),
               domain=Domain(),
               check=True,
               new_domain_variables=vs)
    return f
コード例 #8
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_asia.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     g = ICPattern(ci)
     self.assertEquals(g.shd(self._asia_pdag),5)
     self.assertEquals(self._tetrad_pdag.shd(self._asia_pdag),4)
     # I think tetrad is wrong (in terms of implementation)
     self.assertEquals(g.shd(self._tetrad_pdag),1)
コード例 #9
0
    def runTest(self):
        num_vars = 5
        num_vals = 10
        for x in xrange(num_runs):
            vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)])
            # should get some random data with 0.
            f = Factor(variables = vs.keys()
                    ,data = [abs(randint(0,20)) for i in xrange(num_vals**num_vars)]
                    #,data = [5, 0, 1, 2]
                    #,data = [13,0,0,20]
                    #,data = [14,15,20,16]
                    ,domain = Domain()
                    ,new_domain_variables=vs
                    ,check = True)
            records = []
            for inst in f.insts():
                records.append(inst + (f[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)
            #print records

            cf = IncrementalCompactFactor(rawdata, rmin=0)
            #print 'old tree:'
            #print cf

#            g = Factor(variables = vs.keys()
#                    #,data = [13,2,0,0]
#                    ,data = [14,15,0,0]
#                    ,domain = Domain()
#                    ,new_domain_variables=vs
#                    ,check = True)
            g = f.copy(copy_domain=True)
            def swap_some(x):
                r = random()
                if x == 0:
                    if r <= 0.5:
                        return randint(1,5)
                    return 0
                elif r <= 0.5:
                    return 0
                return x
            def invert_some(x):
                return 5-x
            g.map(swap_some)
            #print g
            f += g
            records = []
            for inst in g.insts():
                records.append(inst + (g[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)
            cf.update(rawdata)
            #print 'new tree:'
            #print cf

            for variables in powerset(vs.keys()):
                g = f.copy(copy_domain=True)
                g.marginalise_away(g.variables() - frozenset(variables))
                self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
コード例 #10
0
 def testdnet(self):
     from gPy.IO import read_dnet
     from gPy.Models import BN
     from gPy.Variables import Domain
     bnm = BN(domain=Domain())
     bnm.from_dnet(read_dnet('Asia.dnet'))
     self.samegraph(bnm.adg(), self.asia_adg)
     for name, cpt_in_file in self.asia_cpts.items():
         cpt = bnm[name]
         self.samecpt(cpt, cpt_in_file, cpt.child())
コード例 #11
0
 def runTest(self):
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     print ci._ind
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
     data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain())
     ci = PCCI(G2Separator(data))
     for a,b in pairs(data.variables()):
         if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1':
             self.assert_(ci.has_independence(a, b))
             self.assert_(not ci.has_independence_involving(a,b,'X3'))
         else:
             print a,b
             self.assert_(not ci.has_independence(a,b))
コード例 #12
0
    def tryModel(self, model):
        # generate some samples
        cf = []
        sampler = ForwardSampler(model)
        samples = sampler.samples(100)
        cf.append(CompactFactor(samples,domain=Domain()))
        icf = IncrementalCompactFactor(samples, domain=Domain())

        for i in xrange(10):
            samples = sampler.samples(100)
            cf.append(CompactFactor(samples,domain=Domain()))
            icf.update(samples)

            # see if the sum of the CPT Factors in cf match that of icf
            for child in model.variables():
                family = model.adg().parents(child) | set([child])
                a = icf.makeFactor(family)
                b = cf[0].makeFactor(family)
                for f in cf[1:]:
                    b += f.makeFactor(family)
                self.assert_(same_factor(a,b))
コード例 #13
0
ファイル: LearningUtils.py プロジェクト: EJHortala/books-2
 def observe(self, num_samples, skip=0):
     """Draw C{num_samples} observational samples, separated by C{skip}
     steps of the sampler.  These samples are appended to any existing
     observational samples and then returned.
     @return: L{IncrementalCompactFactor}
     """
     samples = self._pure_sampler.samples(num_samples,skip)
     if self._data is None:
         self._data = IncrementalCompactFactor(samples,domain=Domain())
     else:
         self._data.update(samples)
     return self._data
コード例 #14
0
ファイル: LearningUtils.py プロジェクト: EJHortala/books-2
 def __init__(self, bn,burnin=1000):
     """
     @param bn: A causal Bayesian network from which samples are drawn.
     @type bn: L{CBN}
     @param burnin: The burn in for the L{GibbsSampler} used for generating
     interventional data.
     @type burnin: int
     """
     super(CausalWorld,self).__init__(domain=Domain.copy(bn),variables=bn.variables())
     self._pure_model = bn.copy(copy_domain=True)
     self._pure_sampler = ForwardSampler(self._pure_model)
     self._burnin = burnin
     self._data = None
     self._inter_sampler = {}
     self._inter_data = {}
コード例 #15
0
ファイル: utils_test.py プロジェクト: EJHortala/books-2
def rand_bn(vs, max_potential_parents=15):
    model = BN(domain=Domain(), new_domain_variables=vs)

    for child in vs.keys():
        parents = list(model.variables())
        too_many = len(parents) - max_potential_parents
        if too_many > 0:
            for i in xrange(too_many):
                parents.remove(choice(parents))

        fv = rand_subset(parents) | set([child])
        n = reduce(operator.mul, [len(vs[v]) for v in fv])
        f = Factor(variables=fv,
                   data=rand_factor_data(n),
                   domain=model,
                   check=True)
        cpt = CPT(f, child, True, True)
        model *= cpt
    return model
コード例 #16
0
    def runTest(self):
        num_vars = 5
        num_vals = 10
        for i in xrange(num_runs):
            vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)])
            # should get some random data with 0.
            f = Factor(variables = vs.keys()
                    ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)]
                    ,domain = Domain()
                    ,new_domain_variables=vs
                    ,check = True)
            records = []
            for inst in f.insts():
                records.append(inst + (f[inst],))
            rawdata = (vs.keys(), vs, vs.keys(), records)

            cf = IncrementalCompactFactor(rawdata)
            for variables in powerset(vs.keys()):
                g = f.copy(copy_domain=True)
                g.marginalise_away(g.variables() - frozenset(variables))
                self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
コード例 #17
0
ファイル: utils_test.py プロジェクト: EJHortala/books-2
def generate_dense_bn(density, num_vars=8, num_vals=3):
    if density > num_vars:
        raise RuntimeError, 'density must be less than number of variables'

    vars, parents = generate_dense_parents(density, num_vars)
    vals = dict([(var, frozenset([i for i in xrange(num_vals)]))
                 for var in vars])
    bn = BN(domain=Domain(), new_domain_variables=vals)
    for child in vars:
        if child in parents:
            n = num_vals**(len(parents[child]) + 1)
        else:
            n = num_vals
            parents[child] = frozenset()

        f = Factor(variables=frozenset([child]) | parents[child],
                   data=rand_factor_data(n),
                   domain=bn,
                   check=True)
        bn *= CPT(f, child, True, True)
    return bn
コード例 #18
0
ファイル: LearningUtils.py プロジェクト: EJHortala/books-2
 def query(self, intervention, num_samples,skip=0):
     """Draw C{num_samples} interventional samples, separated by C{skip}
     steps of the sampler.  These samples are appended to any existing
     interventional samples and then returned.  The intervention made is
     that of C{intervention}.
     @param intervention: A dictionary mapping variables in the L{CBN} to
     a single value in the domain.
     @return: L{IncrementalCompactFactor}
     """
     k = frozenset(intervention.keys())
     if not self._inter_sampler.has_key(k):
         do_model = CBN.from_bn(self._pure_model.copy(copy_domain=True))
         do_model.intervene(intervention)
         self._inter_sampler[k] = GibbsSampler(do_model, self._burnin)
     do_sampler = self._inter_sampler[k]
     samples = do_sampler.samples(num_samples,skip)
     if not self._inter_data.has_key(k):
         self._inter_data[k] = IncrementalCompactFactor(samples,domain=Domain())
     else:
         self._inter_data[k].update(samples)
     return self._inter_data[k]
コード例 #19
0
ファイル: utils_test.py プロジェクト: EJHortala/books-2
def rand_fr(vs, min_fact=1, max_fact=10, min_fact_vars=1, max_fact_vars=10):
    model = FR(domain=Domain(), new_domain_variables=vs)

    for i in xrange(randrange(min_fact, max_fact)):
        fv = []
        while len(fv) == 0:
            for j in xrange(
                    randrange(min_fact_vars, min(max_fact_vars,
                                                 len(vs.keys())))):
                v = choice(vs.keys())
                while v in fv:
                    v = choice(vs.keys())
                fv.append(v)
            fv = tuple(fv)

        n = reduce(operator.mul, [len(vs[v]) for v in fv])
        f = Factor(variables=fv,
                   data=rand_factor_data(n),
                   domain=model,
                   check=True)
        model *= f
    return model
コード例 #20
0
 def setUp(self):
     from gPy.Variables import Domain
     bnm = BN(domain=Domain())
     bnm.from_dnet(read_dnet('Asia.dnet'))
     self.hypergraph = bnm._hypergraph
     self.adg = bnm._adg
     self.tarjan = UGraph(range(1,11),
                          ((1,2),(1,3),(2,3),(2,10),(3,10),(4,5),
                           (4,7),(5,6),(5,9),(5,7),(6,7),(6,9),
                           (7,8),(7,9),(8,9),(8,10),(9,10)))
     self.tarjan2 = UGraph(range(1,10),
                           ((1,4),(1,3),(2,3),(2,7),(3,5),(3,6),
                            (4,5),(4,8),(5,6),(5,8),(6,7),(6,9),
                            (7,9),(8,9)))
     self.tarjan3 = UGraph(range(1,10),
                           ((1,4),(1,3),(2,3),(2,7),(3,5),(3,6),
                            (4,5),(4,8),(5,6),(5,8),(6,7),(6,9),
                            (7,9),(8,9),
                            (3,4),(3,7),(4,6),(4,7),(5,7),(6,8),(7,8)))
     self.tarjanh1 = Hypergraph([[3,4],[2,4],[1,2,3]])
     self.tarjanh2 = Hypergraph([[3,4],[2,4],[1,2,3],[2,3,4]])
     self.graph1 = UGraph('ABCDEF',('AB','AC','BD','CE','EF'))
     self.graph2 = UGraph('ABCDEF',('AB','AC','BD','CE','EF','BC','CD','DE'))
コード例 #21
0
ファイル: utils_test.py プロジェクト: EJHortala/books-2
from gPy.Examples import minibn, asia
from gPy.Models import FR, BN
from gPy.Parameters import Factor, CPT
from gPy.Variables import Domain
from random import choice, randrange, uniform, shuffle
import operator, unittest, pickle

xor = BN(domain=Domain(),
         new_domain_variables={
             'a': [0, 1],
             'b': [0, 1],
             'c': [0, 1]
         })
xor.add_cpts([
    CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
    CPT(Factor(variables=['b'], data=[0.5, 0.5]), child='b'),
    CPT(Factor(variables=['c', 'a', 'b'],
               data=[1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0]),
        child='c')
])
cbn_small_names = ['xor', 'minibn', 'asia']
cbn_small_test_cases = [xor, minibn, asia]
cbn_large_names = ['alarm', 'insurance', 'carpo']
try:
    # load the pickled large Bayes nets.
    cbn_large_test_cases = map(
        lambda fn: pickle.load(open('networks/' + fn + '_bn.pck', 'r')),
        cbn_large_names)
except:
    cbn_large_names = []
    cbn_large_test_cases = []
コード例 #22
0

def disp(fn, samples):
    f = open(fn, 'w')
    fact = samples.makeFactor(samples.variables())
    for var in fact.variables():
        print >> f, var,
    print >> f, 'count'
    for inst in fact.insts():
        for i in inst:
            print >> f, i,
        print >> f, fact[inst]
    f.close()


bn0 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]})
bn0.add_cpts([
    CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
    CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b')
])
w = CausalWorld(bn0)
samples = w.observe(10000)
disp('two_depend', samples)

bn1 = BN(domain=Domain(), new_domain_variables={'a': [0, 1], 'b': [0, 1]})
bn1.add_cpts([
    CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
    CPT(Factor(variables=['b'], data=[0.3, 0.7]), child='b')
])
w = CausalWorld(bn1)
samples = w.observe(10000)