Example #1
1
from gPy.LearningUtils import *
from gPy.Examples import asia
from gPy.K2 import *
from gPy.Models import CBN

# generate 500 samples of the Asia network
data = CausalWorld(asia)
data.observe(5000)

# for comparison
true_model = asia
true_adg = asia.adg()

# search for the ADG
found_adg = K2(max_parents_family=3).search(data, true_adg.topological_order())
# fit a model to the data using the ADG
found_model = CBN.from_adg_data(found_adg, data)

print 'True ADG\n', true_adg
print 'Found ADG\n', found_adg
print 'Structural Hamming distance:',shd(found_adg,true_adg)
print 'BDeu scores of fitted models of'
print 'found ADG:', bdeu(found_adg,data)
print ' true ADG:', bdeu(true_adg,data)
print 'KL-divergence of fitted found model from true model:',dkl(true_model,found_model)

Example #2
1
 def runTest(self):
     data = self._world.observe(num_samples)
     p = CBN.from_bn(self._skel.copy(copy_domain=True))
     print 'learning from',data.size(),'samples: BDeu score:',p.bdeu_score(data)
     p.estimate_parameters(data)
     self.failUnless(same_factor(distribution_of(p), distribution_of(self._skel), dp=2, verbose=True))
Example #3
1
from gPy.FKMCMC import fk_exp_graph
from gPy.Models import CBN

# generate 500 samples of the Asia network
data = CausalWorld(asia)
data.observe(500)

# for comparison
true_model = asia
true_adg = asia.adg()

# produce a sample of ADGs using K2 to search
search = K2(max_parents_family=3)
# generate 5*2 = 10 ADGs from every 7th step of the MCMC using 2
# `independent' order chains
found_adgs = fk_exp_graph(search, data,
                            num_samples = 5,
                            sample_every = 7,
                            num_orderings = 2)

# fit models to the data using the ADGs
found_models = [CBN.from_adg_data(found_adg, data) for found_adg in found_adgs]

print 'Structural Hamming distance:',[shd(found_adg,true_adg) for found_adg in found_adgs]
print 'BDeu scores of fitted models of'
print 'found ADG:', [bdeu(found_adg,data) for found_adg in found_adgs]
print ' true ADG:', bdeu(true_adg,data)
print 'KL-divergence of fitted found model from true model:',
print [dkl(true_model,found_model) for found_model in found_models]

Example #4
0
    def setUp(self):
        from gPy.Examples import minibn, asia
        self._minibn = minibn.copy(copy_domain=True)
        self._minibn_do = CBN.from_bn(self._minibn.copy(copy_domain=True))
        self._minibn_do.intervene({'Smoking': frozenset(['smoker'])})

        self._asia = asia.copy(copy_domain=True)
        self._asia_do = CBN.from_bn(self._asia.copy(copy_domain=True))
        self._asia_do.intervene({'Dyspnea': frozenset(['present'])})
Example #5
0
    def tryModel(self, model):
        model_do = CBN.from_bn(model.copy(copy_domain=True))
        # perform every interventional query
        ci = {}
        for q in model_do.good_interventions():
            # generate the model
            model_do = CBN.from_bn(model.copy(copy_domain=True))
            model_do.intervene(q)
            ci[frozenset(q.keys())] = GraphCI(model_do.adg())

        iic = InterventionalICPattern(ci)
        self.assertEquals(iic.lines(), [])
        self.assertEquals(iic.orient(), model.adg())
    def tryModel(self, model):
        model_do = CBN.from_bn(model.copy(copy_domain=True))
        # perform every interventional query
        ci = {}
        for q in model_do.good_interventions():
            # generate the model
            model_do = CBN.from_bn(model.copy(copy_domain=True))
            model_do.intervene(q)
            ci[frozenset(q.keys())] = GraphCI(model_do.adg())

        iic = InterventionalICPattern(ci)
        self.assertEquals(iic.lines(), [])
        self.assertEquals(iic.orient(), model.adg())
Example #7
0
 def setUp(self):
     from gPy.Examples import minibn
     self._world = CausalWorld(minibn)
     self._skel = minibn
     self._intervention = {'Smoking': frozenset(['nonsmoker'])}
     self._query = CBN.from_bn(minibn.copy(copy_domain=True))
     self._query.intervene(self._intervention)
Example #8
0
 def tryModel(self, model):
     for var in model.variables():
         # intervene at each variable in turn
         model_do = CBN.from_bn(model.copy(copy_domain=True))
         # check that the hyperedges correspond to all factors
         self.assertEquals(frozenset([v for v,x in model_do.items()]),
                           frozenset(model_do._hypergraph.hyperedges()))
         val = frozenset([set(model.values(var)).pop()])
         model_do.intervene({var: val})
         self.assertEquals(len(model_do.parents(var)), 0)
         self.assertEquals(model_do.values(var), val)
         self.assertEquals(len(model_do[var].data()), 1)
         self.assertEquals(frozenset([v for v,x in model_do.items()]),
                           frozenset(model_do._hypergraph.hyperedges()))
         self.assert_(model_do[var].data()[0] > 0)
         for other_var in model.variables():
             if other_var == var:
                 continue
             self.assertEquals(model_do.parents(other_var), model.parents(other_var))
             if var in model.parents(other_var):
                 self.assertEquals(frozenset(model_do[other_var].variables()), frozenset(model[other_var].variables()))
                 for inst in model_do[other_var].insts():
                     self.assertAlmostEquals(model_do[other_var][inst], model[other_var][inst])
             else:
                 self.assert_(same_factor(model_do[other_var], model[other_var]))
Example #9
0
 def setUp(self):
     # model a -> b
     bn = {}
     self.arr = []
     bn[0] = BN(domain=Domain(),
                new_domain_variables={
                    'a': [0, 1],
                    'b': [0, 1]
                })
     bn[0].add_cpts([
         CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
         CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]),
             child='b')
     ])
     self.arr.append([('a', 'b')])
     bn[1] = BN(domain=Domain(),
                new_domain_variables={
                    'a': [0, 1],
                    'b': [0, 1],
                    'c': [0, 1]
                })
     bn[1].add_cpts([
         CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'),
         CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]),
             child='b'),
         CPT(Factor(variables=['c', 'b'], data=[0.1, 0.9, 0.2, 0.8]),
             child='c')
     ])
     self.arr.append([('a', 'b'), ('b', 'c')])
     self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
Example #10
0
    def runTest(self):
        samples = GibbsSampler(self._minibn).samples(100000)
        data = CompactFactor(samples,domain=Domain())

        p = CBN.from_bn(self._minibn.copy(copy_domain=True))
        p.estimate_parameters(data)
        self.failUnless(same_factor(distribution_of(p), distribution_of(self._minibn), dp=2, verbose=True))
Example #11
0
 def setUp(self):
     # model a -> b
     bn = {}
     self.arr = []
     bn[0] = BN(domain=Domain(), new_domain_variables={'a': [0,1], 'b':[0,1]})
     bn[0].add_cpts([CPT(Factor(variables=['a'], data=[0.5, 0.5]),child='a')
                    ,CPT(Factor(variables=['a','b'], data=[0.3, 0.7, 0.4, 0.6]),child='b')
                    ])
     self.arr.append([('a','b')])
     bn[1] = BN(domain=Domain(), new_domain_variables={'a': [0,1], 'b':[0,1], 'c':[0,1]})
     bn[1].add_cpts([CPT(Factor(variables=['a'], data=[0.5, 0.5]),child='a')
                    ,CPT(Factor(variables=['a','b'], data=[0.3, 0.7, 0.4, 0.6]),child='b')
                    ,CPT(Factor(variables=['c','b'], data=[0.1, 0.9, 0.2, 0.8]),child='c')
                    ])
     self.arr.append([('a','b'),('b','c')])
     self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
Example #12
0
    def tryModel(self, model):
        # perform every interventional query
        ci = {}
        for xs in powerset(model.variables()):
            # build the intervention q
            q = {}
            for x in xs:
                q[x] = frozenset([set(model.values(x)).pop()])

            # generate the model
            model_do = CBN.from_bn(model.copy(copy_domain=True))
            model_do.intervene(q)
            ci[frozenset(q.keys())] = GraphCI(model_do.adg())

        iic = InterventionalICPattern(ci)
        self.assertEquals(iic.lines(), [])
        self.assertEquals(iic.orient(), model.adg())
Example #13
0
    def tryModel(self, model):
        # perform every interventional query
        ci = {}
        for xs in powerset(model.variables()):
            # build the intervention q
            q = {}
            for x in xs:
                q[x] = frozenset([set(model.values(x)).pop()])

            # generate the model
            model_do = CBN.from_bn(model.copy(copy_domain=True))
            model_do.intervene(q)
            ci[frozenset(q.keys())] = GraphCI(model_do.adg())

        iic = InterventionalICPattern(ci)
        self.assertEquals(iic.lines(), [])
        self.assertEquals(iic.orient(), model.adg())
Example #14
0
    def tryModel(self, model):
        self.assertAlmostEquals(dkl(model,model),0)
        cbn = CBN.from_bn(model.copy(copy_domain=True))
        v = choice(tuple(cbn.variables()))
        f = cbn[v]
        dat = rand_factor_data(len(f.data()))
        change_one = None
        for i,(a,b) in enumerate(zip(f.data(),dat)):
            if round(a-b,4) == 0:
                dat[i] += 10.0
                break

        cbn._replace_factor( v
                          , CPT(Factor(variables=f.variables()
                               ,data=dat
                               ,domain=cbn), v, cpt_force=True))
        kl = dkl(model,cbn)
        self.assert_(kl > 0)
        kl_ = dkl(cbn,model)
        self.assert_(kl_ > 0)
Example #15
0
 def query(self, intervention, num_samples,skip=0):
     """Draw C{num_samples} interventional samples, separated by C{skip}
     steps of the sampler.  These samples are appended to any existing
     interventional samples and then returned.  The intervention made is
     that of C{intervention}.
     @param intervention: A dictionary mapping variables in the L{CBN} to
     a single value in the domain.
     @return: L{IncrementalCompactFactor}
     """
     k = frozenset(intervention.keys())
     if not self._inter_sampler.has_key(k):
         do_model = CBN.from_bn(self._pure_model.copy(copy_domain=True))
         do_model.intervene(intervention)
         self._inter_sampler[k] = GibbsSampler(do_model, self._burnin)
     do_sampler = self._inter_sampler[k]
     samples = do_sampler.samples(num_samples,skip)
     if not self._inter_data.has_key(k):
         self._inter_data[k] = IncrementalCompactFactor(samples,domain=Domain())
     else:
         self._inter_data[k].update(samples)
     return self._inter_data[k]
Example #16
0
    def tryModel(self, model):
        kl = dkl(model,model)
        self.assert_(is_finite(kl))

        cbn = CBN.from_bn(model.copy(copy_domain=True))
        v = choice(tuple(cbn.variables()))
        f = cbn[v]
        dat = rand_factor_data(len(f.data()))
        change_one = None
        for i,(a,b) in enumerate(zip(f.data(),dat)):
            if round(a-b,4) == 0:
                dat[i] += uniform(1.0,100.0)

        cbn._replace_factor( v
                          , CPT(Factor(variables=f.variables()
                               ,data=dat
                               ,domain=cbn), v, cpt_force=True))
        ikl = dkl(model,cbn)
        self.assert_(is_finite(ikl))
        self.assert_(ikl >= kl)
        kl = dkl(cbn,cbn)
        self.assert_(is_finite(kl))
        ikl_ = dkl(cbn,model)
        self.assert_(ikl_ >= kl)
Example #17
0
## independencies into a PDAG. Note that this may fail if the found conditional
## independencies are inconsistent with an ADG!

pdag = ICPattern(ci)

## Meek's algorithm is used to resolve this PDAG into a particular ADG

found_adg = pdag.orient()

## or for short:
## found_adg = ICPattern(PCCI(G2Separator(data))).orient()
##
## Note that G2Separator can be replaced with X2Separator or GraphSeparator
## and PCCI can be replaced with GraphCI.  The PDAG is the essential graph
## representing the Markov equivalence class of the conditional independencies.
## (This perhaps goes some way to explaining the complexity --- it depends what
## you are interested in.)
##
## Note also that you may want to scale the p-value with the sample size and
## that there is a BFSeparator but this is likely mathematically unsound.

# fit a model to the data using the ADG
found_model = CBN.from_adg_data(found_adg, data)

print 'Structural Hamming distance:', shd(found_adg, true_adg)
print 'BDeu scores of fitted models of'
print 'found ADG:', bdeu(found_adg, data)
print ' true ADG:', bdeu(true_adg, data)
print 'KL-divergence of fitted found model from true model:', dkl(
    true_model, found_model)