from gPy.LearningUtils import * from gPy.Examples import asia from gPy.K2 import * from gPy.Models import CBN # generate 500 samples of the Asia network data = CausalWorld(asia) data.observe(5000) # for comparison true_model = asia true_adg = asia.adg() # search for the ADG found_adg = K2(max_parents_family=3).search(data, true_adg.topological_order()) # fit a model to the data using the ADG found_model = CBN.from_adg_data(found_adg, data) print 'True ADG\n', true_adg print 'Found ADG\n', found_adg print 'Structural Hamming distance:',shd(found_adg,true_adg) print 'BDeu scores of fitted models of' print 'found ADG:', bdeu(found_adg,data) print ' true ADG:', bdeu(true_adg,data) print 'KL-divergence of fitted found model from true model:',dkl(true_model,found_model)
def runTest(self): data = self._world.observe(num_samples) p = CBN.from_bn(self._skel.copy(copy_domain=True)) print 'learning from',data.size(),'samples: BDeu score:',p.bdeu_score(data) p.estimate_parameters(data) self.failUnless(same_factor(distribution_of(p), distribution_of(self._skel), dp=2, verbose=True))
from gPy.FKMCMC import fk_exp_graph from gPy.Models import CBN # generate 500 samples of the Asia network data = CausalWorld(asia) data.observe(500) # for comparison true_model = asia true_adg = asia.adg() # produce a sample of ADGs using K2 to search search = K2(max_parents_family=3) # generate 5*2 = 10 ADGs from every 7th step of the MCMC using 2 # `independent' order chains found_adgs = fk_exp_graph(search, data, num_samples = 5, sample_every = 7, num_orderings = 2) # fit models to the data using the ADGs found_models = [CBN.from_adg_data(found_adg, data) for found_adg in found_adgs] print 'Structural Hamming distance:',[shd(found_adg,true_adg) for found_adg in found_adgs] print 'BDeu scores of fitted models of' print 'found ADG:', [bdeu(found_adg,data) for found_adg in found_adgs] print ' true ADG:', bdeu(true_adg,data) print 'KL-divergence of fitted found model from true model:', print [dkl(true_model,found_model) for found_model in found_models]
def setUp(self): from gPy.Examples import minibn, asia self._minibn = minibn.copy(copy_domain=True) self._minibn_do = CBN.from_bn(self._minibn.copy(copy_domain=True)) self._minibn_do.intervene({'Smoking': frozenset(['smoker'])}) self._asia = asia.copy(copy_domain=True) self._asia_do = CBN.from_bn(self._asia.copy(copy_domain=True)) self._asia_do.intervene({'Dyspnea': frozenset(['present'])})
def tryModel(self, model): model_do = CBN.from_bn(model.copy(copy_domain=True)) # perform every interventional query ci = {} for q in model_do.good_interventions(): # generate the model model_do = CBN.from_bn(model.copy(copy_domain=True)) model_do.intervene(q) ci[frozenset(q.keys())] = GraphCI(model_do.adg()) iic = InterventionalICPattern(ci) self.assertEquals(iic.lines(), []) self.assertEquals(iic.orient(), model.adg())
def setUp(self): from gPy.Examples import minibn self._world = CausalWorld(minibn) self._skel = minibn self._intervention = {'Smoking': frozenset(['nonsmoker'])} self._query = CBN.from_bn(minibn.copy(copy_domain=True)) self._query.intervene(self._intervention)
def tryModel(self, model): for var in model.variables(): # intervene at each variable in turn model_do = CBN.from_bn(model.copy(copy_domain=True)) # check that the hyperedges correspond to all factors self.assertEquals(frozenset([v for v,x in model_do.items()]), frozenset(model_do._hypergraph.hyperedges())) val = frozenset([set(model.values(var)).pop()]) model_do.intervene({var: val}) self.assertEquals(len(model_do.parents(var)), 0) self.assertEquals(model_do.values(var), val) self.assertEquals(len(model_do[var].data()), 1) self.assertEquals(frozenset([v for v,x in model_do.items()]), frozenset(model_do._hypergraph.hyperedges())) self.assert_(model_do[var].data()[0] > 0) for other_var in model.variables(): if other_var == var: continue self.assertEquals(model_do.parents(other_var), model.parents(other_var)) if var in model.parents(other_var): self.assertEquals(frozenset(model_do[other_var].variables()), frozenset(model[other_var].variables())) for inst in model_do[other_var].insts(): self.assertAlmostEquals(model_do[other_var][inst], model[other_var][inst]) else: self.assert_(same_factor(model_do[other_var], model[other_var]))
def setUp(self): # model a -> b bn = {} self.arr = [] bn[0] = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1] }) bn[0].add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b') ]) self.arr.append([('a', 'b')]) bn[1] = BN(domain=Domain(), new_domain_variables={ 'a': [0, 1], 'b': [0, 1], 'c': [0, 1] }) bn[1].add_cpts([ CPT(Factor(variables=['a'], data=[0.5, 0.5]), child='a'), CPT(Factor(variables=['a', 'b'], data=[0.3, 0.7, 0.4, 0.6]), child='b'), CPT(Factor(variables=['c', 'b'], data=[0.1, 0.9, 0.2, 0.8]), child='c') ]) self.arr.append([('a', 'b'), ('b', 'c')]) self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
def runTest(self): samples = GibbsSampler(self._minibn).samples(100000) data = CompactFactor(samples,domain=Domain()) p = CBN.from_bn(self._minibn.copy(copy_domain=True)) p.estimate_parameters(data) self.failUnless(same_factor(distribution_of(p), distribution_of(self._minibn), dp=2, verbose=True))
def setUp(self): # model a -> b bn = {} self.arr = [] bn[0] = BN(domain=Domain(), new_domain_variables={'a': [0,1], 'b':[0,1]}) bn[0].add_cpts([CPT(Factor(variables=['a'], data=[0.5, 0.5]),child='a') ,CPT(Factor(variables=['a','b'], data=[0.3, 0.7, 0.4, 0.6]),child='b') ]) self.arr.append([('a','b')]) bn[1] = BN(domain=Domain(), new_domain_variables={'a': [0,1], 'b':[0,1], 'c':[0,1]}) bn[1].add_cpts([CPT(Factor(variables=['a'], data=[0.5, 0.5]),child='a') ,CPT(Factor(variables=['a','b'], data=[0.3, 0.7, 0.4, 0.6]),child='b') ,CPT(Factor(variables=['c','b'], data=[0.1, 0.9, 0.2, 0.8]),child='c') ]) self.arr.append([('a','b'),('b','c')]) self.cbn = [CBN.from_bn(bn[i]) for i in bn.keys()]
def tryModel(self, model): # perform every interventional query ci = {} for xs in powerset(model.variables()): # build the intervention q q = {} for x in xs: q[x] = frozenset([set(model.values(x)).pop()]) # generate the model model_do = CBN.from_bn(model.copy(copy_domain=True)) model_do.intervene(q) ci[frozenset(q.keys())] = GraphCI(model_do.adg()) iic = InterventionalICPattern(ci) self.assertEquals(iic.lines(), []) self.assertEquals(iic.orient(), model.adg())
def tryModel(self, model): self.assertAlmostEquals(dkl(model,model),0) cbn = CBN.from_bn(model.copy(copy_domain=True)) v = choice(tuple(cbn.variables())) f = cbn[v] dat = rand_factor_data(len(f.data())) change_one = None for i,(a,b) in enumerate(zip(f.data(),dat)): if round(a-b,4) == 0: dat[i] += 10.0 break cbn._replace_factor( v , CPT(Factor(variables=f.variables() ,data=dat ,domain=cbn), v, cpt_force=True)) kl = dkl(model,cbn) self.assert_(kl > 0) kl_ = dkl(cbn,model) self.assert_(kl_ > 0)
def query(self, intervention, num_samples,skip=0): """Draw C{num_samples} interventional samples, separated by C{skip} steps of the sampler. These samples are appended to any existing interventional samples and then returned. The intervention made is that of C{intervention}. @param intervention: A dictionary mapping variables in the L{CBN} to a single value in the domain. @return: L{IncrementalCompactFactor} """ k = frozenset(intervention.keys()) if not self._inter_sampler.has_key(k): do_model = CBN.from_bn(self._pure_model.copy(copy_domain=True)) do_model.intervene(intervention) self._inter_sampler[k] = GibbsSampler(do_model, self._burnin) do_sampler = self._inter_sampler[k] samples = do_sampler.samples(num_samples,skip) if not self._inter_data.has_key(k): self._inter_data[k] = IncrementalCompactFactor(samples,domain=Domain()) else: self._inter_data[k].update(samples) return self._inter_data[k]
def tryModel(self, model): kl = dkl(model,model) self.assert_(is_finite(kl)) cbn = CBN.from_bn(model.copy(copy_domain=True)) v = choice(tuple(cbn.variables())) f = cbn[v] dat = rand_factor_data(len(f.data())) change_one = None for i,(a,b) in enumerate(zip(f.data(),dat)): if round(a-b,4) == 0: dat[i] += uniform(1.0,100.0) cbn._replace_factor( v , CPT(Factor(variables=f.variables() ,data=dat ,domain=cbn), v, cpt_force=True)) ikl = dkl(model,cbn) self.assert_(is_finite(ikl)) self.assert_(ikl >= kl) kl = dkl(cbn,cbn) self.assert_(is_finite(kl)) ikl_ = dkl(cbn,model) self.assert_(ikl_ >= kl)
## independencies into a PDAG. Note that this may fail if the found conditional ## independencies are inconsistent with an ADG! pdag = ICPattern(ci) ## Meek's algorithm is used to resolve this PDAG into a particular ADG found_adg = pdag.orient() ## or for short: ## found_adg = ICPattern(PCCI(G2Separator(data))).orient() ## ## Note that G2Separator can be replaced with X2Separator or GraphSeparator ## and PCCI can be replaced with GraphCI. The PDAG is the essential graph ## representing the Markov equivalence class of the conditional independencies. ## (This perhaps goes some way to explaining the complexity --- it depends what ## you are interested in.) ## ## Note also that you may want to scale the p-value with the sample size and ## that there is a BFSeparator but this is likely mathematically unsound. # fit a model to the data using the ADG found_model = CBN.from_adg_data(found_adg, data) print 'Structural Hamming distance:', shd(found_adg, true_adg) print 'BDeu scores of fitted models of' print 'found ADG:', bdeu(found_adg, data) print ' true ADG:', bdeu(true_adg, data) print 'KL-divergence of fitted found model from true model:', dkl( true_model, found_model)