def runTest(self): num_vars = 5 num_vals = 10 for x in xrange(num_runs): vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)]) # should get some random data with 0. f = Factor(variables = vs.keys() ,data = [abs(randint(0,20)) for i in xrange(num_vals**num_vars)] #,data = [5, 0, 1, 2] #,data = [13,0,0,20] #,data = [14,15,20,16] ,domain = Domain() ,new_domain_variables=vs ,check = True) records = [] for inst in f.insts(): records.append(inst + (f[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) #print records cf = IncrementalCompactFactor(rawdata, rmin=0) #print 'old tree:' #print cf # g = Factor(variables = vs.keys() # #,data = [13,2,0,0] # ,data = [14,15,0,0] # ,domain = Domain() # ,new_domain_variables=vs # ,check = True) g = f.copy(copy_domain=True) def swap_some(x): r = random() if x == 0: if r <= 0.5: return randint(1,5) return 0 elif r <= 0.5: return 0 return x def invert_some(x): return 5-x g.map(swap_some) #print g f += g records = [] for inst in g.insts(): records.append(inst + (g[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) cf.update(rawdata) #print 'new tree:' #print cf for variables in powerset(vs.keys()): g = f.copy(copy_domain=True) g.marginalise_away(g.variables() - frozenset(variables)) self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
def tryModel(self, model): # generate some samples cf = [] sampler = ForwardSampler(model) samples = sampler.samples(100) cf.append(CompactFactor(samples,domain=Domain())) icf = IncrementalCompactFactor(samples, domain=Domain()) for i in xrange(10): samples = sampler.samples(100) cf.append(CompactFactor(samples,domain=Domain())) icf.update(samples) # see if the sum of the CPT Factors in cf match that of icf for child in model.variables(): family = model.adg().parents(child) | set([child]) a = icf.makeFactor(family) b = cf[0].makeFactor(family) for f in cf[1:]: b += f.makeFactor(family) self.assert_(same_factor(a,b))
def runTest(self): num_vars = 5 num_vals = 10 for i in xrange(num_runs): vs = dict([('V'+str(i),range(num_vals)) for i in xrange(num_vars)]) # should get some random data with 0. f = Factor(variables = vs.keys() ,data = [abs(randint(0,5)) for i in xrange(num_vals**num_vars)] ,domain = Domain() ,new_domain_variables=vs ,check = True) records = [] for inst in f.insts(): records.append(inst + (f[inst],)) rawdata = (vs.keys(), vs, vs.keys(), records) cf = IncrementalCompactFactor(rawdata) for variables in powerset(vs.keys()): g = f.copy(copy_domain=True) g.marginalise_away(g.variables() - frozenset(variables)) self.assert_(same_factor(g,cf.makeFactor(variables),verbose=True))
class CausalWorld(SubDomain): """A container for experimental data derived from a causal Bayesian networks.""" def __init__(self, bn,burnin=1000): """ @param bn: A causal Bayesian network from which samples are drawn. @type bn: L{CBN} @param burnin: The burn in for the L{GibbsSampler} used for generating interventional data. @type burnin: int """ super(CausalWorld,self).__init__(domain=Domain.copy(bn),variables=bn.variables()) self._pure_model = bn.copy(copy_domain=True) self._pure_sampler = ForwardSampler(self._pure_model) self._burnin = burnin self._data = None self._inter_sampler = {} self._inter_data = {} def copy(self): cpy = copy(self) cpy._pure_model = self._pure_model.copy(copy_domain=True) cpy._pure_sampler = None cpy._inter_sampler = None cpy._inter_data = {} for k in self._inter_data.keys(): cpy._inter_data[k] = self._inter_data[k].copy() # merge _data into _inter_data! if self._data is not None: cpy._data = self._data.copy() return cpy def interventions(self): """ @return: A list of interventional data sets available. A zero length element denotes observational data. """ avail_data = self._inter_data.keys() if self._data is not None: avail_data.append(frozenset()) return avail_data def intervention_data(self, inter): """Obtain the L{IncrementalCompactFactor} corresponding to the intervention C{inter}. C{inter} is typically an element of the return value of L{interventions}. A zero length C{inter} indicates observational data. """ if len(inter) == 0: return self._data return self._inter_data[inter] def observe(self, num_samples, skip=0): """Draw C{num_samples} observational samples, separated by C{skip} steps of the sampler. These samples are appended to any existing observational samples and then returned. @return: L{IncrementalCompactFactor} """ samples = self._pure_sampler.samples(num_samples,skip) if self._data is None: self._data = IncrementalCompactFactor(samples,domain=Domain()) else: self._data.update(samples) return self._data def query(self, intervention, num_samples,skip=0): """Draw C{num_samples} interventional samples, separated by C{skip} steps of the sampler. These samples are appended to any existing interventional samples and then returned. The intervention made is that of C{intervention}. @param intervention: A dictionary mapping variables in the L{CBN} to a single value in the domain. @return: L{IncrementalCompactFactor} """ k = frozenset(intervention.keys()) if not self._inter_sampler.has_key(k): do_model = CBN.from_bn(self._pure_model.copy(copy_domain=True)) do_model.intervene(intervention) self._inter_sampler[k] = GibbsSampler(do_model, self._burnin) do_sampler = self._inter_sampler[k] samples = do_sampler.samples(num_samples,skip) if not self._inter_data.has_key(k): self._inter_data[k] = IncrementalCompactFactor(samples,domain=Domain()) else: self._inter_data[k].update(samples) return self._inter_data[k] def makeFactor(self, variables): """Construct a factor from only observations (since cannot determine whether interventional evidence is admissible).""" return self._data.makeFactor(variables) def makeCPT(self, child, parents, force_cpt=False, prior=1.0, check=False): """Use all data applicable to C{child} to make its CPT @param prior: the Dirichlet prior parameter (the same parameter value is used for all instances!) Note there may be some problems with this method: a B{different} prior is used by the BDeu score. However, in practice, for parameter estimation, this prior method seems to be ok. I was lazy and it was simple to implement (cb). If prior is zero, then the parameters are the maximum likelihood estimation solutions. """ # child can use all observable data, and all interventional data where # child was not intervened. variables = set(parents) | set([child]) f_child = self._data.makeFactor(variables) for k in self._inter_data.keys(): if child in k: continue f = self._inter_data[k].makeFactor(variables) # fill in the missing zeros f.data_extend(dict([(var,f_child.values(var)) for var in k&f.variables()])) # domain monkeying for var in k: f.change_domain_variable(var, f_child.values(var)) # eventually it looks like something edible. f_child += self._inter_data[k].makeFactor(variables) # chomp return CPT(f_child+prior, child, cpt_check=check, cpt_force=force_cpt) def family_score(self, child, parents): """Obtain the BDeu score of a particular family (consisting of a C{child} and its C{parents}) using all applicable experimental data.""" return self.makeCPT(child, parents, prior=0.0, force_cpt=False, check=False).bdeu_score()