def runTest(self): # construct factors of various sizes with no data for sz in xrange(6): vars = ['V'+str(i) for i in xrange(sz)] vals = dict([(v,[0,1]) for v in vars]) data = (vars,vals,vars,[]) for v_on in subsetn(vars, sz): inst = [] for v in vars: if v in v_on: inst.append(1) else: inst.append(0) data[3].append(tuple(inst+[0])) d = CompactFactor(data,domain=Domain()) x2 = X2Separator(d) g2 = G2Separator(d) for a,b in pairs(vars): for s in powerset(set(vars) - set([a,b])): x2p, x2s, x2d = x2.test_independ(a, b, set(s)) g2p, g2s, g2d = g2.test_independ(a, b, set(s)) # one degree of freedom self.assertEquals(x2d, 0) self.assertEquals(g2d, 0) # default to independent self.assertEquals(x2p, 1) self.assertEquals(g2p, 1) # zero statistics (no data) self.assertEquals(x2s, 0) self.assertEquals(g2s, 0)
def runTest(self): data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) print ci._ind for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b)) data = CompactFactor(read_csv(open('tetrad_xor.csv')),domain=Domain()) ci = PCCI(G2Separator(data)) for a,b in pairs(data.variables()): if a == 'X1' and b == 'X2' or a == 'X2' and b == 'X1': self.assert_(ci.has_independence(a, b)) self.assert_(not ci.has_independence_involving(a,b,'X3')) else: print a,b self.assert_(not ci.has_independence(a,b))
def _ic_discovery(self, separator): n = 0 self._skel.complete(self._skel.vertices()) skel = self._skel too_many_adjacent = True while too_many_adjacent: too_many_adjacent = False # find a pair (x,y) such that the cardinality of the neighbourhood # exceeds n for x, y in pairs(self.variables()): if x not in self._must_have and y not in self._must_have: continue n_x = set(skel.neighbours(x)) n_y = set(skel.neighbours(y)) if y not in n_x: continue if x not in n_y: raise RuntimeError,'inconsistent neighbourhoods' # separators must be potential ancestors of both variables. # constrain the neighbourhood to contain only potential ancestors cond = n_x & self.potential_ancestors(x) cond |= n_y & self.potential_ancestors(y) cond -= frozenset([x,y]) # if the neighbourhood is too small, try the next pair if n > len(cond): continue # find an untested subset s of neighbours of x of cardinality n for s in subsetn(tuple(cond), n): s = frozenset(s) self.num_tests += 1 # test for x _|_ y | s if separator.separates(x, y, s): # see if we can find a more probable separator s = self.hill_climb_cond(separator,x,y,s,cond) # record independence since found self._add_independence(x,y,s) skel.remove_line(x, y) break # increment required neighbourhood minimum size n += 1 # see if we've found all the CIs too_many_adjacent = False for x in self.variables(): too_many_adjacent |= len(skel.neighbours(x)) > n if too_many_adjacent: break
def _add_undirected_independencies(self): # global markov property: # a _|_ b | s # if s separates a and b for va, vb in pairs(self._graph.vertices()): if self._graph.is_neighbour(va,vb): continue a = frozenset([va]) b = frozenset([vb]) for cond in powerset(self._graph.vertices() - (a|b)): cond = set(cond) if self.has_independence(va, vb): skip = False for pc in self._ind[self._index(va,vb)]: if pc <= cond: skip = True break if skip: continue if self._graph.separates(a,b,cond): self._add_independence(va,vb,cond)
def _add_directed_independencies(self): # directed global markov property: # a _|_ b | s # where s separates a and b in the moralised graph of # the smallest ancestral set of {a,b,s} for va,vb in pairs(self._graph.vertices()): if self._graph.is_parent(va,vb) or self._graph.is_parent(vb,va): continue a = frozenset([va]) b = frozenset([vb]) for cond in powerset(self._graph.vertices() - (a|b)): cond = frozenset(cond) if self.has_independence(va, vb): skip = False for pc in self._ind[self._index(va,vb)]: if pc <= cond: skip = True break if skip: continue g = self._graph.ancestral_adg(a | b | cond).moralise() if g.separates(a,b,cond): self._add_independence(va,vb,cond)
ensemble = OrderEnsemble(w,model.variables(), num_runs, burnin = burnin, max_potential_parents=20, max_parents_family=3, max_best_families=4000, best_family_scale=log(10)) print 'run','optimal', for i in xrange(num_runs): print 'order_'+str(i), print samples = [] for i in xrange(num_samples): esample = ensemble.sample(skip=sample_every) print i,optimal_score, for sample in esample: print sample.score(), print samples.append(esample[0:2]) print print 'blanket_score_x','blanket_score_y' for i, j in pairs(model.variables()): e_ij_x = sum([samples[k][0].markov_blanket_score(i,j) for k in xrange(num_samples)])/num_samples e_ij_y = sum([samples[k][1].markov_blanket_score(i,j) for k in xrange(num_samples)])/num_samples print e_ij_x, e_ij_y e_ji_x = sum([samples[k][0].markov_blanket_score(j,i) for k in xrange(num_samples)])/num_samples e_ji_y = sum([samples[k][1].markov_blanket_score(j,i) for k in xrange(num_samples)])/num_samples print e_ji_x, e_ji_y
for i in xrange(num_runs): print 'order_' + str(i), print samples = [] for i in xrange(num_samples): esample = ensemble.sample(skip=sample_every) print i, optimal_score, for sample in esample: print sample.score(), print samples.append(esample[0:2]) print print 'blanket_score_x', 'blanket_score_y' for i, j in pairs(model.variables()): e_ij_x = sum([ samples[k][0].markov_blanket_score(i, j) for k in xrange(num_samples) ]) / num_samples e_ij_y = sum([ samples[k][1].markov_blanket_score(i, j) for k in xrange(num_samples) ]) / num_samples print e_ij_x, e_ij_y e_ji_x = sum([ samples[k][0].markov_blanket_score(j, i) for k in xrange(num_samples) ]) / num_samples e_ji_y = sum([ samples[k][1].markov_blanket_score(j, i) for k in xrange(num_samples) ]) / num_samples print e_ji_x, e_ji_y
def _remove_indeps(self,ci): # for each pair, test a _|_ b | s, for any s. for (a,b) in pairs(ci.variables()): if ci.has_independence(a,b): # remove the edge a - b self.remove_line(a,b)