def __init__(self, separator, constrain_order = None, hill_climb_cond = False, must_have=None): """ @param separator: available separator @type separator: HypotheticalSeparator """ super(PCCI,self).__init__(variables=separator.variables()) self._constrain_order = constrain_order self._hill_climb_cond = hill_climb_cond if must_have is None or not must_have: self._must_have = separator.variables() else: self._must_have = must_have self.num_tests = 0 self._skel = UGraph(vertices=self.variables()) self._ic_discovery(separator)
def setUp(self): super(TestGraphCI, self).setUp() self.g = list(xrange(4)) self.indeps = list(xrange(4)) self.g[0] = UGraph(vertices=['a', 'b', 'c', 'd', 'e'], lines=[('a', 'b'), ('a', 'c'), ('b', 'd'), ('b', 'c'), ('c', 'd'), ('c', 'e')]) self.indeps[0] = { ('b', 'e'): [frozenset(['c'])], ('d', 'e'): [frozenset(['c'])], ('a', 'd'): [frozenset(['c', 'b'])], ('a', 'e'): [frozenset(['c'])] } self.g[1] = ADG(vertices=['a', 'b', 'c', 'd', 'e', 'f'], arrows=[('a', 'b'), ('b', 'c'), ('d', 'a'), ('d', 'e'), ('d', 'f'), ('e', 'c'), ('c', 'f')]) self.indeps[1] = { ('e', 'f'): [frozenset(['c', 'd'])], ('a', 'f'): [frozenset(['b', 'd']), frozenset(['c', 'd'])], ('a', 'e'): [frozenset(['d'])], ('b', 'f'): [frozenset(['a', 'c', 'e']), frozenset(['c', 'd'])], ('c', 'd'): [frozenset(['b', 'e']), frozenset(['a', 'e'])], ('b', 'e'): [frozenset(['a']), frozenset(['d'])], ('a', 'c'): [frozenset(['b', 'd']), frozenset(['b', 'e'])], ('b', 'd'): [frozenset(['a'])] } self.g[2] = ADG(vertices=['a', 'b', 'c', 'd'], arrows=[('a', 'b'), ('b', 'c'), ('c', 'd')]) self.indeps[2] = { ('a', 'd'): [frozenset(['c']), frozenset(['b'])], ('a', 'c'): [frozenset(['b'])], ('b', 'd'): [frozenset(['c'])] } self.g[3] = ADG(vertices=['a', 'b', 'c', 'd'], arrows=[('a', 'c'), ('b', 'c'), ('c', 'd')]) self.indeps[3] = { ('a', 'c'): [frozenset([])], ('b', 'c'): [frozenset([])], ('d', 'c'): [frozenset([])] }
class PCCI(CI): """Implementation of Spirtes, Glurmour and Scheines' PC algorithm """ def __init__(self, separator, constrain_order = None, hill_climb_cond = False, must_have=None): """ @param separator: available separator @type separator: HypotheticalSeparator """ super(PCCI,self).__init__(variables=separator.variables()) self._constrain_order = constrain_order self._hill_climb_cond = hill_climb_cond if must_have is None or not must_have: self._must_have = separator.variables() else: self._must_have = must_have self.num_tests = 0 self._skel = UGraph(vertices=self.variables()) self._ic_discovery(separator) def potential_ancestors(self, child): if self._constrain_order is None or child not in self._constrain_order: return self.variables() i = self._constrain_order.index(child) return set(self._constrain_order[:i]) def skeleton(self): return self._skel def _ic_discovery(self, separator): n = 0 self._skel.complete(self._skel.vertices()) skel = self._skel too_many_adjacent = True while too_many_adjacent: too_many_adjacent = False # find a pair (x,y) such that the cardinality of the neighbourhood # exceeds n for x, y in pairs(self.variables()): if x not in self._must_have and y not in self._must_have: continue n_x = set(skel.neighbours(x)) n_y = set(skel.neighbours(y)) if y not in n_x: continue if x not in n_y: raise RuntimeError,'inconsistent neighbourhoods' # separators must be potential ancestors of both variables. # constrain the neighbourhood to contain only potential ancestors cond = n_x & self.potential_ancestors(x) cond |= n_y & self.potential_ancestors(y) cond -= frozenset([x,y]) # if the neighbourhood is too small, try the next pair if n > len(cond): continue # find an untested subset s of neighbours of x of cardinality n for s in subsetn(tuple(cond), n): s = frozenset(s) self.num_tests += 1 # test for x _|_ y | s if separator.separates(x, y, s): # see if we can find a more probable separator s = self.hill_climb_cond(separator,x,y,s,cond) # record independence since found self._add_independence(x,y,s) skel.remove_line(x, y) break # increment required neighbourhood minimum size n += 1 # see if we've found all the CIs too_many_adjacent = False for x in self.variables(): too_many_adjacent |= len(skel.neighbours(x)) > n if too_many_adjacent: break def hill_climb_cond(self, separator, x, y, s, cond): """Hill climb over s, trying other elements of the set of all possible conditions, cond, to maximise the belief. The idea is that x_|_y|s may just be within the threshold of believable, but there exists a slightly large conditional s' which we won't find due to the greediness of PC search. finding the correct separator influences the collider/immorality recovery step of the IC algorithm (the first stage of orienting arrows.) Either way, by performing hill climbing search here we should not loose out. """ if not self._hill_climb_cond: return s # start with the current separator s and its degrees of belief s = set(s) best_score = separator.confidence(x,y,s) while True: best_new_s = None # consider each potential condition for new_s in cond - s: # test to see if adding just this one condition # gives a higher degree of belief self.num_tests += 1 score_new = separator.confidence(x,y,s|set([new_s])) if score_new > best_score: # if so, go with it best_score = score_new best_new_s = new_s # if we found no improvement, give up (at maximum) if best_new_s is None: break s |= set([best_new_s]) return frozenset(s)