def test_valid_delete_operators_5(self): A = np.array([[0, 1, 1, 1], [0, 0, 1, 1], [1, 1, 0, 0], [1, 1, 0, 0]]) print("out:", utils.is_clique({2, 3}, A)) cache = GaussObsL0Pen(self.obs_data) # Removing the edge X0 - X1 should yield three valid operators # operators, for: # 0. Invalid H = Ø, as NA_yx \ Ø = {X2,X3} is not a clique # 1. H = {X2}, as NA_yx \ H = {X3} is a clique # 2. H = {X3}, as NA_yx \ H = {X2} is a clique # 3. H = {X2,X3}, as NA_yx \ H = Ø is a clique output = ges.score_valid_delete_operators(0, 1, A, cache) print(output) self.assertEqual(3, len(output)) # v-structure on X2, i.e orient X0 -> X2, X1 -> X2 A1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [1, 1, 0, 0]]) # v-structure on X3, i.e. orient X0 -> X3, X1 -> X3 A2 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [1, 1, 0, 0], [0, 0, 0, 0]]) # v-structures on X2 and X3 A3 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]]) self.assertTrue(utils.member([op[1] for op in output], A1) is not None) self.assertTrue(utils.member([op[1] for op in output], A2) is not None) self.assertTrue(utils.member([op[1] for op in output], A3) is not None)
def score_valid_turn_operators_undir(x, y, A, cache, debug=0): """Logic for finding and scoring the valid turn operators that can be applied to the edge x - y. Parameters ---------- x : int the origin node (i.e. x -> y) y : int the target node A : np.array the current adjacency matrix cache : instance of ges.scores.DecomposableScore the score cache to compute the score of the operators that are valid debug : bool or string if debug traces should be printed (True/False). If a non-empty string is passed, traces are printed with the given string as prefix (useful for indenting the prints from a calling function) Returns ------- valid_operators : list of tuples a list of tubles, each containing a valid operator, its score and the resulting connectivity matrix """ # Proposition 31, condition (ii) in GIES paper (Hauser & Bühlmann # 2012) is violated if: # 1. all neighbors of y are adjacent to x, or # 2. y has no neighbors (besides u) # then there are no valid operators. non_adjacents = list(utils.neighbors(y, A) - utils.adj(x, A) - {x}) if len(non_adjacents) == 0: print(" turn(%d,%d) : ne(y) \\ adj(x) = Ø => stopping" % (x, y)) if debug > 1 else None return [] # Otherwise, construct all the possible subsets which will satisfy # condition (ii), i.e. all subsets of neighbors of y with at least # one which is not adjacent to x p = len(A) C0 = sorted(utils.neighbors(y, A) - {x}) subsets = np.zeros((2**len(C0), p + 1), dtype=bool) subsets[:, C0] = utils.cartesian([np.array([False, True])] * len(C0), dtype=bool) # Remove all subsets which do not contain at least one non-adjacent node to x to_remove = (subsets[:, non_adjacents] == False).all(axis=1) subsets = utils.delete(subsets, to_remove, axis=0) # With condition (ii) guaranteed, we now check conditions (i,iii) # for each subset valid_operators = [] print(" turn(%d,%d) C0=" % (x, y), set(C0)) if debug > 1 else None while len(subsets) > 0: print(" len(subsets)=%d, len(valid_operators)=%d" % (len(subsets), len(valid_operators))) if debug > 1 else None # Access the next subset C = set(np.where(subsets[0, :])[0]) subsets = subsets[1:] # Condition (i): C is a clique in the subgraph induced by the # chain component of y. Because C is composed of neighbors of # y, this is equivalent to C being a clique in A. NOTE: This # is also how it is described in Alg. 5 of the paper cond_1 = utils.is_clique(C, A) if not cond_1: # Remove from consideration all other sets C' which # contain C, as the clique condition will also not hold supersets = subsets[:, list(C)].all(axis=1) subsets = utils.delete(subsets, supersets, axis=0) continue # Condition (iii): Note that condition (iii) from proposition # 31 appears to be wrong in the GIES paper; instead we use the # definition of condition (iii) from Alg. 5 of the paper: # Let na_yx (N in the GIES paper) be the neighbors of Y which # are adjacent to X. Then, {x,y} must separate C and na_yx \ C # in the subgraph induced by the chain component of y, # i.e. all the simple paths from one set to the other contain # a node in {x,y}. subgraph = utils.induced_subgraph(utils.chain_component(y, A), A) na_yx = utils.na(y, x, A) if not utils.separates({x, y}, C, na_yx - C, subgraph): continue # At this point C passes both conditions # Apply operator new_A = turn(x, y, C, A) # Compute the change in score new_score = cache.local_score( y, utils.pa(y, A) | C | {x}) + cache.local_score( x, utils.pa(x, A) | (C & na_yx)) old_score = cache.local_score(y, utils.pa(y, A) | C) + \ cache.local_score(x, utils.pa(x, A) | (C & na_yx) | {y}) print(" new score = %0.6f, old score = %0.6f, y=%d, C=%s" % (new_score, old_score, y, C)) if debug > 1 else None # Add to the list of valid operators valid_operators.append((new_score - old_score, new_A, x, y, C)) print(" turn(%d,%d,%s) -> %0.16f" % (x, y, C, new_score - old_score)) if debug else None # Return all valid operators return valid_operators
def score_valid_turn_operators_dir(x, y, A, cache, debug=0): """Logic for finding and scoring the valid turn operators that can be applied to the edge x <- y. Parameters ---------- x : int the origin node (i.e. x -> y) y : int the target node A : np.array the current adjacency matrix cache : instance of ges.scores.DecomposableScore the score cache to compute the score of the operators that are valid debug : bool or string if debug traces should be printed (True/False). If a non-empty string is passed, traces are printed with the given string as prefix (useful for indenting the prints from a calling function) Returns ------- valid_operators : list of tuples a list of tubles, each containing a valid operator, its score and the resulting connectivity matrix """ # One-hot encode all subsets of T0, plus one extra column to mark # if they pass validity condition 2 (see below). The set C passed # to the turn operator will be C = NAyx U T. p = len(A) T0 = sorted(utils.neighbors(y, A) - utils.adj(x, A)) if len(T0) == 0: subsets = np.zeros((1, p + 1), dtype=bool) else: subsets = np.zeros((2**len(T0), p + 1), dtype=bool) subsets[:, T0] = utils.cartesian([np.array([False, True])] * len(T0), dtype=bool) valid_operators = [] print(" turn(%d,%d) T0=" % (x, y), set(T0)) if debug > 1 else None while len(subsets) > 0: print(" len(subsets)=%d, len(valid_operators)=%d" % (len(subsets), len(valid_operators))) if debug > 1 else None # Access the next subset T = np.where(subsets[0, :-1])[0] passed_cond_2 = subsets[0, -1] subsets = subsets[1:] # update the list of remaining subsets # Check that the validity conditions hold for T C = utils.na(y, x, A) | set(T) # Condition 1: Test that C = NA_yx U T is a clique cond_1 = utils.is_clique(C, A) if not cond_1: # Remove from consideration all other sets T' which # contain T, as the clique condition will also not hold supersets = subsets[:, T].all(axis=1) subsets = utils.delete(subsets, supersets, axis=0) # Condition 2: Test that all semi-directed paths from y to x contain a # member from C U neighbors(x) if passed_cond_2: # If a subset of T satisfied condition 2, so does T cond_2 = True else: # otherwise, check condition 2 cond_2 = True for path in utils.semi_directed_paths(y, x, A): if path == [y, x]: pass elif len((C | utils.neighbors(x, A)) & set(path)) == 0: cond_2 = False break if cond_2: # If condition 2 holds for C U neighbors(x), that is, # for C = NAyx U T U neighbors(x), then it holds for # all supersets of T supersets = subsets[:, T].all(axis=1) subsets[supersets, -1] = True # If both conditions hold, apply operator and compute its score print(" turn(%d,%d,%s)" % (x, y, C), "na_yx =", utils.na(y, x, A), "T =", T, "validity:", cond_1, cond_2) if debug > 1 else None if cond_1 and cond_2: # Apply operator new_A = turn(x, y, C, A) # Compute the change in score new_score = cache.local_score( y, utils.pa(y, A) | C | {x}) + cache.local_score( x, utils.pa(x, A) - {y}) old_score = cache.local_score(y, utils.pa(y, A) | C) + \ cache.local_score(x, utils.pa(x, A)) print(" new score = %0.6f, old score = %0.6f, y=%d, C=%s" % (new_score, old_score, y, C)) if debug > 1 else None # Add to the list of valid operators valid_operators.append((new_score - old_score, new_A, x, y, C)) print(" turn(%d,%d,%s) -> %0.16f" % (x, y, C, new_score - old_score)) if debug else None # Return all the valid operators return valid_operators
def score_valid_delete_operators(x, y, A, cache, debug=0): """Generate and score all valid delete(x,y,H) operators involving the edge x -> y or x - y, and all possible subsets H of neighbors of y which are adjacent to x. Parameters ---------- x : int the "origin" node (i.e. x -> y or x - y) y : int the "target" node A : np.array the current adjacency matrix cache : instance of ges.scores.DecomposableScore the score cache to compute the score of the operators that are valid debug : int if larger than 0, debug are traces printed. Higher values correspond to increased verbosity Returns ------- valid_operators : list of tuples a list of tubles, each containing a valid operator, its score and the resulting connectivity matrix """ # Check inputs if A[x, y] == 0: raise ValueError("There is no (un)directed edge from x=%d to y=%d" % (x, y)) # One-hot encode all subsets of H0, plus one column to mark if # they have already passed the validity condition na_yx = utils.na(y, x, A) H0 = sorted(na_yx) p = len(A) if len(H0) == 0: subsets = np.zeros((1, (p + 1)), dtype=bool) else: subsets = np.zeros((2**len(H0), (p + 1)), dtype=bool) subsets[:, H0] = utils.cartesian([np.array([False, True])] * len(H0), dtype=bool) valid_operators = [] print(" delete(%d,%d) H0=" % (x, y), set(H0)) if debug > 1 else None while len(subsets) > 0: print(" len(subsets)=%d, len(valid_operators)=%d" % (len(subsets), len(valid_operators))) if debug > 1 else None # Access the next subset H = np.where(subsets[0, :-1])[0] cond_1 = subsets[0, -1] subsets = subsets[1:] # Check if the validity condition holds for H, i.e. that # NA_yx \ H is a clique. # If it has not been tested previously for a subset of H, # check it now if not cond_1 and utils.is_clique(na_yx - set(H), A): cond_1 = True # For all supersets H' of H, the validity condition will also hold supersets = subsets[:, H].all(axis=1) subsets[supersets, -1] = True # If the validity condition holds, apply operator and compute its score print(" delete(%d,%d,%s)" % (x, y, H), "na_yx - H = ", na_yx - set(H), "validity:", cond_1) if debug > 1 else None if cond_1: # Apply operator new_A = delete(x, y, H, A) # Compute the change in score aux = (na_yx - set(H)) | utils.pa(y, A) | {x} # print(x,y,H,"na_yx:",na_yx,"old:",aux,"new:", aux - {x}) old_score = cache.local_score(y, aux) new_score = cache.local_score(y, aux - {x}) print(" new: s(%d, %s) = %0.6f old: s(%d, %s) = %0.6f" % (y, aux - {x}, new_score, y, aux, old_score)) if debug > 1 else None # Add to the list of valid operators valid_operators.append((new_score - old_score, new_A, x, y, H)) print(" delete(%d,%d,%s) -> %0.16f" % (x, y, H, new_score - old_score)) if debug else None # Return all the valid operators return valid_operators
def score_valid_insert_operators(x, y, A, cache, debug=0): """Generate and score all valid insert(x,y,T) operators involving the edge x-> y, and all possible subsets T of neighbors of y which are NOT adjacent to x. Parameters ---------- x : int the origin node (i.e. x -> y) y : int the target node A : np.array the current adjacency matrix cache : instance of ges.scores.DecomposableScore the score cache to compute the score of the operators that are valid debug : int if larger than 0, debug are traces printed. Higher values correspond to increased verbosity Returns ------- valid_operators : list of tuples a list of tubles, each containing a valid operator, its score and the resulting connectivity matrix """ p = len(A) if A[x, y] != 0 or A[y, x] != 0: raise ValueError("x=%d and y=%d are already connected" % (x, y)) # One-hot encode all subsets of T0, plus one extra column to mark # if they pass validity condition 2 (see below) T0 = sorted(utils.neighbors(y, A) - utils.adj(x, A)) if len(T0) == 0: subsets = np.zeros((1, p + 1), dtype=bool) else: subsets = np.zeros((2**len(T0), p + 1), dtype=bool) subsets[:, T0] = utils.cartesian([np.array([False, True])] * len(T0), dtype=bool) valid_operators = [] print(" insert(%d,%d) T0=" % (x, y), set(T0)) if debug > 1 else None while len(subsets) > 0: print(" len(subsets)=%d, len(valid_operators)=%d" % (len(subsets), len(valid_operators))) if debug > 1 else None # Access the next subset T = np.where(subsets[0, :-1])[0] passed_cond_2 = subsets[0, -1] subsets = subsets[1:] # Check that the validity conditions hold for T na_yxT = utils.na(y, x, A) | set(T) # Condition 1: Test that NA_yx U T is a clique cond_1 = utils.is_clique(na_yxT, A) if not cond_1: # Remove from consideration all other sets T' which # contain T, as the clique condition will also not hold supersets = subsets[:, T].all(axis=1) subsets = utils.delete(subsets, supersets, axis=0) # Condition 2: Test that all semi-directed paths from y to x contain a # member from NA_yx U T if passed_cond_2: # If a subset of T satisfied condition 2, so does T cond_2 = True else: # Check condition 2 cond_2 = True for path in utils.semi_directed_paths(y, x, A): if len(na_yxT & set(path)) == 0: cond_2 = False break if cond_2: # If condition 2 holds for NA_yx U T, then it holds for all supersets of T supersets = subsets[:, T].all(axis=1) subsets[supersets, -1] = True print(" insert(%d,%d,%s)" % (x, y, T), "na_yx U T = ", na_yxT, "validity:", cond_1, cond_2) if debug > 1 else None # If both conditions hold, apply operator and compute its score if cond_1 and cond_2: # Apply operator new_A = insert(x, y, T, A) # Compute the change in score aux = na_yxT | utils.pa(y, A) old_score = cache.local_score(y, aux) new_score = cache.local_score(y, aux | {x}) print(" new: s(%d, %s) = %0.6f old: s(%d, %s) = %0.6f" % (y, aux | {x}, new_score, y, aux, old_score)) if debug > 1 else None # Add to the list of valid operators valid_operators.append((new_score - old_score, new_A, x, y, T)) print(" insert(%d,%d,%s) -> %0.16f" % (x, y, T, new_score - old_score)) if debug else None # Return all the valid operators return valid_operators