def test_delete_operator_3(self): G = 100 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) for x in range(p): # Can only apply the operator to X -> Y or X - Y for y in np.where(cpdag[x, :] != 0)[0]: for H in utils.subsets(utils.na(y, x, cpdag)): output = ges.delete(x, y, H, cpdag) # Verify the new vstructures vstructs = utils.vstructures(output) for h in H: vstruct = (x, h, y) if x < y else (y, h, x) self.assertIn(vstruct, vstructs) # Verify whole connectivity truth = cpdag.copy() # Remove edge truth[x, y], truth[y, x] = 0, 0 # Orient y -> h truth[list(H), y] = 0 truth[list(utils.neighbors(x, cpdag) & H), x] = 0 self.assertTrue((output == truth).all()) print("\nExhaustively checked delete operator on %i CPDAGS" % (i + 1))
def test_insert_2(self): G = 100 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) for x in range(p): # Can only apply the operator to non-adjacent nodes adj_x = utils.adj(x, cpdag) Y = set(range(p)) - adj_x for y in Y: for T in utils.subsets(utils.neighbors(y, cpdag) - adj_x): # print(x,y,T) output = ges.insert(x, y, T, cpdag) # Verify the new vstructures vstructs = utils.vstructures(output) for t in T: vstruct = (x, y, t) if x < t else (t, y, x) self.assertIn(vstruct, vstructs) # Verify whole connectivity truth = cpdag.copy() # Add edge x -> y truth[x, y] = 1 # Orient t -> y truth[list(T), y] = 1 truth[y, list(T)] = 0 self.assertTrue((output == truth).all()) print("\nExhaustively checked insert operator on %i CPDAGS" % (i + 1))
def delete(x, y, H, A): """ Applies the delete operator: 1) deletes the edge x -> y or x - y 2) for every node h in H * orients the edge y -> h * if the edge with x is undirected, orients it as x -> h Note that H must be a subset of the neighbors of y which are adjacent to x. A ValueError exception is thrown otherwise. Parameters ---------- x : int the "origin" node (i.e. x -> y or x - y) y : int the "target" node H : iterable of ints a subset of the neighbors of y which are adjacent to x A : np.array the current adjacency matrix Returns ------- new_A : np.array the adjacency matrix resulting from applying the operator """ H = set(H) # Check inputs if A[x, y] == 0: raise ValueError("There is no (un)directed edge from x=%d to y=%d" % (x, y)) # neighbors of y which are adjacent to x na_yx = utils.na(y, x, A) if not H <= na_yx: raise ValueError( "The given set H is not valid, H=%s is not a subset of NA_yx=%s" % (H, na_yx)) # Apply operator new_A = A.copy() # delete the edge between x and y new_A[x, y], new_A[y, x] = 0, 0 # orient the undirected edges between y and H towards H new_A[list(H), y] = 0 # orient any undirected edges between x and H towards H n_x = utils.neighbors(x, A) new_A[list(H & n_x), x] = 0 return new_A
def turn(x, y, C, A): """ Applies the turning operator: For an edge x - y or x <- y, 1) orients the edge as x -> y 2) for all c in C, orients the previously undirected edge c -> y Parameters ---------- x : int the origin node (i.e. x -> y) y : int the target node C : iterable of ints a subset of the neighbors of y A : np.array the current adjacency matrix Returns ------- new_A : np.array the adjacency matrix resulting from applying the operator """ # Check inputs if A[x, y] != 0 and A[y, x] == 0: raise ValueError("The edge %d -> %d is already exists" % (x, y)) if A[x, y] == 0 and A[y, x] == 0: raise ValueError("x=%d and y=%d are not connected" % (x, y)) if not C <= utils.neighbors(y, A): raise ValueError("Not all nodes in C=%s are neighbors of y=%d" % (C, y)) if len({x, y} & C) > 0: raise ValueError("C should not contain x or y") # Apply operator new_A = A.copy() # Turn edge x -> y new_A[y, x] = 0 new_A[x, y] = 1 # Orient edges c -> y for c in C new_A[y, list(C)] = 0 return new_A
def score_valid_turn_operators_undir(x, y, A, cache, debug=0): """Logic for finding and scoring the valid turn operators that can be applied to the edge x - y. Parameters ---------- x : int the origin node (i.e. x -> y) y : int the target node A : np.array the current adjacency matrix cache : instance of ges.scores.DecomposableScore the score cache to compute the score of the operators that are valid debug : bool or string if debug traces should be printed (True/False). If a non-empty string is passed, traces are printed with the given string as prefix (useful for indenting the prints from a calling function) Returns ------- valid_operators : list of tuples a list of tubles, each containing a valid operator, its score and the resulting connectivity matrix """ # Proposition 31, condition (ii) in GIES paper (Hauser & Bühlmann # 2012) is violated if: # 1. all neighbors of y are adjacent to x, or # 2. y has no neighbors (besides u) # then there are no valid operators. non_adjacents = list(utils.neighbors(y, A) - utils.adj(x, A) - {x}) if len(non_adjacents) == 0: print(" turn(%d,%d) : ne(y) \\ adj(x) = Ø => stopping" % (x, y)) if debug > 1 else None return [] # Otherwise, construct all the possible subsets which will satisfy # condition (ii), i.e. all subsets of neighbors of y with at least # one which is not adjacent to x p = len(A) C0 = sorted(utils.neighbors(y, A) - {x}) subsets = np.zeros((2**len(C0), p + 1), dtype=bool) subsets[:, C0] = utils.cartesian([np.array([False, True])] * len(C0), dtype=bool) # Remove all subsets which do not contain at least one non-adjacent node to x to_remove = (subsets[:, non_adjacents] == False).all(axis=1) subsets = utils.delete(subsets, to_remove, axis=0) # With condition (ii) guaranteed, we now check conditions (i,iii) # for each subset valid_operators = [] print(" turn(%d,%d) C0=" % (x, y), set(C0)) if debug > 1 else None while len(subsets) > 0: print(" len(subsets)=%d, len(valid_operators)=%d" % (len(subsets), len(valid_operators))) if debug > 1 else None # Access the next subset C = set(np.where(subsets[0, :])[0]) subsets = subsets[1:] # Condition (i): C is a clique in the subgraph induced by the # chain component of y. Because C is composed of neighbors of # y, this is equivalent to C being a clique in A. NOTE: This # is also how it is described in Alg. 5 of the paper cond_1 = utils.is_clique(C, A) if not cond_1: # Remove from consideration all other sets C' which # contain C, as the clique condition will also not hold supersets = subsets[:, list(C)].all(axis=1) subsets = utils.delete(subsets, supersets, axis=0) continue # Condition (iii): Note that condition (iii) from proposition # 31 appears to be wrong in the GIES paper; instead we use the # definition of condition (iii) from Alg. 5 of the paper: # Let na_yx (N in the GIES paper) be the neighbors of Y which # are adjacent to X. Then, {x,y} must separate C and na_yx \ C # in the subgraph induced by the chain component of y, # i.e. all the simple paths from one set to the other contain # a node in {x,y}. subgraph = utils.induced_subgraph(utils.chain_component(y, A), A) na_yx = utils.na(y, x, A) if not utils.separates({x, y}, C, na_yx - C, subgraph): continue # At this point C passes both conditions # Apply operator new_A = turn(x, y, C, A) # Compute the change in score new_score = cache.local_score( y, utils.pa(y, A) | C | {x}) + cache.local_score( x, utils.pa(x, A) | (C & na_yx)) old_score = cache.local_score(y, utils.pa(y, A) | C) + \ cache.local_score(x, utils.pa(x, A) | (C & na_yx) | {y}) print(" new score = %0.6f, old score = %0.6f, y=%d, C=%s" % (new_score, old_score, y, C)) if debug > 1 else None # Add to the list of valid operators valid_operators.append((new_score - old_score, new_A, x, y, C)) print(" turn(%d,%d,%s) -> %0.16f" % (x, y, C, new_score - old_score)) if debug else None # Return all valid operators return valid_operators
def score_valid_turn_operators_dir(x, y, A, cache, debug=0): """Logic for finding and scoring the valid turn operators that can be applied to the edge x <- y. Parameters ---------- x : int the origin node (i.e. x -> y) y : int the target node A : np.array the current adjacency matrix cache : instance of ges.scores.DecomposableScore the score cache to compute the score of the operators that are valid debug : bool or string if debug traces should be printed (True/False). If a non-empty string is passed, traces are printed with the given string as prefix (useful for indenting the prints from a calling function) Returns ------- valid_operators : list of tuples a list of tubles, each containing a valid operator, its score and the resulting connectivity matrix """ # One-hot encode all subsets of T0, plus one extra column to mark # if they pass validity condition 2 (see below). The set C passed # to the turn operator will be C = NAyx U T. p = len(A) T0 = sorted(utils.neighbors(y, A) - utils.adj(x, A)) if len(T0) == 0: subsets = np.zeros((1, p + 1), dtype=bool) else: subsets = np.zeros((2**len(T0), p + 1), dtype=bool) subsets[:, T0] = utils.cartesian([np.array([False, True])] * len(T0), dtype=bool) valid_operators = [] print(" turn(%d,%d) T0=" % (x, y), set(T0)) if debug > 1 else None while len(subsets) > 0: print(" len(subsets)=%d, len(valid_operators)=%d" % (len(subsets), len(valid_operators))) if debug > 1 else None # Access the next subset T = np.where(subsets[0, :-1])[0] passed_cond_2 = subsets[0, -1] subsets = subsets[1:] # update the list of remaining subsets # Check that the validity conditions hold for T C = utils.na(y, x, A) | set(T) # Condition 1: Test that C = NA_yx U T is a clique cond_1 = utils.is_clique(C, A) if not cond_1: # Remove from consideration all other sets T' which # contain T, as the clique condition will also not hold supersets = subsets[:, T].all(axis=1) subsets = utils.delete(subsets, supersets, axis=0) # Condition 2: Test that all semi-directed paths from y to x contain a # member from C U neighbors(x) if passed_cond_2: # If a subset of T satisfied condition 2, so does T cond_2 = True else: # otherwise, check condition 2 cond_2 = True for path in utils.semi_directed_paths(y, x, A): if path == [y, x]: pass elif len((C | utils.neighbors(x, A)) & set(path)) == 0: cond_2 = False break if cond_2: # If condition 2 holds for C U neighbors(x), that is, # for C = NAyx U T U neighbors(x), then it holds for # all supersets of T supersets = subsets[:, T].all(axis=1) subsets[supersets, -1] = True # If both conditions hold, apply operator and compute its score print(" turn(%d,%d,%s)" % (x, y, C), "na_yx =", utils.na(y, x, A), "T =", T, "validity:", cond_1, cond_2) if debug > 1 else None if cond_1 and cond_2: # Apply operator new_A = turn(x, y, C, A) # Compute the change in score new_score = cache.local_score( y, utils.pa(y, A) | C | {x}) + cache.local_score( x, utils.pa(x, A) - {y}) old_score = cache.local_score(y, utils.pa(y, A) | C) + \ cache.local_score(x, utils.pa(x, A)) print(" new score = %0.6f, old score = %0.6f, y=%d, C=%s" % (new_score, old_score, y, C)) if debug > 1 else None # Add to the list of valid operators valid_operators.append((new_score - old_score, new_A, x, y, C)) print(" turn(%d,%d,%s) -> %0.16f" % (x, y, C, new_score - old_score)) if debug else None # Return all the valid operators return valid_operators
def score_valid_insert_operators(x, y, A, cache, debug=0): """Generate and score all valid insert(x,y,T) operators involving the edge x-> y, and all possible subsets T of neighbors of y which are NOT adjacent to x. Parameters ---------- x : int the origin node (i.e. x -> y) y : int the target node A : np.array the current adjacency matrix cache : instance of ges.scores.DecomposableScore the score cache to compute the score of the operators that are valid debug : int if larger than 0, debug are traces printed. Higher values correspond to increased verbosity Returns ------- valid_operators : list of tuples a list of tubles, each containing a valid operator, its score and the resulting connectivity matrix """ p = len(A) if A[x, y] != 0 or A[y, x] != 0: raise ValueError("x=%d and y=%d are already connected" % (x, y)) # One-hot encode all subsets of T0, plus one extra column to mark # if they pass validity condition 2 (see below) T0 = sorted(utils.neighbors(y, A) - utils.adj(x, A)) if len(T0) == 0: subsets = np.zeros((1, p + 1), dtype=bool) else: subsets = np.zeros((2**len(T0), p + 1), dtype=bool) subsets[:, T0] = utils.cartesian([np.array([False, True])] * len(T0), dtype=bool) valid_operators = [] print(" insert(%d,%d) T0=" % (x, y), set(T0)) if debug > 1 else None while len(subsets) > 0: print(" len(subsets)=%d, len(valid_operators)=%d" % (len(subsets), len(valid_operators))) if debug > 1 else None # Access the next subset T = np.where(subsets[0, :-1])[0] passed_cond_2 = subsets[0, -1] subsets = subsets[1:] # Check that the validity conditions hold for T na_yxT = utils.na(y, x, A) | set(T) # Condition 1: Test that NA_yx U T is a clique cond_1 = utils.is_clique(na_yxT, A) if not cond_1: # Remove from consideration all other sets T' which # contain T, as the clique condition will also not hold supersets = subsets[:, T].all(axis=1) subsets = utils.delete(subsets, supersets, axis=0) # Condition 2: Test that all semi-directed paths from y to x contain a # member from NA_yx U T if passed_cond_2: # If a subset of T satisfied condition 2, so does T cond_2 = True else: # Check condition 2 cond_2 = True for path in utils.semi_directed_paths(y, x, A): if len(na_yxT & set(path)) == 0: cond_2 = False break if cond_2: # If condition 2 holds for NA_yx U T, then it holds for all supersets of T supersets = subsets[:, T].all(axis=1) subsets[supersets, -1] = True print(" insert(%d,%d,%s)" % (x, y, T), "na_yx U T = ", na_yxT, "validity:", cond_1, cond_2) if debug > 1 else None # If both conditions hold, apply operator and compute its score if cond_1 and cond_2: # Apply operator new_A = insert(x, y, T, A) # Compute the change in score aux = na_yxT | utils.pa(y, A) old_score = cache.local_score(y, aux) new_score = cache.local_score(y, aux | {x}) print(" new: s(%d, %s) = %0.6f old: s(%d, %s) = %0.6f" % (y, aux | {x}, new_score, y, aux, old_score)) if debug > 1 else None # Add to the list of valid operators valid_operators.append((new_score - old_score, new_A, x, y, T)) print(" insert(%d,%d,%s) -> %0.16f" % (x, y, T, new_score - old_score)) if debug else None # Return all the valid operators return valid_operators