def directed_random_graph(nnodes: int, random_graph_model: Callable, size=1, as_list=False) -> Union[DAG, List[DAG]]: if size == 1: # generate a random undirected graph edges = random_graph_model(nnodes).edges # generate a random permutation random_permutation = np.arange(nnodes) np.random.shuffle(random_permutation) arcs = [] for edge in edges: node1, node2 = edge node1_position = np.where(random_permutation == node1)[0][0] node2_position = np.where(random_permutation == node2)[0][0] if node1_position < node2_position: source = node1 endpoint = node2 else: source = node2 endpoint = node1 arcs.append((source, endpoint)) d = DAG(nodes=set(range(nnodes)), arcs=arcs) return [d] if as_list else d else: return [ directed_random_graph(nnodes, random_graph_model) for _ in range(size) ]
def to_dag(self): """ Return a DAG that is consistent with this CPDAG. Returns ------- d Examples -------- TODO """ from causaldag import DAG pdag2 = self.copy() arcs = set() while len(pdag2._edges) + len(pdag2._arcs) != 0: is_sink = lambda n: len(pdag2._children[n]) == 0 no_vstructs = lambda n: all( (pdag2._neighbors[n] - {u_nbr}).issubset(pdag2._neighbors[u_nbr]) for u_nbr in pdag2._undirected_neighbors[n] ) sink = next((n for n in pdag2._nodes if is_sink(n) and no_vstructs(n)), None) if sink is None: break arcs.update((nbr, sink) for nbr in pdag2._neighbors[sink]) pdag2.remove_node(sink) return DAG(arcs=arcs)
def directed_erdos(nnodes, density, size=1, as_list=False) -> Union[DAG, List[DAG]]: """ Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`. Parameters ---------- nnodes: Number of nodes in each graph. density: Probability of any edge. size: Number of graphs. as_list: If True, always return as a list, even if only one DAG is generated. Examples -------- >>> d = cd.rand.directed_erdos(5, .5) """ if size == 1: bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) arcs = {(i, j) for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools) if b} d = DAG(nodes=set(range(nnodes)), arcs=arcs) return [d] if as_list else d else: return [directed_erdos(nnodes, density) for _ in range(size)]
def is_icovered( setting_list: List[Dict], i: int, j: int, dag: DAG, invariance_tester: InvarianceTester, ): """ Tell if an edge i->j is I-covered with respect to the invariance tests. True if, for all I s.t. i \in I, the distribution of j given its parents varies between the observational and interventional data. setting_list: A list of dictionaries that provide meta-information about each setting. The first setting must be observational. i: Source of the edge being tested. j: Target of the edge being tested. """ parents_j = list(dag.parents_of(j)) for setting_num, setting in enumerate(setting_list): if i in setting['interventions']: if invariance_tester.is_invariant(j, context=setting_num, cond_set=parents_j): return False return True
def directed_erdos(nnodes, density, size=1): """ Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`. Parameters ---------- nnodes: Number of nodes in each graph. density: Probability of any edge. size: Number of graphs. Examples -------- >>> d = cd.rand.directed_erdos(5, .5) """ if size == 1: bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) arcs = {(i, j) for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools) if b} return DAG(nodes=set(range(nnodes)), arcs=arcs) else: return [directed_erdos(nnodes, density) for _ in range(size)]
def directed_erdos(nnodes, density=None, exp_nbrs=None, size=1, as_list=False, random_order=True) -> Union[DAG, List[DAG]]: """ Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`. Parameters ---------- nnodes: Number of nodes in each graph. density: Probability of any edge. size: Number of graphs. as_list: If True, always return as a list, even if only one DAG is generated. Examples -------- >>> import causaldag as cd >>> d = cd.rand.directed_erdos(5, .5) """ assert density is not None or exp_nbrs is not None density = density if density is not None else exp_nbrs / (nnodes - 1) if size == 1: # if density < .01: # print('here') # random_nx = fast_gnp_random_graph(nnodes, density, directed=True) # d = DAG(nodes=set(range(nnodes)), arcs=random_nx.edges) # return [d] if as_list else d bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) arcs = {(i, j) for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools) if b} d = DAG(nodes=set(range(nnodes)), arcs=arcs) if random_order: nodes = list(range(nnodes)) d = d.rename_nodes(dict(enumerate(np.random.permutation(nodes)))) return [d] if as_list else d else: return [ directed_erdos(nnodes, density, random_order=random_order) for _ in range(size) ]
def directed_erdos_with_confounders( nnodes: int, density: Optional[float] = None, exp_nbrs: Optional[float] = None, num_confounders: int = 1, confounder_pervasiveness: float = 1, size=1, as_list=False, random_order=True) -> Union[DAG, List[DAG]]: assert density is not None or exp_nbrs is not None density = density if density is not None else exp_nbrs / (nnodes - 1) if size == 1: confounders = list(range(num_confounders)) nonconfounders = list(range(num_confounders, nnodes + num_confounders)) bools = _coin(confounder_pervasiveness, size=int(num_confounders * nnodes)) confounder_arcs = { (i, j) for (i, j), b in zip(itr.product(confounders, nonconfounders), bools) if b } bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2)) local_arcs = { (i, j) for (i, j), b in zip(itr.combinations(nonconfounders, 2), bools) if b } d = DAG(nodes=set(range(nnodes)), arcs=confounder_arcs | local_arcs) if random_order: nodes = list(range(nnodes + num_confounders)) d = d.rename_nodes(dict(enumerate(np.random.permutation(nodes)))) return [d] if as_list else d else: return [ directed_erdos_with_confounders( nnodes, density, num_confounders=num_confounders, confounder_pervasiveness=confounder_pervasiveness, random_order=random_order) for _ in range(size) ]
def perm2dag2(perm, ci_tester, node2nbrs=None): arcs = set() for (i, pi_i), (j, pi_j) in itr.combinations(enumerate(perm), 2): c = set(perm[:j]) - {pi_i} c = c if node2nbrs is None else c & (node2nbrs[pi_i] | node2nbrs[pi_j]) print(pi_i, pi_j, c) if not ci_tester.is_ci(pi_i, pi_j, c): arcs.add((pi_i, pi_j)) return DAG(nodes=set(perm), arcs=arcs)
def to_gauss_dag(self, perm): """ Return a GaussDAG with the same mean and covariance as this GGM, and is a minimal IMAP of this GGM consistent with the node ordering `perm`. Parameters ---------- perm: The desired permutation, or total order, of the nodes in the result. Returns ------- Examples -------- TODO """ from causaldag import DAG, GaussDAG d = DAG(nodes=self.nodes) ixs = list( itr.chain.from_iterable( ((f, s) for f in range(s)) for s in range(len(perm)))) for i, j in ixs: pi_i, pi_j = perm[i], perm[j] if not np.isclose( self.partial_correlation(pi_i, pi_j, d.markov_blanket(pi_i)), 0): d.add_arc(pi_i, pi_j, unsafe=True) arcs = dict() means = [] Sigma = self.covariance variances = [] for i in perm: ps = list(d.parents_of(i)) # === LINEAR REGRESSION TO FIND EDGE WEIGHTS S_xx = Sigma[np.ix_(ps, ps)] S_xy = Sigma[ps, i] coeffs = inv(S_xx) @ S_xy # === COMPUTE MEAN AND VARIANCE mean = self.means[i] - self.means[ps] @ coeffs.T variance = Sigma[i, i] - Sigma[i, ps] @ coeffs for p, coeff in zip(ps, coeffs): print(p, i) arcs[(p, i)] = coeff means.append(mean) variances.append(variance) return GaussDAG(list(range(self.num_nodes)), arcs, means=means, variances=variances)
def perm2dag(perm, ci_tester: CI_Tester, verbose=False, fixed_adjacencies=set(), fixed_gaps=set(), node2nbrs=None, older=False): """ TODO Parameters ---------- perm ci_tester verbose fixed_adjacencies fixed_gaps node2nbrs older Examples -------- TODO """ d = DAG(nodes=set(perm)) ixs = list( itr.chain.from_iterable( ((f, s) for f in range(s)) for s in range(len(perm)))) for i, j in ixs: pi_i, pi_j = perm[i], perm[j] # === IF FIXED, DON'T TEST if (pi_i, pi_j) in fixed_adjacencies or (pi_j, pi_i) in fixed_adjacencies: d.add_arc(pi_i, pi_j) continue if (pi_i, pi_j) in fixed_gaps or (pi_j, pi_i) in fixed_gaps: continue # === TEST MARKOV BLANKET mb = d.markov_blanket(pi_i) if node2nbrs is None else ( set(perm[:j]) - {pi_i}) & (node2nbrs[pi_i] | node2nbrs[pi_j]) mb = mb if not older else set(perm[:j]) - {pi_i} is_ci = ci_tester.is_ci(pi_i, pi_j, mb) if not is_ci: d.add_arc(pi_i, pi_j, unsafe=True) if verbose: print("%s indep of %s given %s: %s" % (pi_i, pi_j, mb, is_ci)) return d
def perm2dag_subsets(perm, ci_tester, max_subset_size=None): """ Not recommended unless max_subset_size set very small. Not thoroughly tested. """ arcs = set() nodes = set(perm) for i, pi_i in enumerate(perm): for candidate_parent_set in powerset(perm[:i], r_max=max_subset_size): print(candidate_parent_set) if all( ci_tester.is_ci(i, j, candidate_parent_set) for j in nodes - {i} - candidate_parent_set): # if ci_tester.is_ci(i, nodes - {i} - candidate_parent_set, candidate_parent_set): arcs.update({(parent, i) for parent in candidate_parent_set}) break return DAG(nodes=nodes, arcs=arcs)
def rand_nn_functions( dag: DAG, num_layers=3, nonlinearity=_leaky_relu, noise=lambda: np.random.laplace(0, 1)) -> SampleDAG: s = SampleDAG(dag._nodes, arcs=dag._arcs) # for each node, create the conditional for node in dag._nodes: nparents = dag.indegree(node) layer_mats = [ np.random.rand(nparents, nparents) * 2 for _ in range(num_layers) ] def conditional(parent_vals): vals = parent_vals for a in layer_mats: vals = a @ vals vals = nonlinearity(vals) return vals + noise() s.set_conditional(node, conditional) return s
def rand_additive_basis(dag: DAG, basis: list, snr_dict: Optional[dict] = None, rand_weight_fn: RandWeightFn = unif_away_zero, noise=lambda: np.random.normal(0, 1), internal_variance: int = 1, num_monte_carlo: int = 10000, progress=False): """ Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable being a general additive model (GAM) of its parents. Parameters ---------- dag: A DAG to use as the structure for the model. basis: Basis functions for the GAM. snr_dict: A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes with that many parents. By default, 1/2 for any number of parents. rand_weight_fn: A function to generate random weights for each parent. noise: A function to generate random internal noise for each node. internal_variance: The variance of the above noise function. num_monte_carlo: The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR. Examples -------- >>> import causaldag as cd >>> import numpy as np >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)}) >>> basis = [np.sin, np.cos, np.exp] >>> snr_dict = {1: 1/2, 2: 2/3} >>> g = cd.rand.rand_additive_basis(d, basis, snr_dict) """ if snr_dict is None: snr_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)} sample_dag = SampleDAG(dag._nodes, arcs=dag._arcs) top_order = dag.topological_sort() sample_dict = defaultdict(list) # for each node, create the conditional node_iterator = top_order if not progress else tqdm(top_order) for node in node_iterator: parents = dag.parents_of(node) nparents = dag.indegree(node) parent_bases = random.choices(basis, k=nparents) parent_weights = rand_weight_fn(size=nparents) c_node = None if nparents > 0: values_from_parents = [] for i in range(num_monte_carlo): val = sum([ weight * base(sample_dict[parent][i]) for weight, base, parent in zip(parent_weights, parent_bases, parents) ]) values_from_parents.append(val) variance_from_parents = np.var(values_from_parents) try: desired_snr = snr_dict[nparents] except ValueError: raise Exception( f"`snr_dict` does not specify a desired SNR for nodes with {nparents} parents" ) c_node = internal_variance / variance_from_parents * desired_snr / ( 1 - desired_snr) conditional = partial(_cam_conditional, c_node=c_node, parent_weights=parent_weights, parent_bases=parent_bases, noise=noise) for i in range(num_monte_carlo): val = conditional([sample_dict[parent][i] for parent in parents]) sample_dict[node].append(val) sample_dag.set_conditional(node, conditional) return sample_dag
from causaldag import DAG cancer_network = DAG( arcs={('Pollution', 'Cancer'), ('Smoker', 'Cancer'), ('Cancer', 'Xmy'), ('Cancer', 'Dysponoea')}) earthquake_network = DAG( arcs={('Burglary', 'Alarm'), ('Earthquake', 'Alarm'), ('Alarm', 'JohnCalls'), ('Alarm', 'MaryCalls')}) sachs_network = DAG( arcs={ ('PKC', 'PKA'), ('PKC', 'Jnk'), ('PKC', 'P38'), ('PKC', 'Raf'), ('PKC', 'Mek'), ('PKA', 'Jnk'), ('PKA', 'P38'), ('PKA', 'Raf'), ('PKA', 'Mek'), ('PKA', 'Erk'), ('PKA', 'Akt'), ('Raf', 'Mek'), ('Mek', 'Erk'), ('Erk', 'Akt'), ('Plcg', 'PIP3'), ('Plcg', 'PIP2'),
def perm2dag(perm: list, ci_tester: CI_Tester, verbose=False, fixed_adjacencies: Set[UndirectedEdge] = set(), fixed_gaps: Set[UndirectedEdge] = set(), node2nbrs=None, older=False, progress=False): """ Given a permutation, find the minimal IMAP consistent with that permutation and the results of conditional independence tests from ci_tester. Parameters ---------- perm: list of nodes representing the permutation. ci_tester: object for testing conditional independence. verbose: if True, log each CI test. fixed_adjacencies: set of nodes known to be adjacent. fixed_gaps: set of nodes known not to be adjacent. node2nbrs: TODO older: TODO Examples -------- >>> from causaldag.utils.ci_tests import MemoizedCI_Tester, gauss_ci_test, gauss_ci_suffstat >>> perm = [0,1,2] >>> suffstat = gauss_ci_suffstat(samples) >>> ci_tester = MemoizedCI_Tester(gauss_ci_test, suffstat) >>> perm2dag(perm, ci_tester, fixed_gaps={frozenset({1, 2})}) """ if fixed_adjacencies: adj = next(iter(fixed_adjacencies)) if not isinstance(adj, frozenset): raise ValueError('fixed_adjacencies should contain frozensets') if fixed_gaps: adj = next(iter(fixed_gaps)) if not isinstance(adj, frozenset): raise ValueError('fixed_gaps should contain frozensets') d = DAG(nodes=set(perm)) ixs = list( itr.chain.from_iterable( ((f, s) for f in range(s)) for s in range(len(perm)))) ixs = ixs if not progress else tqdm(ixs) for i, j in ixs: pi_i, pi_j = perm[i], perm[j] # === IF FIXED, DON'T TEST if frozenset({pi_i, pi_j}) in fixed_adjacencies: d.add_arc(pi_i, pi_j) continue if frozenset({pi_i, pi_j}) in fixed_gaps: continue # === TEST MARKOV BLANKET mb = d.markov_blanket(pi_i) if node2nbrs is None else ( set(perm[:j]) - {pi_i}) & (node2nbrs[pi_i] | node2nbrs[pi_j]) mb = mb if not older else set(perm[:j]) - {pi_i} is_ci = ci_tester.is_ci(pi_i, pi_j, mb) if not is_ci: d.add_arc(pi_i, pi_j, unsafe=True) if verbose: print(f"{pi_i} is independent of {pi_j} given {mb}: {is_ci}") return d
def rand_additive_basis(dag: DAG, basis: list, r2_dict: Optional[Union[Dict[int, float], float]] = None, rand_weight_fn: RandWeightFn = unif_away_zero, noise=lambda size: np.random.normal(0, 1, size=size), internal_variance: int = 1, num_monte_carlo: int = 10000, progress=False): """ Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable being a general additive model (GAM) of its parents. Parameters ---------- dag: A DAG to use as the structure for the model. basis: Basis functions for the GAM. r2_dict: A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes with that many parents. By default, 1/2 for any number of parents. rand_weight_fn: A function to generate random weights for each parent. noise: A function to generate random internal noise for each node. internal_variance: The variance of the above noise function. num_monte_carlo: The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR. Examples -------- >>> import causaldag as cd >>> import numpy as np >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)}) >>> basis = [np.sin, np.cos, np.exp] >>> r2_dict = {1: 1/2, 2: 2/3} >>> g = cd.rand.rand_additive_basis(d, basis, r2_dict) """ if r2_dict is None: r2_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)} if isinstance(r2_dict, float): r2_dict = {nparents: r2_dict for nparents in range(dag.nnodes)} cam_dag = CamDAG(dag._nodes, arcs=dag._arcs) top_order = dag.topological_sort() sample_dict = dict() # for each node, create the conditional node_iterator = top_order if not progress else tqdm(top_order) for node in node_iterator: parents = dag.parents_of(node) nparents = dag.indegree(node) parent2base = dict(zip(parents, random.choices(basis, k=nparents))) parent_weights = rand_weight_fn(size=nparents) parent_vals = np.array([ sample_dict[parent] for parent in parents ]).T if nparents > 0 else np.zeros([num_monte_carlo, 0]) c_node = 1 if nparents > 0: mean_function_no_c = partial(_cam_mean_function, c_node=1, parent_weights=parent_weights, parent2base=parent2base) values_from_parents = mean_function_no_c(parent_vals, parents) variance_from_parents = np.var(values_from_parents) try: desired_r2 = r2_dict[nparents] except ValueError: raise Exception( f"`snr_dict` does not specify a desired R^2 for nodes with {nparents} parents" ) c_node = internal_variance / variance_from_parents * desired_r2 / ( 1 - desired_r2) if np.isnan(c_node): raise ValueError print(node, parents, variance_from_parents, parent_weights, c_node) mean_function = partial(_cam_mean_function, c_node=c_node, parent_weights=parent_weights, parent2base=parent2base) mean_vals = mean_function(parent_vals, parents) sample_dict[node] = mean_vals + noise(size=num_monte_carlo) cam_dag.set_mean_function(node, mean_function) cam_dag.set_noise(node, noise) return cam_dag