Esempio n. 1
0
def directed_random_graph(nnodes: int,
                          random_graph_model: Callable,
                          size=1,
                          as_list=False) -> Union[DAG, List[DAG]]:
    if size == 1:
        # generate a random undirected graph
        edges = random_graph_model(nnodes).edges

        # generate a random permutation
        random_permutation = np.arange(nnodes)
        np.random.shuffle(random_permutation)

        arcs = []
        for edge in edges:
            node1, node2 = edge
            node1_position = np.where(random_permutation == node1)[0][0]
            node2_position = np.where(random_permutation == node2)[0][0]
            if node1_position < node2_position:
                source = node1
                endpoint = node2
            else:
                source = node2
                endpoint = node1
            arcs.append((source, endpoint))
        d = DAG(nodes=set(range(nnodes)), arcs=arcs)
        return [d] if as_list else d
    else:
        return [
            directed_random_graph(nnodes, random_graph_model)
            for _ in range(size)
        ]
Esempio n. 2
0
    def to_dag(self):
        """
        Return a DAG that is consistent with this CPDAG.

        Returns
        -------
        d

        Examples
        --------
        TODO
        """
        from causaldag import DAG

        pdag2 = self.copy()
        arcs = set()
        while len(pdag2._edges) + len(pdag2._arcs) != 0:
            is_sink = lambda n: len(pdag2._children[n]) == 0
            no_vstructs = lambda n: all(
                (pdag2._neighbors[n] - {u_nbr}).issubset(pdag2._neighbors[u_nbr])
                for u_nbr in pdag2._undirected_neighbors[n]
            )
            sink = next((n for n in pdag2._nodes if is_sink(n) and no_vstructs(n)), None)
            if sink is None:
                break
            arcs.update((nbr, sink) for nbr in pdag2._neighbors[sink])
            pdag2.remove_node(sink)

        return DAG(arcs=arcs)
Esempio n. 3
0
def directed_erdos(nnodes,
                   density,
                   size=1,
                   as_list=False) -> Union[DAG, List[DAG]]:
    """
    Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`.

    Parameters
    ----------
    nnodes:
        Number of nodes in each graph.
    density:
        Probability of any edge.
    size:
        Number of graphs.
    as_list:
        If True, always return as a list, even if only one DAG is generated.

    Examples
    --------
    >>> d = cd.rand.directed_erdos(5, .5)
    """
    if size == 1:
        bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2))
        arcs = {(i, j)
                for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools)
                if b}
        d = DAG(nodes=set(range(nnodes)), arcs=arcs)
        return [d] if as_list else d
    else:
        return [directed_erdos(nnodes, density) for _ in range(size)]
Esempio n. 4
0
def is_icovered(
    setting_list: List[Dict],
    i: int,
    j: int,
    dag: DAG,
    invariance_tester: InvarianceTester,
):
    """
    Tell if an edge i->j is I-covered with respect to the invariance tests.

    True if, for all I s.t. i \in I, the distribution of j given its parents varies between the observational and
    interventional data.

    setting_list:
        A list of dictionaries that provide meta-information about each setting.
        The first setting must be observational.
    i:
        Source of the edge being tested.
    j:
        Target of the edge being tested.
    """
    parents_j = list(dag.parents_of(j))

    for setting_num, setting in enumerate(setting_list):
        if i in setting['interventions']:
            if invariance_tester.is_invariant(j,
                                              context=setting_num,
                                              cond_set=parents_j):
                return False

    return True
Esempio n. 5
0
def directed_erdos(nnodes, density, size=1):
    """
    Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`.

    Parameters
    ----------
    nnodes:
        Number of nodes in each graph.
    density:
        Probability of any edge.
    size:
        Number of graphs.

    Examples
    --------
    >>> d = cd.rand.directed_erdos(5, .5)
    """
    if size == 1:
        bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2))
        arcs = {(i, j)
                for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools)
                if b}
        return DAG(nodes=set(range(nnodes)), arcs=arcs)
    else:
        return [directed_erdos(nnodes, density) for _ in range(size)]
Esempio n. 6
0
def directed_erdos(nnodes,
                   density=None,
                   exp_nbrs=None,
                   size=1,
                   as_list=False,
                   random_order=True) -> Union[DAG, List[DAG]]:
    """
    Generate random Erdos-Renyi DAG(s) on `nnodes` nodes with density `density`.

    Parameters
    ----------
    nnodes:
        Number of nodes in each graph.
    density:
        Probability of any edge.
    size:
        Number of graphs.
    as_list:
        If True, always return as a list, even if only one DAG is generated.

    Examples
    --------
    >>> import causaldag as cd
    >>> d = cd.rand.directed_erdos(5, .5)
    """
    assert density is not None or exp_nbrs is not None
    density = density if density is not None else exp_nbrs / (nnodes - 1)
    if size == 1:
        # if density < .01:
        #     print('here')
        #     random_nx = fast_gnp_random_graph(nnodes, density, directed=True)
        #     d = DAG(nodes=set(range(nnodes)), arcs=random_nx.edges)
        #     return [d] if as_list else d
        bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2))
        arcs = {(i, j)
                for (i, j), b in zip(itr.combinations(range(nnodes), 2), bools)
                if b}
        d = DAG(nodes=set(range(nnodes)), arcs=arcs)
        if random_order:
            nodes = list(range(nnodes))
            d = d.rename_nodes(dict(enumerate(np.random.permutation(nodes))))
        return [d] if as_list else d
    else:
        return [
            directed_erdos(nnodes, density, random_order=random_order)
            for _ in range(size)
        ]
Esempio n. 7
0
def directed_erdos_with_confounders(
        nnodes: int,
        density: Optional[float] = None,
        exp_nbrs: Optional[float] = None,
        num_confounders: int = 1,
        confounder_pervasiveness: float = 1,
        size=1,
        as_list=False,
        random_order=True) -> Union[DAG, List[DAG]]:
    assert density is not None or exp_nbrs is not None
    density = density if density is not None else exp_nbrs / (nnodes - 1)

    if size == 1:
        confounders = list(range(num_confounders))
        nonconfounders = list(range(num_confounders, nnodes + num_confounders))
        bools = _coin(confounder_pervasiveness,
                      size=int(num_confounders * nnodes))
        confounder_arcs = {
            (i, j)
            for (i,
                 j), b in zip(itr.product(confounders, nonconfounders), bools)
            if b
        }
        bools = _coin(density, size=int(nnodes * (nnodes - 1) / 2))
        local_arcs = {
            (i, j)
            for (i, j), b in zip(itr.combinations(nonconfounders, 2), bools)
            if b
        }
        d = DAG(nodes=set(range(nnodes)), arcs=confounder_arcs | local_arcs)

        if random_order:
            nodes = list(range(nnodes + num_confounders))
            d = d.rename_nodes(dict(enumerate(np.random.permutation(nodes))))

        return [d] if as_list else d
    else:
        return [
            directed_erdos_with_confounders(
                nnodes,
                density,
                num_confounders=num_confounders,
                confounder_pervasiveness=confounder_pervasiveness,
                random_order=random_order) for _ in range(size)
        ]
Esempio n. 8
0
def perm2dag2(perm, ci_tester, node2nbrs=None):
    arcs = set()
    for (i, pi_i), (j, pi_j) in itr.combinations(enumerate(perm), 2):
        c = set(perm[:j]) - {pi_i}
        c = c if node2nbrs is None else c & (node2nbrs[pi_i] | node2nbrs[pi_j])
        print(pi_i, pi_j, c)
        if not ci_tester.is_ci(pi_i, pi_j, c):
            arcs.add((pi_i, pi_j))
    return DAG(nodes=set(perm), arcs=arcs)
Esempio n. 9
0
    def to_gauss_dag(self, perm):
        """
        Return a GaussDAG with the same mean and covariance as this GGM, and is a minimal IMAP of this GGM
        consistent with the node ordering `perm`.

        Parameters
        ----------
        perm:
            The desired permutation, or total order, of the nodes in the result.

        Returns
        -------

        Examples
        --------
        TODO
        """
        from causaldag import DAG, GaussDAG

        d = DAG(nodes=self.nodes)
        ixs = list(
            itr.chain.from_iterable(
                ((f, s) for f in range(s)) for s in range(len(perm))))
        for i, j in ixs:
            pi_i, pi_j = perm[i], perm[j]
            if not np.isclose(
                    self.partial_correlation(pi_i, pi_j,
                                             d.markov_blanket(pi_i)), 0):
                d.add_arc(pi_i, pi_j, unsafe=True)

        arcs = dict()
        means = []
        Sigma = self.covariance
        variances = []
        for i in perm:
            ps = list(d.parents_of(i))

            # === LINEAR REGRESSION TO FIND EDGE WEIGHTS
            S_xx = Sigma[np.ix_(ps, ps)]
            S_xy = Sigma[ps, i]
            coeffs = inv(S_xx) @ S_xy

            # === COMPUTE MEAN AND VARIANCE
            mean = self.means[i] - self.means[ps] @ coeffs.T
            variance = Sigma[i, i] - Sigma[i, ps] @ coeffs

            for p, coeff in zip(ps, coeffs):
                print(p, i)
                arcs[(p, i)] = coeff
            means.append(mean)
            variances.append(variance)

        return GaussDAG(list(range(self.num_nodes)),
                        arcs,
                        means=means,
                        variances=variances)
Esempio n. 10
0
def perm2dag(perm,
             ci_tester: CI_Tester,
             verbose=False,
             fixed_adjacencies=set(),
             fixed_gaps=set(),
             node2nbrs=None,
             older=False):
    """
    TODO

    Parameters
    ----------
    perm
    ci_tester
    verbose
    fixed_adjacencies
    fixed_gaps
    node2nbrs
    older

    Examples
    --------
    TODO
    """
    d = DAG(nodes=set(perm))
    ixs = list(
        itr.chain.from_iterable(
            ((f, s) for f in range(s)) for s in range(len(perm))))
    for i, j in ixs:
        pi_i, pi_j = perm[i], perm[j]

        # === IF FIXED, DON'T TEST
        if (pi_i, pi_j) in fixed_adjacencies or (pi_j,
                                                 pi_i) in fixed_adjacencies:
            d.add_arc(pi_i, pi_j)
            continue
        if (pi_i, pi_j) in fixed_gaps or (pi_j, pi_i) in fixed_gaps:
            continue

        # === TEST MARKOV BLANKET
        mb = d.markov_blanket(pi_i) if node2nbrs is None else (
            set(perm[:j]) - {pi_i}) & (node2nbrs[pi_i] | node2nbrs[pi_j])
        mb = mb if not older else set(perm[:j]) - {pi_i}

        is_ci = ci_tester.is_ci(pi_i, pi_j, mb)
        if not is_ci:
            d.add_arc(pi_i, pi_j, unsafe=True)
        if verbose:
            print("%s indep of %s given %s: %s" % (pi_i, pi_j, mb, is_ci))

    return d
Esempio n. 11
0
def perm2dag_subsets(perm, ci_tester, max_subset_size=None):
    """
    Not recommended unless max_subset_size set very small. Not thoroughly tested.
    """
    arcs = set()
    nodes = set(perm)
    for i, pi_i in enumerate(perm):
        for candidate_parent_set in powerset(perm[:i], r_max=max_subset_size):
            print(candidate_parent_set)
            if all(
                    ci_tester.is_ci(i, j, candidate_parent_set)
                    for j in nodes - {i} - candidate_parent_set):
                # if ci_tester.is_ci(i, nodes - {i} - candidate_parent_set, candidate_parent_set):
                arcs.update({(parent, i) for parent in candidate_parent_set})
                break
    return DAG(nodes=nodes, arcs=arcs)
Esempio n. 12
0
def rand_nn_functions(
    dag: DAG,
    num_layers=3,
    nonlinearity=_leaky_relu,
    noise=lambda: np.random.laplace(0, 1)) -> SampleDAG:
    s = SampleDAG(dag._nodes, arcs=dag._arcs)

    # for each node, create the conditional
    for node in dag._nodes:
        nparents = dag.indegree(node)
        layer_mats = [
            np.random.rand(nparents, nparents) * 2 for _ in range(num_layers)
        ]

        def conditional(parent_vals):
            vals = parent_vals
            for a in layer_mats:
                vals = a @ vals
                vals = nonlinearity(vals)
            return vals + noise()

        s.set_conditional(node, conditional)

    return s
Esempio n. 13
0
def rand_additive_basis(dag: DAG,
                        basis: list,
                        snr_dict: Optional[dict] = None,
                        rand_weight_fn: RandWeightFn = unif_away_zero,
                        noise=lambda: np.random.normal(0, 1),
                        internal_variance: int = 1,
                        num_monte_carlo: int = 10000,
                        progress=False):
    """
    Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable
    being a general additive model (GAM) of its parents.

    Parameters
    ----------
    dag:
        A DAG to use as the structure for the model.
    basis:
        Basis functions for the GAM.
    snr_dict:
        A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes
        with that many parents. By default, 1/2 for any number of parents.
    rand_weight_fn:
        A function to generate random weights for each parent.
    noise:
        A function to generate random internal noise for each node.
    internal_variance:
        The variance of the above noise function.
    num_monte_carlo:
        The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR.

    Examples
    --------
    >>> import causaldag as cd
    >>> import numpy as np
    >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)})
    >>> basis = [np.sin, np.cos, np.exp]
    >>> snr_dict = {1: 1/2, 2: 2/3}
    >>> g = cd.rand.rand_additive_basis(d, basis, snr_dict)
    """
    if snr_dict is None:
        snr_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)}

    sample_dag = SampleDAG(dag._nodes, arcs=dag._arcs)
    top_order = dag.topological_sort()
    sample_dict = defaultdict(list)

    # for each node, create the conditional
    node_iterator = top_order if not progress else tqdm(top_order)
    for node in node_iterator:
        parents = dag.parents_of(node)
        nparents = dag.indegree(node)
        parent_bases = random.choices(basis, k=nparents)
        parent_weights = rand_weight_fn(size=nparents)

        c_node = None
        if nparents > 0:
            values_from_parents = []
            for i in range(num_monte_carlo):
                val = sum([
                    weight * base(sample_dict[parent][i]) for weight, base,
                    parent in zip(parent_weights, parent_bases, parents)
                ])
                values_from_parents.append(val)
            variance_from_parents = np.var(values_from_parents)

            try:
                desired_snr = snr_dict[nparents]
            except ValueError:
                raise Exception(
                    f"`snr_dict` does not specify a desired SNR for nodes with {nparents} parents"
                )
            c_node = internal_variance / variance_from_parents * desired_snr / (
                1 - desired_snr)

        conditional = partial(_cam_conditional,
                              c_node=c_node,
                              parent_weights=parent_weights,
                              parent_bases=parent_bases,
                              noise=noise)

        for i in range(num_monte_carlo):
            val = conditional([sample_dict[parent][i] for parent in parents])
            sample_dict[node].append(val)
        sample_dag.set_conditional(node, conditional)

    return sample_dag
Esempio n. 14
0
from causaldag import DAG

cancer_network = DAG(
    arcs={('Pollution',
           'Cancer'), ('Smoker', 'Cancer'), ('Cancer',
                                             'Xmy'), ('Cancer', 'Dysponoea')})

earthquake_network = DAG(
    arcs={('Burglary',
           'Alarm'), ('Earthquake',
                      'Alarm'), ('Alarm', 'JohnCalls'), ('Alarm',
                                                         'MaryCalls')})

sachs_network = DAG(
    arcs={
        ('PKC', 'PKA'),
        ('PKC', 'Jnk'),
        ('PKC', 'P38'),
        ('PKC', 'Raf'),
        ('PKC', 'Mek'),
        ('PKA', 'Jnk'),
        ('PKA', 'P38'),
        ('PKA', 'Raf'),
        ('PKA', 'Mek'),
        ('PKA', 'Erk'),
        ('PKA', 'Akt'),
        ('Raf', 'Mek'),
        ('Mek', 'Erk'),
        ('Erk', 'Akt'),
        ('Plcg', 'PIP3'),
        ('Plcg', 'PIP2'),
Esempio n. 15
0
def perm2dag(perm: list,
             ci_tester: CI_Tester,
             verbose=False,
             fixed_adjacencies: Set[UndirectedEdge] = set(),
             fixed_gaps: Set[UndirectedEdge] = set(),
             node2nbrs=None,
             older=False,
             progress=False):
    """
    Given a permutation, find the minimal IMAP consistent with that permutation and the results of conditional independence
    tests from ci_tester.

    Parameters
    ----------
    perm:
        list of nodes representing the permutation.
    ci_tester:
        object for testing conditional independence.
    verbose:
        if True, log each CI test.
    fixed_adjacencies:
        set of nodes known to be adjacent.
    fixed_gaps:
        set of nodes known not to be adjacent.
    node2nbrs:
        TODO
    older:
        TODO

    Examples
    --------
    >>> from causaldag.utils.ci_tests import MemoizedCI_Tester, gauss_ci_test, gauss_ci_suffstat
    >>> perm = [0,1,2]
    >>> suffstat = gauss_ci_suffstat(samples)
    >>> ci_tester = MemoizedCI_Tester(gauss_ci_test, suffstat)
    >>> perm2dag(perm, ci_tester, fixed_gaps={frozenset({1, 2})})
    """
    if fixed_adjacencies:
        adj = next(iter(fixed_adjacencies))
        if not isinstance(adj, frozenset):
            raise ValueError('fixed_adjacencies should contain frozensets')
    if fixed_gaps:
        adj = next(iter(fixed_gaps))
        if not isinstance(adj, frozenset):
            raise ValueError('fixed_gaps should contain frozensets')

    d = DAG(nodes=set(perm))
    ixs = list(
        itr.chain.from_iterable(
            ((f, s) for f in range(s)) for s in range(len(perm))))
    ixs = ixs if not progress else tqdm(ixs)
    for i, j in ixs:
        pi_i, pi_j = perm[i], perm[j]

        # === IF FIXED, DON'T TEST
        if frozenset({pi_i, pi_j}) in fixed_adjacencies:
            d.add_arc(pi_i, pi_j)
            continue
        if frozenset({pi_i, pi_j}) in fixed_gaps:
            continue

        # === TEST MARKOV BLANKET
        mb = d.markov_blanket(pi_i) if node2nbrs is None else (
            set(perm[:j]) - {pi_i}) & (node2nbrs[pi_i] | node2nbrs[pi_j])
        mb = mb if not older else set(perm[:j]) - {pi_i}

        is_ci = ci_tester.is_ci(pi_i, pi_j, mb)
        if not is_ci:
            d.add_arc(pi_i, pi_j, unsafe=True)
        if verbose:
            print(f"{pi_i} is independent of {pi_j} given {mb}: {is_ci}")

    return d
Esempio n. 16
0
def rand_additive_basis(dag: DAG,
                        basis: list,
                        r2_dict: Optional[Union[Dict[int, float],
                                                float]] = None,
                        rand_weight_fn: RandWeightFn = unif_away_zero,
                        noise=lambda size: np.random.normal(0, 1, size=size),
                        internal_variance: int = 1,
                        num_monte_carlo: int = 10000,
                        progress=False):
    """
    Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable
    being a general additive model (GAM) of its parents.

    Parameters
    ----------
    dag:
        A DAG to use as the structure for the model.
    basis:
        Basis functions for the GAM.
    r2_dict:
        A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes
        with that many parents. By default, 1/2 for any number of parents.
    rand_weight_fn:
        A function to generate random weights for each parent.
    noise:
        A function to generate random internal noise for each node.
    internal_variance:
        The variance of the above noise function.
    num_monte_carlo:
        The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR.

    Examples
    --------
    >>> import causaldag as cd
    >>> import numpy as np
    >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)})
    >>> basis = [np.sin, np.cos, np.exp]
    >>> r2_dict = {1: 1/2, 2: 2/3}
    >>> g = cd.rand.rand_additive_basis(d, basis, r2_dict)
    """
    if r2_dict is None:
        r2_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)}
    if isinstance(r2_dict, float):
        r2_dict = {nparents: r2_dict for nparents in range(dag.nnodes)}

    cam_dag = CamDAG(dag._nodes, arcs=dag._arcs)
    top_order = dag.topological_sort()
    sample_dict = dict()

    # for each node, create the conditional
    node_iterator = top_order if not progress else tqdm(top_order)
    for node in node_iterator:
        parents = dag.parents_of(node)
        nparents = dag.indegree(node)
        parent2base = dict(zip(parents, random.choices(basis, k=nparents)))
        parent_weights = rand_weight_fn(size=nparents)
        parent_vals = np.array([
            sample_dict[parent] for parent in parents
        ]).T if nparents > 0 else np.zeros([num_monte_carlo, 0])

        c_node = 1
        if nparents > 0:
            mean_function_no_c = partial(_cam_mean_function,
                                         c_node=1,
                                         parent_weights=parent_weights,
                                         parent2base=parent2base)
            values_from_parents = mean_function_no_c(parent_vals, parents)
            variance_from_parents = np.var(values_from_parents)

            try:
                desired_r2 = r2_dict[nparents]
            except ValueError:
                raise Exception(
                    f"`snr_dict` does not specify a desired R^2 for nodes with {nparents} parents"
                )
            c_node = internal_variance / variance_from_parents * desired_r2 / (
                1 - desired_r2)
            if np.isnan(c_node):
                raise ValueError
            print(node, parents, variance_from_parents, parent_weights, c_node)

        mean_function = partial(_cam_mean_function,
                                c_node=c_node,
                                parent_weights=parent_weights,
                                parent2base=parent2base)

        mean_vals = mean_function(parent_vals, parents)
        sample_dict[node] = mean_vals + noise(size=num_monte_carlo)

        cam_dag.set_mean_function(node, mean_function)
        cam_dag.set_noise(node, noise)

    return cam_dag