Ejemplo n.º 1
0
def rand_additive_basis(dag: DAG,
                        basis: list,
                        snr_dict: Optional[dict] = None,
                        rand_weight_fn: RandWeightFn = unif_away_zero,
                        noise=lambda: np.random.normal(0, 1),
                        internal_variance: int = 1,
                        num_monte_carlo: int = 10000,
                        progress=False):
    """
    Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable
    being a general additive model (GAM) of its parents.

    Parameters
    ----------
    dag:
        A DAG to use as the structure for the model.
    basis:
        Basis functions for the GAM.
    snr_dict:
        A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes
        with that many parents. By default, 1/2 for any number of parents.
    rand_weight_fn:
        A function to generate random weights for each parent.
    noise:
        A function to generate random internal noise for each node.
    internal_variance:
        The variance of the above noise function.
    num_monte_carlo:
        The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR.

    Examples
    --------
    >>> import causaldag as cd
    >>> import numpy as np
    >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)})
    >>> basis = [np.sin, np.cos, np.exp]
    >>> snr_dict = {1: 1/2, 2: 2/3}
    >>> g = cd.rand.rand_additive_basis(d, basis, snr_dict)
    """
    if snr_dict is None:
        snr_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)}

    sample_dag = SampleDAG(dag._nodes, arcs=dag._arcs)
    top_order = dag.topological_sort()
    sample_dict = defaultdict(list)

    # for each node, create the conditional
    node_iterator = top_order if not progress else tqdm(top_order)
    for node in node_iterator:
        parents = dag.parents_of(node)
        nparents = dag.indegree(node)
        parent_bases = random.choices(basis, k=nparents)
        parent_weights = rand_weight_fn(size=nparents)

        c_node = None
        if nparents > 0:
            values_from_parents = []
            for i in range(num_monte_carlo):
                val = sum([
                    weight * base(sample_dict[parent][i]) for weight, base,
                    parent in zip(parent_weights, parent_bases, parents)
                ])
                values_from_parents.append(val)
            variance_from_parents = np.var(values_from_parents)

            try:
                desired_snr = snr_dict[nparents]
            except ValueError:
                raise Exception(
                    f"`snr_dict` does not specify a desired SNR for nodes with {nparents} parents"
                )
            c_node = internal_variance / variance_from_parents * desired_snr / (
                1 - desired_snr)

        conditional = partial(_cam_conditional,
                              c_node=c_node,
                              parent_weights=parent_weights,
                              parent_bases=parent_bases,
                              noise=noise)

        for i in range(num_monte_carlo):
            val = conditional([sample_dict[parent][i] for parent in parents])
            sample_dict[node].append(val)
        sample_dag.set_conditional(node, conditional)

    return sample_dag
Ejemplo n.º 2
0
def rand_additive_basis(dag: DAG,
                        basis: list,
                        r2_dict: Optional[Union[Dict[int, float],
                                                float]] = None,
                        rand_weight_fn: RandWeightFn = unif_away_zero,
                        noise=lambda size: np.random.normal(0, 1, size=size),
                        internal_variance: int = 1,
                        num_monte_carlo: int = 10000,
                        progress=False):
    """
    Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable
    being a general additive model (GAM) of its parents.

    Parameters
    ----------
    dag:
        A DAG to use as the structure for the model.
    basis:
        Basis functions for the GAM.
    r2_dict:
        A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes
        with that many parents. By default, 1/2 for any number of parents.
    rand_weight_fn:
        A function to generate random weights for each parent.
    noise:
        A function to generate random internal noise for each node.
    internal_variance:
        The variance of the above noise function.
    num_monte_carlo:
        The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR.

    Examples
    --------
    >>> import causaldag as cd
    >>> import numpy as np
    >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)})
    >>> basis = [np.sin, np.cos, np.exp]
    >>> r2_dict = {1: 1/2, 2: 2/3}
    >>> g = cd.rand.rand_additive_basis(d, basis, r2_dict)
    """
    if r2_dict is None:
        r2_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)}
    if isinstance(r2_dict, float):
        r2_dict = {nparents: r2_dict for nparents in range(dag.nnodes)}

    cam_dag = CamDAG(dag._nodes, arcs=dag._arcs)
    top_order = dag.topological_sort()
    sample_dict = dict()

    # for each node, create the conditional
    node_iterator = top_order if not progress else tqdm(top_order)
    for node in node_iterator:
        parents = dag.parents_of(node)
        nparents = dag.indegree(node)
        parent2base = dict(zip(parents, random.choices(basis, k=nparents)))
        parent_weights = rand_weight_fn(size=nparents)
        parent_vals = np.array([
            sample_dict[parent] for parent in parents
        ]).T if nparents > 0 else np.zeros([num_monte_carlo, 0])

        c_node = 1
        if nparents > 0:
            mean_function_no_c = partial(_cam_mean_function,
                                         c_node=1,
                                         parent_weights=parent_weights,
                                         parent2base=parent2base)
            values_from_parents = mean_function_no_c(parent_vals, parents)
            variance_from_parents = np.var(values_from_parents)

            try:
                desired_r2 = r2_dict[nparents]
            except ValueError:
                raise Exception(
                    f"`snr_dict` does not specify a desired R^2 for nodes with {nparents} parents"
                )
            c_node = internal_variance / variance_from_parents * desired_r2 / (
                1 - desired_r2)
            if np.isnan(c_node):
                raise ValueError
            print(node, parents, variance_from_parents, parent_weights, c_node)

        mean_function = partial(_cam_mean_function,
                                c_node=c_node,
                                parent_weights=parent_weights,
                                parent2base=parent2base)

        mean_vals = mean_function(parent_vals, parents)
        sample_dict[node] = mean_vals + noise(size=num_monte_carlo)

        cam_dag.set_mean_function(node, mean_function)
        cam_dag.set_noise(node, noise)

    return cam_dag