def rand_additive_basis(dag: DAG, basis: list, snr_dict: Optional[dict] = None, rand_weight_fn: RandWeightFn = unif_away_zero, noise=lambda: np.random.normal(0, 1), internal_variance: int = 1, num_monte_carlo: int = 10000, progress=False): """ Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable being a general additive model (GAM) of its parents. Parameters ---------- dag: A DAG to use as the structure for the model. basis: Basis functions for the GAM. snr_dict: A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes with that many parents. By default, 1/2 for any number of parents. rand_weight_fn: A function to generate random weights for each parent. noise: A function to generate random internal noise for each node. internal_variance: The variance of the above noise function. num_monte_carlo: The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR. Examples -------- >>> import causaldag as cd >>> import numpy as np >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)}) >>> basis = [np.sin, np.cos, np.exp] >>> snr_dict = {1: 1/2, 2: 2/3} >>> g = cd.rand.rand_additive_basis(d, basis, snr_dict) """ if snr_dict is None: snr_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)} sample_dag = SampleDAG(dag._nodes, arcs=dag._arcs) top_order = dag.topological_sort() sample_dict = defaultdict(list) # for each node, create the conditional node_iterator = top_order if not progress else tqdm(top_order) for node in node_iterator: parents = dag.parents_of(node) nparents = dag.indegree(node) parent_bases = random.choices(basis, k=nparents) parent_weights = rand_weight_fn(size=nparents) c_node = None if nparents > 0: values_from_parents = [] for i in range(num_monte_carlo): val = sum([ weight * base(sample_dict[parent][i]) for weight, base, parent in zip(parent_weights, parent_bases, parents) ]) values_from_parents.append(val) variance_from_parents = np.var(values_from_parents) try: desired_snr = snr_dict[nparents] except ValueError: raise Exception( f"`snr_dict` does not specify a desired SNR for nodes with {nparents} parents" ) c_node = internal_variance / variance_from_parents * desired_snr / ( 1 - desired_snr) conditional = partial(_cam_conditional, c_node=c_node, parent_weights=parent_weights, parent_bases=parent_bases, noise=noise) for i in range(num_monte_carlo): val = conditional([sample_dict[parent][i] for parent in parents]) sample_dict[node].append(val) sample_dag.set_conditional(node, conditional) return sample_dag
def rand_additive_basis(dag: DAG, basis: list, r2_dict: Optional[Union[Dict[int, float], float]] = None, rand_weight_fn: RandWeightFn = unif_away_zero, noise=lambda size: np.random.normal(0, 1, size=size), internal_variance: int = 1, num_monte_carlo: int = 10000, progress=False): """ Generate a random structural causal model (SCM), using `dag` as the structure, and with each variable being a general additive model (GAM) of its parents. Parameters ---------- dag: A DAG to use as the structure for the model. basis: Basis functions for the GAM. r2_dict: A dictionary mapping each number of parents to the desired signal-to-noise ratio (SNR) for nodes with that many parents. By default, 1/2 for any number of parents. rand_weight_fn: A function to generate random weights for each parent. noise: A function to generate random internal noise for each node. internal_variance: The variance of the above noise function. num_monte_carlo: The number of Monte Carlo samples used when computing coefficients to achieve the desired SNR. Examples -------- >>> import causaldag as cd >>> import numpy as np >>> d = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)}) >>> basis = [np.sin, np.cos, np.exp] >>> r2_dict = {1: 1/2, 2: 2/3} >>> g = cd.rand.rand_additive_basis(d, basis, r2_dict) """ if r2_dict is None: r2_dict = {nparents: 1 / 2 for nparents in range(dag.nnodes)} if isinstance(r2_dict, float): r2_dict = {nparents: r2_dict for nparents in range(dag.nnodes)} cam_dag = CamDAG(dag._nodes, arcs=dag._arcs) top_order = dag.topological_sort() sample_dict = dict() # for each node, create the conditional node_iterator = top_order if not progress else tqdm(top_order) for node in node_iterator: parents = dag.parents_of(node) nparents = dag.indegree(node) parent2base = dict(zip(parents, random.choices(basis, k=nparents))) parent_weights = rand_weight_fn(size=nparents) parent_vals = np.array([ sample_dict[parent] for parent in parents ]).T if nparents > 0 else np.zeros([num_monte_carlo, 0]) c_node = 1 if nparents > 0: mean_function_no_c = partial(_cam_mean_function, c_node=1, parent_weights=parent_weights, parent2base=parent2base) values_from_parents = mean_function_no_c(parent_vals, parents) variance_from_parents = np.var(values_from_parents) try: desired_r2 = r2_dict[nparents] except ValueError: raise Exception( f"`snr_dict` does not specify a desired R^2 for nodes with {nparents} parents" ) c_node = internal_variance / variance_from_parents * desired_r2 / ( 1 - desired_r2) if np.isnan(c_node): raise ValueError print(node, parents, variance_from_parents, parent_weights, c_node) mean_function = partial(_cam_mean_function, c_node=c_node, parent_weights=parent_weights, parent2base=parent2base) mean_vals = mean_function(parent_vals, parents) sample_dict[node] = mean_vals + noise(size=num_monte_carlo) cam_dag.set_mean_function(node, mean_function) cam_dag.set_noise(node, noise) return cam_dag