def test_mixed_interventions(self): gdag = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}) factor = .1 gdag_iv = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3*factor, (1, 2): 5*factor}, means=[1, 0, 0], variances=[.5, 1, 1]) iv = {0: GaussIntervention(1, .5), 2: ScalingIntervention(factor=factor)} samples = gdag.sample_interventional(iv, 100000) print(np.cov(samples, rowvar=False)) print(gdag_iv.covariance) print(np.mean(samples, axis=0))
def test_scaling_intervention(self): gdag = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}) factor = .1 gdag_iv = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3*factor, (1, 2): 5*factor}) iv = {2: ScalingIntervention(factor=factor)} samples = gdag.sample_interventional(iv, 100000) print(np.cov(samples, rowvar=False)) print(gdag_iv.covariance) # === TEST WHEN INTERVENED NODE HAS NO PARENTS gdag = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}) factor = .1 iv = {0: ScalingIntervention(factor=factor)} samples = gdag.sample_interventional(iv, 100000) print(np.cov(samples, rowvar=False)) print(gdag.covariance) # === TEST WITH STD FACTOR, INTERVENED NODE HAS PARENTS gdag = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}) factor = .1 std_factor = .2 gdag_iv = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3 * factor, (1, 2): 5 * factor}, variances=[1, 1, std_factor**2]) iv = {2: ScalingIntervention(factor, std_factor)} samples = gdag.sample_interventional(iv, 100000) print(np.cov(samples, rowvar=False)) print(gdag_iv.covariance) # === TEST WITH STD FACTOR, INTERVENED NODE HAS NO PARENTS gdag = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}) factor = .1 std_factor = .2 gdag_iv = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}, variances=[std_factor ** 2, 1, 1]) iv = {0: ScalingIntervention(factor, std_factor)} samples = gdag.sample_interventional(iv, 100000) print(np.cov(samples, rowvar=False)) print(gdag_iv.covariance) # === TEST WITH STD FACTOR, CHANGE IN MEAN, INTERVENED NODE HAS NO PARENTS gdag = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}) factor = .1 std_factor = .2 gdag_iv = cd.GaussDAG([0, 1, 2], arcs={(0, 1): 2, (0, 2): 3, (1, 2): 5}, variances=[std_factor ** 2, 1, 1]) iv = {0: ScalingIntervention(factor, std_factor, mean=2.13)} samples = gdag.sample_interventional(iv, 100000) print(np.cov(samples, rowvar=False)) print(gdag_iv.covariance) print(np.mean(samples[:, 0]))
def test_hsic_invariance_cond_set_false_negatives(self): false_negatives = 0 alpha = .05 num_tests = 100 nsamples = 200 for i in range(num_tests): d = cd.GaussDAG(nodes=[0, 1, 2], arcs={(0, 1), (0, 2), (1, 2)}) samples = d.sample(nsamples) iv_samples = d.sample_interventional_soft( {1: ScalingIntervention(0)}, nsamples=nsamples) test_results = cd.utils.ci_tests.hsic_invariance_test(samples, iv_samples, 0, cond_set=[1], alpha=alpha) if not test_results[ 'reject']: # should be rejecting the hypothesis of invariance false_negatives += 1 print("Number of false negatives:", false_negatives)
def save_dags(self, folder): os.makedirs(folder, exist_ok=True) yaml.dump(asdict(self), open(os.path.join(folder, 'config.yaml'), 'w')) if self.graph_type == 'erdos': dags = cd.rand.directed_erdos(self.n_nodes, self.edge_prob, size=self.n_dags) if self.n_dags == 1: dags = [dags] elif self.graph_type == 'erdos-bounded': dags = [] while len(dags) < self.n_dags: dag = cd.rand.directed_erdos(self.n_nodes, self.edge_prob) cpdag = dag.cpdag() mec_size = len(cpdag.all_dags()) if mec_size < MAX_MEC_SIZE: dags.append(dag) elif self.graph_type == 'components': dags = [get_component_dag(self.n_nodes, self.edge_prob) for _ in range(self.n_dags)] elif self.graph_type == 'unoriented_by_one': dags = [] while len(dags) < self.n_dags: dag = cd.rand.directed_erdos(self.n_nodes, self.edge_prob) cpdag = dag.cpdag() if len(cpdag.all_dags()) < MAX_MEC_SIZE: print(cpdag.undirected_neighbors[0]) dags.append(dag) else: dags = [graph_utils.generate_DAG(self.n_nodes, type_=self.graph_type) for _ in range(self.n_dags)] dag_arcs = [{(i, j): np.random.uniform() for i, j in dag.arcs} for dag in dags] # A-ICP paper: Generate random weights gdags = [cd.GaussDAG(nodes=list(range(self.n_nodes)), arcs=arcs) for arcs in dag_arcs] print('=== Saving DAGs ===') for i, gdag in enumerate(gdags): os.makedirs(os.path.join(DATA_FOLDER, folder, 'dags', 'dag%d' % i), exist_ok=True) np.savetxt(os.path.join(DATA_FOLDER, folder, 'dags', 'dag%d' % i, 'adjacency.txt'), gdag.to_amat()) # A-ICP paper: Sample means/variances uniformly at random from (0,1) means = np.random.uniform(size=len(gdag.nodes)) variances = np.random.uniform(size=len(gdag.nodes)) np.savetxt(os.path.join(DATA_FOLDER, folder, 'dags', 'dag%d' % i, 'means.txt'), means) np.savetxt(os.path.join(DATA_FOLDER, folder, 'dags', 'dag%d' % i, 'variances.txt'), variances) print('=== Saved ===') return gdags
import causaldag as cd import time import numpy as np g = cd.GaussDAG([0, 1, 2], arcs={(0, 1), (0, 2)}) cov = g.covariance nsamples = 2500 trials = 10 iv = {1: cd.GaussIntervention(0, 1)} # === TIME INVARIANCE TEST NO CONDITIONING SET start = time.time() for _ in range(trials): samples = g.sample(nsamples) cd.utils.ci_tests.hsic_test_vector(samples[:, 0], samples[:, 1]) print(time.time() - start) # === TIME INVARIANCE TEST WITH CONDITIONING SET # start = time.time() # for _ in range(trials): # samples = g.sample(nsamples) # cd.utils.ci_tests.hsic_invariance_test(samples[:, 0], samples[:, 1], 0) # print(time.time() - start)
import causaldag as cd import matplotlib.pyplot as plt import os import random import numpy as np from numpy.ma import masked_array random.seed(181) np.random.seed(181) nsamples = 10 g = cd.GaussDAG(nodes=[0, 1, 2], arcs={(0, 1): 1, (0, 2): 1}, variances=[1, .2, .2]) obs_samples = g.sample(nsamples) iv1_samples = g.sample_interventional({1: cd.GaussIntervention(0, 1)}, nsamples=nsamples) iv01_samples = g.sample_interventional({1: cd.GaussIntervention(0, 1), 0: cd.GaussIntervention(1, .1)}, nsamples=nsamples) cmap = 'bwr' plt.clf() os.makedirs('figures/example_data/', exist_ok=True) plt.imshow(obs_samples, cmap=cmap) plt.xticks([]) plt.yticks([]) plt.tight_layout() plt.savefig('figures/example_data/obs.png', transparent=True, bbox_inches='tight') plt.clf() plt.imshow(iv1_samples, cmap=cmap) plt.xticks([]) plt.yticks([])
# find probs probs = np.zeros(k) for fval, w in zip(fvals, weights): probs[fval] += w # = find entropy mask = probs != 0 plogps = np.zeros(len(probs)) plogps[mask] = np.log2(probs[mask]) * probs[mask] return -plogps.sum() return get_k_entropy np.random.seed(100) g = cd.rand.directed_erdos(10, .5) g = cd.GaussDAG(nodes=list(range(10)), arcs=g.arcs) mec = [ cd.DAG(arcs=arcs) for arcs in cd.DAG(arcs=g.arcs).cpdag().all_dags() ] strat = create_info_gain_strategy_dag_collection( mec, [get_mec_functional_k(mec)], [get_k_entropy_fxn(len(mec))], verbose=True) samples = g.sample(1000) precision_matrix = samples.T @ samples / 1000 sel_interventions = strat( IterationData(current_data={-1: g.sample(1000)}, max_interventions=1, n_samples=500, batch_num=0, n_batches=1,
import causaldag as cd from causaldag import GaussIntervention from causaldag.inference.structural import igsp, unknown_target_igsp from causaldag.utils.ci_tests import gauss_ci_test, hsic_invariance_test import numpy as np import random import os from config import PROJECT_FOLDER from R_algs.wrappers import run_gies np.random.seed(1729) random.seed(1729) ntrials = 10 nnodes = 5 d = cd.DAG(arcs={(i, i + 1) for i in range(nnodes - 1)}) g = cd.GaussDAG(nodes=list(range(nnodes)), arcs=d.arcs) cpdag = d.cpdag() print(d.interventional_cpdag({nnodes - 1}, cpdag=cpdag).arcs) print(d.interventional_cpdag({0, nnodes - 1}, cpdag=cpdag).arcs) shds_igsp = [] shds_utigsp = [] shds_gies = [] dags_igsp = [] dags_utigsp = [] dags_gies = [] for i in range(ntrials): nsamples = 500 intervention = GaussIntervention(1, .01) samples = g.sample(nsamples) iv_samples = g.sample_interventional_perfect(
def plot_roc(target, true_adj_mat, adj_mats): avg_inc_mat = np.zeros(true_adj_mat.shape) for adj_mat in adj_mats: avg_inc_mat += graph_utils.adj2inc(adj_mat) true_inc_mat = graph_utils.adj2inc(true_adj_mat) roc_curve(true_inc_mat[:, target], avg_inc_mat[:, target]) if __name__ == '__main__': adj_true = np.loadtxt('./data/dataset_5000/graph_2/adjacency.csv') g = cd.from_amat(adj_true) dict_weights = {} for arc in g.arcs: dict_weights[arc] = adj_true[arc[0], arc[1]] gdag = cd.GaussDAG(nodes=list(range(50)), arcs=dict_weights) all_data = [gdag.sample(250)] interventions = [-1] * 250 all_iv = np.random.randint(0, 50, 10) for iv in all_iv: interventions += [iv] * 25 g_iv = cd.GaussIntervention(mean=2, variance=1) all_data.append(gdag.sample_interventional({iv: g_iv}, 25)) all_data = np.vstack(all_data) interventions = np.array(interventions) interventions[interventions != -1] = interventions[interventions != -1] + 1 np.savetxt('./random_data', all_data) np.savetxt('./random_interventions', interventions) interventions[interventions != -1] = interventions[interventions != -1] - 1 graph_utils.run_gies_boot(200, './random_data', './random_interventions')
if __name__ == '__main__': import causaldag as cd from tqdm import tqdm import random np.random.seed(1818) random.seed(11) nsamples = 10000 ntrials = 10 perms = [ np.random.permutation(list(range(nsamples))) for _ in range(ntrials) ] samples_list = [np.random.normal(size=nsamples) for _ in range(ntrials)] d = cd.GaussDAG(nodes=[0, 1], arcs=set()) samples = d.sample(1000) TEST_TIME = False if TEST_TIME: samples_list = [d.sample(nsamples) for _ in range(ntrials)] for samples in tqdm(samples_list): fadcor_test_vector(samples[:, 0], samples[:, 1]) TEST_PARTIAL_SUM = False if TEST_PARTIAL_SUM: x = samples[:, 0] y = samples[:, 1] c = np.random.normal(size=len(x)) res1 = partial_sum2d(x, y, c) res2 = _partial_sum_simple(x, y, c)