def generate_DAG(p, m=4, prob=0., type_='config_model'): if type_ == 'config_model': z = [int(e) for e in powerlaw_sequence(p)] if np.sum(z) % 2 != 0: z[0] += 1 G = nx.configuration_model(z) elif type_ == 'barabasi': G = nx.barabasi_albert_graph(p, m) elif type_ == 'small_world': G = nx.watts_strogatz_graph(p, m, prob) elif type_ == 'chain': source_node = int(np.ceil(p / 2)) - 1 arcs = {(i + 1, i) for i in range(source_node) } | {(i, i + 1) for i in range(source_node, p - 1)} print(source_node, arcs) return cd.DAG(nodes=set(range(p)), arcs=arcs) elif type_ == 'chain_one_direction': return cd.DAG(nodes=set(range(p)), arcs={(i, i + 1) for i in range(p - 1)}) else: raise Exception('Not a graph type') G = nx.Graph(G) dag = cd.DAG(nodes=set(range(p))) for i, j in G.edges: if i != j: dag.add_arc(*sorted((i, j))) return dag
def test_marginal_mag(self): d = cd.DAG(arcs={(1, 2), (1, 3)}) self.assertEqual(d.marginal_mag(1), cd.AncestralGraph(bidirected={(2, 3)})) d = cd.DAG(arcs={(1, 2), (1, 3), (2, 3)}) self.assertEqual(d.marginal_mag(1), cd.AncestralGraph(directed={(2, 3)}))
def test_pdag2alldags_5nodes(self): dag = cd.DAG(arcs={(1, 2), (2, 3), (1, 3), (2, 4), (2, 5), (3, 5), (4, 5)}) cpdag = dag.cpdag() dags = cpdag.all_dags() for arcs in dags: dag2 = cd.DAG(arcs=set(arcs)) cpdag2 = dag2.cpdag() if cpdag2 != cpdag: print(cpdag2.nodes, cpdag.nodes) print(cpdag2.arcs, cpdag.arcs) print(cpdag2.edges, cpdag.edges) self.assertEqual(cpdag, cpdag2)
def get_strategy(strategy, dag): if strategy == 'random': return random_nodes.random_strategy if strategy == 'learn-parents': return learn_target_parents.create_learn_target_parents( target, args.boot) if strategy == 'edge-prob': return edge_prob.create_edge_prob_strategy(target, args.boot) if strategy == 'var-score': node_vars = np.diag(dag.covariance) return var_score.create_variance_strategy( target, node_vars, [2 * np.sqrt(node_var) for node_var in node_vars]) if strategy == 'entropy': return information_gain.create_info_gain_strategy( args.boot, descendant_functionals(args.target, n_nodes)) if strategy == 'entropy-enum': return information_gain.create_info_gain_strategy(args.boot, parent_functionals( target, dag.nodes), enum_combos=True) if strategy == 'entropy-dag-collection': base_dag = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) dag_collection = [ cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in base_dag.cpdag().all_dags() ] # mec_functionals = get_mec_functionals(dag_collection) mec_functional = get_mec_functional_k(dag_collection) functional_entropies = [get_k_entropy_fxn(len(dag_collection))] # print([m(base_dag) for m in mec_functionals]) gauss_iv = args.intervention_type == 'gauss' return information_gain.create_info_gain_strategy_dag_collection( dag_collection, [mec_functional], functional_entropies, gauss_iv) if strategy == 'entropy-dag-collection-enum': base_dag = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) dag_collection = [ cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in base_dag.cpdag().all_dags() ] # mec_functionals = get_mec_functionals(dag_collection) mec_functional = get_mec_functional_k(dag_collection) functional_entropies = [get_k_entropy_fxn(len(dag_collection))] # print([m(base_dag) for m in mec_functionals]) return information_gain.create_info_gain_strategy_dag_collection_enum( dag_collection, [mec_functional], functional_entropies)
def simulate_(tup): gdag, folder, num = tup dag = cd.DAG(nodes=set(gdag.nodes), arcs=gdag.arcs) print('SIMULATING FOR DAG: %d' % num) print('Folder:', folder) print('Size of MEC:', len(dag.cpdag().all_dags())) simulate(get_strategy(args.strategy, gdag), SIM_CONFIG, gdag, folder, save_gies=False)
def test_cpdag_v(self): dag = cd.DAG(arcs={(1, 2), (3, 2)}) cpdag = dag.cpdag() self.assertEqual(cpdag.arcs, {(1, 2), (3, 2)}) self.assertEqual(cpdag.edges, set()) self.assertEqual(cpdag.parents[1], set()) self.assertEqual(cpdag.parents[2], {1, 3}) self.assertEqual(cpdag.parents[3], set()) self.assertEqual(cpdag.children[1], {2}) self.assertEqual(cpdag.children[2], set()) self.assertEqual(cpdag.children[3], {2}) self.assertEqual(cpdag.neighbors[1], {2}) self.assertEqual(cpdag.neighbors[2], {1, 3}) self.assertEqual(cpdag.neighbors[3], {2}) self.assertEqual(cpdag.undirected_neighbors[1], set()) self.assertEqual(cpdag.undirected_neighbors[2], set()) self.assertEqual(cpdag.undirected_neighbors[3], set()) self.assertEqual(dag.arcs, {(1, 2), (3, 2)}) self.assertEqual(dag.parents[1], set()) self.assertEqual(dag.parents[2], {1, 3}) self.assertEqual(dag.parents[3], set()) self.assertEqual(dag.children[1], {2}) self.assertEqual(dag.children[2], set()) self.assertEqual(dag.children[3], {2})
def test_interventional_cpdag_2node(self): d = cd.DAG(arcs={(0, 1)}) c = d.interventional_cpdag([{1}], cpdag=d.cpdag()) self.assertEqual(c.arcs, {(0, 1)}) self.assertEqual(c.edges, set()) c = d.interventional_cpdag([{0}], cpdag=d.cpdag()) self.assertEqual(c.arcs, {(0, 1)}) self.assertEqual(c.edges, set())
def test_dsep(self): d = cd.DAG(arcs={(1, 2), (2, 3)}) # chain self.assertTrue(d.dsep(1, 3, {2})) self.assertFalse(d.dsep(1, 3)) d = cd.DAG(arcs={(2, 1), (2, 3)}) # confounder self.assertTrue(d.dsep(1, 3, {2})) self.assertFalse(d.dsep(1, 3)) d = cd.DAG(arcs={(1, 3), (2, 3)}) # v-structure self.assertTrue(d.dsep(1, 2)) self.assertFalse(d.dsep(1, 2, {3})) d = cd.DAG(arcs={(1, 3), (2, 3), (3, 4), (4, 5)}) # v-structure with chain self.assertTrue(d.dsep(1, 2)) self.assertFalse(d.dsep(1, 2, {5}))
def test_interventional_cpdag(self): dag = cd.DAG(arcs={(1, 2), (1, 3), (2, 3)}) cpdag = dag.cpdag() int_cpdag = dag.interventional_cpdag([{1}], cpdag=cpdag) self.assertEqual(int_cpdag.arcs, {(1, 2), (1, 3)}) self.assertEqual(int_cpdag.edges, {(2, 3)}) self.assertEqual(int_cpdag.undirected_neighbors[1], set()) self.assertEqual(int_cpdag.undirected_neighbors[2], {3}) self.assertEqual(int_cpdag.undirected_neighbors[3], {2})
def get_dag_transitive_closure(self): node_set = self.underlying_dag.nodes to_return = cd.DAG(nodes=node_set) for e in itr.combinations(node_set, 2): if self.less_than(e[0], e[1]): to_return.add_arc(e[0], e[1]) elif self.less_than(e[1], e[0]): to_return.add_arc(e[1], e[0]) return to_return
def __init__(self, nodes): """ Invariant: the underlying DAG should remain a Hasse diagram, i.e. i->j implies there is no path i->k->...->j """ self.underlying_dag = cd.DAG(nodes=nodes) self._ancestors = defaultdict(set) self._descendants = defaultdict(set) self._num_relations = 0
def test_is_invariant(self): d = cd.DAG(arcs={(1, 2), (2, 3)}) self.assertTrue(d.is_invariant(1, 3)) self.assertTrue(d.is_invariant(2, 3)) self.assertFalse(d.is_invariant(3, 3)) self.assertFalse(d.is_invariant(1, 3, cond_set=3)) self.assertFalse(d.is_invariant(2, 3, cond_set=3)) self.assertTrue(d.is_invariant(2, 3, cond_set=1)) self.assertTrue(d.is_invariant(1, 3, cond_set=2))
def test_pdag2alldags_3nodes_chain(self): dag = cd.DAG(arcs={(1, 2), (2, 3)}) cpdag = dag.cpdag() dags = cpdag.all_dags(verbose=False) true_possible_arcs = { frozenset({(1, 2), (2, 3)}), frozenset({(2, 1), (2, 3)}), frozenset({(2, 1), (3, 2)}), } self.assertEqual(true_possible_arcs, dags)
def get_component_dag(nnodes, p, nclusters=3): cluster_cutoffs = [int(nnodes/nclusters)*i for i in range(nclusters+1)] clusters = [list(range(cluster_cutoffs[i], cluster_cutoffs[i+1])) for i in range(len(cluster_cutoffs)-1)] pairs_in_clusters = [list(itr.combinations(cluster, 2)) for cluster in clusters] bools = np.random.binomial(1, p, sum(map(len, pairs_in_clusters))) dag = cd.DAG(nodes=set(range(nnodes))) for (i, j), b in zip(itr.chain(*pairs_in_clusters), bools): if b != 0: dag.add_arc(i, j) return dag
def test_to_dag(self): dag = cd.DAG(arcs={(1, 2), (2, 3)}) cpdag = dag.cpdag() dag2 = cpdag.to_dag() true_possible_arcs = { frozenset({(1, 2), (2, 3)}), frozenset({(2, 1), (2, 3)}), frozenset({(2, 1), (3, 2)}), } self.assertIn(frozenset(dag2.arcs), true_possible_arcs)
def test_to_dag_complete3(self): dag = cd.DAG(arcs={(1, 2), (2, 3), (1, 3)}) cpdag = dag.cpdag() dag2 = cpdag.to_dag() true_possible_arcs = { frozenset({(1, 2), (1, 3), (2, 3)}), frozenset({(1, 2), (1, 3), (3, 2)}), # flip 2->3 frozenset({(1, 2), (3, 1), (3, 2)}), # flip 1->3 frozenset({(2, 1), (3, 1), (3, 2)}), # flip 1->2 frozenset({(2, 1), (3, 1), (2, 3)}), # flip 3->2 frozenset({(2, 1), (1, 3), (2, 3)}), # flip 3->1 } self.assertIn(frozenset(dag2.arcs), true_possible_arcs)
def test_pdag2alldags_3nodes_complete(self): dag = cd.DAG(arcs={(1, 2), (1, 3), (2, 3)}) cpdag = dag.cpdag() dags = cpdag.all_dags(verbose=False) self.assertEqual(len(dags), 6) for dag in dags: self.assertEqual(len(dag), 3) true_possible_arcs = { frozenset({(1, 2), (1, 3), (2, 3)}), frozenset({(1, 2), (1, 3), (3, 2)}), # flip 2->3 frozenset({(1, 2), (3, 1), (3, 2)}), # flip 1->3 frozenset({(2, 1), (3, 1), (3, 2)}), # flip 1->2 frozenset({(2, 1), (3, 1), (2, 3)}), # flip 3->2 frozenset({(2, 1), (1, 3), (2, 3)}), # flip 3->1 } self.assertEqual(true_possible_arcs, dags)
def simulate_(tup): gdag, folder, num = tup dag = cd.DAG(nodes=set(gdag.nodes), arcs=gdag.arcs) print('SIMULATING FOR DAG: %d' % num) print('Folder:', folder) mec_size = len(dag.cpdag().all_dags()) print('Size of MEC:', mec_size) SIM_CONFIG = SimulationConfig( starting_samples = starting_samples, n_samples=args.samples, n_batches=args.batches, max_interventions=args.max_interventions, strategy=args.strategy, intervention_strength=args.intervention_strength, target=targets[num], # A-ICP paper set a different target for each DAG intervention_type=args.intervention_type if args.intervention_type is not None else 'gauss', target_allowed=args.target_allowed != 0 if args.target_allowed is not None else True ) return (mec_size,) + simulate(get_strategy(args.strategy, gdag, targets[num]), SIM_CONFIG, gdag, folder, save_gies=False, dag_num = num)
def test_shd(self): d1 = cd.DAG(arcs={(0, 1), (0, 2)}) d2 = cd.DAG(arcs={(1, 0), (1, 2)}) self.assertEqual(d1.shd(d2), 3) self.assertEqual(d2.shd(d1), 3) d1 = cd.DAG() d2 = cd.DAG(arcs={(0, 1), (1, 2)}) self.assertEqual(d1.shd(d2), 2) self.assertEqual(d2.shd(d1), 2) d1 = cd.DAG(arcs={(0, 1), (1, 2)}) d2 = cd.DAG(arcs={(0, 1), (2, 1)}) self.assertEqual(d1.shd(d2), 1) self.assertEqual(d2.shd(d1), 1)
def setUp(self): self.d = cd.DAG(arcs={(1, 2), (1, 3), (3, 4), (2, 4), (3, 5)})
def get_strategy(strategy, dag, target): if strategy == 'budgeted_exp_design': base_dag = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) dag_collection = [cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in base_dag.cpdag().all_dags()] return budgeted_experiment_design.create_bed_strategy(dag_collection) if strategy == 'random': return random_nodes.random_strategy if strategy == 'random-smart': d = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) return random_nodes.create_random_smart_strategy(d.cpdag()) if strategy == 'learn-parents': return learn_target_parents.create_learn_target_parents(target, args.boot) if strategy == 'edge-prob': return edge_prob.create_edge_prob_strategy(target, args.boot) if strategy == 'var-score': node_vars = np.diag(dag.covariance) return var_score.create_variance_strategy(target, node_vars, [2*np.sqrt(node_var) for node_var in node_vars]) if strategy == 'entropy': return information_gain.create_info_gain_strategy(args.boot, parent_functionals(target, dag.nodes)) if strategy == 'entropy-enum': return information_gain.create_info_gain_strategy(args.boot, parent_functionals(target, dag.nodes), enum_combos=True) if strategy == 'entropy-dag-collection': base_dag = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) dag_collection = [cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in base_dag.cpdag().all_dags()] # mec_functionals = get_mec_functionals(dag_collection) mec_functional = get_mec_functional_k(dag_collection) functional_entropies = [get_k_entropy_fxn(len(dag_collection))] # print([m(base_dag) for m in mec_functionals]) gauss_iv = args.intervention_type == 'gauss' return information_gain.create_info_gain_strategy_dag_collection(dag_collection, [mec_functional], functional_entropies, gauss_iv, args.mbsize, verbose=args.verbose) if strategy == 'entropy-dag-collection-multiple-mec': base_dag = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) other_dags = [] non_reversible_arcs = list(base_dag.arcs - base_dag.reversible_arcs()) random.shuffle(non_reversible_arcs) while len(other_dags) < 3: if len(non_reversible_arcs) == 0: break arc = non_reversible_arcs.pop() other_dag = base_dag.copy() try: other_dag.reverse_arc(*arc) except CycleError: pass if not any(other_dag.markov_equivalent(d) for d in other_dags) and len(other_dag.cpdag().all_dags()) < 25: other_dags.append(other_dag) print(other_dags) dag_collection = [cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in base_dag.cpdag().all_dags()] for other_dag in other_dags: dag_collection.extend([cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in other_dag.cpdag().all_dags()]) print('length of dag collection:', len(dag_collection)) mec_functional = get_mec_functional_k(dag_collection) functional_entropies = [get_k_entropy_fxn(len(dag_collection))] gauss_iv = args.intervention_type == 'gauss' return information_gain.create_info_gain_strategy_dag_collection(dag_collection, [mec_functional], functional_entropies, gauss_iv, args.mbsize, verbose=args.verbose) if strategy == 'entropy-dag-collection-descendants': base_dag = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) dag_collection = [cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in base_dag.cpdag().all_dags()] binary_entropy_fxn = get_k_entropy_fxn(2) d_functionals = descendant_functionals(target, dag.nodes) d_functionals_entropies = [binary_entropy_fxn] * len(d_functionals) gauss_iv = args.intervention_type == 'gauss' return information_gain.create_info_gain_strategy_dag_collection(dag_collection, d_functionals, d_functionals_entropies, gauss_iv, args.mbsize, verbose=args.verbose) if strategy == 'entropy-dag-collection-enum': base_dag = cd.DAG(nodes=set(dag.nodes), arcs=dag.arcs) dag_collection = [cd.DAG(nodes=set(dag.nodes), arcs=arcs) for arcs in base_dag.cpdag().all_dags()] # mec_functionals = get_mec_functionals(dag_collection) mec_functional = get_mec_functional_k(dag_collection) functional_entropies = [get_k_entropy_fxn(len(dag_collection))] # print([m(base_dag) for m in mec_functionals]) return information_gain.create_info_gain_strategy_dag_collection_enum(dag_collection, [mec_functional], functional_entropies)
def test_pdag2alldags_6nodes_complete(self): dag = cd.DAG(arcs={(i, j) for i, j in itr.combinations(range(6), 2)}) cpdag = dag.cpdag() dags = cpdag.all_dags() self.assertEqual(len(dags), np.prod(range(1, 7)))
probs[fval] += w # = find entropy mask = probs != 0 plogps = np.zeros(len(probs)) plogps[mask] = np.log2(probs[mask]) * probs[mask] return -plogps.sum() return get_k_entropy np.random.seed(100) g = cd.rand.directed_erdos(10, .5) g = cd.GaussDAG(nodes=list(range(10)), arcs=g.arcs) mec = [ cd.DAG(arcs=arcs) for arcs in cd.DAG(arcs=g.arcs).cpdag().all_dags() ] strat = create_info_gain_strategy_dag_collection( mec, [get_mec_functional_k(mec)], [get_k_entropy_fxn(len(mec))], verbose=True) samples = g.sample(1000) precision_matrix = samples.T @ samples / 1000 sel_interventions = strat( IterationData(current_data={-1: g.sample(1000)}, max_interventions=1, n_samples=500, batch_num=0, n_batches=1, intervention_set=[0, 1, 2], interventions=[cd.GaussIntervention() for _ in range(3)], batch_folder='test_sanity',
def test_icpdag2alldags(self): dag = cd.DAG(arcs={(1, 2), (1, 3), (2, 3), (4, 5)}) icpdag = dag.interventional_cpdag([{2}], cpdag=dag.cpdag()) dags = icpdag.all_dags() self.assertEqual(len(dags), 2)
def test_optimal_intervention_1intervention(self): dag = cd.DAG(arcs={(1, 2), (1, 3), (2, 3)}) best_ivs, icpdags = dag.optimal_intervention_greedy() self.assertEqual(best_ivs, [2]) self.assertEqual(icpdags[0].arcs, dag.arcs)
for e in itr.combinations(self.underlying_dag.nodes, 2): u = e[0] v = e[1] if self.less_than(u, v) and not other.less_than(u, v): return False if other.less_than(u, v) and not self.less_than(u, v): return False if self.greater_than(u, v) and not other.greater_than(u, v): return False if other.greater_than(u, v) and not self.greater_than(u, v): return False return True if __name__ == '__main__': dag = cd.DAG(arcs={(0, 1), (1, 3), (3, 4), (2, 3), (0, 3), (0, 4)}) p = Poset.from_dag(dag) # VERBOSE = False # empty_poset = Poset(4) # # visited_posets = {frozenset(empty_poset.underlying_dag._arcs)} # queue = [empty_poset] # while queue: # current_poset = queue.pop(0) # covering_posets = current_poset.get_covering_posets() # for poset in covering_posets: # arcs = frozenset(poset.underlying_dag._arcs) # # if arcs == {(1, 0), (0, 2), (1, 2)}: # # print(current_poset.underlying_dag.arcs) # if arcs not in visited_posets: # queue.append(poset)
def test_optimal_intervention_2interventions2(self): dag = cd.DAG(arcs={(1, 2), (1, 3), (2, 3), (4, 5)}) best_ivs, icpdags = dag.optimal_intervention_greedy(num_interventions=2) self.assertEqual(best_ivs, [2, 4]) self.assertEqual(icpdags[0].arcs, {(1, 2), (1, 3), (2, 3)}) self.assertEqual(icpdags[1].arcs, {(1, 2), (1, 3), (2, 3), (4, 5)})
import causaldag as cd from causaldag import GaussIntervention from causaldag.inference.structural import igsp, unknown_target_igsp from causaldag.utils.ci_tests import gauss_ci_test, hsic_invariance_test import numpy as np import random import os from config import PROJECT_FOLDER from R_algs.wrappers import run_gies np.random.seed(1729) random.seed(1729) ntrials = 10 nnodes = 5 d = cd.DAG(arcs={(i, i + 1) for i in range(nnodes - 1)}) g = cd.GaussDAG(nodes=list(range(nnodes)), arcs=d.arcs) cpdag = d.cpdag() print(d.interventional_cpdag({nnodes - 1}, cpdag=cpdag).arcs) print(d.interventional_cpdag({0, nnodes - 1}, cpdag=cpdag).arcs) shds_igsp = [] shds_utigsp = [] shds_gies = [] dags_igsp = [] dags_utigsp = [] dags_gies = [] for i in range(ntrials): nsamples = 500 intervention = GaussIntervention(1, .01) samples = g.sample(nsamples) iv_samples = g.sample_interventional_perfect(
a = stats.multivariate_normal(mean=mu, cov=sigma).logpdf(samples) ll = np.sum(a) lls[j] = ll return logsumexp(lls) - np.log(num_iterations) if __name__ == '__main__': import causaldag from causaldag.rand import rand_weights, directed_erdos from causaldag.utils.ci_tests import partial_monte_carlo_correlation_suffstat, partial_correlation_suffstat from causaldag.utils.scores.gaussian_bge_score import local_gaussian_bge_score import time # d = causaldag.DAG(arcs={(0, 1)}) # # d = causaldag.DAG(arcs={(0, 1), (1, 2), (0, 2)}) # d = causaldag.DAG(arcs={(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)}) d = causaldag.DAG(arcs={(0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)}) g = rand_weights(d) samples = g.sample(100) # with open("tests/data/bge_data/samples.npy", 'wb') as f: # np.save(f, samples) # samples = np.load("tests/data/bge_data/samples.npy") # print(np.shape(samples)) # Topologically sort data print(d.to_amat()[0]) suffstat = partial_correlation_suffstat(samples) suffstat["samples"] = samples p = np.shape(samples)[1] alpha_mu = p alpha_w = p + alpha_mu + 1 inverse_scale_matrix = np.eye(p) * alpha_mu * (alpha_w - p - 1) / (alpha_mu + 1) parameter_mean = np.zeros(p)
def test_fully_orienting_interventions_6nodes_complete(self): dag = cd.DAG(arcs={(i, j) for i, j in itr.combinations(range(6), 2)}) ivs, icpdags = dag.fully_orienting_interventions_greedy()