def test_means(self): # Test that means are set correctly p = 10 W = sempler.generators.dag_avg_deg(p, p / 4, 1, 1) means = np.arange(p) sem = sempler.LGANM(W, means, (0, 1)) self.assertTrue((sem.means == means).all())
def test_vs_cdt_1(self): # Test that behaviour matches that of the implementation in # the R package pcalg, using NUM_GRAPHS randomly generated # Erdos-Renyi graphs. The call is made through the ges.fit_bic # function np.random.seed(15) G = NUM_GRAPHS # number of graphs p = 15 # number of variables n = 1500 # size of the observational sample for i in range(G): print(" Checking SCM %d" % (i)) start = time.time() A = sempler.generators.dag_avg_deg(p, 3, 1, 1) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n) # Estimate the equivalence class using the pcalg # implementation of GES (package cdt) data = pd.DataFrame(obs_sample) output = GES(verbose=True).predict(data) estimate_cdt = nx.to_numpy_array(output) end = time.time() print(" GES-CDT done (%0.2f seconds)" % (end - start)) start = time.time() # Estimate using this implementation # Test debugging output for the first 2 SCMs estimate, _ = ges.fit_bic(obs_sample, iterate=True, debug=4 if i < 2 else 2) end = time.time() print(" GES-own done (%0.2f seconds)" % (end - start)) self.assertTrue((estimate == estimate_cdt).all()) print("\nCompared with PCALG implementation on %d DAGs" % (i + 1))
def gen_cases(n, P, k, w_min=1, w_max=1, var_min=1, var_max=1, int_min=0, int_max=0, random_state=None): """ Generate random experimental cases (ie. linear SEMs). Parameters: - n: total number of cases - P: number of variables in the SEMs (either an integer or a tuple to indicate a range) - w_min, w_max: Weights of the SEMs are sampled at uniform between w_min and w_max - var_min, var_max: Weights of the SEMs are sampled at uniform between var_min and var_max - int_min, int_max: Weights of the SEMs are sampled at uniform between int_min and int_max - random_state: to fix the random seed for reproducibility """ if random_state is not None: np.random.seed(random_state) cases = [] i = 0 while i < n: if isinstance(P, tuple): p = np.random.randint(P[0], P[1]+1) else: p = P W = sempler.dag_avg_deg(p, k, w_min, w_max) target = np.random.choice(range(p)) parents,_,_,mb = utils.graph_info(target, W) if len(parents) > 0:# and len(parents) != len(mb): sem = sempler.LGANM(W, (var_min, var_max), (int_min, int_max)) (truth, _, _, _) = utils.graph_info(target, W) cases.append(TestCase(i, sem, target, truth)) i += 1 return cases
def test_distribution(self): # Test "population" sampling W = np.array([[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 0]]) # Build SEM with unit weights and standard normal noise # variables sem = sempler.LGANM(W, (0, 0), (1, 1)) # Observational Distribution distribution = sem.sample(population=True) true_cov = np.array([[1, 0, 1, 1], [0, 1, 1, 1], [1, 1, 3, 3], [1, 1, 3, 4]]) self.assertTrue((distribution.mean == np.zeros(4)).all()) self.assertTrue((distribution.covariance == true_cov).all()) # Do intervention on X1 <- 0 distribution = sem.sample(population=True, do_interventions={0: 1}) true_cov = np.array([[0, 0, 0, 0], [0, 1, 1, 1], [0, 1, 2, 2], [0, 1, 2, 3]]) self.assertTrue((distribution.mean == np.array([1, 0, 1, 1])).all()) self.assertTrue((distribution.covariance == true_cov).all()) # Noise interventions on X1 <- N(0,2), X2 <- N(1,2) interventions = {0: (0, 2), 1: (1, 2)} distribution = sem.sample(population=True, do_interventions=interventions) true_cov = np.array([[2, 0, 2, 2], [0, 2, 2, 2], [2, 2, 5, 5], [2, 2, 5, 6]]) self.assertTrue((distribution.mean == np.array([0, 1, 1, 1])).all()) self.assertTrue((distribution.covariance == true_cov).all())
def test_basic(self): # Test the initialization of an LGANM object p = 10 W = sempler.generators.dag_avg_deg(p, p / 4, 1, 1) sem = sempler.LGANM(W, (0, 0), (1, 1)) self.assertTrue((sem.variances == np.ones(p)).all()) self.assertTrue((sem.means == np.zeros(p)).all()) self.assertTrue( np.sum((sem.W == 0).astype(float) + (sem.W == 1).astype(float)), p * p)
def test_blanket_behaviour(self): np.random.seed(7) for p in range(2, 8): #print("Testing random graph of size %d" %p) W = sempler.dag_avg_deg(p, 2.5, -1, 1) sem = sempler.LGANM(W, (0.1, 2)) dist = sem.sample(population=True) for i in range(p): #print("Testing markov and stable blankets of X_%d" %i) (parents, children, poc, mb) = utils.graph_info(i, W) result = population_icp([dist], i, debug=False, selection='all') sb_0 = stable_blanket(result.accepted, result.mses) # Intervening on a parent should leave the stable # blanket the same if len(parents) > 0: pa = np.random.choice(list(parents)) dist_pa = sem.sample(population=True, do_interventions={pa: (1, 5)}) result = population_icp([dist, dist_pa], i, debug=False, selection='all') sb_pa = stable_blanket(result.accepted, result.mses) self.assertEqual(sb_0, sb_pa) # Intervening on a parent of a child (that is not a child) should leave the stable # blanket the same only_poc = poc.difference(children) if len(only_poc) > 0: pc = np.random.choice(list(only_poc)) dist_pc = sem.sample(population=True, do_interventions={pc: (1, 5)}) result = population_icp([dist, dist_pc], i, debug=False, selection='all') sb_pc = stable_blanket(result.accepted, result.mses) self.assertEqual(sb_0, sb_pc) # Intervening on a child should affect the stable blanket if len(children) > 0: ch = np.random.choice(list(children)) dist_ch = sem.sample(population=True, do_interventions={ch: (1, 5)}) result = population_icp([dist, dist_ch], i, debug=False, selection='all') sb_ch = stable_blanket(result.accepted, result.mses) _, descendants, _, _ = utils.graph_info(ch, W) for d in descendants.union({ch}): self.assertTrue(d not in sb_ch)
def test_memory(self): # Test that all arguments are copied and not simply stored by # reference variances = np.array([1, 2, 3]) means = np.array([3, 4, 5]) W = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]]) sem = sempler.LGANM(W, means, variances) # Modify and compare variances[0] = 0 means[2] = 1 W[0, 0] = 2 self.assertFalse((W == sem.W).all()) self.assertFalse((variances == sem.variances).all()) self.assertFalse((means == sem.means).all())
def test_gaussian_sampling(self): # Test 100 interventions K = 50 W = np.array([[0, 0, 0, 0.2, 0], [0, 0, 0.4, 0, 0], [0, 0, 0, 0.3, 0], [0, 0, 0, 0, 0.5], [0, 0, 0, 0, 0]]) lganm = sempler.LGANM(W, (1, 2), (1, 2)) noise_distributions = [ sempler.noise.normal(m, v) for (m, v) in zip(lganm.means, lganm.variances) ] assignments = [ None, None, lambda x: .4 * x, lambda x: .2 * x[:, 0] + .3 * x[:, 1], lambda x: .5 * x ] anm = sempler.ANM(W, assignments, noise_distributions) interventions = sempler.generators.intervention_targets( lganm.p, K, (0, 3)) for targets in interventions: print(targets) means, variances = np.random.uniform( 0, 5, len(targets)), np.random.uniform(2, 3, len(targets)) interventions_lganm = dict( (t, (m, v)) for (t, m, v) in zip(targets, means, variances)) interventions_anm = dict( (t, sempler.noise.normal(m, v)) for (t, m, v) in zip(targets, means, variances)) # Sample each SCMs # TODO: Combine different interventions in one n = round(1e6) if len(targets) <= 1: samples_anm = anm.sample(n, do_interventions=interventions_anm) samples_lganm = lganm.sample( n, do_interventions=interventions_lganm) elif len(targets) == 2: samples_anm = anm.sample(n, shift_interventions=interventions_anm) samples_lganm = lganm.sample( n, shift_interventions=interventions_lganm) elif len(targets) == 3: samples_anm = anm.sample(n, noise_interventions=interventions_anm) samples_lganm = lganm.sample( n, noise_interventions=interventions_lganm) # Check that the distribution is the same self.assertTrue( sempler.utils.same_normal(samples_anm, samples_lganm, debug=False))
def test_sampling_args(self): variances = np.array([1, 2, 3]) means = np.array([3, 4, 5]) W = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]]) sem = sempler.LGANM(W, means, variances) self.assertEqual(np.ndarray, type(sem.sample(n=1))) self.assertEqual(np.ndarray, type(sem.sample(n=1, shift_interventions={}))) self.assertEqual(np.ndarray, type(sem.sample(n=1, do_interventions={}))) self.assertEqual(np.ndarray, type(sem.sample(n=1, shift_interventions=None))) self.assertEqual(np.ndarray, type(sem.sample(n=1, do_interventions=None))) self.assertEqual(sempler.NormalDistribution, type(sem.sample(n=1, population=True))) self.assertEqual(sempler.NormalDistribution, type(sem.sample(population=True)))
def gen_scms(G, p, k, w_min, w_max, m_min, m_max, v_min, v_max): """ Generate random experimental cases (ie. linear SEMs). Parameters: - n: total number of cases - p: number of variables in the SCMs - k: average node degree - w_min, w_max: Weights of the SCMs are sampled at uniform between w_min and w_max - v_min, v_max: Variances of the variables are sampled at uniform between v_min and v_max - m_min, m_max: Intercepts of the variables of the SCMs are sampled at uniform between m_min and m_max - random_state: to fix the random seed for reproducibility """ cases = [] while len(cases) < G: W = sempler.generators.dag_avg_deg(p, k, w_min, w_max) W *= np.random.choice([-1, 1], size=W.shape) scm = sempler.LGANM(W, (m_min, m_max), (v_min, v_max)) cases.append(scm) return cases
def test_sampling_2(self): # Test that the distribution of a 4 variable DAG with upper # triangular, all ones adj. matrix matches what we expect # using the path method p = 4 n = round(1e6) W = np.triu(np.ones((p, p)), k=1) sem = sempler.LGANM(W, (0, 0), (0.16, 0.16)) np.random.seed(42) noise = np.random.normal([0, 0, 0, 0], [.4, .4, .4, .4], size=(n, 4)) truth = np.zeros((n, p)) truth[:, 0] = noise[:, 0] truth[:, 1] = noise[:, 0] + noise[:, 1] truth[:, 2] = 2 * noise[:, 0] + noise[:, 1] + noise[:, 2] truth[:, 3] = 4 * noise[:, 0] + 2 * noise[:, 1] + noise[:, 2] + noise[:, 3] samples = sem.sample(n) self.assertTrue(utils.same_normal(truth, samples))
def test_valid_turn_operators_10(self): # Check that all valid turn operators result in a different # essential graph G = 10 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000) cache = GaussObsL0Pen(obs_sample) fro, to = np.where(cpdag != 0) for (x, y) in zip(to, fro): valid_operators = ges.score_valid_turn_operators(x, y, cpdag, cache) # print(i,len(valid_operators)) for (_, new_A, _, _, _) in valid_operators: new_cpdag = ges.utils.pdag_to_cpdag(new_A) self.assertFalse((cpdag == new_cpdag).all()) print("\nChecked that valid turn operators result in different MEC for %i CPDAGs" % (i + 1))
def test_sampling_1(self): # Test sampling of a DAG with one variable np.random.seed(42) p = 1 n = round(1e6) W = sempler.generators.dag_full(p) sem = sempler.LGANM(W, (0, 0), (1, 1)) # Observational data truth = np.random.normal(0, 1, size=(n, 1)) samples = sem.sample(n, shift_interventions={}) self.assertTrue(utils.same_normal(truth, samples, atol=1e-1)) # Under do intervention truth = np.ones((n, 1)) samples = sem.sample(n, do_interventions={0: 1}) self.assertTrue((truth == samples).all()) # Under noise intervention truth = np.random.normal(1, 2, size=(n, 1)) samples = sem.sample(n, do_interventions={0: (1, 4)}) self.assertTrue(utils.same_normal(truth, samples, atol=1e-1))
def test_valid_delete_operators_3(self): # Check symmetry of the delete operator when X - Y G = 100 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000) cache = GaussObsL0Pen(obs_sample) fro, to = np.where(utils.only_undirected(cpdag)) # Test the operator to all undirected edges for (x, y) in zip(fro, to): output_a = ges.score_valid_delete_operators(x, y, cpdag, cache) output_b = ges.score_valid_delete_operators(y, x, cpdag, cache) for (op_a, op_b) in zip(output_a, output_b): # Check resulting state is the same self.assertTrue((op_a[1] == op_b[1]).all()) self.assertAlmostEqual(op_a[0], op_b[0]) print("\nChecked equality of delete operator on undirected edges in %i CPDAGS" % (i + 1))
def test_blankets(self): np.random.seed(42) for p in range(2, 8): #print("Testing random graph of size %d" %p) W = sempler.dag_avg_deg(p, 2.5, -1, 1) sem = sempler.LGANM(W, (0.1, 2)) dist = sem.sample(population=True) for i in range(p): #print("Testing markov and stable blankets of X_%d" %i) (_, _, _, true_mb) = utils.graph_info(i, W) # Test markov blanket estimated_mb = set(markov_blanket(i, dist, tol=1e-10)) self.assertEqual(true_mb, estimated_mb) # Stable blanket for one env. should be markov blanket result = population_icp([dist], i, debug=False, selection='all') estimated_sb = stable_blanket(result.accepted, result.mses) self.assertEqual(true_mb, estimated_sb)
def test_interventions_1(self): # Test sampling and interventions on a custom DAG, comparing # with results obtained via the path method np.random.seed(42) p = 6 n = round(1e6) W = np.array([[0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1], [0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0]]) sem = sempler.LGANM(W, (0, 0), (0.16, 0.16)) # Test observational data M = np.array([[1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 0, 1, 0, 0, 0], [1, 0, 1, 1, 0, 0], [2, 1, 1, 1, 1, 0], [4, 2, 2, 2, 1, 1]]) noise = np.random.normal(np.zeros(p), np.ones(p) * 0.4, size=(n, p)) truth = noise @ M.T samples = sem.sample(n) self.assertTrue(utils.same_normal(truth, samples)) # Test under do-interventions on X1 noise = np.random.normal([2.1, 0, 0, 0, 0, 0], [0, .4, .4, .4, .4, .4], size=(n, p)) truth = noise @ M.T samples = sem.sample(n, do_interventions={0: 2.1}) self.assertTrue(utils.same_normal(truth, samples)) # Test under do-intervention on X1 and noise interventions X2 and X5 do_int = {0: 2, 1: (2, 0.25), 4: (1, 0.25)} noise = np.random.normal([2, 2, 0, 0, 1, 0], [0, .5, .4, .4, .5, .4], size=(n, p)) M = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 0, 1, 0, 0, 0], [1, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 0], [1, 1, 1, 1, 1, 1]]) truth = noise @ M.T samples = sem.sample(n, do_interventions=do_int) self.assertTrue(utils.same_normal(truth, samples))
def test_basic_1(self): # Test the initialization of an LGANM object p = 5 W = sempler.generators.dag_avg_deg(p, p / 4, 1, 1) sem = sempler.LGANM(W, (0, 0), (1, 1)) self.assertTrue((sem.variances == np.ones(p)).all()) self.assertTrue((sem.means == np.zeros(p)).all()) sem = sempler.LGANM(W, np.zeros(p), np.ones(p)) self.assertTrue((sem.variances == np.ones(p)).all()) self.assertTrue((sem.means == np.zeros(p)).all()) with self.assertRaises(Exception): sempler.LGANM(W, (0, 0), (0, 1, 2, 3, 4)) with self.assertRaises(Exception): sempler.LGANM(W, (0, 0, 0, 0, 0), (0, 1)) with self.assertRaises(Exception): sempler.LGANM(W, (0, 1, 2, 3), (0, 0, 0)) with self.assertRaises(Exception): sempler.LGANM(W, (0, 1, 2, 3))
class OverallGESTests(unittest.TestCase): true_A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) factorization = [(4, (2, 3)), (3, (2, )), (2, (0, 1)), (0, ()), (1, ())] true_B = true_A * np.random.uniform(1, 2, size=true_A.shape) scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4)) p = len(true_A) n = 100000 interventions = [{ 0: (0, 1.0) }, { 1: (0, 1.1) }, { 2: (0, 1.2) }, { 3: (0, 1.3) }, { 4: (0, 1.4) }] obs_data = scm.sample(n=n) int_data = [obs_data] # Sample interventional distributions and construct true interventional # variances for later reference in tests interventional_variances = np.tile(scm.variances, (len(interventions) + 1, 1)) for i, intervention in enumerate(interventions): int_data.append(scm.sample(n=n, shift_interventions=intervention)) for (target, params) in intervention.items(): interventional_variances[i + 1, target] += params[1] # ------------------------------------------------------ # Tests def test_vs_cdt_1(self): # Test that behaviour matches that of the implementation in # the R package pcalg, using NUM_GRAPHS randomly generated # Erdos-Renyi graphs. The call is made through the ges.fit_bic # function np.random.seed(15) G = NUM_GRAPHS # number of graphs p = 15 # number of variables n = 1500 # size of the observational sample for i in range(G): print(" Checking SCM %d" % (i)) start = time.time() A = sempler.generators.dag_avg_deg(p, 3, 1, 1) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n) # Estimate the equivalence class using the pcalg # implementation of GES (package cdt) data = pd.DataFrame(obs_sample) output = GES(verbose=True).predict(data) estimate_cdt = nx.to_numpy_array(output) end = time.time() print(" GES-CDT done (%0.2f seconds)" % (end - start)) start = time.time() # Estimate using this implementation # Test debugging output for the first 2 SCMs estimate, _ = ges.fit_bic(obs_sample, iterate=True, debug=4 if i < 2 else 2) end = time.time() print(" GES-own done (%0.2f seconds)" % (end - start)) self.assertTrue((estimate == estimate_cdt).all()) print("\nCompared with PCALG implementation on %d DAGs" % (i + 1)) def test_vs_cdt_2(self): # Test that behaviour matches that of the implementation in # the R package pcalg, using NUM_GRAPHS randomly generated # Erdos-Renyi graphs. The call is made through the ges.fit # function; for half of the cases, manually specify the # completion algorithm. np.random.seed(16) G = NUM_GRAPHS # number of graphs p = 15 # number of variables n = 1500 # size of the observational sample for i in range(G): print(" Checking SCM %d" % (i)) start = time.time() A = sempler.generators.dag_avg_deg(p, 3, 1, 1) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n) # Estimate the equivalence class using the pcalg # implementation of GES (package cdt) data = pd.DataFrame(obs_sample) score_class = ges.scores.gauss_obs_l0_pen.GaussObsL0Pen(obs_sample) completion_algorithm = None if i % 2 == 0 else ges.utils.pdag_to_cpdag output = GES(verbose=True).predict(data) estimate_cdt = nx.to_numpy_array(output) end = time.time() print(" GES-CDT done (%0.2f seconds)" % (end - start)) start = time.time() # Estimate using this implementation # Test debugging output for the first 2 SCMs estimate, _ = ges.fit(score_class, completion_algorithm=completion_algorithm, iterate=True, debug=4 if i < 2 else 2) end = time.time() print(" GES-own done (%0.2f seconds)" % (end - start)) self.assertTrue((estimate == estimate_cdt).all()) print("\nCompared with PCALG implementation on %d DAGs" % (i + 1)) def test_vs_cdt_2_raw(self): # Test that behaviour matches that of the implementation in # the R package pcalg, using NUM_GRAPHS randomly generated # Erdos-Renyi graphs. The call is made through the ges.fit # function np.random.seed(17) G = NUM_GRAPHS # number of graphs p = 15 # number of variables n = 1500 # size of the observational sample for i in range(G): print(" Checking SCM %d" % (i)) start = time.time() A = sempler.generators.dag_avg_deg(p, 3, 1, 1) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n) # Estimate the equivalence class using the pcalg # implementation of GES (package cdt) data = pd.DataFrame(obs_sample) score_class = ges.scores.gauss_obs_l0_pen.GaussObsL0Pen( obs_sample, method='raw') output = GES(verbose=True).predict(data) estimate_cdt = nx.to_numpy_array(output) end = time.time() print(" GES-CDT done (%0.2f seconds)" % (end - start)) start = time.time() # Estimate using this implementation # Test debugging output for the first 2 SCMs estimate, _ = ges.fit(score_class, iterate=True, debug=4 if i < 2 else 2) end = time.time() print(" GES-own done (%0.2f seconds)" % (end - start)) self.assertTrue((estimate == estimate_cdt).all()) print("\nCompared with PCALG implementation on %d DAGs" % (i + 1))
class ScoreTests(unittest.TestCase): np.random.seed(12) true_A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) factorization = [(4, (2, 3)), (3, (2, )), (2, (0, 1)), (0, ()), (1, ())] true_B = true_A * np.random.uniform(1, 2, size=true_A.shape) scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4)) p = len(true_A) n = 10000 obs_data = scm.sample(n=n) obs_score = GaussObsL0Pen(obs_data) obs_score_raw = GaussObsL0Pen(obs_data, method='raw') # ------------------------------------------------------ # White-box tests: # testing the inner workings of the ges.scores module, e.g. the # intermediate functions used to compute the likelihoods def test_mle_obs(self): # Check that the parameters are correctly estimated when # passing a subgraph to GaussObsL0Pen._mle_full for score in [self.obs_score, self.obs_score_raw]: print("Testing %s" % score) local_B = np.zeros_like(self.true_B) local_omegas = np.zeros(self.p) for (x, pa) in self.factorization: local_B[:, x], local_omegas[x] = score._mle_local(x, pa) full_B, full_omegas = score._mle_full(self.true_A) print("Locally estimated", local_B, local_omegas) print("Fully estimated", full_B, full_omegas) print("Truth", self.true_B, self.scm.variances) # Make sure zeros are respected self.assertTrue((local_B[self.true_A == 0] == 0).all()) self.assertTrue((full_B[self.true_A == 0] == 0).all()) # Make sure estimation of weights is similar self.assertTrue((local_B == full_B).all()) # Make sure estimation of noise variances is similar self.assertTrue((local_omegas == full_omegas).all()) # Compare with true model self.assertTrue(np.allclose(self.true_B, local_B, atol=5e-2)) self.assertTrue(np.allclose(self.true_B, full_B, atol=5e-2)) self.assertTrue( np.allclose(self.scm.variances, local_omegas, atol=1e-1)) self.assertTrue( np.allclose(self.scm.variances, full_omegas, atol=1e-1)) # ------------------------------------------------------ # Black-box tests: # Testing the behaviour of the "API" functions, i.e. the # functions to compute the full/local # observational/interventional BIC scores from a given DAG # structure and the data def test_parameters_obs(self): # Fails if data is not ndarray try: GaussObsL0Pen([self.obs_data]) self.fail() except TypeError: pass except Exception: self.fail() def test_full_score_obs(self): # Verify that the true adjacency yields a higher score than # the empty graph Compute score of true adjacency for score_fun in [self.obs_score, self.obs_score_raw]: print("Testing %s" % score_fun) true_score = score_fun.full_score(self.true_A) self.assertIsInstance(true_score, float) # Compute score of unconnected graph score = score_fun.full_score(np.zeros((self.p, self.p))) self.assertIsInstance(score, float) self.assertGreater(true_score, score) def test_score_decomposability_obs(self): # As a black-box test, make sure the score functions # preserve decomposability for score_fun in [self.obs_score, self.obs_score_raw]: print("Decomposability of observational score") print("Testing %s" % score_fun) full_score = score_fun.full_score(self.true_A) acc = 0 for (j, pa) in self.factorization: local_score = score_fun.local_score(j, pa) print(" ", j, pa, local_score) acc += local_score print("Full vs. acc:", full_score, acc) self.assertAlmostEqual(full_score, acc, places=2)
# -------------------------------------------------------------------- # Generate (or load) test cases # Load dataset if args.load_dataset is not None: print("\nLoading test cases from %s" % args.load_dataset) # Load a dataset stored in the format used by ABCD G = len(os.listdir(os.path.join(args.load_dataset, 'dags'))) Ws = [np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'adjacency.txt')) for i in range(G)] means = [np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'means.txt')) for i in range(G)] variances = [np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'variances.txt')) for i in range(G)] targets = [int(np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'target.txt'))) for i in range(G)] cases = [] for i, W in enumerate(Ws): sem = sempler.LGANM(W, variances[i], means[i]) truth = utils.graph_info(targets[i], W)[0] cases.append(evaluation.TestCase(i, sem, targets[i], truth)) excluded_keys += ['k', 'w_min', 'w_max', 'var_min', 'var_max', 'int_min', 'int_max', 'random_state', 'p_min', 'p_max'] # Or generate dataset else: P = args.p_min if args.p_min == args.p_max else (args.p_min, args.p_max) cases = evaluation.gen_cases(args.G, P, args.k, args.w_min, args.w_max, args.var_min, args.var_max, args.int_min, args.int_max)
class TurnOperatorTests(unittest.TestCase): true_A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) factorization = [(4, (2, 3)), (3, (2,)), (2, (0, 1)), (0, ()), (1, ())] true_B = true_A * np.random.uniform(1, 2, size=true_A.shape) scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4)) p = len(true_A) n = 10000 obs_data = scm.sample(n=n) cache = GaussObsL0Pen(obs_data) # ------------------------------------------------------ # Tests def test_turn_operator_1(self): A = np.array([[0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) output = ges.turn(1, 2, {3}, A) # Orient X1 -> X2 and X3 -> X2 A[2, 1], A[1, 2] = 0, 1 A[2, 3] = 0 self.assertTrue((A == output).all()) def test_turn_operator_2(self): A = np.array([[0, 1, 1, 0, 0], [1, 0, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0]]) # Turn edge X3 - X1 to X3 -> X1 with C = {X4, X0} output = ges.turn(3, 1, {0, 4}, A) truth = A.copy() truth[1, 3] = 0 truth[1, 0], truth[1, 4] = 0, 0 self.assertTrue((truth == output).all()) # Turn edge X1 - X3 to X1 -> X3 with C = Ø output = ges.turn(1, 3, set(), A) truth = A.copy() truth[3, 1] = 0 self.assertTrue((truth == output).all()) # Turn edge X4 -> X1 with C = {X3} output = ges.turn(4, 1, {3}, A) truth = A.copy() truth[1, 4] = 0 truth[1, 3] = 0 self.assertTrue((truth == output).all()) # Turn edge X2 -> X0 with C = {X1} output = ges.turn(2, 0, {1}, A) truth = A.copy() truth[0, 2], truth[2, 0] = 0, 1 truth[0, 1] = 0 self.assertTrue((truth == output).all()) def test_turn_operator_preconditions(self): A = np.array([[0, 1, 1, 0, 0], [1, 0, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0]]) # Trying to turn X1 -> X2 fails as edge already exists try: ges.turn(1, 2, set(), A) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) # Trying to turn X3 -> X4 fails as they are not adjacent try: ges.turn(3, 4, set(), A) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) # Trying to turn X3 <- X4 fails as they are not adjacent try: ges.turn(4, 3, set(), A) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) # Turning X0 -> X1 with C = {X3,X2} fails as X2 is not a neighbor of X1 try: ges.turn(0, 1, {3, 2}, A) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) # Turning X3 -> X1 with C = {X4,X0,X3} should fail as X3 is contained in C try: ges.turn(3, 1, {0, 3, 4}, A) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) def test_valid_turn_operators_preconditions(self): # Test preconditions A = np.array([[0, 1, 1, 0, 0], [1, 0, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0, 0]]) # Trying to turn X1 -> X2 fails as edge already exists try: ges.score_valid_turn_operators(1, 2, A, self.cache) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) # Trying to turn X3 -> X4 fails as they are not adjacent try: ges.score_valid_turn_operators(3, 4, A, self.cache) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) # Trying to turn X3 <- X4 fails as they are not adjacent try: ges.score_valid_turn_operators(4, 3, A, self.cache) self.fail("Exception should have been thrown") except ValueError as e: print("OK:", e) def test_valid_turn_operators_1(self): A = np.array([[0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) # Turning the edge X1 <- X2 should yield one valid # operator, for: # 1. T = Ø, as NA_yx U Ø = {X3} is a clique output = ges.score_valid_turn_operators(1, 2, A, self.cache) self.assertEqual(1, len(output)) true_A = A.copy() # Turn X1 <- X2 (and orient X3 -> X2) true_A[2, 1] = 0 true_A[1, 2] = 1 true_A[2, 3] = 0 self.assertTrue((true_A == output[0][1]).all()) def test_valid_turn_operators_2(self): # NOTE: Same graph as previous test A = np.array([[0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) # Turning the edge X1 <- X3 should yield one valid # operator, for: # 1. T = Ø, as NA_yx U Ø = {X2} is a clique output = ges.score_valid_turn_operators(1, 3, A, self.cache) self.assertEqual(1, len(output)) true_A = A.copy() # Turn X1 <- X3 (and orient X2 -> X3) true_A[3, 1] = 0 true_A[1, 3] = 1 true_A[3, 2] = 0 self.assertTrue((true_A == output[0][1]).all()) def test_valid_turn_operators_3(self): # NOTE: Same graph as two previous tests (i.e. _3 and _2) A = np.array([[0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) # Turning the edge X0 -> X1 should yield one valid # operator, for: # 1. T = Ø, as NA_yx U Ø = Ø is a clique output = ges.score_valid_turn_operators(1, 0, A, self.cache) self.assertEqual(1, len(output)) true_A = A.copy() # Turn X1 <- X0 true_A[0, 1] = 0 true_A[1, 0] = 1 self.assertTrue((true_A == output[0][1]).all()) def test_valid_turn_operators_4(self): A = np.array([[0, 1, 1, 1, 1], [0, 0, 0, 0, 0], [1, 1, 0, 0, 0], [1, 1, 0, 0, 0], [1, 0, 0, 0, 0]]) # Turning the edge X0 -> X1 should yield no valid # operators, for (note T0 = {X4}) # 1. T = Ø, as C = NA_yx U Ø = {X2,X3} is not a clique # 2. T = {X4}, as C = NA_yx U {X4} = {X2,X3,X4} is not a clique output = ges.score_valid_turn_operators(1, 0, A, self.cache) self.assertEqual(0, len(output)) def test_valid_turn_operators_5(self): A = np.array([[0, 1, 1, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) # Turning the edge X0 -> X1 should yield one valid # operator, for (note T0 = {X2}) # 1. T = Ø, as C = NA_yx U Ø = Ø is a clique, but the path # X0 - X2 - X3 -> X1 does not contain a node in C # 2. T = {X2}, as C = NA_yx U {X2} = {X2} is a clique and # satisfies the path condition output = ges.score_valid_turn_operators(1, 0, A, self.cache) self.assertEqual(1, len(output)) # Orient X1 -> X0 and X2 -> X0 truth = A.copy() truth[0, 1], truth[1, 0] = 0, 1 truth[0, 2] = 0 self.assertTrue((truth == output[0][1]).all()) def test_valid_turn_operators_6(self): A = np.array([[0, 1, 0, 0, 0], [1, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) # Orienting the edge X1 -> X3 yields not valid operators, as # all neighbors of X1 are adjacent to X3 output = ges.score_valid_turn_operators(1, 3, A, self.cache) self.assertEqual(0, len(output)) def test_valid_turn_operators_7(self): A = np.array([[0, 1, 0, 0, 0], [1, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) output = ges.score_valid_turn_operators(3, 1, A, self.cache) # Orienting the edge X3 -> X1 yields only one valid operator, # as for (note ne(X1) = {X2, X0} # C = Ø and C = {X2} condition (i) is not satisfied # C = {X0, X2} is not a clique # C = {X0} satisfies all three conditions self.assertEqual(1, len(output)) truth = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) self.assertTrue((truth == output[0][1]).all()) def test_valid_turn_operators_8(self): A = np.array([[0, 1, 0, 0, 0], [1, 0, 1, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) # For the edge X0 -> X1 three operators are valid # C = Ø : does not satisfy condition 1 # C = {X2}, {X3}, {X2,X3} are valid output = ges.score_valid_turn_operators(0, 1, A, self.cache) self.assertEqual(3, len(output)) truth_2 = np.array([[0, 1, 0, 0, 0], [0, 0, 0, 1, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) truth_3 = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) truth_23 = np.array([[0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]) for (_, new_A, _, _, C) in output: if C == {2}: self.assertTrue((new_A == truth_2).all()) if C == {3}: self.assertTrue((new_A == truth_3).all()) if C == {2, 3}: self.assertTrue((new_A == truth_23).all()) def test_valid_turn_operators_9(self): A = np.array([[0, 1, 1, 0, 0, 1], [1, 0, 1, 1, 0, 1], [0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]]) # Orienting the edge X0 -> X1 there should be only one valid # operator. NA_yx = {X5} and ne(y) / {x} = {X3, X5}: # C = Ø does not satisfy condition i # C = {X3} is valid # C = {X5} does not satisfy condition i # C = {X3,X5} do not form a clique output = ges.score_valid_turn_operators(0, 1, A, self.cache) self.assertEqual(1, len(output)) truth = np.array([[0, 1, 1, 0, 0, 1], [0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0]]) self.assertTrue((truth == output[0][1]).all()) def test_valid_turn_operators_10(self): # Check that all valid turn operators result in a different # essential graph G = 10 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000) cache = GaussObsL0Pen(obs_sample) fro, to = np.where(cpdag != 0) for (x, y) in zip(to, fro): valid_operators = ges.score_valid_turn_operators(x, y, cpdag, cache) # print(i,len(valid_operators)) for (_, new_A, _, _, _) in valid_operators: new_cpdag = ges.utils.pdag_to_cpdag(new_A) self.assertFalse((cpdag == new_cpdag).all()) print("\nChecked that valid turn operators result in different MEC for %i CPDAGs" % (i + 1))
import sempler import numpy as np # Connectivity matrix W = np.array([[0, 0, 0, 0.1, 0], [0, 0, 2.1, 0, 0], [0, 0, 0, 3.2, 0], [0, 0, 0, 0, 5.0], [0, 0, 0, 0, 0]]) # All together lganm = sempler.LGANM(W, (0, 1), (0, 1)) # Sampling from the observational setting samples = lganm.sample(100) # Sampling under a shift intervention on variable 1 with standard gaussian noise samples = lganm.sample(100, shift_interventions={1: (0, 1)}) # Sampling the observational environment in the "population setting" distribution = lganm.sample(population=True)
import ges import ges.scores import sempler import numpy as np # Generate observational data from a Gaussian SCM using sempler A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) W = A * np.random.uniform(1, 2, A.shape) # sample weights data = sempler.LGANM(W, (1, 2), (1, 2)).sample(n=5000) # Define the score class score_class = ges.scores.GaussObsL0Pen(data) # Run GES with the gaussian BIC score estimate, score = ges.fit(score_class) print(estimate, score) # Output # [[0 0 1 0 0] # [0 0 1 0 0] # [0 0 0 1 1] # [0 0 0 0 1] # [0 0 0 1 0]] 24002.112921580803
def test_shift_noise_interventions_1(self): # Test sampling and interventions on a custom DAG, comparing # with results obtained via the path method np.random.seed(42) p = 6 n = round(1e6) W = np.array([[0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1], [0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0]]) sem = sempler.LGANM(W, (0, 0), (0.16, 0.16)) # Test observational data M = np.array([[1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 0, 1, 0, 0, 0], [1, 0, 1, 1, 0, 0], [2, 1, 1, 1, 1, 0], [4, 2, 2, 2, 1, 1]]) noise = np.random.normal(np.zeros(p), np.ones(p) * 0.4, size=(n, p)) truth = noise @ M.T samples = sem.sample(n) self.assertTrue(utils.same_normal(truth, samples)) # Test shift intervention on X4 noise = np.random.normal([0, 0, 0, 0, 0, 0], [.4, .4, .4, .4, .5, .4], size=(n, p)) truth = noise @ M.T samples = sem.sample(n, shift_interventions={4: (0, 0.09)}) self.assertTrue(utils.same_normal(truth, samples)) # Test under noise intervention on X4 noise = np.random.normal([0, 0, 0, 0, 0, 0], [.4, .4, .4, .4, 1, .4], size=(n, p)) truth = noise @ M.T samples = sem.sample(n, noise_interventions={4: (0, 1)}) self.assertTrue(utils.same_normal(truth, samples)) # Test noiseless shift intervention on X2 noise = np.random.normal([0, 0, 2, 0, 0, 0], [.4, .4, .4, .4, .4, .4], size=(n, p)) truth = noise @ M.T samples = sem.sample(n, shift_interventions={2: 2}) self.assertTrue(utils.same_normal(truth, samples)) # Test that do-interventions on X0 override shift interventions on X0 noise = np.random.normal([2.1, 0, 0, 0, 0, 0], [0, .4, .4, .4, .4, .4], size=(n, p)) truth = noise @ M.T samples = sem.sample(n, do_interventions={0: 2.1}, shift_interventions={0: (1, 2)}) self.assertTrue(utils.same_normal(truth, samples)) # Test under shift-intervention on X0 and do interventions X1 and X4 shift_int = {0: (0, 0.2)} do_int = {1: (2, 0.25), 4: (1, 0.25)} noise = np.random.normal([0, 2, 0, 0, 1, 0], [0.6, .5, .4, .4, .5, .4], size=(n, p)) M = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 0, 1, 0, 0, 0], [1, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 0], [1, 1, 1, 1, 1, 1]]) truth = noise @ M.T samples = sem.sample(n, do_interventions=do_int, shift_interventions=shift_int) self.assertTrue(utils.same_normal(truth, samples))
class InsertOperatorTests(unittest.TestCase): true_A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) factorization = [(4, (2, 3)), (3, (2,)), (2, (0, 1)), (0, ()), (1, ())] true_B = true_A * np.random.uniform(1, 2, size=true_A.shape) scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4)) p = len(true_A) n = 10000 obs_data = scm.sample(n=n) # ------------------------------------------------------ # Tests def test_insert_1(self): # Test behaviour of the ges.insert(x,y,T) function # Insert should fail on adjacent edges try: ges.insert(0, 2, set(), self.true_A) self.fail("Call to insert should have failed") except ValueError as e: print("OK:", e) # Insert should fail when T contains non-neighbors of y try: ges.insert(0, 1, {2}, self.true_A) self.fail("Call to insert should have failed") except ValueError as e: print("OK:", e) # Insert should fail when T contains adjacents of x A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 try: ges.insert(3, 2, {4}, A) self.fail("Call to insert should have failed") except ValueError as e: print("OK:", e) # This should work true_new_A = A.copy() true_new_A[3, 2] = 1 new_A = ges.insert(3, 2, set(), A) self.assertTrue((true_new_A == new_A).all()) # This should work true_new_A = A.copy() true_new_A[1, 2] = 1 new_A = ges.insert(1, 2, set(), A) self.assertTrue((true_new_A == new_A).all()) # This should work true_new_A = A.copy() true_new_A[1, 2] = 1 true_new_A[4, 2], true_new_A[2, 4] = 1, 0 new_A = ges.insert(1, 2, {4}, A) self.assertTrue((true_new_A == new_A).all()) # This should work true_new_A = A.copy() true_new_A[1, 0] = 1 new_A = ges.insert(1, 0, set(), A) self.assertTrue((true_new_A == new_A).all()) def test_insert_2(self): G = 100 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) for x in range(p): # Can only apply the operator to non-adjacent nodes adj_x = utils.adj(x, cpdag) Y = set(range(p)) - adj_x for y in Y: for T in utils.subsets(utils.neighbors(y, cpdag) - adj_x): # print(x,y,T) output = ges.insert(x, y, T, cpdag) # Verify the new vstructures vstructs = utils.vstructures(output) for t in T: vstruct = (x, y, t) if x < t else (t, y, x) self.assertIn(vstruct, vstructs) # Verify whole connectivity truth = cpdag.copy() # Add edge x -> y truth[x, y] = 1 # Orient t -> y truth[list(T), y] = 1 truth[y, list(T)] = 0 self.assertTrue((output == truth).all()) print("\nExhaustively checked insert operator on %i CPDAGS" % (i + 1)) def test_valid_insert_operators_1a(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should only be one valid operator, as # 1. X1 has no neighbors in A, so T0 = {set()} # 2. na_yx is also an empty set, thus na_yx U T is a clique # 3. there are no semi-directed paths from y to x valid_operators = ges.score_valid_insert_operators(0, 1, A, cache, debug=False) self.assertEqual(1, len(valid_operators)) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[0, 1] = 1 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_1b(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should only be one valid operator, as # 1. X0 has no neighbors in A, so T0 = {set()} # 2. na_yx is also an empty set, thus na_yx U T is a clique # 3. there are no semi-directed paths from y to x valid_operators = ges.score_valid_insert_operators(1, 0, A, cache, debug=False) self.assertEqual(1, len(valid_operators)) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[1, 0] = 1 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_2a(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should be two valid operators, as T0 = {X4} # 1. insert(X0,X2,set()) should be valid # 2. and also insert(X0,X2,{X4}), as na_yx U T = {X4} and is a clique # 3. there are no semi-directed paths from y to x valid_operators = ges.score_valid_insert_operators(0, 2, A, cache, debug=False) self.assertEqual(2, len(valid_operators)) # Test outcome of insert(0,2,set()) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[0, 2] = 1 self.assertTrue((new_A == true_new_A).all()) # Test outcome of insert(0,2,4) _, new_A, _, _, _ = valid_operators[1] true_new_A = A.copy() true_new_A[0, 2], true_new_A[2, 4] = 1, 0 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_2b(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should only be one valid operator, as # 1. X0 has no neighbors in A, so T0 = {set()} # 2. na_yx is also an empty set, thus na_yx U T is a clique # 3. there are no semi-directed paths from y to x valid_operators = ges.score_valid_insert_operators(2, 0, A, cache, debug=False) self.assertEqual(1, len(valid_operators)) # Test outcome of insert(2,0,set()) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[2, 0] = 1 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_3a(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should be two valid operators, as T0 = {X4} # 1. insert(X1,X2,set()) should be valid # 2. and also insert(X1,X2,{X4}), as na_yx U T = {X4} and is a clique # 3. there are no semi-directed paths from y to x valid_operators = ges.score_valid_insert_operators(1, 2, A, cache, debug=False) self.assertEqual(2, len(valid_operators)) # Test outcome of insert(0,2,set()) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[1, 2] = 1 self.assertTrue((new_A == true_new_A).all()) # Test outcome of insert(1,2,4) _, new_A, _, _, _ = valid_operators[1] true_new_A = A.copy() true_new_A[1, 2], true_new_A[2, 4] = 1, 0 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_3b(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should only be one valid operator, as # 1. X1 has no neighbors in A, so T0 = {set()} # 2. na_yx is also an empty set, thus na_yx U T is a clique # 3. there are no semi-directed paths from y to x valid_operators = ges.score_valid_insert_operators(2, 1, A, cache, debug=False) self.assertEqual(1, len(valid_operators)) # Test outcome of insert(2,0,set()) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[2, 1] = 1 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_4a(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should be one valid operator, as T0 = set(), na_yx = {4} # 1. insert(X0,X2,set()) should be valid # 2. na_yx U T = {X4} should be a clique # 3. the semi-directed path X2-X4->X3 contains one node in na_yx U T valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False) self.assertEqual(1, len(valid_operators)) # Test outcome of insert(3,2,set()) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[3, 2] = 1 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_4b(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # there should be one valid operator, as T0 = set(), na_yx = set() # 1. insert(X2,X3,set()) should be valid # 2. na_yx U T = set() should be a clique # 3. there are no semi-directed paths between X3 and X2 valid_operators = ges.score_valid_insert_operators(2, 3, A, cache, debug=False) self.assertEqual(1, len(valid_operators)) # Test outcome of insert(2,3,set()) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[2, 3] = 1 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_5(self): # Define A and cache A = np.zeros_like(self.true_A) A[2, 4], A[4, 2] = 1, 1 # x2 - x4 A[4, 3] = 1 # x4 -> x3 cache = GaussObsL0Pen(self.obs_data) # Should fail as 2,4 are adjacent try: ges.score_valid_insert_operators(2, 4, A, cache, debug=False) self.fail() except ValueError as e: print("OK:", e) try: ges.score_valid_insert_operators(4, 2, A, cache, debug=False) self.fail() except ValueError as e: print("OK:", e) # Should fail as 3,4 are adjacent try: ges.score_valid_insert_operators(3, 4, A, cache, debug=False) self.fail() except ValueError as e: print("OK:", e) try: ges.score_valid_insert_operators(4, 3, A, cache, debug=False) self.fail() except ValueError as e: print("OK:", e) def test_valid_insert_operators_6(self): A = np.array([[0, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 0], [0, 0, 0, 0]]) data = self.obs_data[:, 0:4] cache = GaussObsL0Pen(data) # There should be one valid operator for x3 -> x2 # 1. na_yx = {1}, T0 = {0} # 2. for T=set(), na_yx U T = {1} which is a clique, and # contains a node in the semi-directed path 2-1->3 # 3. for T = {0}, na_yx U T = {0,1} which is not a clique valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False) self.assertEqual(1, len(valid_operators)) # Test outcome of insert(3,2,set()) _, new_A, _, _, _ = valid_operators[0] true_new_A = A.copy() true_new_A[3, 2] = 1 self.assertTrue((new_A == true_new_A).all()) def test_valid_insert_operators_7(self): A = np.array([[0, 0, 1, 0], [0, 0, 0, 1], [1, 1, 0, 0], [0, 0, 0, 0]]) data = self.obs_data[:, 0:4] cache = GaussObsL0Pen(data) # There should no valid operator for x3 -> x2 # 1. na_yx = set(), T0 = {0} # 2. for T=set(), na_yx U T = set() which is a clique, but does not # contain a node in the semi-directed path 2->1->3 # 3. for T = {0}, na_yx U T = {0} which is a clique, but does not # contain a node in the semi-directed path 2->1->3 valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False) self.assertEqual(0, len(valid_operators)) def test_valid_insert_operators_8(self): A = np.array([[0, 0, 1, 0], [0, 0, 0, 1], [1, 1, 0, 0], [0, 0, 0, 0]]) data = self.obs_data[:, 0:4] cache = GaussObsL0Pen(data) # There should no valid operator for x2 -> x3 # 1. na_yx = set(), T0 = {0} # 2. for T=set(), na_yx U T = set() which is a clique, but does not # contain a node in the semi-directed path 2->1->3 # 3. for T = {0}, na_yx U T = {0} which is a clique, but does not # contain a node in the semi-directed path 2->1->3 valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False) self.assertEqual(0, len(valid_operators))
class DeleteOperatorTests(unittest.TestCase): true_A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]]) factorization = [(4, (2, 3)), (3, (2,)), (2, (0, 1)), (0, ()), (1, ())] true_B = true_A * np.random.uniform(1, 2, size=true_A.shape) scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4)) p = len(true_A) n = 10000 obs_data = scm.sample(n=n) # ------------------------------------------------------ # Tests def test_delete_operator_preconditions(self): # Test that if x and y are not adjacent an exception is thrown try: ges.delete(0, 1, set(), self.true_A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) try: ges.delete(3, 0, set(), self.true_A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) # Test that if there is no edge x -> y or x - y an exception # is thrown try: ges.delete(3, 2, set(), self.true_A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) try: ges.delete(2, 1, set(), self.true_A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) # Test that if H is not a subset of neighbors of Y and # adjacents of X, an exception is thrown cpdag = self.true_A.copy() cpdag[4, 3] = 1 try: ges.delete(2, 4, {1}, cpdag) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) try: ges.delete(2, 4, {0}, cpdag) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) try: ges.delete(4, 2, {3}, cpdag) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) def test_delete_operator_1(self): # Test the result from applying the delete operator to a # hand-picked matrix A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0]]) # remove the edge 2 -> 4, with H = set() new_A = ges.delete(2, 4, set(), A) truth = A.copy() truth[2, 4] = 0 self.assertTrue((truth == new_A).all()) # remove the edge 2 -> 4, with H = {3} new_A = ges.delete(2, 4, {3}, A) truth = A.copy() truth[2, 4] = 0 truth[3, 4] = 0 self.assertTrue((truth == new_A).all()) def test_delete_operator_2(self): # Test the result from applying the delete operator to a # hand-picked matrix A = np.array([[0, 1, 1, 1, 0], [1, 0, 1, 0, 0], [1, 1, 0, 1, 0], [1, 0, 1, 0, 1], [0, 0, 0, 1, 0]]) # remove the edge 0 - 1, with H = set() new_A = ges.delete(0, 1, set(), A) truth = A.copy() truth[1, 0], truth[0, 1] = 0, 0 self.assertTrue((truth == new_A).all()) # remove the edge 0 -> 1, with H = {2} new_A = ges.delete(0, 1, {2}, A) truth = A.copy() truth[1, 0], truth[0, 1] = 0, 0 truth[2, 0], truth[2, 1] = 0, 0 print(new_A) self.assertTrue((truth == new_A).all()) # remove the edge 0 - 2 with H = set() new_A = ges.delete(0, 2, set(), A) truth = A.copy() truth[0, 2], truth[2, 0] = 0, 0 self.assertTrue((truth == new_A).all()) # remove the edge 0 - 2 with H = {1} new_A = ges.delete(0, 2, {1}, A) truth = A.copy() truth[0, 2], truth[2, 0] = 0, 0 truth[1, 0], truth[1, 2] = 0, 0 self.assertTrue((truth == new_A).all()) # remove the edge 0 - 2 with H = {1,3} new_A = ges.delete(0, 2, {1, 3}, A) truth = A.copy() truth[0, 2], truth[2, 0] = 0, 0 truth[1, 0], truth[1, 2] = 0, 0 truth[3, 0], truth[3, 2], truth[1, 0], truth[1, 2] = 0, 0, 0, 0 self.assertTrue((truth == new_A).all()) def test_delete_operator_3(self): G = 100 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) for x in range(p): # Can only apply the operator to X -> Y or X - Y for y in np.where(cpdag[x, :] != 0)[0]: for H in utils.subsets(utils.na(y, x, cpdag)): output = ges.delete(x, y, H, cpdag) # Verify the new vstructures vstructs = utils.vstructures(output) for h in H: vstruct = (x, h, y) if x < y else (y, h, x) self.assertIn(vstruct, vstructs) # Verify whole connectivity truth = cpdag.copy() # Remove edge truth[x, y], truth[y, x] = 0, 0 # Orient y -> h truth[list(H), y] = 0 truth[list(utils.neighbors(x, cpdag) & H), x] = 0 self.assertTrue((output == truth).all()) print("\nExhaustively checked delete operator on %i CPDAGS" % (i + 1)) def test_valid_delete_operators_preconditions(self): A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 1, 0, 1], [0, 0, 1, 1, 0]]) # Should fail as X0 and X1 are not adjacent try: ges.delete(0, 1, set(), A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) # Should fail as X0 and X1 are not adjacent try: ges.delete(1, 0, set(), A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) # Should fail as X0 and X3 are not adjacent try: ges.delete(0, 3, set(), A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) # Should fail as there is no edge X2 -> X0 or X2 - X0 try: ges.delete(2, 0, set(), A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) # Should fail as there is no edge X2 -> X1 or X2 - X1 try: ges.delete(2, 1, set(), A) self.fail("Call to delete should have failed") except ValueError as e: print("OK", e) def test_valid_delete_operators_1(self): A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 1, 0, 1], [0, 0, 1, 1, 0]]) cache = GaussObsL0Pen(self.obs_data) # Removing the edge X2 - X4 should yield two valid # operators, for: # 1. H = Ø, as NA_yx \ Ø = {X3} is a clique # 2. H = {3}, as NA_yx \ {X3} = Ø is a clique output = ges.score_valid_delete_operators(2, 4, A, cache) self.assertEqual(2, len(output)) A1, A2 = A.copy(), A.copy() # Remove X2 - X4 A1[2, 4], A1[4, 2], A2[2, 4], A2[4, 2] = 0, 0, 0, 0 # Orient X2 -> X3, X4 -> X3 A2[3, 2], A2[3, 4] = 0, 0 self.assertTrue(utils.member([op[1] for op in output], A1) is not None) self.assertTrue(utils.member([op[1] for op in output], A2) is not None) def test_valid_delete_operators_2(self): A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1], [0, 0, 1, 0, 1], [0, 0, 1, 1, 0]]) cache = GaussObsL0Pen(self.obs_data) # Removing the edge X1 - X2 should yield one valid # operator, for: # 1. H = Ø, as NA_yx \ Ø = {X3, X4} is a clique output = ges.score_valid_delete_operators(1, 2, A, cache) self.assertEqual(1, len(output)) true_A = A.copy() # Remove X1 - X2 true_A[1, 2] = 0 self.assertTrue((true_A == output[0][1]).all()) def test_valid_delete_operators_3(self): # Check symmetry of the delete operator when X - Y G = 100 p = 20 for i in range(G): A = sempler.generators.dag_avg_deg(p, 3, 1, 1) cpdag = utils.dag_to_cpdag(A) W = A * np.random.uniform(1, 2, A.shape) obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000) cache = GaussObsL0Pen(obs_sample) fro, to = np.where(utils.only_undirected(cpdag)) # Test the operator to all undirected edges for (x, y) in zip(fro, to): output_a = ges.score_valid_delete_operators(x, y, cpdag, cache) output_b = ges.score_valid_delete_operators(y, x, cpdag, cache) for (op_a, op_b) in zip(output_a, output_b): # Check resulting state is the same self.assertTrue((op_a[1] == op_b[1]).all()) self.assertAlmostEqual(op_a[0], op_b[0]) print("\nChecked equality of delete operator on undirected edges in %i CPDAGS" % (i + 1)) def test_valid_delete_operators_4(self): A = np.array([[0, 1, 1, 0], [0, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0]]) cache = GaussObsL0Pen(self.obs_data) # Removing the edge X0 - X2 should yield two valid operators # operators, for: # 1. H = Ø, as NA_yx \ Ø = {X1} is a clique # 2. H = {1}, as NA_yx \ {X1} = Ø is a clique output = ges.score_valid_delete_operators(0, 2, A, cache) self.assertEqual(2, len(output)) A1, A2 = A.copy(), A.copy() # Remove X2 - X4 A1[0, 2], A2[0, 2] = 0, 0 # Orient X2 -> X1 A2[1, 2] = 0 self.assertTrue(utils.member([op[1] for op in output], A1) is not None) self.assertTrue(utils.member([op[1] for op in output], A2) is not None) def test_valid_delete_operators_5(self): A = np.array([[0, 1, 1, 1], [0, 0, 1, 1], [1, 1, 0, 0], [1, 1, 0, 0]]) print("out:", utils.is_clique({2, 3}, A)) cache = GaussObsL0Pen(self.obs_data) # Removing the edge X0 - X1 should yield three valid operators # operators, for: # 0. Invalid H = Ø, as NA_yx \ Ø = {X2,X3} is not a clique # 1. H = {X2}, as NA_yx \ H = {X3} is a clique # 2. H = {X3}, as NA_yx \ H = {X2} is a clique # 3. H = {X2,X3}, as NA_yx \ H = Ø is a clique output = ges.score_valid_delete_operators(0, 1, A, cache) print(output) self.assertEqual(3, len(output)) # v-structure on X2, i.e orient X0 -> X2, X1 -> X2 A1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [1, 1, 0, 0]]) # v-structure on X3, i.e. orient X0 -> X3, X1 -> X3 A2 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [1, 1, 0, 0], [0, 0, 0, 0]]) # v-structures on X2 and X3 A3 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0]]) self.assertTrue(utils.member([op[1] for op in output], A1) is not None) self.assertTrue(utils.member([op[1] for op in output], A2) is not None) self.assertTrue(utils.member([op[1] for op in output], A3) is not None)
def test_interventions_2(self): # Test that the means and variances of variables in the joint # distribution are what is expected via the path method W = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]]) n = round(1e6) variances = np.array([1, 2, 3]) * 0.1 means = np.array([1, 2, 3]) sem = sempler.LGANM(W, means, variances) np.random.seed(42) # Test observational data # Build truth noise = np.random.normal(means, variances**0.5, size=(n, 3)) truth = np.zeros_like(noise) truth[:, 0] = noise[:, 0] truth[:, 1] = truth[:, 0] * W[0, 1] + noise[:, 1] truth[:, 2] = truth[:, 0] * W[0, 2] + truth[:, 1] * W[1, 2] + noise[:, 2] samples = sem.sample(n) self.assertTrue(utils.same_normal(truth, samples)) # Test that variances/means are as expected true_vars, true_means = np.zeros(3), np.zeros(3) true_vars[0] = variances[0] true_vars[1] = W[0, 1]**2 * variances[0] + variances[1] true_vars[2] = (W[0, 1] * W[1, 2] + W[0, 2])**2 * variances[0] + W[ 1, 2]**2 * variances[1] + variances[2] true_means[0] = means[0] true_means[1] = W[0, 1] * means[0] + means[1] true_means[2] = (W[0, 1] * W[1, 2] + W[0, 2]) * means[0] + W[1, 2] * means[1] + means[2] self.assertTrue( np.allclose(true_vars, np.var(samples, axis=0), atol=1e-2)) self.assertTrue( np.allclose(true_means, np.mean(samples, axis=0), atol=1e-2)) # Test under intervention on X1 <- N(0,0.1) variances = np.array([1., 1., 3.]) * 0.1 means = np.array([1., 0., 3.]) noise = np.random.normal(means, variances**0.5, size=(n, 3)) truth[:, 0] = noise[:, 0] truth[:, 1] = noise[:, 1] truth[:, 2] = truth[:, 0] * W[0, 2] + truth[:, 1] * W[1, 2] + noise[:, 2] samples = sem.sample(n, do_interventions={1: (0, 0.1)}) self.assertTrue(utils.same_normal(truth, samples)) # Test that variances/means are as expected true_vars, true_means = np.zeros(3), np.zeros(3) true_vars[0] = variances[0] true_vars[1] = variances[1] true_vars[2] = W[0, 2]**2 * variances[0] + W[ 1, 2]**2 * variances[1] + variances[2] true_means[0] = means[0] true_means[1] = means[1] true_means[2] = W[0, 2] * means[0] + W[1, 2] * means[1] + means[2] self.assertTrue( np.allclose(true_vars, np.var(samples, axis=0), atol=1e-2)) self.assertTrue( np.allclose(true_means, np.mean(samples, axis=0), atol=1e-2)) # Test under intervention on do(X0 = 0) variances = np.array([0., 2., 3.]) * 0.1 means = np.array([0., 2., 3.]) noise = np.random.normal(means, variances**0.5, size=(n, 3)) truth[:, 0] = noise[:, 0] truth[:, 1] = truth[:, 0] * W[0, 1] + noise[:, 1] truth[:, 2] = truth[:, 0] * W[0, 2] + truth[:, 1] * W[1, 2] + noise[:, 2] samples = sem.sample(n, do_interventions={0: 0}) self.assertTrue(utils.same_normal(truth, samples)) # Test that variances/means are as expected true_vars, true_means = np.zeros(3), np.zeros(3) true_vars[0] = variances[0] true_vars[1] = W[0, 1]**2 * variances[0] + variances[1] true_vars[2] = (W[0, 1] * W[1, 2] + W[0, 2])**2 * variances[0] + W[ 1, 2]**2 * variances[1] + variances[2] true_means[0] = means[0] true_means[1] = W[0, 1] * means[0] + means[1] true_means[2] = (W[0, 1] * W[1, 2] + W[0, 2]) * means[0] + W[1, 2] * means[1] + means[2] self.assertTrue( np.allclose(true_vars, np.var(samples, axis=0), atol=1e-2)) self.assertTrue( np.allclose(true_means, np.mean(samples, axis=0), atol=1e-2))