コード例 #1
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_means(self):
     # Test that means are set correctly
     p = 10
     W = sempler.generators.dag_avg_deg(p, p / 4, 1, 1)
     means = np.arange(p)
     sem = sempler.LGANM(W, means, (0, 1))
     self.assertTrue((sem.means == means).all())
コード例 #2
0
 def test_vs_cdt_1(self):
     # Test that behaviour matches that of the implementation in
     # the R package pcalg, using NUM_GRAPHS randomly generated
     # Erdos-Renyi graphs. The call is made through the ges.fit_bic
     # function
     np.random.seed(15)
     G = NUM_GRAPHS  # number of graphs
     p = 15  # number of variables
     n = 1500  # size of the observational sample
     for i in range(G):
         print("  Checking SCM %d" % (i))
         start = time.time()
         A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
         W = A * np.random.uniform(1, 2, A.shape)
         obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n)
         # Estimate the equivalence class using the pcalg
         # implementation of GES (package cdt)
         data = pd.DataFrame(obs_sample)
         output = GES(verbose=True).predict(data)
         estimate_cdt = nx.to_numpy_array(output)
         end = time.time()
         print("    GES-CDT done (%0.2f seconds)" % (end - start))
         start = time.time()
         # Estimate using this implementation
         # Test debugging output for the first 2 SCMs
         estimate, _ = ges.fit_bic(obs_sample,
                                   iterate=True,
                                   debug=4 if i < 2 else 2)
         end = time.time()
         print("    GES-own done (%0.2f seconds)" % (end - start))
         self.assertTrue((estimate == estimate_cdt).all())
     print("\nCompared with PCALG implementation on %d DAGs" % (i + 1))
コード例 #3
0
ファイル: evaluation.py プロジェクト: musikisomorphie/aicp
def gen_cases(n, P, k, w_min=1, w_max=1, var_min=1, var_max=1, int_min=0, int_max=0, random_state=None):
    """
    Generate random experimental cases (ie. linear SEMs). Parameters:
      - n: total number of cases
      - P: number of variables in the SEMs (either an integer or a tuple to indicate a range)
      - w_min, w_max: Weights of the SEMs are sampled at uniform between w_min and w_max
      - var_min, var_max: Weights of the SEMs are sampled at uniform between var_min and var_max
      - int_min, int_max: Weights of the SEMs are sampled at uniform between int_min and int_max
      - random_state: to fix the random seed for reproducibility
    """
    if random_state is not None:
        np.random.seed(random_state)
    cases = []
    i = 0
    while i < n:
        if isinstance(P, tuple):
            p = np.random.randint(P[0], P[1]+1)
        else:
            p = P
        W = sempler.dag_avg_deg(p, k, w_min, w_max)
        target = np.random.choice(range(p))
        parents,_,_,mb = utils.graph_info(target, W)
        if len(parents) > 0:# and len(parents) != len(mb):
            sem = sempler.LGANM(W, (var_min, var_max), (int_min, int_max))
            (truth, _, _, _) = utils.graph_info(target, W)
            cases.append(TestCase(i, sem, target, truth))
            i += 1
    return cases
コード例 #4
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_distribution(self):
     # Test "population" sampling
     W = np.array([[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 0]])
     # Build SEM with unit weights and standard normal noise
     # variables
     sem = sempler.LGANM(W, (0, 0), (1, 1))
     # Observational Distribution
     distribution = sem.sample(population=True)
     true_cov = np.array([[1, 0, 1, 1], [0, 1, 1, 1], [1, 1, 3, 3],
                          [1, 1, 3, 4]])
     self.assertTrue((distribution.mean == np.zeros(4)).all())
     self.assertTrue((distribution.covariance == true_cov).all())
     # Do intervention on X1 <- 0
     distribution = sem.sample(population=True, do_interventions={0: 1})
     true_cov = np.array([[0, 0, 0, 0], [0, 1, 1, 1], [0, 1, 2, 2],
                          [0, 1, 2, 3]])
     self.assertTrue((distribution.mean == np.array([1, 0, 1, 1])).all())
     self.assertTrue((distribution.covariance == true_cov).all())
     # Noise interventions on X1 <- N(0,2), X2 <- N(1,2)
     interventions = {0: (0, 2), 1: (1, 2)}
     distribution = sem.sample(population=True,
                               do_interventions=interventions)
     true_cov = np.array([[2, 0, 2, 2], [0, 2, 2, 2], [2, 2, 5, 5],
                          [2, 2, 5, 6]])
     self.assertTrue((distribution.mean == np.array([0, 1, 1, 1])).all())
     self.assertTrue((distribution.covariance == true_cov).all())
コード例 #5
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_basic(self):
     # Test the initialization of an LGANM object
     p = 10
     W = sempler.generators.dag_avg_deg(p, p / 4, 1, 1)
     sem = sempler.LGANM(W, (0, 0), (1, 1))
     self.assertTrue((sem.variances == np.ones(p)).all())
     self.assertTrue((sem.means == np.zeros(p)).all())
     self.assertTrue(
         np.sum((sem.W == 0).astype(float) + (sem.W == 1).astype(float)),
         p * p)
コード例 #6
0
 def test_blanket_behaviour(self):
     np.random.seed(7)
     for p in range(2, 8):
         #print("Testing random graph of size %d" %p)
         W = sempler.dag_avg_deg(p, 2.5, -1, 1)
         sem = sempler.LGANM(W, (0.1, 2))
         dist = sem.sample(population=True)
         for i in range(p):
             #print("Testing markov and stable blankets of X_%d" %i)
             (parents, children, poc, mb) = utils.graph_info(i, W)
             result = population_icp([dist],
                                     i,
                                     debug=False,
                                     selection='all')
             sb_0 = stable_blanket(result.accepted, result.mses)
             # Intervening on a parent should leave the stable
             # blanket the same
             if len(parents) > 0:
                 pa = np.random.choice(list(parents))
                 dist_pa = sem.sample(population=True,
                                      do_interventions={pa: (1, 5)})
                 result = population_icp([dist, dist_pa],
                                         i,
                                         debug=False,
                                         selection='all')
                 sb_pa = stable_blanket(result.accepted, result.mses)
                 self.assertEqual(sb_0, sb_pa)
             # Intervening on a parent of a child (that is not a child) should leave the stable
             # blanket the same
             only_poc = poc.difference(children)
             if len(only_poc) > 0:
                 pc = np.random.choice(list(only_poc))
                 dist_pc = sem.sample(population=True,
                                      do_interventions={pc: (1, 5)})
                 result = population_icp([dist, dist_pc],
                                         i,
                                         debug=False,
                                         selection='all')
                 sb_pc = stable_blanket(result.accepted, result.mses)
                 self.assertEqual(sb_0, sb_pc)
             # Intervening on a child should affect the stable blanket
             if len(children) > 0:
                 ch = np.random.choice(list(children))
                 dist_ch = sem.sample(population=True,
                                      do_interventions={ch: (1, 5)})
                 result = population_icp([dist, dist_ch],
                                         i,
                                         debug=False,
                                         selection='all')
                 sb_ch = stable_blanket(result.accepted, result.mses)
                 _, descendants, _, _ = utils.graph_info(ch, W)
                 for d in descendants.union({ch}):
                     self.assertTrue(d not in sb_ch)
コード例 #7
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_memory(self):
     # Test that all arguments are copied and not simply stored by
     # reference
     variances = np.array([1, 2, 3])
     means = np.array([3, 4, 5])
     W = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
     sem = sempler.LGANM(W, means, variances)
     # Modify and compare
     variances[0] = 0
     means[2] = 1
     W[0, 0] = 2
     self.assertFalse((W == sem.W).all())
     self.assertFalse((variances == sem.variances).all())
     self.assertFalse((means == sem.means).all())
コード例 #8
0
ファイル: test_anm.py プロジェクト: juangamella/sempler
 def test_gaussian_sampling(self):
     # Test 100 interventions
     K = 50
     W = np.array([[0, 0, 0, 0.2, 0], [0, 0, 0.4, 0, 0], [0, 0, 0, 0.3, 0],
                   [0, 0, 0, 0, 0.5], [0, 0, 0, 0, 0]])
     lganm = sempler.LGANM(W, (1, 2), (1, 2))
     noise_distributions = [
         sempler.noise.normal(m, v)
         for (m, v) in zip(lganm.means, lganm.variances)
     ]
     assignments = [
         None, None, lambda x: .4 * x,
         lambda x: .2 * x[:, 0] + .3 * x[:, 1], lambda x: .5 * x
     ]
     anm = sempler.ANM(W, assignments, noise_distributions)
     interventions = sempler.generators.intervention_targets(
         lganm.p, K, (0, 3))
     for targets in interventions:
         print(targets)
         means, variances = np.random.uniform(
             0, 5, len(targets)), np.random.uniform(2, 3, len(targets))
         interventions_lganm = dict(
             (t, (m, v)) for (t, m, v) in zip(targets, means, variances))
         interventions_anm = dict(
             (t, sempler.noise.normal(m, v))
             for (t, m, v) in zip(targets, means, variances))
         # Sample each SCMs
         # TODO: Combine different interventions in one
         n = round(1e6)
         if len(targets) <= 1:
             samples_anm = anm.sample(n, do_interventions=interventions_anm)
             samples_lganm = lganm.sample(
                 n, do_interventions=interventions_lganm)
         elif len(targets) == 2:
             samples_anm = anm.sample(n,
                                      shift_interventions=interventions_anm)
             samples_lganm = lganm.sample(
                 n, shift_interventions=interventions_lganm)
         elif len(targets) == 3:
             samples_anm = anm.sample(n,
                                      noise_interventions=interventions_anm)
             samples_lganm = lganm.sample(
                 n, noise_interventions=interventions_lganm)
         # Check that the distribution is the same
         self.assertTrue(
             sempler.utils.same_normal(samples_anm,
                                       samples_lganm,
                                       debug=False))
コード例 #9
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_sampling_args(self):
     variances = np.array([1, 2, 3])
     means = np.array([3, 4, 5])
     W = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
     sem = sempler.LGANM(W, means, variances)
     self.assertEqual(np.ndarray, type(sem.sample(n=1)))
     self.assertEqual(np.ndarray,
                      type(sem.sample(n=1, shift_interventions={})))
     self.assertEqual(np.ndarray, type(sem.sample(n=1,
                                                  do_interventions={})))
     self.assertEqual(np.ndarray,
                      type(sem.sample(n=1, shift_interventions=None)))
     self.assertEqual(np.ndarray,
                      type(sem.sample(n=1, do_interventions=None)))
     self.assertEqual(sempler.NormalDistribution,
                      type(sem.sample(n=1, population=True)))
     self.assertEqual(sempler.NormalDistribution,
                      type(sem.sample(population=True)))
コード例 #10
0
def gen_scms(G, p, k, w_min, w_max, m_min, m_max, v_min, v_max):
    """
    Generate random experimental cases (ie. linear SEMs). Parameters:
      - n: total number of cases
      - p: number of variables in the SCMs
      - k: average node degree
      - w_min, w_max: Weights of the SCMs are sampled at uniform between w_min and w_max
      - v_min, v_max: Variances of the variables are sampled at uniform between v_min and v_max
      - m_min, m_max: Intercepts of the variables of the SCMs are sampled at uniform between m_min and m_max
      - random_state: to fix the random seed for reproducibility
    """
    cases = []
    while len(cases) < G:
        W = sempler.generators.dag_avg_deg(p, k, w_min, w_max)
        W *= np.random.choice([-1, 1], size=W.shape)
        scm = sempler.LGANM(W, (m_min, m_max), (v_min, v_max))
        cases.append(scm)
    return cases
コード例 #11
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_sampling_2(self):
     # Test that the distribution of a 4 variable DAG with upper
     # triangular, all ones adj. matrix matches what we expect
     # using the path method
     p = 4
     n = round(1e6)
     W = np.triu(np.ones((p, p)), k=1)
     sem = sempler.LGANM(W, (0, 0), (0.16, 0.16))
     np.random.seed(42)
     noise = np.random.normal([0, 0, 0, 0], [.4, .4, .4, .4], size=(n, 4))
     truth = np.zeros((n, p))
     truth[:, 0] = noise[:, 0]
     truth[:, 1] = noise[:, 0] + noise[:, 1]
     truth[:, 2] = 2 * noise[:, 0] + noise[:, 1] + noise[:, 2]
     truth[:,
           3] = 4 * noise[:, 0] + 2 * noise[:, 1] + noise[:, 2] + noise[:,
                                                                        3]
     samples = sem.sample(n)
     self.assertTrue(utils.same_normal(truth, samples))
コード例 #12
0
ファイル: test_operators.py プロジェクト: juangamella/ges
 def test_valid_turn_operators_10(self):
     # Check that all valid turn operators result in a different
     # essential graph
     G = 10
     p = 20
     for i in range(G):
         A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
         cpdag = utils.dag_to_cpdag(A)
         W = A * np.random.uniform(1, 2, A.shape)
         obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000)
         cache = GaussObsL0Pen(obs_sample)
         fro, to = np.where(cpdag != 0)
         for (x, y) in zip(to, fro):
             valid_operators = ges.score_valid_turn_operators(x, y, cpdag, cache)
             # print(i,len(valid_operators))
             for (_, new_A, _, _, _) in valid_operators:
                 new_cpdag = ges.utils.pdag_to_cpdag(new_A)
                 self.assertFalse((cpdag == new_cpdag).all())
     print("\nChecked that valid turn operators result in different MEC for %i CPDAGs" % (i + 1))
コード例 #13
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_sampling_1(self):
     # Test sampling of a DAG with one variable
     np.random.seed(42)
     p = 1
     n = round(1e6)
     W = sempler.generators.dag_full(p)
     sem = sempler.LGANM(W, (0, 0), (1, 1))
     # Observational data
     truth = np.random.normal(0, 1, size=(n, 1))
     samples = sem.sample(n, shift_interventions={})
     self.assertTrue(utils.same_normal(truth, samples, atol=1e-1))
     # Under do intervention
     truth = np.ones((n, 1))
     samples = sem.sample(n, do_interventions={0: 1})
     self.assertTrue((truth == samples).all())
     # Under noise intervention
     truth = np.random.normal(1, 2, size=(n, 1))
     samples = sem.sample(n, do_interventions={0: (1, 4)})
     self.assertTrue(utils.same_normal(truth, samples, atol=1e-1))
コード例 #14
0
ファイル: test_operators.py プロジェクト: juangamella/ges
 def test_valid_delete_operators_3(self):
     # Check symmetry of the delete operator when X - Y
     G = 100
     p = 20
     for i in range(G):
         A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
         cpdag = utils.dag_to_cpdag(A)
         W = A * np.random.uniform(1, 2, A.shape)
         obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000)
         cache = GaussObsL0Pen(obs_sample)
         fro, to = np.where(utils.only_undirected(cpdag))
         # Test the operator to all undirected edges
         for (x, y) in zip(fro, to):
             output_a = ges.score_valid_delete_operators(x, y, cpdag, cache)
             output_b = ges.score_valid_delete_operators(y, x, cpdag, cache)
             for (op_a, op_b) in zip(output_a, output_b):
                 # Check resulting state is the same
                 self.assertTrue((op_a[1] == op_b[1]).all())
                 self.assertAlmostEqual(op_a[0], op_b[0])
     print("\nChecked equality of delete operator on undirected edges in %i CPDAGS" % (i + 1))
コード例 #15
0
 def test_blankets(self):
     np.random.seed(42)
     for p in range(2, 8):
         #print("Testing random graph of size %d" %p)
         W = sempler.dag_avg_deg(p, 2.5, -1, 1)
         sem = sempler.LGANM(W, (0.1, 2))
         dist = sem.sample(population=True)
         for i in range(p):
             #print("Testing markov and stable blankets of X_%d" %i)
             (_, _, _, true_mb) = utils.graph_info(i, W)
             # Test markov blanket
             estimated_mb = set(markov_blanket(i, dist, tol=1e-10))
             self.assertEqual(true_mb, estimated_mb)
             # Stable blanket for one env. should be markov blanket
             result = population_icp([dist],
                                     i,
                                     debug=False,
                                     selection='all')
             estimated_sb = stable_blanket(result.accepted, result.mses)
             self.assertEqual(true_mb, estimated_sb)
コード例 #16
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
    def test_interventions_1(self):
        # Test sampling and interventions on a custom DAG, comparing
        # with results obtained via the path method
        np.random.seed(42)
        p = 6
        n = round(1e6)
        W = np.array([[0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1,
                                           1], [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 1],
                      [0, 0, 0, 0, 0, 0]])
        sem = sempler.LGANM(W, (0, 0), (0.16, 0.16))

        # Test observational data
        M = np.array([[1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0,
                                           0], [1, 0, 1, 0, 0, 0],
                      [1, 0, 1, 1, 0, 0], [2, 1, 1, 1, 1, 0],
                      [4, 2, 2, 2, 1, 1]])
        noise = np.random.normal(np.zeros(p), np.ones(p) * 0.4, size=(n, p))
        truth = noise @ M.T
        samples = sem.sample(n)
        self.assertTrue(utils.same_normal(truth, samples))

        # Test under do-interventions on X1
        noise = np.random.normal([2.1, 0, 0, 0, 0, 0], [0, .4, .4, .4, .4, .4],
                                 size=(n, p))
        truth = noise @ M.T
        samples = sem.sample(n, do_interventions={0: 2.1})
        self.assertTrue(utils.same_normal(truth, samples))

        # Test under do-intervention on X1 and noise interventions X2 and X5
        do_int = {0: 2, 1: (2, 0.25), 4: (1, 0.25)}
        noise = np.random.normal([2, 2, 0, 0, 1, 0], [0, .5, .4, .4, .5, .4],
                                 size=(n, p))
        M = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0,
                                           0], [1, 0, 1, 0, 0, 0],
                      [1, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 0],
                      [1, 1, 1, 1, 1, 1]])
        truth = noise @ M.T
        samples = sem.sample(n, do_interventions=do_int)
        self.assertTrue(utils.same_normal(truth, samples))
コード例 #17
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
 def test_basic_1(self):
     # Test the initialization of an LGANM object
     p = 5
     W = sempler.generators.dag_avg_deg(p, p / 4, 1, 1)
     sem = sempler.LGANM(W, (0, 0), (1, 1))
     self.assertTrue((sem.variances == np.ones(p)).all())
     self.assertTrue((sem.means == np.zeros(p)).all())
     sem = sempler.LGANM(W, np.zeros(p), np.ones(p))
     self.assertTrue((sem.variances == np.ones(p)).all())
     self.assertTrue((sem.means == np.zeros(p)).all())
     with self.assertRaises(Exception):
         sempler.LGANM(W, (0, 0), (0, 1, 2, 3, 4))
     with self.assertRaises(Exception):
         sempler.LGANM(W, (0, 0, 0, 0, 0), (0, 1))
     with self.assertRaises(Exception):
         sempler.LGANM(W, (0, 1, 2, 3), (0, 0, 0))
     with self.assertRaises(Exception):
         sempler.LGANM(W, (0, 1, 2, 3))
コード例 #18
0
class OverallGESTests(unittest.TestCase):
    true_A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])
    factorization = [(4, (2, 3)), (3, (2, )), (2, (0, 1)), (0, ()), (1, ())]
    true_B = true_A * np.random.uniform(1, 2, size=true_A.shape)
    scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4))
    p = len(true_A)
    n = 100000
    interventions = [{
        0: (0, 1.0)
    }, {
        1: (0, 1.1)
    }, {
        2: (0, 1.2)
    }, {
        3: (0, 1.3)
    }, {
        4: (0, 1.4)
    }]
    obs_data = scm.sample(n=n)
    int_data = [obs_data]
    # Sample interventional distributions and construct true interventional
    # variances for later reference in tests
    interventional_variances = np.tile(scm.variances,
                                       (len(interventions) + 1, 1))
    for i, intervention in enumerate(interventions):
        int_data.append(scm.sample(n=n, shift_interventions=intervention))
        for (target, params) in intervention.items():
            interventional_variances[i + 1, target] += params[1]

    # ------------------------------------------------------
    # Tests

    def test_vs_cdt_1(self):
        # Test that behaviour matches that of the implementation in
        # the R package pcalg, using NUM_GRAPHS randomly generated
        # Erdos-Renyi graphs. The call is made through the ges.fit_bic
        # function
        np.random.seed(15)
        G = NUM_GRAPHS  # number of graphs
        p = 15  # number of variables
        n = 1500  # size of the observational sample
        for i in range(G):
            print("  Checking SCM %d" % (i))
            start = time.time()
            A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
            W = A * np.random.uniform(1, 2, A.shape)
            obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n)
            # Estimate the equivalence class using the pcalg
            # implementation of GES (package cdt)
            data = pd.DataFrame(obs_sample)
            output = GES(verbose=True).predict(data)
            estimate_cdt = nx.to_numpy_array(output)
            end = time.time()
            print("    GES-CDT done (%0.2f seconds)" % (end - start))
            start = time.time()
            # Estimate using this implementation
            # Test debugging output for the first 2 SCMs
            estimate, _ = ges.fit_bic(obs_sample,
                                      iterate=True,
                                      debug=4 if i < 2 else 2)
            end = time.time()
            print("    GES-own done (%0.2f seconds)" % (end - start))
            self.assertTrue((estimate == estimate_cdt).all())
        print("\nCompared with PCALG implementation on %d DAGs" % (i + 1))

    def test_vs_cdt_2(self):
        # Test that behaviour matches that of the implementation in
        # the R package pcalg, using NUM_GRAPHS randomly generated
        # Erdos-Renyi graphs. The call is made through the ges.fit
        # function; for half of the cases, manually specify the
        # completion algorithm.
        np.random.seed(16)
        G = NUM_GRAPHS  # number of graphs
        p = 15  # number of variables
        n = 1500  # size of the observational sample
        for i in range(G):
            print("  Checking SCM %d" % (i))
            start = time.time()
            A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
            W = A * np.random.uniform(1, 2, A.shape)
            obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n)
            # Estimate the equivalence class using the pcalg
            # implementation of GES (package cdt)
            data = pd.DataFrame(obs_sample)
            score_class = ges.scores.gauss_obs_l0_pen.GaussObsL0Pen(obs_sample)
            completion_algorithm = None if i % 2 == 0 else ges.utils.pdag_to_cpdag
            output = GES(verbose=True).predict(data)
            estimate_cdt = nx.to_numpy_array(output)
            end = time.time()
            print("    GES-CDT done (%0.2f seconds)" % (end - start))
            start = time.time()
            # Estimate using this implementation
            # Test debugging output for the first 2 SCMs
            estimate, _ = ges.fit(score_class,
                                  completion_algorithm=completion_algorithm,
                                  iterate=True,
                                  debug=4 if i < 2 else 2)
            end = time.time()
            print("    GES-own done (%0.2f seconds)" % (end - start))
            self.assertTrue((estimate == estimate_cdt).all())
        print("\nCompared with PCALG implementation on %d DAGs" % (i + 1))

    def test_vs_cdt_2_raw(self):
        # Test that behaviour matches that of the implementation in
        # the R package pcalg, using NUM_GRAPHS randomly generated
        # Erdos-Renyi graphs. The call is made through the ges.fit
        # function
        np.random.seed(17)
        G = NUM_GRAPHS  # number of graphs
        p = 15  # number of variables
        n = 1500  # size of the observational sample
        for i in range(G):
            print("  Checking SCM %d" % (i))
            start = time.time()
            A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
            W = A * np.random.uniform(1, 2, A.shape)
            obs_sample = sempler.LGANM(W, (1, 10), (0.5, 1)).sample(n=n)
            # Estimate the equivalence class using the pcalg
            # implementation of GES (package cdt)
            data = pd.DataFrame(obs_sample)
            score_class = ges.scores.gauss_obs_l0_pen.GaussObsL0Pen(
                obs_sample, method='raw')
            output = GES(verbose=True).predict(data)
            estimate_cdt = nx.to_numpy_array(output)
            end = time.time()
            print("    GES-CDT done (%0.2f seconds)" % (end - start))
            start = time.time()
            # Estimate using this implementation
            # Test debugging output for the first 2 SCMs
            estimate, _ = ges.fit(score_class,
                                  iterate=True,
                                  debug=4 if i < 2 else 2)
            end = time.time()
            print("    GES-own done (%0.2f seconds)" % (end - start))
            self.assertTrue((estimate == estimate_cdt).all())
        print("\nCompared with PCALG implementation on %d DAGs" % (i + 1))
コード例 #19
0
ファイル: test_gauss_bic.py プロジェクト: juangamella/ges
class ScoreTests(unittest.TestCase):
    np.random.seed(12)
    true_A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1],
                       [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])
    factorization = [(4, (2, 3)), (3, (2, )), (2, (0, 1)), (0, ()), (1, ())]
    true_B = true_A * np.random.uniform(1, 2, size=true_A.shape)
    scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4))
    p = len(true_A)
    n = 10000
    obs_data = scm.sample(n=n)
    obs_score = GaussObsL0Pen(obs_data)
    obs_score_raw = GaussObsL0Pen(obs_data, method='raw')

    # ------------------------------------------------------
    # White-box tests:
    #   testing the inner workings of the ges.scores module, e.g. the
    #   intermediate functions used to compute the likelihoods

    def test_mle_obs(self):
        # Check that the parameters are correctly estimated when
        # passing a subgraph to GaussObsL0Pen._mle_full
        for score in [self.obs_score, self.obs_score_raw]:
            print("Testing %s" % score)
            local_B = np.zeros_like(self.true_B)
            local_omegas = np.zeros(self.p)
            for (x, pa) in self.factorization:
                local_B[:, x], local_omegas[x] = score._mle_local(x, pa)
            full_B, full_omegas = score._mle_full(self.true_A)
            print("Locally estimated", local_B, local_omegas)
            print("Fully estimated", full_B, full_omegas)
            print("Truth", self.true_B, self.scm.variances)
            # Make sure zeros are respected
            self.assertTrue((local_B[self.true_A == 0] == 0).all())
            self.assertTrue((full_B[self.true_A == 0] == 0).all())
            # Make sure estimation of weights is similar
            self.assertTrue((local_B == full_B).all())
            # Make sure estimation of noise variances is similar
            self.assertTrue((local_omegas == full_omegas).all())
            # Compare with true model
            self.assertTrue(np.allclose(self.true_B, local_B, atol=5e-2))
            self.assertTrue(np.allclose(self.true_B, full_B, atol=5e-2))
            self.assertTrue(
                np.allclose(self.scm.variances, local_omegas, atol=1e-1))
            self.assertTrue(
                np.allclose(self.scm.variances, full_omegas, atol=1e-1))

    # ------------------------------------------------------
    # Black-box tests:
    #   Testing the behaviour of the "API" functions, i.e. the
    #   functions to compute the full/local
    #   observational/interventional BIC scores from a given DAG
    #   structure and the data

    def test_parameters_obs(self):
        # Fails if data is not ndarray
        try:
            GaussObsL0Pen([self.obs_data])
            self.fail()
        except TypeError:
            pass
        except Exception:
            self.fail()

    def test_full_score_obs(self):
        # Verify that the true adjacency yields a higher score than
        # the empty graph Compute score of true adjacency
        for score_fun in [self.obs_score, self.obs_score_raw]:
            print("Testing %s" % score_fun)
            true_score = score_fun.full_score(self.true_A)
            self.assertIsInstance(true_score, float)
            # Compute score of unconnected graph
            score = score_fun.full_score(np.zeros((self.p, self.p)))
            self.assertIsInstance(score, float)
            self.assertGreater(true_score, score)

    def test_score_decomposability_obs(self):
        # As a black-box test, make sure the score functions
        # preserve decomposability
        for score_fun in [self.obs_score, self.obs_score_raw]:
            print("Decomposability of observational score")
            print("Testing %s" % score_fun)
            full_score = score_fun.full_score(self.true_A)
            acc = 0
            for (j, pa) in self.factorization:
                local_score = score_fun.local_score(j, pa)
                print("  ", j, pa, local_score)
                acc += local_score
            print("Full vs. acc:", full_score, acc)
            self.assertAlmostEqual(full_score, acc, places=2)
コード例 #20
0
# --------------------------------------------------------------------
# Generate (or load) test cases

# Load dataset
if args.load_dataset is not None:
    print("\nLoading test cases from %s" % args.load_dataset)
    # Load a dataset stored in the format used by ABCD
    G = len(os.listdir(os.path.join(args.load_dataset, 'dags')))
    Ws = [np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'adjacency.txt')) for i in range(G)]
    means = [np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'means.txt')) for i in range(G)]
    variances = [np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'variances.txt')) for i in range(G)]
    targets = [int(np.loadtxt(os.path.join(args.load_dataset, 'dags', 'dag%d' % i, 'target.txt'))) for i in range(G)]
    cases = []
    for i, W in enumerate(Ws):
        sem = sempler.LGANM(W, variances[i], means[i])
        truth = utils.graph_info(targets[i], W)[0]
        cases.append(evaluation.TestCase(i, sem, targets[i], truth))
    excluded_keys += ['k', 'w_min', 'w_max', 'var_min', 'var_max', 'int_min', 'int_max', 'random_state', 'p_min', 'p_max']
# Or generate dataset
else:
    P = args.p_min if args.p_min == args.p_max else (args.p_min, args.p_max)
    cases = evaluation.gen_cases(args.G,
                                 P,
                                 args.k,
                                 args.w_min,
                                 args.w_max,
                                 args.var_min,
                                 args.var_max,
                                 args.int_min,
                                 args.int_max)
コード例 #21
0
ファイル: test_operators.py プロジェクト: juangamella/ges
class TurnOperatorTests(unittest.TestCase):
    true_A = np.array([[0, 0, 1, 0, 0],
                       [0, 0, 1, 0, 0],
                       [0, 0, 0, 1, 1],
                       [0, 0, 0, 0, 1],
                       [0, 0, 0, 0, 0]])
    factorization = [(4, (2, 3)), (3, (2,)), (2, (0, 1)), (0, ()), (1, ())]
    true_B = true_A * np.random.uniform(1, 2, size=true_A.shape)
    scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4))
    p = len(true_A)
    n = 10000
    obs_data = scm.sample(n=n)
    cache = GaussObsL0Pen(obs_data)
    # ------------------------------------------------------
    # Tests

    def test_turn_operator_1(self):
        A = np.array([[0, 1, 0, 0, 0],
                      [0, 0, 0, 0, 0],
                      [0, 1, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        output = ges.turn(1, 2, {3}, A)
        # Orient X1 -> X2 and X3 -> X2
        A[2, 1], A[1, 2] = 0, 1
        A[2, 3] = 0
        self.assertTrue((A == output).all())

    def test_turn_operator_2(self):
        A = np.array([[0, 1, 1, 0, 0],
                      [1, 0, 1, 1, 1],
                      [0, 0, 0, 0, 0],
                      [0, 1, 0, 0, 0],
                      [0, 1, 0, 0, 0]])
        # Turn edge X3 - X1 to X3 -> X1 with C = {X4, X0}
        output = ges.turn(3, 1, {0, 4}, A)
        truth = A.copy()
        truth[1, 3] = 0
        truth[1, 0], truth[1, 4] = 0, 0
        self.assertTrue((truth == output).all())
        # Turn edge X1 - X3 to X1 -> X3 with C = Ø
        output = ges.turn(1, 3, set(), A)
        truth = A.copy()
        truth[3, 1] = 0
        self.assertTrue((truth == output).all())
        # Turn edge X4 -> X1 with C = {X3}
        output = ges.turn(4, 1, {3}, A)
        truth = A.copy()
        truth[1, 4] = 0
        truth[1, 3] = 0
        self.assertTrue((truth == output).all())
        # Turn edge X2 -> X0 with C = {X1}
        output = ges.turn(2, 0, {1}, A)
        truth = A.copy()
        truth[0, 2], truth[2, 0] = 0, 1
        truth[0, 1] = 0
        self.assertTrue((truth == output).all())

    def test_turn_operator_preconditions(self):
        A = np.array([[0, 1, 1, 0, 0],
                      [1, 0, 1, 1, 1],
                      [0, 0, 0, 0, 0],
                      [0, 1, 0, 0, 0],
                      [0, 1, 0, 0, 0]])
        # Trying to turn X1 -> X2 fails as edge already exists
        try:
            ges.turn(1, 2, set(), A)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)
        # Trying to turn X3 -> X4 fails as they are not adjacent
        try:
            ges.turn(3, 4, set(), A)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)
        # Trying to turn X3 <- X4 fails as they are not adjacent
        try:
            ges.turn(4, 3, set(), A)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)
        # Turning X0 -> X1 with C = {X3,X2} fails as X2 is not a neighbor of X1
        try:
            ges.turn(0, 1, {3, 2}, A)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)
        # Turning X3 -> X1 with C = {X4,X0,X3} should fail as X3 is contained in C
        try:
            ges.turn(3, 1, {0, 3, 4}, A)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)

    def test_valid_turn_operators_preconditions(self):
        # Test preconditions
        A = np.array([[0, 1, 1, 0, 0],
                      [1, 0, 1, 1, 1],
                      [0, 0, 0, 0, 0],
                      [0, 1, 0, 0, 0],
                      [0, 1, 0, 0, 0]])
        # Trying to turn X1 -> X2 fails as edge already exists
        try:
            ges.score_valid_turn_operators(1, 2, A, self.cache)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)
        # Trying to turn X3 -> X4 fails as they are not adjacent
        try:
            ges.score_valid_turn_operators(3, 4, A, self.cache)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)
        # Trying to turn X3 <- X4 fails as they are not adjacent
        try:
            ges.score_valid_turn_operators(4, 3, A, self.cache)
            self.fail("Exception should have been thrown")
        except ValueError as e:
            print("OK:", e)

    def test_valid_turn_operators_1(self):
        A = np.array([[0, 1, 0, 0, 0],
                      [0, 0, 0, 0, 0],
                      [0, 1, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        # Turning the edge X1 <- X2 should yield one valid
        # operator, for:
        #   1. T = Ø, as NA_yx U Ø = {X3} is a clique
        output = ges.score_valid_turn_operators(1, 2, A, self.cache)
        self.assertEqual(1, len(output))
        true_A = A.copy()
        # Turn X1 <- X2 (and orient X3 -> X2)
        true_A[2, 1] = 0
        true_A[1, 2] = 1
        true_A[2, 3] = 0
        self.assertTrue((true_A == output[0][1]).all())

    def test_valid_turn_operators_2(self):
        # NOTE: Same graph as previous test
        A = np.array([[0, 1, 0, 0, 0],
                      [0, 0, 0, 0, 0],
                      [0, 1, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        # Turning the edge X1 <- X3 should yield one valid
        # operator, for:
        #   1. T = Ø, as NA_yx U Ø = {X2} is a clique
        output = ges.score_valid_turn_operators(1, 3, A, self.cache)
        self.assertEqual(1, len(output))
        true_A = A.copy()
        # Turn X1 <- X3 (and orient X2 -> X3)
        true_A[3, 1] = 0
        true_A[1, 3] = 1
        true_A[3, 2] = 0
        self.assertTrue((true_A == output[0][1]).all())

    def test_valid_turn_operators_3(self):
        # NOTE: Same graph as two previous tests (i.e. _3 and _2)
        A = np.array([[0, 1, 0, 0, 0],
                      [0, 0, 0, 0, 0],
                      [0, 1, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        # Turning the edge X0 -> X1 should yield one valid
        # operator, for:
        #   1. T = Ø, as NA_yx U Ø = Ø is a clique
        output = ges.score_valid_turn_operators(1, 0, A, self.cache)
        self.assertEqual(1, len(output))
        true_A = A.copy()
        # Turn X1 <- X0
        true_A[0, 1] = 0
        true_A[1, 0] = 1
        self.assertTrue((true_A == output[0][1]).all())

    def test_valid_turn_operators_4(self):
        A = np.array([[0, 1, 1, 1, 1],
                      [0, 0, 0, 0, 0],
                      [1, 1, 0, 0, 0],
                      [1, 1, 0, 0, 0],
                      [1, 0, 0, 0, 0]])
        # Turning the edge X0 -> X1 should yield no valid
        # operators, for (note T0 = {X4})
        #   1. T = Ø, as C = NA_yx U Ø = {X2,X3} is not a clique
        #   2. T = {X4}, as C = NA_yx U {X4} = {X2,X3,X4} is not a clique
        output = ges.score_valid_turn_operators(1, 0, A, self.cache)
        self.assertEqual(0, len(output))

    def test_valid_turn_operators_5(self):
        A = np.array([[0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0],
                      [1, 0, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        # Turning the edge X0 -> X1 should yield one valid
        # operator, for (note T0 = {X2})
        #   1. T = Ø, as C = NA_yx U Ø = Ø is a clique, but the path
        #   X0 - X2 - X3 -> X1 does not contain a node in C
        #   2. T = {X2}, as C = NA_yx U {X2} = {X2} is a clique and
        #   satisfies the path condition
        output = ges.score_valid_turn_operators(1, 0, A, self.cache)
        self.assertEqual(1, len(output))
        # Orient X1 -> X0 and X2 -> X0
        truth = A.copy()
        truth[0, 1], truth[1, 0] = 0, 1
        truth[0, 2] = 0
        self.assertTrue((truth == output[0][1]).all())

    def test_valid_turn_operators_6(self):
        A = np.array([[0, 1, 0, 0, 0],
                      [1, 0, 1, 1, 0],
                      [0, 1, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        # Orienting the edge X1 -> X3 yields not valid operators, as
        # all neighbors of X1 are adjacent to X3
        output = ges.score_valid_turn_operators(1, 3, A, self.cache)
        self.assertEqual(0, len(output))

    def test_valid_turn_operators_7(self):
        A = np.array([[0, 1, 0, 0, 0],
                      [1, 0, 1, 1, 0],
                      [0, 1, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        output = ges.score_valid_turn_operators(3, 1, A, self.cache)
        # Orienting the edge X3 -> X1 yields only one valid operator,
        # as for (note ne(X1) = {X2, X0}
        #   C = Ø and C = {X2} condition (i) is not satisfied
        #   C = {X0, X2} is not a clique
        #   C = {X0} satisfies all three conditions
        self.assertEqual(1, len(output))
        truth = np.array([[0, 1, 0, 0, 0],
                          [0, 0, 1, 0, 0],
                          [0, 1, 0, 1, 0],
                          [0, 1, 1, 0, 0],
                          [0, 0, 0, 0, 0]])
        self.assertTrue((truth == output[0][1]).all())

    def test_valid_turn_operators_8(self):
        A = np.array([[0, 1, 0, 0, 0],
                      [1, 0, 1, 1, 0],
                      [0, 1, 0, 1, 0],
                      [0, 1, 1, 0, 0],
                      [0, 0, 0, 0, 0]])
        # For the edge X0 -> X1 three operators are valid
        #   C = Ø : does not satisfy condition 1
        #   C = {X2}, {X3}, {X2,X3} are valid
        output = ges.score_valid_turn_operators(0, 1, A, self.cache)
        self.assertEqual(3, len(output))
        truth_2 = np.array([[0, 1, 0, 0, 0],
                            [0, 0, 0, 1, 0],
                            [0, 1, 0, 1, 0],
                            [0, 1, 1, 0, 0],
                            [0, 0, 0, 0, 0]])
        truth_3 = np.array([[0, 1, 0, 0, 0],
                            [0, 0, 1, 0, 0],
                            [0, 1, 0, 1, 0],
                            [0, 1, 1, 0, 0],
                            [0, 0, 0, 0, 0]])
        truth_23 = np.array([[0, 1, 0, 0, 0],
                             [0, 0, 0, 0, 0],
                             [0, 1, 0, 1, 0],
                             [0, 1, 1, 0, 0],
                             [0, 0, 0, 0, 0]])
        for (_, new_A, _, _, C) in output:
            if C == {2}:
                self.assertTrue((new_A == truth_2).all())
            if C == {3}:
                self.assertTrue((new_A == truth_3).all())
            if C == {2, 3}:
                self.assertTrue((new_A == truth_23).all())

    def test_valid_turn_operators_9(self):
        A = np.array([[0, 1, 1, 0, 0, 1],
                      [1, 0, 1, 1, 0, 1],
                      [0, 0, 0, 0, 0, 0],
                      [0, 1, 1, 0, 0, 0],
                      [0, 0, 1, 0, 0, 0],
                      [1, 1, 0, 0, 0, 0]])
        # Orienting the edge X0 -> X1 there should be only one valid
        # operator. NA_yx = {X5} and ne(y) / {x} = {X3, X5}:
        #   C = Ø does not satisfy condition i
        #   C = {X3} is valid
        #   C = {X5} does not satisfy condition i
        #   C = {X3,X5} do not form a clique
        output = ges.score_valid_turn_operators(0, 1, A, self.cache)
        self.assertEqual(1, len(output))
        truth = np.array([[0, 1, 1, 0, 0, 1],
                          [0, 0, 1, 0, 0, 1],
                          [0, 0, 0, 0, 0, 0],
                          [0, 1, 1, 0, 0, 0],
                          [0, 0, 1, 0, 0, 0],
                          [1, 1, 0, 0, 0, 0]])
        self.assertTrue((truth == output[0][1]).all())

    def test_valid_turn_operators_10(self):
        # Check that all valid turn operators result in a different
        # essential graph
        G = 10
        p = 20
        for i in range(G):
            A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
            cpdag = utils.dag_to_cpdag(A)
            W = A * np.random.uniform(1, 2, A.shape)
            obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000)
            cache = GaussObsL0Pen(obs_sample)
            fro, to = np.where(cpdag != 0)
            for (x, y) in zip(to, fro):
                valid_operators = ges.score_valid_turn_operators(x, y, cpdag, cache)
                # print(i,len(valid_operators))
                for (_, new_A, _, _, _) in valid_operators:
                    new_cpdag = ges.utils.pdag_to_cpdag(new_A)
                    self.assertFalse((cpdag == new_cpdag).all())
        print("\nChecked that valid turn operators result in different MEC for %i CPDAGs" % (i + 1))
コード例 #22
0
ファイル: lganm_example.py プロジェクト: juangamella/sempler
import sempler
import numpy as np

# Connectivity matrix
W = np.array([[0, 0, 0, 0.1, 0], [0, 0, 2.1, 0, 0], [0, 0, 0, 3.2, 0],
              [0, 0, 0, 0, 5.0], [0, 0, 0, 0, 0]])

# All together
lganm = sempler.LGANM(W, (0, 1), (0, 1))

# Sampling from the observational setting
samples = lganm.sample(100)

# Sampling under a shift intervention on variable 1 with standard gaussian noise
samples = lganm.sample(100, shift_interventions={1: (0, 1)})

# Sampling the observational environment in the "population setting"
distribution = lganm.sample(population=True)
コード例 #23
0
import ges
import ges.scores
import sempler
import numpy as np

# Generate observational data from a Gaussian SCM using sempler
A = np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 1],
              [0, 0, 0, 0, 1], [0, 0, 0, 0, 0]])
W = A * np.random.uniform(1, 2, A.shape)  # sample weights
data = sempler.LGANM(W, (1, 2), (1, 2)).sample(n=5000)

# Define the score class
score_class = ges.scores.GaussObsL0Pen(data)

# Run GES with the gaussian BIC score
estimate, score = ges.fit(score_class)

print(estimate, score)

# Output
# [[0 0 1 0 0]
#  [0 0 1 0 0]
#  [0 0 0 1 1]
#  [0 0 0 0 1]
#  [0 0 0 1 0]] 24002.112921580803
コード例 #24
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
    def test_shift_noise_interventions_1(self):
        # Test sampling and interventions on a custom DAG, comparing
        # with results obtained via the path method
        np.random.seed(42)
        p = 6
        n = round(1e6)
        W = np.array([[0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1,
                                           1], [0, 0, 0, 1, 0, 0],
                      [0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 1],
                      [0, 0, 0, 0, 0, 0]])
        sem = sempler.LGANM(W, (0, 0), (0.16, 0.16))

        # Test observational data
        M = np.array([[1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0,
                                           0], [1, 0, 1, 0, 0, 0],
                      [1, 0, 1, 1, 0, 0], [2, 1, 1, 1, 1, 0],
                      [4, 2, 2, 2, 1, 1]])
        noise = np.random.normal(np.zeros(p), np.ones(p) * 0.4, size=(n, p))
        truth = noise @ M.T
        samples = sem.sample(n)
        self.assertTrue(utils.same_normal(truth, samples))

        # Test shift intervention on X4
        noise = np.random.normal([0, 0, 0, 0, 0, 0], [.4, .4, .4, .4, .5, .4],
                                 size=(n, p))
        truth = noise @ M.T
        samples = sem.sample(n, shift_interventions={4: (0, 0.09)})
        self.assertTrue(utils.same_normal(truth, samples))

        # Test under noise intervention on X4
        noise = np.random.normal([0, 0, 0, 0, 0, 0], [.4, .4, .4, .4, 1, .4],
                                 size=(n, p))
        truth = noise @ M.T
        samples = sem.sample(n, noise_interventions={4: (0, 1)})
        self.assertTrue(utils.same_normal(truth, samples))

        # Test noiseless shift intervention on X2
        noise = np.random.normal([0, 0, 2, 0, 0, 0], [.4, .4, .4, .4, .4, .4],
                                 size=(n, p))
        truth = noise @ M.T
        samples = sem.sample(n, shift_interventions={2: 2})
        self.assertTrue(utils.same_normal(truth, samples))

        # Test that do-interventions on X0 override shift interventions on X0
        noise = np.random.normal([2.1, 0, 0, 0, 0, 0], [0, .4, .4, .4, .4, .4],
                                 size=(n, p))
        truth = noise @ M.T
        samples = sem.sample(n,
                             do_interventions={0: 2.1},
                             shift_interventions={0: (1, 2)})
        self.assertTrue(utils.same_normal(truth, samples))

        # Test under shift-intervention on X0 and do interventions X1 and X4
        shift_int = {0: (0, 0.2)}
        do_int = {1: (2, 0.25), 4: (1, 0.25)}
        noise = np.random.normal([0, 2, 0, 0, 1, 0], [0.6, .5, .4, .4, .5, .4],
                                 size=(n, p))
        M = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0,
                                           0], [1, 0, 1, 0, 0, 0],
                      [1, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 0],
                      [1, 1, 1, 1, 1, 1]])
        truth = noise @ M.T
        samples = sem.sample(n,
                             do_interventions=do_int,
                             shift_interventions=shift_int)
        self.assertTrue(utils.same_normal(truth, samples))
コード例 #25
0
ファイル: test_operators.py プロジェクト: juangamella/ges
class InsertOperatorTests(unittest.TestCase):
    true_A = np.array([[0, 0, 1, 0, 0],
                       [0, 0, 1, 0, 0],
                       [0, 0, 0, 1, 1],
                       [0, 0, 0, 0, 1],
                       [0, 0, 0, 0, 0]])
    factorization = [(4, (2, 3)), (3, (2,)), (2, (0, 1)), (0, ()), (1, ())]
    true_B = true_A * np.random.uniform(1, 2, size=true_A.shape)
    scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4))
    p = len(true_A)
    n = 10000
    obs_data = scm.sample(n=n)

    # ------------------------------------------------------
    # Tests
    def test_insert_1(self):
        # Test behaviour of the ges.insert(x,y,T) function

        # Insert should fail on adjacent edges
        try:
            ges.insert(0, 2, set(), self.true_A)
            self.fail("Call to insert should have failed")
        except ValueError as e:
            print("OK:", e)

        # Insert should fail when T contains non-neighbors of y
        try:
            ges.insert(0, 1, {2}, self.true_A)
            self.fail("Call to insert should have failed")
        except ValueError as e:
            print("OK:", e)

        # Insert should fail when T contains adjacents of x
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        try:
            ges.insert(3, 2, {4}, A)
            self.fail("Call to insert should have failed")
        except ValueError as e:
            print("OK:", e)

        # This should work
        true_new_A = A.copy()
        true_new_A[3, 2] = 1
        new_A = ges.insert(3, 2, set(), A)
        self.assertTrue((true_new_A == new_A).all())

        # This should work
        true_new_A = A.copy()
        true_new_A[1, 2] = 1
        new_A = ges.insert(1, 2, set(), A)
        self.assertTrue((true_new_A == new_A).all())

        # This should work
        true_new_A = A.copy()
        true_new_A[1, 2] = 1
        true_new_A[4, 2], true_new_A[2, 4] = 1, 0
        new_A = ges.insert(1, 2, {4}, A)
        self.assertTrue((true_new_A == new_A).all())

        # This should work
        true_new_A = A.copy()
        true_new_A[1, 0] = 1
        new_A = ges.insert(1, 0, set(), A)
        self.assertTrue((true_new_A == new_A).all())

    def test_insert_2(self):
        G = 100
        p = 20
        for i in range(G):
            A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
            cpdag = utils.dag_to_cpdag(A)
            for x in range(p):
                # Can only apply the operator to non-adjacent nodes
                adj_x = utils.adj(x, cpdag)
                Y = set(range(p)) - adj_x
                for y in Y:
                    for T in utils.subsets(utils.neighbors(y, cpdag) - adj_x):
                        # print(x,y,T)
                        output = ges.insert(x, y, T, cpdag)
                        # Verify the new vstructures
                        vstructs = utils.vstructures(output)
                        for t in T:
                            vstruct = (x, y, t) if x < t else (t, y, x)
                            self.assertIn(vstruct, vstructs)
                        # Verify whole connectivity
                        truth = cpdag.copy()
                        # Add edge x -> y
                        truth[x, y] = 1
                        # Orient t -> y
                        truth[list(T), y] = 1
                        truth[y, list(T)] = 0
                        self.assertTrue((output == truth).all())
        print("\nExhaustively checked insert operator on %i CPDAGS" % (i + 1))

    def test_valid_insert_operators_1a(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should only be one valid operator, as
        #   1. X1 has no neighbors in A, so T0 = {set()}
        #   2. na_yx is also an empty set, thus na_yx U T is a clique
        #   3. there are no semi-directed paths from y to x
        valid_operators = ges.score_valid_insert_operators(0, 1, A, cache, debug=False)
        self.assertEqual(1, len(valid_operators))
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[0, 1] = 1
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_1b(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should only be one valid operator, as
        #   1. X0 has no neighbors in A, so T0 = {set()}
        #   2. na_yx is also an empty set, thus na_yx U T is a clique
        #   3. there are no semi-directed paths from y to x
        valid_operators = ges.score_valid_insert_operators(1, 0, A, cache, debug=False)
        self.assertEqual(1, len(valid_operators))
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[1, 0] = 1
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_2a(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should be two valid operators, as T0 = {X4}
        #   1. insert(X0,X2,set()) should be valid
        #   2. and also insert(X0,X2,{X4}), as na_yx U T = {X4} and is a clique
        #   3. there are no semi-directed paths from y to x
        valid_operators = ges.score_valid_insert_operators(0, 2, A, cache, debug=False)
        self.assertEqual(2, len(valid_operators))
        # Test outcome of insert(0,2,set())
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[0, 2] = 1
        self.assertTrue((new_A == true_new_A).all())
        # Test outcome of insert(0,2,4)
        _, new_A, _, _, _ = valid_operators[1]
        true_new_A = A.copy()
        true_new_A[0, 2], true_new_A[2, 4] = 1, 0
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_2b(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should only be one valid operator, as
        #   1. X0 has no neighbors in A, so T0 = {set()}
        #   2. na_yx is also an empty set, thus na_yx U T is a clique
        #   3. there are no semi-directed paths from y to x
        valid_operators = ges.score_valid_insert_operators(2, 0, A, cache, debug=False)
        self.assertEqual(1, len(valid_operators))
        # Test outcome of insert(2,0,set())
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[2, 0] = 1
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_3a(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should be two valid operators, as T0 = {X4}
        #   1. insert(X1,X2,set()) should be valid
        #   2. and also insert(X1,X2,{X4}), as na_yx U T = {X4} and is a clique
        #   3. there are no semi-directed paths from y to x
        valid_operators = ges.score_valid_insert_operators(1, 2, A, cache, debug=False)
        self.assertEqual(2, len(valid_operators))
        # Test outcome of insert(0,2,set())
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[1, 2] = 1
        self.assertTrue((new_A == true_new_A).all())
        # Test outcome of insert(1,2,4)
        _, new_A, _, _, _ = valid_operators[1]
        true_new_A = A.copy()
        true_new_A[1, 2], true_new_A[2, 4] = 1, 0
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_3b(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should only be one valid operator, as
        #   1. X1 has no neighbors in A, so T0 = {set()}
        #   2. na_yx is also an empty set, thus na_yx U T is a clique
        #   3. there are no semi-directed paths from y to x
        valid_operators = ges.score_valid_insert_operators(2, 1, A, cache, debug=False)
        self.assertEqual(1, len(valid_operators))
        # Test outcome of insert(2,0,set())
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[2, 1] = 1
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_4a(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should be one valid operator, as T0 = set(), na_yx = {4}
        #   1. insert(X0,X2,set()) should be valid
        #   2. na_yx U T = {X4} should be a clique
        #   3. the semi-directed path X2-X4->X3 contains one node in na_yx U T
        valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False)
        self.assertEqual(1, len(valid_operators))
        # Test outcome of insert(3,2,set())
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[3, 2] = 1
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_4b(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # there should be one valid operator, as T0 = set(), na_yx = set()
        #   1. insert(X2,X3,set()) should be valid
        #   2. na_yx U T = set() should be a clique
        #   3. there are no semi-directed paths between X3 and X2
        valid_operators = ges.score_valid_insert_operators(2, 3, A, cache, debug=False)
        self.assertEqual(1, len(valid_operators))
        # Test outcome of insert(2,3,set())
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[2, 3] = 1
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_5(self):
        # Define A and cache
        A = np.zeros_like(self.true_A)
        A[2, 4], A[4, 2] = 1, 1  # x2 - x4
        A[4, 3] = 1  # x4 -> x3
        cache = GaussObsL0Pen(self.obs_data)
        # Should fail as 2,4 are adjacent
        try:
            ges.score_valid_insert_operators(2, 4, A, cache, debug=False)
            self.fail()
        except ValueError as e:
            print("OK:", e)
        try:
            ges.score_valid_insert_operators(4, 2, A, cache, debug=False)
            self.fail()
        except ValueError as e:
            print("OK:", e)
            # Should fail as 3,4 are adjacent
        try:
            ges.score_valid_insert_operators(3, 4, A, cache, debug=False)
            self.fail()
        except ValueError as e:
            print("OK:", e)
        try:
            ges.score_valid_insert_operators(4, 3, A, cache, debug=False)
            self.fail()
        except ValueError as e:
            print("OK:", e)

    def test_valid_insert_operators_6(self):
        A = np.array([[0, 0, 1, 0],
                      [0, 0, 1, 1],
                      [1, 1, 0, 0],
                      [0, 0, 0, 0]])
        data = self.obs_data[:, 0:4]
        cache = GaussObsL0Pen(data)
        # There should be one valid operator for x3 -> x2
        #   1. na_yx = {1}, T0 = {0}
        #   2. for T=set(), na_yx U T = {1} which is a clique, and
        #   contains a node in the semi-directed path 2-1->3
        #   3. for T = {0}, na_yx U T = {0,1} which is not a clique
        valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False)
        self.assertEqual(1, len(valid_operators))
        # Test outcome of insert(3,2,set())
        _, new_A, _, _, _ = valid_operators[0]
        true_new_A = A.copy()
        true_new_A[3, 2] = 1
        self.assertTrue((new_A == true_new_A).all())

    def test_valid_insert_operators_7(self):
        A = np.array([[0, 0, 1, 0],
                      [0, 0, 0, 1],
                      [1, 1, 0, 0],
                      [0, 0, 0, 0]])
        data = self.obs_data[:, 0:4]
        cache = GaussObsL0Pen(data)
        # There should no valid operator for x3 -> x2
        #   1. na_yx = set(), T0 = {0}
        #   2. for T=set(), na_yx U T = set() which is a clique, but does not
        #   contain a node in the semi-directed path 2->1->3
        #   3. for T = {0}, na_yx U T = {0} which is a clique, but does not
        #   contain a node in the semi-directed path 2->1->3
        valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False)
        self.assertEqual(0, len(valid_operators))

    def test_valid_insert_operators_8(self):
        A = np.array([[0, 0, 1, 0],
                      [0, 0, 0, 1],
                      [1, 1, 0, 0],
                      [0, 0, 0, 0]])
        data = self.obs_data[:, 0:4]
        cache = GaussObsL0Pen(data)
        # There should no valid operator for x2 -> x3
        #   1. na_yx = set(), T0 = {0}
        #   2. for T=set(), na_yx U T = set() which is a clique, but does not
        #   contain a node in the semi-directed path 2->1->3
        #   3. for T = {0}, na_yx U T = {0} which is a clique, but does not
        #   contain a node in the semi-directed path 2->1->3
        valid_operators = ges.score_valid_insert_operators(3, 2, A, cache, debug=False)
        self.assertEqual(0, len(valid_operators))
コード例 #26
0
ファイル: test_operators.py プロジェクト: juangamella/ges
class DeleteOperatorTests(unittest.TestCase):
    true_A = np.array([[0, 0, 1, 0, 0],
                       [0, 0, 1, 0, 0],
                       [0, 0, 0, 1, 1],
                       [0, 0, 0, 0, 1],
                       [0, 0, 0, 0, 0]])
    factorization = [(4, (2, 3)), (3, (2,)), (2, (0, 1)), (0, ()), (1, ())]
    true_B = true_A * np.random.uniform(1, 2, size=true_A.shape)
    scm = sempler.LGANM(true_B, (0, 0), (0.3, 0.4))
    p = len(true_A)
    n = 10000
    obs_data = scm.sample(n=n)
    # ------------------------------------------------------
    # Tests

    def test_delete_operator_preconditions(self):
        # Test that if x and y are not adjacent an exception is thrown
        try:
            ges.delete(0, 1, set(), self.true_A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        try:
            ges.delete(3, 0, set(), self.true_A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        # Test that if there is no edge x -> y or x - y an exception
        # is thrown
        try:
            ges.delete(3, 2, set(), self.true_A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        try:
            ges.delete(2, 1, set(), self.true_A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        # Test that if H is not a subset of neighbors of Y and
        # adjacents of X, an exception is thrown
        cpdag = self.true_A.copy()
        cpdag[4, 3] = 1
        try:
            ges.delete(2, 4, {1}, cpdag)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        try:
            ges.delete(2, 4, {0}, cpdag)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        try:
            ges.delete(4, 2, {3}, cpdag)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)

    def test_delete_operator_1(self):
        # Test the result from applying the delete operator to a
        # hand-picked matrix
        A = np.array([[0, 0, 1, 0, 0],
                      [0, 0, 1, 0, 0],
                      [0, 0, 0, 1, 1],
                      [0, 0, 0, 0, 1],
                      [0, 0, 0, 1, 0]])
        # remove the edge 2 -> 4, with H = set()
        new_A = ges.delete(2, 4, set(), A)
        truth = A.copy()
        truth[2, 4] = 0
        self.assertTrue((truth == new_A).all())
        # remove the edge 2 -> 4, with H = {3}
        new_A = ges.delete(2, 4, {3}, A)
        truth = A.copy()
        truth[2, 4] = 0
        truth[3, 4] = 0
        self.assertTrue((truth == new_A).all())

    def test_delete_operator_2(self):
        # Test the result from applying the delete operator to a
        # hand-picked matrix
        A = np.array([[0, 1, 1, 1, 0],
                      [1, 0, 1, 0, 0],
                      [1, 1, 0, 1, 0],
                      [1, 0, 1, 0, 1],
                      [0, 0, 0, 1, 0]])
        # remove the edge 0 - 1, with H = set()
        new_A = ges.delete(0, 1, set(), A)
        truth = A.copy()
        truth[1, 0], truth[0, 1] = 0, 0
        self.assertTrue((truth == new_A).all())
        # remove the edge 0 -> 1, with H = {2}
        new_A = ges.delete(0, 1, {2}, A)
        truth = A.copy()
        truth[1, 0], truth[0, 1] = 0, 0
        truth[2, 0], truth[2, 1] = 0, 0
        print(new_A)
        self.assertTrue((truth == new_A).all())
        # remove the edge 0 - 2 with H = set()
        new_A = ges.delete(0, 2, set(), A)
        truth = A.copy()
        truth[0, 2], truth[2, 0] = 0, 0
        self.assertTrue((truth == new_A).all())
        # remove the edge 0 - 2 with H = {1}
        new_A = ges.delete(0, 2, {1}, A)
        truth = A.copy()
        truth[0, 2], truth[2, 0] = 0, 0
        truth[1, 0], truth[1, 2] = 0, 0
        self.assertTrue((truth == new_A).all())
        # remove the edge 0 - 2 with H = {1,3}
        new_A = ges.delete(0, 2, {1, 3}, A)
        truth = A.copy()
        truth[0, 2], truth[2, 0] = 0, 0
        truth[1, 0], truth[1, 2] = 0, 0
        truth[3, 0], truth[3, 2], truth[1, 0], truth[1, 2] = 0, 0, 0, 0
        self.assertTrue((truth == new_A).all())

    def test_delete_operator_3(self):
        G = 100
        p = 20
        for i in range(G):
            A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
            cpdag = utils.dag_to_cpdag(A)
            for x in range(p):
                # Can only apply the operator to X -> Y or X - Y
                for y in np.where(cpdag[x, :] != 0)[0]:
                    for H in utils.subsets(utils.na(y, x, cpdag)):
                        output = ges.delete(x, y, H, cpdag)
                        # Verify the new vstructures
                        vstructs = utils.vstructures(output)
                        for h in H:
                            vstruct = (x, h, y) if x < y else (y, h, x)
                            self.assertIn(vstruct, vstructs)
                        # Verify whole connectivity
                        truth = cpdag.copy()
                        # Remove edge
                        truth[x, y], truth[y, x] = 0, 0
                        # Orient y -> h
                        truth[list(H), y] = 0
                        truth[list(utils.neighbors(x, cpdag) & H), x] = 0
                        self.assertTrue((output == truth).all())
        print("\nExhaustively checked delete operator on %i CPDAGS" % (i + 1))

    def test_valid_delete_operators_preconditions(self):
        A = np.array([[0, 0, 1, 0, 0],
                      [0, 0, 1, 0, 0],
                      [0, 0, 0, 1, 1],
                      [0, 0, 1, 0, 1],
                      [0, 0, 1, 1, 0]])
        # Should fail as X0 and X1 are not adjacent
        try:
            ges.delete(0, 1, set(), A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        # Should fail as X0 and X1 are not adjacent
        try:
            ges.delete(1, 0, set(), A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        # Should fail as X0 and X3 are not adjacent
        try:
            ges.delete(0, 3, set(), A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        # Should fail as there is no edge X2 -> X0 or X2 - X0
        try:
            ges.delete(2, 0, set(), A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)
        # Should fail as there is no edge X2 -> X1 or X2 - X1
        try:
            ges.delete(2, 1, set(), A)
            self.fail("Call to delete should have failed")
        except ValueError as e:
            print("OK", e)

    def test_valid_delete_operators_1(self):
        A = np.array([[0, 0, 1, 0, 0],
                      [0, 0, 1, 0, 0],
                      [0, 0, 0, 1, 1],
                      [0, 0, 1, 0, 1],
                      [0, 0, 1, 1, 0]])
        cache = GaussObsL0Pen(self.obs_data)
        # Removing the edge X2 - X4 should yield two valid
        # operators, for:
        #   1. H = Ø, as NA_yx \ Ø = {X3} is a clique
        #   2. H = {3}, as NA_yx \ {X3} = Ø is a clique
        output = ges.score_valid_delete_operators(2, 4, A, cache)
        self.assertEqual(2, len(output))
        A1, A2 = A.copy(), A.copy()
        # Remove X2 - X4
        A1[2, 4], A1[4, 2], A2[2, 4], A2[4, 2] = 0, 0, 0, 0
        # Orient X2 -> X3, X4 -> X3
        A2[3, 2], A2[3, 4] = 0, 0
        self.assertTrue(utils.member([op[1] for op in output], A1) is not None)
        self.assertTrue(utils.member([op[1] for op in output], A2) is not None)

    def test_valid_delete_operators_2(self):
        A = np.array([[0, 0, 1, 0, 0],
                      [0, 0, 1, 0, 0],
                      [0, 0, 0, 1, 1],
                      [0, 0, 1, 0, 1],
                      [0, 0, 1, 1, 0]])
        cache = GaussObsL0Pen(self.obs_data)
        # Removing the edge X1 - X2 should yield one valid
        # operator, for:
        #   1. H = Ø, as NA_yx \ Ø = {X3, X4} is a clique
        output = ges.score_valid_delete_operators(1, 2, A, cache)
        self.assertEqual(1, len(output))
        true_A = A.copy()
        # Remove X1 - X2
        true_A[1, 2] = 0
        self.assertTrue((true_A == output[0][1]).all())

    def test_valid_delete_operators_3(self):
        # Check symmetry of the delete operator when X - Y
        G = 100
        p = 20
        for i in range(G):
            A = sempler.generators.dag_avg_deg(p, 3, 1, 1)
            cpdag = utils.dag_to_cpdag(A)
            W = A * np.random.uniform(1, 2, A.shape)
            obs_sample = sempler.LGANM(W, (0, 0), (0.5, 1)).sample(n=1000)
            cache = GaussObsL0Pen(obs_sample)
            fro, to = np.where(utils.only_undirected(cpdag))
            # Test the operator to all undirected edges
            for (x, y) in zip(fro, to):
                output_a = ges.score_valid_delete_operators(x, y, cpdag, cache)
                output_b = ges.score_valid_delete_operators(y, x, cpdag, cache)
                for (op_a, op_b) in zip(output_a, output_b):
                    # Check resulting state is the same
                    self.assertTrue((op_a[1] == op_b[1]).all())
                    self.assertAlmostEqual(op_a[0], op_b[0])
        print("\nChecked equality of delete operator on undirected edges in %i CPDAGS" % (i + 1))

    def test_valid_delete_operators_4(self):
        A = np.array([[0, 1, 1, 0],
                      [0, 0, 1, 0],
                      [0, 1, 0, 1],
                      [0, 0, 1, 0]])
        cache = GaussObsL0Pen(self.obs_data)
        # Removing the edge X0 - X2 should yield two valid operators
        # operators, for:
        #   1. H = Ø, as NA_yx \ Ø = {X1} is a clique
        #   2. H = {1}, as NA_yx \ {X1} = Ø is a clique
        output = ges.score_valid_delete_operators(0, 2, A, cache)
        self.assertEqual(2, len(output))
        A1, A2 = A.copy(), A.copy()
        # Remove X2 - X4
        A1[0, 2], A2[0, 2] = 0, 0
        # Orient X2 -> X1
        A2[1, 2] = 0
        self.assertTrue(utils.member([op[1] for op in output], A1) is not None)
        self.assertTrue(utils.member([op[1] for op in output], A2) is not None)

    def test_valid_delete_operators_5(self):
        A = np.array([[0, 1, 1, 1],
                      [0, 0, 1, 1],
                      [1, 1, 0, 0],
                      [1, 1, 0, 0]])
        print("out:", utils.is_clique({2, 3}, A))
        cache = GaussObsL0Pen(self.obs_data)
        # Removing the edge X0 - X1 should yield three valid operators
        # operators, for:
        #   0. Invalid H = Ø, as NA_yx \ Ø = {X2,X3} is not a clique
        #   1. H = {X2}, as NA_yx \ H = {X3} is a clique
        #   2. H = {X3}, as NA_yx \ H = {X2} is a clique
        #   3. H = {X2,X3}, as NA_yx \ H = Ø is a clique
        output = ges.score_valid_delete_operators(0, 1, A, cache)
        print(output)
        self.assertEqual(3, len(output))
        # v-structure on X2, i.e orient X0 -> X2, X1 -> X2
        A1 = np.array([[0, 0, 1, 1],
                       [0, 0, 1, 1],
                       [0, 0, 0, 0],
                       [1, 1, 0, 0]])
        # v-structure on X3, i.e. orient X0 -> X3, X1 -> X3
        A2 = np.array([[0, 0, 1, 1],
                       [0, 0, 1, 1],
                       [1, 1, 0, 0],
                       [0, 0, 0, 0]])
        # v-structures on X2 and X3
        A3 = np.array([[0, 0, 1, 1],
                       [0, 0, 1, 1],
                       [0, 0, 0, 0],
                       [0, 0, 0, 0]])
        self.assertTrue(utils.member([op[1] for op in output], A1) is not None)
        self.assertTrue(utils.member([op[1] for op in output], A2) is not None)
        self.assertTrue(utils.member([op[1] for op in output], A3) is not None)
コード例 #27
0
ファイル: test_lganm.py プロジェクト: juangamella/sempler
    def test_interventions_2(self):
        # Test that the means and variances of variables in the joint
        # distribution are what is expected via the path method
        W = np.array([[0, 1, 1], [0, 0, 1], [0, 0, 0]])
        n = round(1e6)
        variances = np.array([1, 2, 3]) * 0.1
        means = np.array([1, 2, 3])
        sem = sempler.LGANM(W, means, variances)
        np.random.seed(42)
        # Test observational data
        # Build truth
        noise = np.random.normal(means, variances**0.5, size=(n, 3))
        truth = np.zeros_like(noise)
        truth[:, 0] = noise[:, 0]
        truth[:, 1] = truth[:, 0] * W[0, 1] + noise[:, 1]
        truth[:,
              2] = truth[:, 0] * W[0, 2] + truth[:, 1] * W[1, 2] + noise[:, 2]
        samples = sem.sample(n)
        self.assertTrue(utils.same_normal(truth, samples))
        # Test that variances/means are as expected
        true_vars, true_means = np.zeros(3), np.zeros(3)
        true_vars[0] = variances[0]
        true_vars[1] = W[0, 1]**2 * variances[0] + variances[1]
        true_vars[2] = (W[0, 1] * W[1, 2] + W[0, 2])**2 * variances[0] + W[
            1, 2]**2 * variances[1] + variances[2]
        true_means[0] = means[0]
        true_means[1] = W[0, 1] * means[0] + means[1]
        true_means[2] = (W[0, 1] * W[1, 2] +
                         W[0, 2]) * means[0] + W[1, 2] * means[1] + means[2]
        self.assertTrue(
            np.allclose(true_vars, np.var(samples, axis=0), atol=1e-2))
        self.assertTrue(
            np.allclose(true_means, np.mean(samples, axis=0), atol=1e-2))

        # Test under intervention on X1 <- N(0,0.1)
        variances = np.array([1., 1., 3.]) * 0.1
        means = np.array([1., 0., 3.])
        noise = np.random.normal(means, variances**0.5, size=(n, 3))
        truth[:, 0] = noise[:, 0]
        truth[:, 1] = noise[:, 1]
        truth[:,
              2] = truth[:, 0] * W[0, 2] + truth[:, 1] * W[1, 2] + noise[:, 2]
        samples = sem.sample(n, do_interventions={1: (0, 0.1)})
        self.assertTrue(utils.same_normal(truth, samples))
        # Test that variances/means are as expected
        true_vars, true_means = np.zeros(3), np.zeros(3)
        true_vars[0] = variances[0]
        true_vars[1] = variances[1]
        true_vars[2] = W[0, 2]**2 * variances[0] + W[
            1, 2]**2 * variances[1] + variances[2]
        true_means[0] = means[0]
        true_means[1] = means[1]
        true_means[2] = W[0, 2] * means[0] + W[1, 2] * means[1] + means[2]
        self.assertTrue(
            np.allclose(true_vars, np.var(samples, axis=0), atol=1e-2))
        self.assertTrue(
            np.allclose(true_means, np.mean(samples, axis=0), atol=1e-2))

        # Test under intervention on do(X0 = 0)
        variances = np.array([0., 2., 3.]) * 0.1
        means = np.array([0., 2., 3.])
        noise = np.random.normal(means, variances**0.5, size=(n, 3))
        truth[:, 0] = noise[:, 0]
        truth[:, 1] = truth[:, 0] * W[0, 1] + noise[:, 1]
        truth[:,
              2] = truth[:, 0] * W[0, 2] + truth[:, 1] * W[1, 2] + noise[:, 2]
        samples = sem.sample(n, do_interventions={0: 0})
        self.assertTrue(utils.same_normal(truth, samples))
        # Test that variances/means are as expected
        true_vars, true_means = np.zeros(3), np.zeros(3)
        true_vars[0] = variances[0]
        true_vars[1] = W[0, 1]**2 * variances[0] + variances[1]
        true_vars[2] = (W[0, 1] * W[1, 2] + W[0, 2])**2 * variances[0] + W[
            1, 2]**2 * variances[1] + variances[2]
        true_means[0] = means[0]
        true_means[1] = W[0, 1] * means[0] + means[1]
        true_means[2] = (W[0, 1] * W[1, 2] +
                         W[0, 2]) * means[0] + W[1, 2] * means[1] + means[2]
        self.assertTrue(
            np.allclose(true_vars, np.var(samples, axis=0), atol=1e-2))
        self.assertTrue(
            np.allclose(true_means, np.mean(samples, axis=0), atol=1e-2))