def test_2_multinom_RVs(df_2_multinomial_indep_RVs):
    df = df_2_multinomial_indep_RVs(size=10000)
    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    cond_sets_satisfying_cond_indep = skeleton_finder.find()

    assert graph.get_edges() == set({})
    assert cond_sets_satisfying_cond_indep['x _||_ y'] == set({frozenset({})})
def test_2_deterministic_and_3rd_var_caused_by_one_of_them(
        df_2_deterministic_and_3rd_var_caused_by_one_of_them):
    df = df_2_deterministic_and_3rd_var_caused_by_one_of_them(size=1000)
    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    skeleton_finder.find()

    assert graph.has_adjacency(('x', 'y'))
    assert graph.get_nodes() == set({'x', 'y', 'z'})  # pylint: disable='no-member'
def test_skeleton_finder_X_causes_Y(df_X_causes_Y):
    df = df_X_causes_Y(size=1000)

    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    cond_sets_satisfying_cond_indep = skeleton_finder.find()

    assert graph.has_adjacency(('x', 'y'))
    assert cond_sets_satisfying_cond_indep == {}
def test_skeleton_finder_Z_causes_X_and_Y(df_Z_causes_X_and_Y):
    df = df_Z_causes_X_and_Y(size=1000)

    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    cond_sets_satisfying_cond_indep = skeleton_finder.find()

    assert graph.has_adjacency(('x', 'z'))
    assert graph.has_adjacency(('y', 'z'))
    assert cond_sets_satisfying_cond_indep == \
        {'x _||_ y': set({frozenset({'z'})})}
def test_chain_and_collider_without_MI(df_chain_and_collider_without_MI):
    size = 10000

    df = df_chain_and_collider_without_MI(size=size)

    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    skeleton_finder.find()

    assert graph.has_adjacency(('a', 'b'))
    assert graph.has_adjacency(('b', 'c'))
    assert graph.has_adjacency(('a', 'd'))
    assert graph.has_adjacency(('c', 'd'))
def test_3_multinom_RVs_MAR(df_Z_causes_X_Y_and_X_Z_causes_MI_Y):
    size = 70000

    df = df_Z_causes_X_Y_and_X_Z_causes_MI_Y(size=size)

    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    skeleton_finder.find()

    assert set(graph.get_nodes()).intersection(set(['x', 'y', 'MI_x']))  # pylint: disable='no-member'

    assert graph.has_adjacency(('x', 'z'))
    assert graph.has_adjacency(('y', 'z'))
def test_chain_and_collider_with_MI(df_chain_and_collider_with_MI):
    size = 20000

    df = df_chain_and_collider_with_MI(size=size)
    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    skeleton_finder.find()

    # we expect a-c in this intermediate stage. a-c is spurious, due to
    # collider bias.

    assert graph.has_adjacency(('a', 'c'))
    assert graph.has_adjacency(('a', 'b'))
    assert graph.has_adjacency(('b', 'c'))
    assert graph.has_adjacency(('a', 'd'))
    assert graph.has_adjacency(('c', 'd'))
Exemple #8
0
def test_init_complete_then_add_edge():
    graph = PartialAncestralGraph(complete=True, variables=['A', 'B', 'C'])
    graph.add_edge('E o-> D')
    assert graph.has_edge('E o-> D')
    assert graph.has_edge('D <-o E')

    assert graph.has_edge('A o-o B')
    assert graph.has_edge('C o-o B')
    assert graph.has_edge('A o-o C')

    assert graph.has_edge('D o-o A') is False
Exemple #9
0
def test_simple_chain():
    graph = PAG(variables=['X', 'Y', 'Z'])

    graph.add_edge('X o-o Y')
    graph.add_edge('Z o-o Y')

    sep_sets = SepSets()
    sep_sets.add(
        node_1='X',
        node_2='Z',
        cond_set=set({'Y'})
    )

    immoralities = ImmoralitiesFinder(
        graph=graph,
        sep_sets=sep_sets
    ).find()

    assert len(immoralities) == 0
def test_long_chains_collider_bias_with_MI(
        df_long_chains_and_collider_with_MI):
    size = 10000

    df = df_long_chains_and_collider_with_MI(size=size, proba_noise=0.7)

    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    skeleton_finder.find()

    # we expect b-d in this intermediate stage. b-d is spurious, due to
    # collider bias.

    assert graph.has_adjacency(('a', 'b'))
    assert graph.has_adjacency(('b', 'd'))
    assert graph.has_adjacency(('d', 'e'))
    assert graph.has_adjacency(('b', 'c'))
    assert graph.has_adjacency(('c', 'd'))
Exemple #11
0
def test_simple_collider():
    graph = PAG(
        variables=['Parent 1', 'Parent 2', 'collider'],
    )

    graph.add_edge('Parent 1 o-o collider')
    graph.add_edge('Parent 2 o-o collider')

    sep_sets = SepSets()
    sep_sets.add(
        node_1='Parent 1',
        node_2='Parent_2',
        cond_set=set({'collider'})
    )

    immoralities = ImmoralitiesFinder(
        graph=graph,
        sep_sets=sep_sets
    ).find()

    assert ('Parent 1', 'collider', 'Parent 2') in immoralities
Exemple #12
0
def test_init_complete_graph():
    graph = PartialAncestralGraph(complete=True, variables=['A', 'B', 'C'])

    assert graph.has_edge('A o-o B')
    assert graph.has_edge('C o-o B')
    assert graph.has_edge('A o-o C')

    assert set({'A', 'C'}) == graph.get_neighbors('B')
    assert set({'B', 'C'}) == graph.get_neighbors('A')

    assert Edge('A o-o B') in graph.get_edges()
    assert Edge('A o-o A') not in graph.get_edges()
    assert Edge('B o-o B') not in graph.get_edges()
Exemple #13
0
def test_get_edge():
    graph = PartialAncestralGraph()
    assert graph.get_edge('A', 'B') is None

    graph.add_edge('A o-o B')

    result = graph.get_edge('A', 'B')

    assert result[0] == 'A'
    assert result[1] == 'o-o'
    assert result[2] == 'B'
Exemple #14
0
def test_firing_squad_example():
    graph = PAG(
        variables=[
            'Captain ordered to shoot',
            'Rifleman 1 shot',
            'Rifleman 2 shot',
            'Prisoner hit by bullet',
            'Prisoner dead'
        ]
    )

    graph.add_edge('Rifleman 1 shot o-o Prisoner hit by bullet')
    graph.add_edge('Rifleman 2 shot o-o Prisoner hit by bullet')
    graph.add_edge('Captain ordered to shoot o-o Rifleman 1 shot')
    graph.add_edge('Captain ordered to shoot o-o Rifleman 2 shot')
    graph.add_edge('Prisoner hit by bullet o-o Prisoner dead')

    sep_sets = SepSets()
    sep_sets.add(
        node_1='Captain ordered to shoot',
        node_2='Prisoner hit by bullet',
        cond_set=set({'Rifleman 1 shot', 'Rifleman 2 shot'})
    )
    sep_sets.add(
        node_1='Prisoner dead',
        node_2='Rifleman 1 shot',
        cond_set=set({'Prisoner hit by bullet', 'Rifleman 2 shot'})
    )
    sep_sets.add(
        node_1='Prisoner dead',
        node_2='Rifleman 2 shot',
        cond_set=set({'Prisoner hit by bullet', 'Rifleman 1 shot'})
    )
    sep_sets.add(
        node_1='Rifleman 1 shot',
        node_2='Rifleman 2 shot',
        cond_set=set({'Captain ordered to shoot'})
    )

    immoralities = ImmoralitiesFinder(
        graph=graph,
        sep_sets=sep_sets
    ).find()

    assert ('Rifleman 1 shot', 'Prisoner hit by bullet', 'Rifleman 2 shot') \
        in immoralities
def test_dog_pee():
    size = 100000

    # Sometimes cloudy
    cloudy = np.random.binomial(n=1, p=0.5, size=size)

    # Cloudyness causes rain, but sometimes it rains even when it's not cloudy.
    rain = cloudy * np.random.binomial(n=1, p=0.7, size=size) \
        + (1 - cloudy) * np.random.binomial(n=1, p=0.1, size=size)

    # Sprinkler generally turns on when it isn't cloudy.
    sprinkler = (cloudy == 0) * np.random.binomial(n=1, p=0.8, size=size) \
        + cloudy * np.random.binomial(n=1, p=0.1, size=size)

    # Grass is generally wet whenever it rained or the sprinkler is on.
    wet_grass = (rain | sprinkler) * np.random.binomial(n=1, p=0.90, size=size)

    # Dog doesn't like to get rained on
    # Dog goes out more frequently when it's not raining
    dog_goes_out_to_pee = rain * np.random.binomial(n=1, p=0.2, size=size) \
        + (1 - rain) * np.random.binomial(n=1, p=0.9, size=size)

    df = pd.DataFrame({
        'cloudy': cloudy,
        'sprinkler': sprinkler,
        'rain': rain,
        'wet_grass': wet_grass,
        'dog_goes_out_to_pee': dog_goes_out_to_pee
    })

    graph = Graph(variables=list(df.columns), complete=True)
    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    skeleton_finder.find()

    assert graph.has_adjacency(('cloudy', 'rain'))
    assert graph.has_adjacency(('cloudy', 'sprinkler'))
    assert graph.has_adjacency(('rain', 'dog_goes_out_to_pee'))
    assert graph.has_adjacency(('rain', 'wet_grass'))
    assert graph.has_adjacency(('sprinkler', 'wet_grass'))
Exemple #16
0
def test_remove_edge():
    graph = PartialAncestralGraph()

    graph.add_edge('A o-o B')
    assert graph.has_adjacency(('A', 'B')) is True

    graph.remove_edge(('A', 'B'))
    assert graph.has_adjacency(('A', 'B')) is False

    graph = PartialAncestralGraph()

    graph.add_edge('A <-o B')
    assert graph.has_adjacency(('A', 'B')) is True

    graph.remove_edge(('A', 'B'))
    assert graph.has_adjacency(('A', 'B')) is False
Exemple #17
0
def test_ancestral_validation():
    # No directed cycles

    graph = PartialAncestralGraph()

    graph.add_edge('A --> B')
    graph.add_edge('B --> C')

    with pytest.raises(NotAncestralError):
        graph.add_edge('C --> A')

    graph = PartialAncestralGraph()

    graph.add_edge('C --> A')
    graph.add_edge('B --> C')

    with pytest.raises(NotAncestralError):
        graph.add_edge('A --> B')

    # No almost-directed cycles

    graph = PartialAncestralGraph()

    graph.add_edge('A --> B')
    graph.add_edge('B --> C')

    with pytest.raises(NotAncestralError):
        graph.add_edge('C <-> A')

    graph = PartialAncestralGraph()

    graph.add_edge('A --> B')
    graph.add_edge('C <-> A')

    with pytest.raises(NotAncestralError):
        graph.add_edge('B --> C')
    # Nodes of undirected edges can't have siblings.

    graph = PartialAncestralGraph()

    graph.add_edge('A --- B')

    with pytest.raises(NotAncestralError):
        graph.add_edge('B <-> C')

    graph = PartialAncestralGraph()

    graph.add_edge('B <-> C')

    with pytest.raises(NotAncestralError):
        graph.add_edge('A --- B')
def test_dog_example():
    df = dog_example(size=100000)

    graph = Graph(variables=list(df.columns), complete=True)

    skeleton_finder = PCSkeletonFinder(data=df, graph=graph)

    cond_sets_satisfying_cond_indep = \
        skeleton_finder.find()

    assert cond_sets_satisfying_cond_indep[key_for_pair(
        ('activity', 'dog_tired'))].intersection(
            set({frozenset({'exercise_levels'})
                 })) == set({frozenset({'exercise_levels'})})

    assert set({frozenset({'best_friends_visit', 'activity'})}) not in \
        cond_sets_satisfying_cond_indep[
            key_for_pair(('weekend', 'mentally_exhausted_before_bed'))
        ]

    assert graph.has_adjacency(('rain', 'best_friends_visit'))
    assert graph.has_adjacency(('weekend', 'best_friends_visit'))
    assert graph.has_adjacency(('rain', 'activity'))
    assert graph.has_adjacency(('exercise_levels', 'best_friends_visit'))
    assert graph.has_adjacency(('exercise_levels', 'activity'))
    assert graph.has_adjacency(('mentally_exhausted_before_bed', 'activity'))
    assert graph.has_adjacency(('exercise_levels', 'dog_tired'))
    assert graph.has_adjacency(
        ('best_friends_visit', 'mentally_exhausted_before_bed'))
    assert graph.has_adjacency(
        ('mentally_exhausted_before_bed', 'dog_teeth_brushed'))
    assert graph.has_adjacency(('dog_tired', 'dog_teeth_brushed'))
Exemple #19
0
def test_add_edge():
    graph = PartialAncestralGraph()

    assert graph.has_edge('A o-o B') is False
    assert graph.has_edge('B o-o A') is False

    graph.add_edge('A o-o B')
    assert graph.has_edge('A o-o B') is True
    assert graph.has_edge('B o-o A') is True

    graph = PartialAncestralGraph()

    assert graph.has_edge('A --o B') is False
    assert graph.has_edge('B o-- A') is False
    graph.add_edge('A --o B')
    assert graph.has_edge('B o-- A') is True
    assert graph.has_edge('A --o B') is True

    graph = PartialAncestralGraph()

    assert graph.has_edge('A o-- B') is False
    assert graph.has_edge('B --o A') is False
    graph.add_edge('A o-- B')
    assert graph.has_edge('B --o A') is True
    assert graph.has_edge('A o-- B') is True

    graph = PartialAncestralGraph()

    assert graph.has_edge('A --- B') is False
    assert graph.has_edge('B --- A') is False
    graph.add_edge('A --- B')
    assert graph.has_edge('B --- A') is True
    assert graph.has_edge('A --- B') is True

    graph = PartialAncestralGraph()

    assert graph.has_edge('A o-> B') is False
    assert graph.has_edge('B <-o A') is False
    graph.add_edge('A o-> B')
    assert graph.has_edge('A o-> B') is True
    assert graph.has_edge('B <-o A') is True

    graph = PartialAncestralGraph()

    assert graph.has_edge('A <-> B') is False
    assert graph.has_edge('B <-> A') is False
    graph.add_edge('A <-> B')
    assert graph.has_edge('A <-> B') is True
    assert graph.has_edge('B <-> A') is True