def greedy_markov_blanket_discovery(estimator,
                                    data,
                                    target_variable=None,
                                    limit=None,
                                    control_variables=None):
    """
    Markov blanket discovery with standard greedy
    """
    selected, best_score = greedy_search(estimator,
                                         data,
                                         target_variable=target_variable,
                                         limit=limit,
                                         control_variables=control_variables)

    return selected, best_score
def random_markov_blanket_discovery(estimator,
                                    data,
                                    target_variable=None,
                                    limit=None,
                                    control_variables=None):
    """
    Markov blanket discovery with random greedy (uniformly selects from top-3)
    """
    selected, best_score = greedy_search(estimator,
                                         data,
                                         target_variable=target_variable,
                                         limit=limit,
                                         control_variables=control_variables,
                                         select_from_top_k=3)

    return selected, best_score
Beispiel #3
0
def test_greedy_search_with_permutation_fi():
    """ Tests choose_no_overflow for simple cases """

    # mock
    testfile = Path(
        explora.__file__).parent.parent / "datasets" / "tic_tac_toe.csv"
    data = pd.read_csv(testfile)

    # do
    selected, best_score = greedy_search(fraction_of_information_permutation,
                                         data)
    error = 1e-10
    expected = 0.4447970033469652

    # assert
    assert best_score == approx(expected, rel=error)
    assert (selected == {1, 5, 9, 3, 7})
Beispiel #4
0
def test_greedy_search_with_upper_bound_mi():
    """ Tests choose_no_overflow for simple cases """

    # mock
    testfile = Path(
        explora.__file__).parent.parent / "datasets" / "tic_tac_toe.csv"
    data = pd.read_csv(testfile)

    # do
    selected, best_score = greedy_search(
        mutual_information_permutation_upper_bound, data)
    error = 1e-10
    expected = 0.28707781833145635

    # assert
    assert best_score == approx(expected, rel=error)
    assert (selected == {1, 5, 9})
Beispiel #5
0
def test_greedy_search_with_conditional_permutation_fi():
    """ Tests choose_no_overflow for simple cases """

    # mock
    testfile = Path(
        explora.__file__).parent.parent / "datasets" / "tic_tac_toe.csv"
    data = pd.read_csv(testfile)

    control_var_set = {5}

    # do
    selected, best_score = greedy_search(
        conditional_fraction_of_information_permutation,
        data,
        control_variables=control_var_set)
    error = 1e-10
    expected = 0.3561447636856704

    # assert
    assert best_score == approx(expected, rel=error)
    assert (selected == {1, 9, 3, 7})
Beispiel #6
0
def grow_shrink(grow_estimator, shrink_estimator, data, shrink_threshold=0, target=None, limit=None):
    """
    For a dependency measure D(XY) and
    conditional D(XY|Z), it greedily finds a maximizer for D(XY), and shrinks 
    afterwards with the conditional. Not to be confused with the Grow Shrink for Markov
    blankets (although similar)
    """

    if isinstance(data, pd.DataFrame):
        data = data.to_numpy()

    if target is None:
        target = np.size(data, 1)

    # start_time = time.time()  

    [greedy_result, greedy_score] = greedy_search(grow_estimator, data, target, limit=limit)
    # print("--- %s Time for grow---" % (time.time() - start_time))

    # start_time = time.time()  
    greedy_result = {x - 1 for x in greedy_result}
    shrink_results = shrink(shrink_estimator, greedy_result, data, shrink_threshold=shrink_threshold, target=None)
    # print("--- %s Time to shrink---" % (time.time() - start_time))
    return shrink_results