def greedy_markov_blanket_discovery(estimator, data, target_variable=None, limit=None, control_variables=None): """ Markov blanket discovery with standard greedy """ selected, best_score = greedy_search(estimator, data, target_variable=target_variable, limit=limit, control_variables=control_variables) return selected, best_score
def random_markov_blanket_discovery(estimator, data, target_variable=None, limit=None, control_variables=None): """ Markov blanket discovery with random greedy (uniformly selects from top-3) """ selected, best_score = greedy_search(estimator, data, target_variable=target_variable, limit=limit, control_variables=control_variables, select_from_top_k=3) return selected, best_score
def test_greedy_search_with_permutation_fi(): """ Tests choose_no_overflow for simple cases """ # mock testfile = Path( explora.__file__).parent.parent / "datasets" / "tic_tac_toe.csv" data = pd.read_csv(testfile) # do selected, best_score = greedy_search(fraction_of_information_permutation, data) error = 1e-10 expected = 0.4447970033469652 # assert assert best_score == approx(expected, rel=error) assert (selected == {1, 5, 9, 3, 7})
def test_greedy_search_with_upper_bound_mi(): """ Tests choose_no_overflow for simple cases """ # mock testfile = Path( explora.__file__).parent.parent / "datasets" / "tic_tac_toe.csv" data = pd.read_csv(testfile) # do selected, best_score = greedy_search( mutual_information_permutation_upper_bound, data) error = 1e-10 expected = 0.28707781833145635 # assert assert best_score == approx(expected, rel=error) assert (selected == {1, 5, 9})
def test_greedy_search_with_conditional_permutation_fi(): """ Tests choose_no_overflow for simple cases """ # mock testfile = Path( explora.__file__).parent.parent / "datasets" / "tic_tac_toe.csv" data = pd.read_csv(testfile) control_var_set = {5} # do selected, best_score = greedy_search( conditional_fraction_of_information_permutation, data, control_variables=control_var_set) error = 1e-10 expected = 0.3561447636856704 # assert assert best_score == approx(expected, rel=error) assert (selected == {1, 9, 3, 7})
def grow_shrink(grow_estimator, shrink_estimator, data, shrink_threshold=0, target=None, limit=None): """ For a dependency measure D(XY) and conditional D(XY|Z), it greedily finds a maximizer for D(XY), and shrinks afterwards with the conditional. Not to be confused with the Grow Shrink for Markov blankets (although similar) """ if isinstance(data, pd.DataFrame): data = data.to_numpy() if target is None: target = np.size(data, 1) # start_time = time.time() [greedy_result, greedy_score] = greedy_search(grow_estimator, data, target, limit=limit) # print("--- %s Time for grow---" % (time.time() - start_time)) # start_time = time.time() greedy_result = {x - 1 for x in greedy_result} shrink_results = shrink(shrink_estimator, greedy_result, data, shrink_threshold=shrink_threshold, target=None) # print("--- %s Time to shrink---" % (time.time() - start_time)) return shrink_results