Python ExhaustiveSearch Examples, pgmpy.estimators.ExhaustiveSearch Python Examples

Example #1

0

Show file

File: tools.py Project: afcarl/collaborative_filter_website_example

def train_joke_type_selection():
    #one table
    jokes = Jokes.query.all()
    joke_preferences = []
    for i in range(
            sum([joke_preference.nerd_joke for joke_preference in jokes])):
        joke_preferences.append("nerd joke")
    for i in range(sum([joke.weird_joke for joke in jokes])):
        joke_preferences.append("weird joke")
    for i in range(sum([joke.cat_meme for joke in jokes])):
        joke_preferences.append("cat meme")
    for i in range(sum([joke.dog_meme for joke in jokes])):
        joke_preferences.append("dog meme")
    for i in range(sum([joke.dad_joke for joke in jokes])):
        joke_preferences.append("dad joke")
    data = pd.DataFrame()
    for joke_preference in joke_preferences:
        data = data.append({"joke_preference": joke_preference},
                           ignore_index=True)

    bic = BicScore(data)
    import code
    code.interact(local=locals())
    es = ExhaustiveSearch(data, scoring_method=bic)
    best_model = es.estimate()
    return best_model

Example #2

0

Show file

File: structure_learning.py Project: ms440/bnlearn

def _exhaustivesearch(df, scoretype='bic', return_all_dags=False, verbose=3):
    out = dict()

    # Set scoring type
    scoring_method = _SetScoringType(df, scoretype)
    # Exhaustive search across all dags
    model = ExhaustiveSearch(df, scoring_method=scoring_method)
    # Compute best DAG
    best_model = model.estimate()
    # Store
    out['model'] = best_model
    out['model_edges'] = best_model.edges()

    # Compute all possible DAGs
    if return_all_dags:
        out['scores'] = []
        out['dag'] = []
        # print("\nAll DAGs by score:")
        for [score, dag] in reversed(model.all_scores()):
            out['scores'].append(score)
            out['dag'].append(dag)
            # print(score, dag.edges())

        plt.plot(out['scores'])
        plt.show()

    return (out)

Example #3

0

Show file

File: test_ExhaustiveSearch.py Project: MariosRichards/BES_analysis_code

    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = ExhaustiveSearch(self.rand_data)
        self.est_rand_bdeu = ExhaustiveSearch(self.rand_data, scoring_method=BdeuScore(self.rand_data))
        self.est_rand_bic = ExhaustiveSearch(self.rand_data, scoring_method=BicScore(self.rand_data))

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic = ExhaustiveSearch(self.titanic_data2)

Example #4

0

Show file

File: PgmpyScript.py Project: AdrienFelipe/Causal-discovery-Unit-testing

    def predict(self, dataset: DatasetInterface) -> List[Relation]:
        data = dataset.get_data()

        if self.algorithm == self.ESTIMATOR_PC:
            estimator = PC(data)
            graph = estimator.estimate(show_progress=False)
        elif self.algorithm == self.ESTIMATOR_MMHC:
            estimator = ExhaustiveSearch(data, show_progress=False)
            graph = estimator.estimate()
        else:
            estimator = HillClimbSearch(data)
            graph = estimator.estimate(show_progress=False)

        return PgmpyScript.__build_relations(graph, data)

Example #5

0

Show file

def scoreStructureLearn(data,
                        search='HillClimbSearch',
                        scoring_method='BicScore'):
    #基于score-search的结构学习
    #search:HillClimbSearch, ExhaustiveSearch
    #scoring_method: 'BicScore', K2Score, BdeuScore
    if scoring_method == 'BicScore':
        scoring_method_tmp = BicScore(data)
    elif scoring_method == 'K2Score':
        scoring_method_tmp = K2Score(data)
    elif scoring_method == 'BdeuScore':
        scoring_method_tmp = BdeuScore(data, equivalent_sample_size=5)
    if search == 'HillClimbSearch':
        es = HillClimbSearch(data, scoring_method=scoring_method_tmp)
    else:
        es = ExhaustiveSearch(data, scoring_method=scoring_method_tmp)
    best_model = es.estimate()
    return best_model

Example #6

0

Show file

File: evaluation.py Project: VaishnaviSPatil/UDA_CausalDAG

def structure_prediction(samples,
                         encoder,
                         bayesmodel_true,
                         method='hc',
                         score_type='bic'):
    Score = {'bic': BicScore, 'k2': K2Score, 'bdeu': BDeuScore}[score_type]
    samples = encoder.inverse_transform(samples)
    nodes = list(bayesmodel_true.nodes())
    idx_to_node = dict(list(zip(range(len(nodes)), nodes)))
    samples = pd.DataFrame(samples).rename(columns=idx_to_node)
    if method == 'ex':
        bayesmodel_predicted = ExhaustiveSearch(
            samples, scoring_method=Score(samples)).estimate()
    else:
        bayesmodel_predicted = HillClimbSearch(
            samples, scoring_method=Score(samples)).estimate(
                start_dag=bayesmodel_true.copy(), show_progress=False)
    return bayesmodel_predicted

Example #7

0

Show file

File: test_ExhaustiveSearch.py Project: studosi-fer/STRUCE

    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)),
                                      columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = ExhaustiveSearch(self.rand_data)
        self.est_rand_bdeu = ExhaustiveSearch(self.rand_data,
                                              scoring_method=BdeuScore(
                                                  self.rand_data))
        self.est_rand_bic = ExhaustiveSearch(self.rand_data,
                                             scoring_method=BicScore(
                                                 self.rand_data))

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv(
            'pgmpy/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic = ExhaustiveSearch(self.titanic_data2)

Example #8

0

Show file

File: pgm_tutorial.py Project: mpierrau/DD2420_Probabilistical_Graphical_Models

# for this assignement.
raw_data2 = {
    'age': data['age'],
    'avg_cs': data['avg_cs'],
    'avg_mat': data['avg_mat'],
    'delay': data['delay'],  # Don't comment out this one
    'gender': data['gender'],
}

data2 = pd.DataFrame(data=raw_data2)

import time

t0 = time.time()
# Uncomment below to perform exhaustive search
searcher = ExhaustiveSearch(data2, scoring_method=K2Score(data2))
search = searcher.all_scores()
print('time:', time.time() - t0)

# Uncomment for printout:
#for score, model in search:
#    print("{0}        {1}".format(score, model.edges()))

separator()

hcs = HillClimbSearch(data2, scoring_method=K2Score(data))
model = hcs.estimate()

hcs2 = HillClimbSearch(data2, scoring_method=K2Score(data2))
model2 = hcs2.estimate()

Example #9

0

Show file

File: test_ExhaustiveSearch.py Project: studosi-fer/STRUCE

class TestBaseEstimator(unittest.TestCase):
    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)),
                                      columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = ExhaustiveSearch(self.rand_data)
        self.est_rand_bdeu = ExhaustiveSearch(self.rand_data,
                                              scoring_method=BdeuScore(
                                                  self.rand_data))
        self.est_rand_bic = ExhaustiveSearch(self.rand_data,
                                             scoring_method=BicScore(
                                                 self.rand_data))

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv(
            'pgmpy/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic = ExhaustiveSearch(self.titanic_data2)

    def test_all_dags(self):
        self.assertEqual(
            len(list(self.est_rand.all_dags(['A', 'B', 'C', 'D']))), 543)
        # self.assertEqual(len(list(self.est_rand.all_dags(nodes=range(5)))), 29281)  # takes ~30s

        abc_dags = set(
            six.moves.map(
                tuple,
                [sorted(dag.edges()) for dag in self.est_rand.all_dags()]))
        abc_dags_ref = set([
            (('A', 'B'), ('C', 'A'), ('C', 'B')), (('A', 'C'), ('B', 'C')),
            (('B', 'A'), ('B', 'C')), (('C', 'B'), ), (('A', 'C'), ('B', 'A')),
            (('B', 'C'), ('C', 'A')), (('A', 'B'), ('B', 'C')),
            (('A', 'C'), ('B', 'A'), ('B', 'C')), (('A', 'B'), ),
            (('A', 'B'), ('C', 'A')), (('B', 'A'), ('C', 'A'), ('C', 'B')),
            (('A', 'C'), ('C', 'B')), (('A', 'B'), ('A', 'C'), ('C', 'B')),
            (('B', 'A'), ('C', 'B')), (('A', 'B'), ('A', 'C')),
            (('C', 'A'), ('C', 'B')), (('A', 'B'), ('A', 'C'), ('B', 'C')),
            (('C', 'A'), ), (('B', 'A'), ('B', 'C'), ('C', 'A')),
            (('B', 'A'), ), (('A', 'B'), ('C', 'B')), (),
            (('B', 'A'), ('C', 'A')), (('A', 'C'), ), (('B', 'C'), )
        ])
        self.assertSetEqual(abc_dags, abc_dags_ref)

    def test_estimate_rand(self):
        est = self.est_rand.estimate()
        self.assertSetEqual(set(est.nodes()), set(['A', 'B', 'C']))
        self.assertTrue(est.edges() == [('B', 'C')]
                        or est.edges() == [('C', 'B')])

        est_bdeu = self.est_rand.estimate()
        self.assertTrue(est_bdeu.edges() == [('B', 'C')]
                        or est_bdeu.edges() == [('C', 'B')])

        est_bic = self.est_rand.estimate()
        self.assertTrue(est_bic.edges() == [('B', 'C')]
                        or est_bic.edges() == [('C', 'B')])

    def test_estimate_titanic(self):
        e1 = self.est_titanic.estimate()
        self.assertSetEqual(
            set(e1.edges()),
            set([('Survived', 'Pclass'), ('Sex', 'Pclass'),
                 ('Sex', 'Survived')]))

    def test_all_scores(self):
        scores = self.est_titanic.all_scores()
        scores_ref = [(-2072.9132364404695, []),
                      (-2069.071694164769, [('Pclass', 'Sex')]),
                      (-2069.0144197068785, [('Sex', 'Pclass')]),
                      (-2025.869489762676, [('Survived', 'Pclass')]),
                      (-2025.8559302273054, [('Pclass', 'Survived')]),
                      (-2022.0279474869753, [('Pclass', 'Sex'),
                                             ('Survived', 'Pclass')]),
                      (-2022.0143879516047, [('Pclass', 'Sex'),
                                             ('Pclass', 'Survived')]),
                      (-2021.9571134937144, [('Pclass', 'Survived'),
                                             ('Sex', 'Pclass')]),
                      (-2017.5258065853768, [('Sex', 'Pclass'),
                                             ('Survived', 'Pclass')]),
                      (-1941.3075053892837, [('Survived', 'Sex')]),
                      (-1941.2720031713893, [('Sex', 'Survived')]),
                      (-1937.4304608956886, [('Pclass', 'Sex'),
                                             ('Sex', 'Survived')]),
                      (-1937.4086886556927, [('Sex', 'Pclass'),
                                             ('Survived', 'Sex')]),
                      (-1937.3731864377983, [('Sex', 'Pclass'),
                                             ('Sex', 'Survived')]),
                      (-1934.1344850608882, [('Pclass', 'Sex'),
                                             ('Survived', 'Sex')]),
                      (-1894.2637587114903, [('Survived', 'Pclass'),
                                             ('Survived', 'Sex')]),
                      (-1894.2501991761198, [('Pclass', 'Survived'),
                                             ('Survived', 'Sex')]),
                      (-1894.2282564935958, [('Sex', 'Survived'),
                                             ('Survived', 'Pclass')]),
                      (-1891.0630673606006, [('Pclass', 'Survived'),
                                             ('Sex', 'Survived')]),
                      (-1887.2215250849, [('Pclass', 'Sex'),
                                          ('Pclass', 'Survived'),
                                          ('Sex', 'Survived')]),
                      (-1887.1642506270096, [('Pclass', 'Survived'),
                                             ('Sex', 'Pclass'),
                                             ('Sex', 'Survived')]),
                      (-1887.0907383830947, [('Pclass', 'Sex'),
                                             ('Survived', 'Pclass'),
                                             ('Survived', 'Sex')]),
                      (-1887.0771788477243, [('Pclass', 'Sex'),
                                             ('Pclass', 'Survived'),
                                             ('Survived', 'Sex')]),
                      (-1885.9200755341915, [('Sex', 'Pclass'),
                                             ('Survived', 'Pclass'),
                                             ('Survived', 'Sex')]),
                      (-1885.884573316297, [('Sex', 'Pclass'),
                                            ('Sex', 'Survived'),
                                            ('Survived', 'Pclass')])]

        self.assertEqual([sorted(model.edges()) for score, model in scores],
                         [edges for score, edges in scores_ref])
        # use assertAlmostEqual pointwise to avoid rounding issues
        six.moves.map(lambda x, y: self.assertAlmostEqual(x, y),
                      [score for score, model in scores],
                      [score for score, edges in scores_ref])

    def tearDown(self):
        del self.rand_data
        del self.est_rand
        del self.est_rand_bdeu
        del self.est_rand_bic
        del self.titanic_data
        del self.est_titanic

Example #10

0

Show file

File: test_ExhaustiveSearch.py Project: MariosRichards/BES_analysis_code

class TestBaseEstimator(unittest.TestCase):
    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = ExhaustiveSearch(self.rand_data)
        self.est_rand_bdeu = ExhaustiveSearch(self.rand_data, scoring_method=BdeuScore(self.rand_data))
        self.est_rand_bic = ExhaustiveSearch(self.rand_data, scoring_method=BicScore(self.rand_data))

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic = ExhaustiveSearch(self.titanic_data2)

    def test_all_dags(self):
        self.assertEqual(len(list(self.est_rand.all_dags(['A', 'B', 'C', 'D']))), 543)
        # self.assertEqual(len(list(self.est_rand.all_dags(nodes=range(5)))), 29281)  # takes ~30s

        abc_dags = set(six.moves.map(tuple, [sorted(dag.edges()) for dag in self.est_rand.all_dags()]))
        abc_dags_ref = set([(('A', 'B'), ('C', 'A'), ('C', 'B')), (('A', 'C'), ('B', 'C')),
                            (('B', 'A'), ('B', 'C')), (('C', 'B'),), (('A', 'C'), ('B', 'A')),
                            (('B', 'C'), ('C', 'A')), (('A', 'B'), ('B', 'C')), (('A', 'C'),
                            ('B', 'A'), ('B', 'C')), (('A', 'B'),), (('A', 'B'), ('C', 'A')),
                            (('B', 'A'), ('C', 'A'), ('C', 'B')), (('A', 'C'), ('C', 'B')),
                            (('A', 'B'), ('A', 'C'), ('C', 'B')), (('B', 'A'), ('C', 'B')),
                            (('A', 'B'), ('A', 'C')), (('C', 'A'), ('C', 'B')), (('A', 'B'),
                            ('A', 'C'), ('B', 'C')), (('C', 'A'),), (('B', 'A'), ('B', 'C'), ('C', 'A')),
                            (('B', 'A'),), (('A', 'B'), ('C', 'B')), (), (('B', 'A'), ('C', 'A')),
                            (('A', 'C'),), (('B', 'C'),)])
        self.assertSetEqual(abc_dags, abc_dags_ref)

    def test_estimate_rand(self):
        est = self.est_rand.estimate()
        self.assertSetEqual(set(est.nodes()), set(['A', 'B', 'C']))
        self.assertTrue(est.edges() == [('B', 'C')] or est.edges() == [('C', 'B')])

        est_bdeu = self.est_rand.estimate()
        self.assertTrue(est_bdeu.edges() == [('B', 'C')] or est_bdeu.edges() == [('C', 'B')])

        est_bic = self.est_rand.estimate()
        self.assertTrue(est_bic.edges() == [('B', 'C')] or est_bic.edges() == [('C', 'B')])

    def test_estimate_titanic(self):
        e1 = self.est_titanic.estimate()
        self.assertSetEqual(set(e1.edges()), set([('Survived', 'Pclass'), ('Sex', 'Pclass'), ('Sex', 'Survived')]))

    def test_all_scores(self):
        scores = self.est_titanic.all_scores()
        scores_ref = [(-2072.9132364404695, []),
                      (-2069.071694164769, [('Pclass', 'Sex')]),
                      (-2069.0144197068785, [('Sex', 'Pclass')]),
                      (-2025.869489762676, [('Survived', 'Pclass')]),
                      (-2025.8559302273054, [('Pclass', 'Survived')]),
                      (-2022.0279474869753, [('Pclass', 'Sex'), ('Survived', 'Pclass')]),
                      (-2022.0143879516047, [('Pclass', 'Sex'), ('Pclass', 'Survived')]),
                      (-2021.9571134937144, [('Pclass', 'Survived'), ('Sex', 'Pclass')]),
                      (-2017.5258065853768, [('Sex', 'Pclass'), ('Survived', 'Pclass')]),
                      (-1941.3075053892837, [('Survived', 'Sex')]),
                      (-1941.2720031713893, [('Sex', 'Survived')]),
                      (-1937.4304608956886, [('Pclass', 'Sex'), ('Sex', 'Survived')]),
                      (-1937.4086886556927, [('Sex', 'Pclass'), ('Survived', 'Sex')]),
                      (-1937.3731864377983, [('Sex', 'Pclass'), ('Sex', 'Survived')]),
                      (-1934.1344850608882, [('Pclass', 'Sex'), ('Survived', 'Sex')]),
                      (-1894.2637587114903, [('Survived', 'Pclass'), ('Survived', 'Sex')]),
                      (-1894.2501991761198, [('Pclass', 'Survived'), ('Survived', 'Sex')]),
                      (-1894.2282564935958, [('Sex', 'Survived'), ('Survived', 'Pclass')]),
                      (-1891.0630673606006, [('Pclass', 'Survived'), ('Sex', 'Survived')]),
                      (-1887.2215250849, [('Pclass', 'Sex'), ('Pclass', 'Survived'), ('Sex', 'Survived')]),
                      (-1887.1642506270096, [('Pclass', 'Survived'), ('Sex', 'Pclass'), ('Sex', 'Survived')]),
                      (-1887.0907383830947, [('Pclass', 'Sex'), ('Survived', 'Pclass'), ('Survived', 'Sex')]),
                      (-1887.0771788477243, [('Pclass', 'Sex'), ('Pclass', 'Survived'), ('Survived', 'Sex')]),
                      (-1885.9200755341915, [('Sex', 'Pclass'), ('Survived', 'Pclass'), ('Survived', 'Sex')]),
                      (-1885.884573316297, [('Sex', 'Pclass'), ('Sex', 'Survived'), ('Survived', 'Pclass')])]

        self.assertEqual([sorted(model.edges()) for score, model in scores],
                         [edges for score, edges in scores_ref])
        # use assertAlmostEqual pointwise to avoid rounding issues
        six.moves.map(lambda x, y: self.assertAlmostEqual(x, y),
                      [score for score, model in scores],
                      [score for score, edges in scores_ref])

    def tearDown(self):
        del self.rand_data
        del self.est_rand
        del self.est_rand_bdeu
        del self.est_rand_bic
        del self.titanic_data
        del self.est_titanic

Example #11

0

Show file

from pgmpy.estimators import ExhaustiveSearch, K2Score

if __name__ == '__main__':
#     fp = os.path.join('data', 'MTurk_Harvey.csv')
#     df = pd.read_csv(fp)
#     data = np.genfromtxt(fp, delimiter=",", dtype=float, skip_header=1)
#     x = data[:,:-1]
#     y = data[:,-1]
    
#     data = pd.DataFrame(np.random.randint(0, 5, size=(2500, 3)), columns=list('XYZ'))
#     data['sum'] = data.sum(axis=1)
#     #print(data)
    
#     est = ConstraintBasedEstimator(data)
#     skel, sep_sets = est.estimate_skeleton()
#     print(skel.edges())

#     s = ExhaustiveSearch(pd.DataFrame(data={'Temperature': [23, 19],'Weather': ['sunny', 'cloudy'],'Humidity': [65, 75]}))
#     print(len(list(s.all_dags())))
#     for dag in s.all_dags():
#         print(dag.edges())
        
    data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
    data['C'] = data['B']
    searcher = ExhaustiveSearch(data, scoring_method=K2Score(data))
    for score, model in searcher.all_scores():
        print score
        print model.edges()

Example #12

0

Show file

File: 9_LearnBayesNetFromData.py Project: statisticallyfit/PythonProbabilisticGraphicalModels

print(k2Fruit.local_score(variable='tasty', parents=['size', 'fruit']))
print(bicFruit.local_score(variable='tasty', parents=['size', 'fruit']))

# %% markdown [markdown]
# ### Search Strategies
# The search space of DAGs is super-exponential in the number of variables and the above scoring functions allow for local maxima. The first property makes exhaustive search intractable for all but very small networks, the second prohibits efficient local optimization algorithms to always find the optimal structure. Thus, identifiying the ideal structure is often not tractable. Despite these bad news, heuristic search strategies often yields good results.
#
# If only few nodes are involved (read: less than 5), ExhaustiveSearch can be used to compute the score for every DAG and returns the best-scoring one:

# #### Exhaustive Search
# **Example 1:** $Z + X + Y$
# %% codecell
from pgmpy.estimators import ExhaustiveSearch
from pgmpy.base.DAG import DAG

es: ExhaustiveSearch = ExhaustiveSearch(data=data, scoring_method=bic)
bestModel: DAG = es.estimate()

bestModel.edges()
# %% codecell
# The best model (structurally estimated):
drawGraph(bestModel, nodeColor=LIGHT_GREEN)

# %% codecell
# Computing scores for all structurally analyzed DAGS:

print("All DAGs sorted by score:\n")

for score, dag in reversed(es.all_scores()):
    print(f"Score = {score},   Edges: {dag.edges()}")
# %% markdown [markdown]