def kFold_cross_validation_bayesian(X, y, splits=10):
    """
    cross-validation per la rete bayesiana
    :param X: X dataframe - valori noti
    :param y: y column(s) - valori da predire
    :param splits: numero di folds da utilizzare
    :return: valore medio di accuracy
    """
    folds = KFold_splitting(X, y, splits)
    scores = []
    for fold in folds:
        model = BayesianModel([('fat_value', 'saturated-fat_value'),
                               ('carbohydrates_value', 'sugars_value'),
                               ('proteins_value', 'salt_value'),
                               ('fat_value', 'energy_value'),
                               ('carbohydrates_value', 'energy_value'),
                               ('salt_value', 'nutri_value'),
                               ('energy_value', 'nutri_value'),
                               ('saturated-fat_value', 'nutri_value'),
                               ('sugars_value', 'nutri_value')])
        predict_data = fold[1].copy()
        real_data = fold[3].copy()
        X['nutri_value'] = y
        model.fit(X, estimator=BayesianEstimator, prior_type="BDeu")
        y_pred = model.predict(predict_data)
        scores.append(accuracy_score(y_pred, real_data))
    avg_scores = statistics.mean(scores)
    std_scores = statistics.stdev(scores)
    print('Accuracy: %.3f (Standard Dev: %.3f)' % (avg_scores, std_scores))
    return avg_scores
    def test_predict(self):
        titanic = BayesianModel()
        titanic.add_edges_from([("Sex", "Survived"), ("Pclass", "Survived")])
        titanic.fit(self.titanic_data2[500:])

        p1 = titanic.predict(self.titanic_data2[["Sex", "Pclass"]][:30])
        p2 = titanic.predict(self.titanic_data2[["Survived", "Pclass"]][:30])
        p3 = titanic.predict(self.titanic_data2[["Survived", "Sex"]][:30])

        p1_res = np.array([
            '0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0',
            '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
            '0', '0', '0', '0'
        ])
        p2_res = np.array([
            'male', 'female', 'female', 'female', 'male', 'male', 'male',
            'male', 'female', 'female', 'female', 'female', 'male', 'male',
            'male', 'female', 'male', 'female', 'male', 'female', 'male',
            'female', 'female', 'female', 'male', 'female', 'male', 'male',
            'female', 'male'
        ])
        p3_res = np.array([
            '3', '1', '1', '1', '3', '3', '3', '3', '1', '1', '1', '1', '3',
            '3', '3', '1', '3', '1', '3', '1', '3', '1', '1', '1', '3', '1',
            '3', '3', '1', '3'
        ])

        np_test.assert_array_equal(p1.values.ravel(), p1_res)
        np_test.assert_array_equal(p2.values.ravel(), p2_res)
        np_test.assert_array_equal(p3.values.ravel(), p3_res)
Exemple #3
0
def create_BN_model(data):
    #structure learning
    print("Structure learning")
    start_time = datetime.now()
    print("Start time: ", start_time)

    #DECOMMENT TO CREATE A MODEL WITH THE HILL CLIMB ALGORITHM
    hc = HillClimbSearch(data)

    best_model = hc.estimate()
    print(best_model.edges())
    edges = best_model.edges()

    model = BayesianModel(edges)

    print('Fitting the model...')

    # Evaluation of cpds using Maximum Likelihood Estimation
    model.fit(data)

    end_time = datetime.now()
    print("End time: ", end_time)

    model_write = BIFWriter(model)
    model_write.write_bif('model_pgmpy.bif')

    if model.check_model():
        print(
            "Your network structure and CPD's are correctly defined. The probabilities in the columns sum to 1. Hill Climb worked fine!"
        )
    else:
        print("not good")
    return (model, end_time - start_time)
def naiveModel2():
    trainingInputs, trainingOutputs, testingInputs, testingOutputs = \
     gtd.formSameWriterDiffWriterInputOutputFeaturePairs(5, True)

    trainingData = pd.DataFrame(
     data = np.concatenate((trainingInputs, trainingOutputs), axis=1),
     columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9','f10',\
      'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19'])

    testingData = pd.DataFrame(
     data = np.concatenate((testingInputs, testingOutputs), axis=1),
     columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9','f10',\
      'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19'])

    # create model
    model = BayesianModel([('f19', 'f1'), ('f19', 'f2'), ('f19', 'f3'),
                           ('f19', 'f4'), ('f19', 'f5'), ('f19', 'f6'),
                           ('f19', 'f7'), ('f19', 'f8'), ('f19', 'f9'),
                           ('f19', 'f10'), ('f19', 'f11'), ('f19', 'f12'),
                           ('f19', 'f13'), ('f19', 'f14'), ('f19', 'f15'),
                           ('f19', 'f16'), ('f19', 'f17'), ('f19', 'f18')])

    # fit model and data, compute CPDs
    model.fit(trainingData, estimator=BayesianEstimator, prior_type='BDeu')

    # inference object
    # computing probability of Hyothesis given evidence
    evaluateModel(model, testingData, 'f19', featuresLabelList2)
Exemple #5
0
 def _train_bn(self):
     model = BayesianModel(self._dag.edges)
     model.add_nodes_from(self._dag.nodes)
     model.fit(self._get_training_data(),
               BayesianEstimator,
               prior_type='BDeu')
     return model
Exemple #6
0
    def configure_network(self, X, root_node, estimator_type, class_node=None, draw_dag=True) :
        """
        Learn structure of data and fit a Bayesian Network model, default method is TreeSearch

        :param X: pandas DataFrame, shape (n_samples, n_features)
        :param root_node: str, int. Root node of the tree structure.
        :param estimator_type: str (chow-liu | tan). The algorithm to use for estimating the DAG.
        :param class_node: str, int. Required if estimator_type = 'tan'.
        
        :return: self : object
        """


        est = TreeSearch(X, root_node)
        dag = est.estimate(estimator_type=estimator_type, class_node=class_node)

        model = BayesianModel(dag.edges())
        model.fit(X, estimator=BayesianEstimator, prior_type='dirichlet', pseudo_counts=0.1)

        self.dag = dag
        self.model = model

        if draw_dag :
            self.draw_network(self.dag)

        return self
Exemple #7
0
    def pgm_generate(self, target, data, stats, subnodes):
        stats_pd = pd.Series(stats, name='p-values')
        MK_blanket_frame = stats_pd[stats_pd < 0.05]
        MK_blanket = [node for node in MK_blanket_frame.index if node in subnodes]
        subnodes_no_target = [node for node in subnodes if node != target]
        est = HillClimbSearch(data[subnodes_no_target], scoring_method=BicScore(data))
        pgm_no_target = est.estimate()
        for node in MK_blanket:
            if node != target:
                pgm_no_target.add_edge(node,target)

    #   Create the pgm    
        pgm_explanation = BayesianModel()
        for node in pgm_no_target.nodes():
            pgm_explanation.add_node(node)
        for edge in pgm_no_target.edges():
            pgm_explanation.add_edge(edge[0],edge[1])

    #   Fit the pgm
        data_ex = data[subnodes].copy()
        data_ex[target] = data[target].apply(self.generalize_target)
        for node in subnodes_no_target:
            data_ex[node] = data[node].apply(self.generalize_others)
        pgm_explanation.fit(data_ex)

        return pgm_explanation
Exemple #8
0
 def single_bayes_net(df, independent_vars, dependent_vars):
     model = BayesianModel()
     model.add_nodes_from(independent_vars)
     for independent_var in independent_vars:
         for dependent_var in dependent_vars:
             model.add_edge(independent_var, dependent_var)
     model.fit(df)
     return model
def train_model(df, lst_relations: list = default_list) -> BayesianModel:
    model = BayesianModel(lst_relations)

    # model.cpds = []
    model.fit(df,
              estimator=BayesianEstimator,
              prior_type="k2",
              equivalent_sample_size=10,
              complete_samples_only=False)

    return model
    def pgm_generate(self, target, data, pgm_stats, subnodes, child=None):

        subnodes = [str(int(node)) for node in subnodes]
        target = str(int(target))
        subnodes_no_target = [node for node in subnodes if node != target]
        data.columns = data.columns.astype(str)

        MK_blanket = self.search_MK(data, target, subnodes_no_target.copy())

        if child == None:
            est = HillClimbSearch(data[subnodes_no_target],
                                  scoring_method=BicScore(data))
            pgm_no_target = est.estimate()
            for node in MK_blanket:
                if node != target:
                    pgm_no_target.add_edge(node, target)

        #   Create the pgm
            pgm_explanation = BayesianModel()
            for node in pgm_no_target.nodes():
                pgm_explanation.add_node(node)
            for edge in pgm_no_target.edges():
                pgm_explanation.add_edge(edge[0], edge[1])

        #   Fit the pgm
            data_ex = data[subnodes].copy()
            data_ex[target] = data[target].apply(self.generalize_target)
            for node in subnodes_no_target:
                data_ex[node] = data[node].apply(self.generalize_others)
            pgm_explanation.fit(data_ex)
        else:
            data_ex = data[subnodes].copy()
            data_ex[target] = data[target].apply(self.generalize_target)
            for node in subnodes_no_target:
                data_ex[node] = data[node].apply(self.generalize_others)

            est = HillClimbSearch(data_ex, scoring_method=BicScore(data_ex))
            pgm_w_target_explanation = est.estimate()

            #   Create the pgm
            pgm_explanation = BayesianModel()
            for node in pgm_w_target_explanation.nodes():
                pgm_explanation.add_node(node)
            for edge in pgm_w_target_explanation.edges():
                pgm_explanation.add_edge(edge[0], edge[1])

            #   Fit the pgm
            data_ex = data[subnodes].copy()
            data_ex[target] = data[target].apply(self.generalize_target)
            for node in subnodes_no_target:
                data_ex[node] = data[node].apply(self.generalize_others)
            pgm_explanation.fit(data_ex)

        return pgm_explanation
Exemple #11
0
class BayesNetwork:
    def __init__(self, dataset, graph_structure_index):
        self.dataset = dataset
        self.columns = dataset.dataframe.columns
        self.graph_structure_index = graph_structure_index

    def build_graph(self):
        graph_structure_name = list(
            map(lambda tuple: (self.columns[tuple[0]], self.columns[tuple[1]]),
                self.graph_structure_index))
        self.model = BayesianModel(graph_structure_name)

    def draw_graph(self):
        Drawer.draw_graph(self.model)

    def fit_model(self, prior=False, prior_data=[]):
        if prior:
            pseudo_counts = {{
                'D': [300, 700],
                'I': [500, 500],
                'G': [800, 200],
                'L': [500, 500],
                'S': [400, 600]
            }}
            raise NotImplementedError
        else:
            self.model.fit(self.dataset.dataframe[0:-3],
                           estimator=MaximumLikelihoodEstimator)

    def inference(self, name):
        from pgmpy.inference import VariableElimination
        self.infer = VariableElimination(self.model)
        q = self.infer.query(variables=[name])
        print(q[name])

    def evaluate_result(self):
        for cpd in self.model.get_cpds():
            print("CPD of {variable}:".format(variable=cpd.variable))
            print(cpd)
            accept_node = cpd.variables[0]

            ##3D-dimension
            if len(cpd.values.shape) > 3:
                pass
                # Drawer.draw_3D(cpd.values, x_label=cpd.variables[1],
                #                y_label=cpd.variables[2], z_label=cpd.variables[3])
            ##2D Dimension
            elif len(cpd.values.shape) == 2:
                title = cpd.variables[1] + '----->' + accept_node
                Drawer(title=title,
                       is_show=False,
                       is_save=False,
                       save_path='img/' + title + '.jpg').draw_matrix(
                           cpd.values)
Exemple #12
0
class BN:
    def __init__(self, DAG):
        self.data = []
        self.model = BayesianModel(DAG)

    def take_only_relevant_features(self, DAG, db_file):
        all_data = pd.read_csv(db_file)

        data = pd.DataFrame()
        relevant_features = ()
        for tuple_of_two in DAG:
            relevant_features = relevant_features + tuple_of_two

        for column in all_data:
            if column in relevant_features:
                data[column] = all_data[column]
        return data

    def BNLearning(self, DAG, db_file):
        self.data = self.take_only_relevant_features(DAG, db_file)
        self.model = BayesianModel(DAG)

        self.model.fit(self.data, BayesianEstimator)

    def BNTesting(self, results_file):
        # separate data for test
        training_part = int(0.8 * len(self.data))
        testing_data = self.data[training_part:]

        # predict
        predict_data = testing_data.copy()
        predict_data.drop('song_popularity', axis=1, inplace=True)
        y_pred = self.model.predict(predict_data)

        with open(results_file, 'w', newline='') as file:
            y_pred.to_csv(file)

    def BNForOneSong(self, DAG, db_file, results_file, songFile):
        data = self.take_only_relevant_features(DAG, db_file)
        dataToPredictRF = self.take_only_relevant_features(DAG, songFile)
        dataToPredict = pd.read_csv(songFile)

        model = BayesianModel(DAG)

        model.fit(data, BayesianEstimator)

        dataToPredictRF = dataToPredictRF.copy()
        y_pred = model.predict(dataToPredictRF)
        # print(y_pred)

        with open(results_file, 'w', newline='') as file:
            y_pred.to_csv(file)

        return y_pred['song_popularity'][0]
class BaseEliminationTest(TestCase):
    def setUp(self):
        self.model = BayesianModel([('diff', 'grade'), ('intel', 'grade'), ('intel', 'sat'),
                                    ('grade', 'reco')])
        raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
        data = pd.DataFrame(raw_data, columns=['diff', 'grade', 'intel', 'sat', 'reco'])
        self.model.fit(data)

    def tearDown(self):
        del self.model
        del self.elimination_order
class BaseEliminationTest(TestCase):
    def setUp(self):
        self.model = BayesianModel([('diff', 'grade'), ('intel', 'grade'),
                                    ('intel', 'sat'), ('grade', 'reco')])
        raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
        data = pd.DataFrame(raw_data,
                            columns=['diff', 'grade', 'intel', 'sat', 'reco'])
        self.model.fit(data)

    def tearDown(self):
        del self.model
        del self.elimination_order
Exemple #15
0
def bayesnet_examples():
    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd

    student_model = BayesianModel([('D', 'G'),
                                   ('I', 'G'),
                                   ('G', 'L'),
                                   ('I', 'S')])
    # we can generate some random data.
    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
    data_train = data[: int(data.shape[0] * 0.75)]
    student_model.fit(data_train)
    student_model.get_cpds()

    data_test = data[int(0.75 * data.shape[0]): data.shape[0]]
    data_test.drop('D', axis=1, inplace=True)
    student_model.predict(data_test)

    grade_cpd = TabularCPD(
        variable='G',
        variable_card=3,
        values=[[0.3, 0.05, 0.9, 0.5],
                [0.4, 0.25, 0.08, 0.3],
                [0.3, 0.7, 0.02, 0.2]],
        evidence=['I', 'D'],
        evidence_card=[2, 2])
    difficulty_cpd = TabularCPD(
        variable='D',
        variable_card=2,
        values=[[0.6, 0.4]])
    intel_cpd = TabularCPD(
        variable='I',
        variable_card=2,
        values=[[0.7, 0.3]])
    letter_cpd = TabularCPD(
        variable='L',
        variable_card=2,
        values=[[0.1, 0.4, 0.99],
                [0.9, 0.6, 0.01]],
        evidence=['G'],
        evidence_card=[3])
    sat_cpd = TabularCPD(
        variable='S',
        variable_card=2,
        values=[[0.95, 0.2],
                [0.05, 0.8]],
        evidence=['I'],
        evidence_card=[2])
    student_model.add_cpds(grade_cpd, difficulty_cpd,
                           intel_cpd, letter_cpd,
                           sat_cpd)
Exemple #16
0
class BaseEliminationTest(TestCase):
    def setUp(self):
        self.model = BayesianModel([("diff", "grade"), ("intel", "grade"),
                                    ("intel", "sat"), ("grade", "reco")])
        raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
        data = pd.DataFrame(raw_data,
                            columns=["diff", "grade", "intel", "sat", "reco"])
        self.model.fit(data)

    def tearDown(self):
        del self.model
        del self.elimination_order
Exemple #17
0
def bayesnet_examples():
    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd

    student_model = BayesianModel([('D', 'G'),
                                   ('I', 'G'),
                                   ('G', 'L'),
                                   ('I', 'S')])
    # we can generate some random data.
    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
    data_train = data[: int(data.shape[0] * 0.75)]
    student_model.fit(data_train)
    student_model.get_cpds()

    data_test = data[int(0.75 * data.shape[0]): data.shape[0]]
    data_test.drop('D', axis=1, inplace=True)
    student_model.predict(data_test)

    grade_cpd = TabularCPD(
        variable='G',
        variable_card=3,
        values=[[0.3, 0.05, 0.9, 0.5],
                [0.4, 0.25, 0.08, 0.3],
                [0.3, 0.7, 0.02, 0.2]],
        evidence=['I', 'D'],
        evidence_card=[2, 2])
    difficulty_cpd = TabularCPD(
        variable='D',
        variable_card=2,
        values=[[0.6, 0.4]])
    intel_cpd = TabularCPD(
        variable='I',
        variable_card=2,
        values=[[0.7, 0.3]])
    letter_cpd = TabularCPD(
        variable='L',
        variable_card=2,
        values=[[0.1, 0.4, 0.99],
                [0.9, 0.6, 0.01]],
        evidence=['G'],
        evidence_card=[3])
    sat_cpd = TabularCPD(
        variable='S',
        variable_card=2,
        values=[[0.95, 0.2],
                [0.05, 0.8]],
        evidence=['I'],
        evidence_card=[2])
    student_model.add_cpds(grade_cpd, difficulty_cpd,
                           intel_cpd, letter_cpd,
                           sat_cpd)
def pgmpy_test():

    raw_data = np.array([0] * 30 +
                        [1] * 70)  # Representing heads by 0 and tails by 1
    data = pd.DataFrame(raw_data, columns=['coin'])
    print(data)
    model = BayesianModel()
    model.add_node('coin')

    # Fitting the data to the model using Maximum Likelihood Estimator
    model.fit(data, estimator=MaximumLikelihoodEstimator)
    print(model.get_cpds('coin'))
Exemple #19
0
def create_bayes_net(file, keep_atts, edges):
    atts = pd.read_csv(file)
    atts = atts[keep_atts]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    # defining the structure of edges
    graph.add_edges_from(edges)

    # fit estimates the CPD tables for the given structure
    graph.fit(atts)

    return graph
class TestBayesianModelFitPredict(unittest.TestCase):
    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])

        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])

    def test_disconnected_fit(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        self.model_disconnected.fit(values)

        for node in ['A', 'B', 'C', 'D', 'E']:
            cpd = self.model_disconnected.get_cpds(node)
            self.assertEqual(cpd.variable, node)
            np_test.assert_array_equal(cpd.cardinality, np.array([2]))
            value = (values.ix[:, node].value_counts() /
                     values.ix[:, node].value_counts().sum())
            value = value.reindex(sorted(value.index)).values
            np_test.assert_array_equal(cpd.values, value)

    def test_connected_predict(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:800]
        predict_data = values[800:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict, predict_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_predict = self.model_connected.predict(predict_data)
        np_test.assert_array_equal(e_predict.values.ravel(),
                                   np.array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
                                             1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
                                             0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                                             0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
                                             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
                                             1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
                                             1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
                                             0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
                                             1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
                                             1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
                                             0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
                                             1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                                             1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 1, 0]))

    def tearDown(self):
        del self.model_connected
        del self.model_disconnected
Exemple #21
0
class TestBayesianModelFitPredict(unittest.TestCase):
    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])

        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'),
                                              ('C', 'D'), ('B', 'E')])

    def test_disconnected_fit(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        self.model_disconnected.fit(values)

        for node in ['A', 'B', 'C', 'D', 'E']:
            cpd = self.model_disconnected.get_cpds(node)
            self.assertEqual(cpd.variable, node)
            np_test.assert_array_equal(cpd.cardinality, np.array([2]))
            value = (values.ix[:, node].value_counts() /
                     values.ix[:, node].value_counts().sum())
            value = value.reindex(sorted(value.index)).values
            np_test.assert_array_equal(cpd.values, value)

    def test_connected_predict(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:800]
        predict_data = values[800:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict,
                          predict_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_predict = self.model_connected.predict(predict_data)
        np_test.assert_array_equal(
            e_predict.values.ravel(),
            np.array([
                1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
                0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
                1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1,
                1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
                1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
                1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
                1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0
            ]))

    def tearDown(self):
        del self.model_connected
        del self.model_disconnected
def pgmpy_test2():
    # example from https://github.com/pgmpy/pgmpy/blob/dev/examples/Learning%20from%20data.ipynb
    # Generating radom data with each variable have 2 states and equal probabilities for each state

    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])

    model = BayesianModel([('D', 'G'), ('I', 'G'), ('I', 'S'), ('G', 'L')])

    # Learing CPDs using Maximum Likelihood Estimators
    model.fit(data, estimator=MaximumLikelihoodEstimator)
    for cpd in model.get_cpds():
        print("CPD of {variable}:".format(variable=cpd.variable))
        print(cpd)
Exemple #23
0
def create_model_and_inference():
    dep_df = pd.read_csv('dependencies.csv', sep=';')

    def connect(df, source, edgelist):
        source_df = df[df['Column2'] == source]
        for col in source_df.iloc[0, 3:len(source_df.columns)]:
            target_df = df[df['Column1'] == col]['Column2']
            if not target_df.empty:
                target = target_df.item()
                if not (target, source) in edgelist:
                    edgelist.append((source, target))
                    connect(df, target, edgelist)

    edges = []
    connect(dep_df, 'myproximus-usage', edges)
    edges = [(t[1], t[0]) for t in edges]

    nodes = set(itertools.chain.from_iterable(edges))
    nodes_df = dep_df.iloc[:, 1].to_frame()
    nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)]

    nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df = nodes_df.set_index('Column2').transpose()

    model = BayesianModel()
    model.add_nodes_from(nodes)
    for edge in edges:
        try:
            model.add_edge(edge[0], edge[1])
        except:
            print('WARNING: tried to add edge which forms loop: ' + str(edge))

    model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu")
    # for cpd in model.get_cpds():
    #     print(cpd)

    draw_network(model.nodes(), model.edges(), {}, [])

    return model, VariableElimination(model)
Exemple #24
0
def fully_connected_model(nodes=None):
    if not nodes:
        nodes = [BOREDOM, DESIRE, MOBILE, MOTOR_HYPO, LEFT_ARM]
    network = BayesianModel()
    network.add_nodes_from(nodes)

    for hypo in nodes:
        if 'hypo' in hypo:
            for obs in nodes:
                if 'obs' in obs or 'motor' in obs:
                    network.add_edge(u=hypo, v=obs)

    network.fit(TRAINING_DATA, estimator=BayesianEstimator, prior_type="BDeu")

    return network
Exemple #25
0
def create_bayes_net():
    atts = pd.read_csv('../../data/list_attr_celeba.csv')
    atts = atts[KEEP_ATTS]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    graph.add_edges_from([('Young', 'Eyeglasses'), ('Young', 'Bald'),
                          ('Young', 'Mustache'), ('Male', 'Mustache'),
                          ('Male', 'Smiling'), ('Male', 'Wearing_Lipstick'),
                          ('Young', 'Mouth_Slightly_Open'),
                          ('Young', 'Narrow_Eyes'), ('Male', 'Narrow_Eyes'),
                          ('Smiling', 'Narrow_Eyes'),
                          ('Smiling', 'Mouth_Slightly_Open'),
                          ('Young', 'Smiling')])
    graph.fit(atts)
    return graph
Exemple #26
0
def generate_approx_model_from_graph(ebunch, nodes, df):
	"""
	Aprende un modelo Bayesiano de pgmpy usando un datos de un
	dataframe de pandas. Primero se hace un barajado de los datos.
	"""
	df = df.sample(frac=1)
	approx_model = BayesianModel(ebunch)
	approx_model.add_nodes_from(nodes)
	state_names = dict()
	for pair in ebunch:
		state_names[pair[0]] = [0, 1]
		state_names[pair[1]] = [0, 1]
	for node in nodes:
		state_names[node] = [0, 1]
	approx_model.fit(df, state_names=state_names, estimator=SmoothedMaximumLikelihoodEstimator)
	return approx_model
Exemple #27
0
def kNN(k):
    fileName = '';
    dataMat, dataLab = file2matrix(fileName, 9);
    trainMat = dataMat[];
    trainLab = np.array(dataLab[]);
    testMat = dataMat[];
    testLab = np.array(dataLab[]);
    coef = 1;
    distPos = np.zeros((testMat.shape[0], trainMat.shape[0]));
    distTim = np.zeros((testMat.shape[0], trainMat.shape[0]));
    for i in range(testMat.shape[0]):
        for j in range(trainMat.shape[0]):
            distPos[i,j] = distSLC(testMat[i], trainMat[j]);
            distTim[i,j] = disTim(testMat[i], trainMat[j]);
    distPosNor = dataNorm(distPos);
    distTimNor = dataNorm(distTim);
    distAll = distPosNor*coef + distTimNor*(1-coef);
    distIndex = distAll.argsort();
    testI = np.zeros((testMat.shape[0], 4), dtype='int32');
    count = 0;
    for i in testMat[:, 2:6]:
        testI[count,:] = map(int, i);
        count += 1;
    testInput = pd.DataFrame(testI, columns=[]);
    trainMatK = trainMat[distIndex[:,0:k]];
    labelPre = [];
    for i in range(len(trainMatK)):
        num = 0;
        trainI = np.zeros((trainMatK[0].shape[0], 5), dtype='int32');
        for j in trainMatK[i][:, [2,3,4,5,8]]:
            trainI[num, :] = map(int, j);
            num += 1;
        trainFraK = pd.DataFrame(trainI,columns=[]);
        trainInput = trainFraK[[]];
        model = BayesianModel([(),(),(), ()]);
        model.fit(trainInput);
        a = pd.DataFrame([testInput.ix[i].values.tolist()], columns=[]);
        labelPre.append(model.predict(a).values[0][0]);
#     for i in range(len(testLakK)):
#         labels = testLakK[i];
#         labelPre.append(getLabel(labels));
    count = 0;
    #print labelPre;
    for i in range(len(labelPre)):
        if labelPre[i]==testLab[i]:
            count += 1;
    print '准确度:', float(count)/len(testLab);
Exemple #28
0
    def BNForOneSong(self, DAG, db_file, results_file, songFile):
        data = self.take_only_relevant_features(DAG, db_file)
        dataToPredictRF = self.take_only_relevant_features(DAG, songFile)
        dataToPredict = pd.read_csv(songFile)

        model = BayesianModel(DAG)

        model.fit(data, BayesianEstimator)

        dataToPredictRF = dataToPredictRF.copy()
        y_pred = model.predict(dataToPredictRF)
        # print(y_pred)

        with open(results_file, 'w', newline='') as file:
            y_pred.to_csv(file)

        return y_pred['song_popularity'][0]
def BN(DAG):

    data = take_only_relevant_features(DAG)

    training_data = data[:15068]
    predict_data = data[15068:16952]

    model = BayesianModel(DAG)

    model.fit(data, BayesianEstimator)

    predict_data = predict_data.copy()
    predict_data.drop('song_popularity', axis=1, inplace=True)
    y_pred = model.predict(predict_data)
    print(y_pred)

    with open('predicted_results.csv', 'w', newline='') as file:
        y_pred.to_csv(file)
Exemple #30
0
def init(df, miss_node):
    # get miss_idx and miss_size
    miss_idx = df[df[miss_node].isnull()].index.tolist()
    miss_size = len(miss_idx)
    # random guess missing values
    if miss_size == 0:
        df_complete = df
    else:
        init_vals = np.random.choice(3, size=miss_size)
        df_complete = copy.deepcopy(df)
        df_complete[miss_node][miss_idx] = init_vals
    # assume complete data, estimate parameters using MLE
    bn_model = BayesianModel([('D', 'G'), ('I', 'G'), ('E', 'L'), ('G', 'L')])
    bn_model.fit(df_complete, estimator=MaximumLikelihoodEstimator)
    # 	cpds = bn_model.get_cpds()
    # 	for cpd in bn_model.get_cpds():
    # 		print("CPD of {variable}:".format(variable=cpd.variable))
    # 		print(cpd)
    return bn_model
def naiveModel():
    trainingData, testingData = differenceBetweenFeatures(True)

    # create model
    '''model = BayesianModel(
		[('f10','f1'), ('f10','f2'), ('f10','f3'),
		 ('f10','f4'), ('f10','f5'), ('f10','f6'),
		 ('f10','f7'), ('f10','f8'), ('f10','f9')])'''

    model = BayesianModel([('f1', 'h'), ('f2', 'h'), ('f3', 'h'), ('f4', 'h'),
                           ('f5', 'h'), ('f6', 'h'), ('f7', 'h'), ('f8', 'h'),
                           ('f9', 'h')])

    # fit model and data, compute CPDs
    model.fit(trainingData, estimator=BayesianEstimator, prior_type='BDeu')

    # inference object
    # computing probability of Hyothesis given evidence
    evaluateModel(model, testingData, 'h', featuresLabelList)
def createBayesGraph(graph_list,mapping,data):
    '''
    Creating the bayesian network graph and table
    the graph_list, mapping and data are the parameters needed for creating the tables
    this function returns:
        bayes_model - the bayes model and its order
        cpds_array - array of the tables
        categories_each_element - categories of each element in the graph
    '''
    cpds_array = []
    categories_each_element = {}  # Returning an array with the values of each element
    bayes_model = BayesianModel()
    bayes_model.add_nodes_from(list(mapping))
    for value in graph_list:
        temp_list=value.split(',')
        bayes_model.add_edge(temp_list[0],temp_list[1])
    data_dict = {mapping[i]: data[:,i] for i in range(0, len(mapping))}
    data_dict_pd = pandas.DataFrame(data=data_dict)
    bayes_model.fit(data_dict_pd)

    cpds_tables = bayes_model.get_cpds()

    # Creating the array which returs to the client
    for cpd in cpds_tables:
        cpds_list = {}
        for cat in cpd.state_names:
            categories_each_element[cat] = cpd.state_names[cat]
        cpd_string = str(cpd).split('|')
        temp_array = []
        cpd_matrix_values = []
        digits_numbers = False

        for a in cpd_string:
            if (is_number(a)):
                temp_array.append(float(a.strip()))
                digits_numbers = True
            elif ("-+" in a and digits_numbers == True):
                cpd_matrix_values.append(temp_array)
                temp_array = []
                digits_numbers = False
        cpds_list[str(list(cpd.variables))] = cpd_matrix_values
        cpds_array.append(cpds_list)
    return(bayes_model,cpds_array,categories_each_element)
def create_bayes_net():
    atts = pd.read_csv('./data/list_attr_celeba.csv')
    atts = atts[KEEP_ATTS]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    # can't automate this part
    # defining the structure of edges
    graph.add_edges_from([('Young', 'Eyeglasses'), ('Young', 'Bald'),
                          ('Young', 'Mustache'), ('Male', 'Mustache'),
                          ('Male', 'Smiling'), ('Male', 'Wearing_Lipstick'),
                          ('Young', 'Mouth_Slightly_Open'),
                          ('Young', 'Narrow_Eyes'), ('Male', 'Narrow_Eyes'),
                          ('Smiling', 'Narrow_Eyes'),
                          ('Smiling', 'Mouth_Slightly_Open'),
                          ('Young', 'Smiling')])

    # fit estimates the CPD tables for the given structure
    graph.fit(atts)

    return graph
    def test_predict(self):
        titanic = BayesianModel()
        titanic.add_edges_from([("Sex", "Survived"), ("Pclass", "Survived")])
        titanic.fit(self.titanic_data2[500:])

        p1 = titanic.predict(self.titanic_data2[["Sex", "Pclass"]][:30])
        p2 = titanic.predict(self.titanic_data2[["Survived", "Pclass"]][:30])
        p3 = titanic.predict(self.titanic_data2[["Survived", "Sex"]][:30])

        p1_res =  np.array(['0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0',
                            '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
                            '0', '0', '0', '0'])
        p2_res = np.array(['male', 'female', 'female', 'female', 'male', 'male', 'male',
                           'male', 'female', 'female', 'female', 'female', 'male', 'male',
                           'male', 'female', 'male', 'female', 'male', 'female', 'male',
                           'female', 'female', 'female', 'male', 'female', 'male', 'male',
                           'female', 'male'])
        p3_res = np.array(['3', '1', '1', '1', '3', '3', '3', '3', '1', '1', '1', '1', '3',
                           '3', '3', '1', '3', '1', '3', '1', '3', '1', '1', '1', '3', '1',
                           '3', '3', '1', '3'])

        np_test.assert_array_equal(p1.values.ravel(), p1_res)
        np_test.assert_array_equal(p2.values.ravel(), p2_res)
        np_test.assert_array_equal(p3.values.ravel(), p3_res)
Exemple #35
0
 def setup(self):
     values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E'])
     model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
     model.fit(values)
     self.inference = VariableElimination(model)
class TestBayesianModelFitPredict(unittest.TestCase):

    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])
        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])

        self.model2 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.data1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        self.data2 = pd.DataFrame(data={'A': [0, np.NaN, 1],
                                        'B': [0, 1, 0],
                                        'C': [1, 1, np.NaN],
                                        'D': [np.NaN, 'Y', np.NaN]})

        # data_link - "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv', dtype=str)
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]

    def test_bayesian_fit(self):
        print(isinstance(BayesianEstimator, BaseEstimator))
        print(isinstance(MaximumLikelihoodEstimator, BaseEstimator))
        self.model2.fit(self.data1, estimator=BayesianEstimator, prior_type="dirichlet", pseudo_counts=[9, 3])
        self.assertEqual(self.model2.get_cpds('B'), TabularCPD('B', 2, [[11.0 / 15], [4.0 / 15]]))

    def test_fit_missing_data(self):
        self.model2.fit(self.data2, state_names={'C': [0, 1]}, complete_samples_only=False)
        cpds = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                    TabularCPD('B', 2, [[2. / 3], [1. / 3]]),
                    TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                               evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds, set(self.model2.get_cpds()))

    def test_disconnected_fit(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        self.model_disconnected.fit(values)

        for node in ['A', 'B', 'C', 'D', 'E']:
            cpd = self.model_disconnected.get_cpds(node)
            self.assertEqual(cpd.variable, node)
            np_test.assert_array_equal(cpd.cardinality, np.array([2]))
            value = (values.ix[:, node].value_counts() /
                     values.ix[:, node].value_counts().sum())
            value = value.reindex(sorted(value.index)).values
            np_test.assert_array_equal(cpd.values, value)

    def test_predict(self):
        titanic = BayesianModel()
        titanic.add_edges_from([("Sex", "Survived"), ("Pclass", "Survived")])
        titanic.fit(self.titanic_data2[500:])

        p1 = titanic.predict(self.titanic_data2[["Sex", "Pclass"]][:30])
        p2 = titanic.predict(self.titanic_data2[["Survived", "Pclass"]][:30])
        p3 = titanic.predict(self.titanic_data2[["Survived", "Sex"]][:30])

        p1_res =  np.array(['0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0',
                            '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
                            '0', '0', '0', '0'])
        p2_res = np.array(['male', 'female', 'female', 'female', 'male', 'male', 'male',
                           'male', 'female', 'female', 'female', 'female', 'male', 'male',
                           'male', 'female', 'male', 'female', 'male', 'female', 'male',
                           'female', 'female', 'female', 'male', 'female', 'male', 'male',
                           'female', 'male'])
        p3_res = np.array(['3', '1', '1', '1', '3', '3', '3', '3', '1', '1', '1', '1', '3',
                           '3', '3', '1', '3', '1', '3', '1', '3', '1', '1', '1', '3', '1',
                           '3', '3', '1', '3'])

        np_test.assert_array_equal(p1.values.ravel(), p1_res)
        np_test.assert_array_equal(p2.values.ravel(), p2_res)
        np_test.assert_array_equal(p3.values.ravel(), p3_res)

    def test_connected_predict(self):
        np.random.seed(42)
        values = pd.DataFrame(np.array(np.random.randint(low=0, high=2, size=(1000, 5)),
                                       dtype=str),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:800]
        predict_data = values[800:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict, predict_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_predict = self.model_connected.predict(predict_data)
        np_test.assert_array_equal(e_predict.values.ravel(),
                                   np.array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
                                             1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
                                             0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                                             0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
                                             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
                                             1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
                                             1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
                                             0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
                                             1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
                                             1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
                                             0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
                                             1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                                             1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 1, 0], dtype=str))

    def test_connected_predict_probability(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:80]
        predict_data = values[80:].copy()
        self.model_connected.fit(fit_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_prob = self.model_connected.predict_probability(predict_data)
        np_test.assert_allclose(e_prob.values.ravel(),
                                    np.array([0.57894737,  0.42105263,  0.57894737,  0.42105263,  0.57894737,
                                             0.42105263,  0.5       ,  0.5       ,  0.57894737,  0.42105263,
                                             0.5       ,  0.5       ,  0.57894737,  0.42105263,  0.57894737,
                                             0.42105263,  0.57894737,  0.42105263,  0.5       ,  0.5       ,
                                             0.57894737,  0.42105263,  0.57894737,  0.42105263,  0.5       ,
                                             0.5       ,  0.57894737,  0.42105263,  0.57894737,  0.42105263,
                                             0.5       ,  0.5       ,  0.57894737,  0.42105263,  0.5       ,
                                             0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ]), atol = 0)
        predict_data = pd.DataFrame(np.random.randint(low=0, high=2, size=(1, 5)),
                              columns=['A', 'B', 'C', 'F', 'E'])[:]

    def test_predict_probability_errors(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(2, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:1]
        predict_data = values[1:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict_probability, predict_data)
        predict_data = pd.DataFrame(np.random.randint(low=0, high=2, size=(1, 5)),
                              columns=['A', 'B', 'C', 'F', 'E'])[:]
        self.assertRaises(ValueError, self.model_connected.predict_probability, predict_data)

    def tearDown(self):
        del self.model_connected
        del self.model_disconnected
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
# Generating some random data
raw_data = np.random.randint(low=0, high=2, size=(1000, 6))
print(raw_data)
data = pd.DataFrame(raw_data, columns=['A', 'R', 'J', 'G', 'L', 'Q'])

# Creating the network structures
student_model = BayesianModel([('A', 'J'), ('R', 'J'),
                               ('J', 'Q'), ('J', 'L'),
                               ('G', 'L')])
student_model.fit(data, estimator=BayesianEstimator)
student_model.get_cpds()
print(student_model.get_cpds('D'))
import numpy as np
import pandas as pd
from pgmpy.inference import VariableElimination
from pgmpy.models import BayesianModel

data = pd.read_csv('~/Documents/unifiedMLData.csv')

#print data
movie_model = BayesianModel([
('occupation','rating')
#,('gender','rating')
#,('age','rating')
#,('age','occupation')
#,('gender','occupation')
#,('genre','movie_title')
#,('movie_title','rating')
                             ])
movie_model.fit(data)


model_infer = VariableElimination(movie_model)
results = model_infer.query('rating')

print(results['rating'])

#print(movie_model.get_cpds('rating'))
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
# Generating random data for two coin tossing examples
raw_data = np.random.randint(low=0, high=2, size=(1000, 2))
data = pd.DataFrame(raw_data, columns=['X', 'Y'])
print(data)
coin_model = BayesianModel()
coin_model.fit(data, estimator=BayesianEstimator)
coin_model.get_cpds()
coin_model.nodes()
coin_model.edges()
        ax_temp.bar(x, z, zs=y, zdir='y', alpha=0.6, color='r' * 4)
        ax_temp.set_xlabel('X')
        ax_temp.set_ylabel('Y')
        ax_temp.set_zlabel('Z')
        ax_temp.title.set_text(('Feature ' + str(mean_indices[counter])))
        counter += 1
plt.show()

# Learning naive bayes model from various subsets of data
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2])
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2, 4])
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2, 3, 4, 5])

# Splitting train and test data for PGM model
temp_data = pd.concat([all_city_data, pd.DataFrame(all_city_label, columns=[13])], axis=1)
pgm_train_set = temp_data.loc[0:700]
pgm_test_set = temp_data.loc[700:]
print(pgm_train_set)


# Implementing PGM model on data
# Using these features: 0: (age) 1: (sex) 2: (cp)
pgm_model = BayesianModel()
pgm_model.add_nodes_from([0, 1, 2, 13])
pgm_model.add_edges_from([(1, 13)])
pgm_model.fit(pgm_train_set.loc[:, [0, 1, 2, 13]])
pgm_test_set = pgm_test_set.loc[:, [0, 1, 2, 13]].drop(13, axis=1)
print(pgm_test_set)
print(pgm_model.get_cpds(13))
# Now in general machine learning problems it doesn't matter which
# column of the array represents which variable (until we use same
# order for both training and prediction) because all the values
# are on symmetrical axis but in graphical models each variable is
# different (in the way it is connected to other variables etc) so
# we will need to specify which columns of data are for which
# variable. For that we will use pandas.
import pandas as pd
data = pd.DataFrame(data, columns=['cost', 'quality',
                                   'location', 'no_of_people'])
data
train = data[:750]
# We will try to predict the no_of_people from our model. So for
# test data we will delete that column and then later on predict
# those values.
test = data[750:].drop('no_of_people', axis=1)
test
# Now we will need to create the base network structure for the
# model.
restaurant_model = BayesianModel([('location', 'cost'),
                                  ('quality', 'cost'),
                                  ('location', 'no_of_people'),
                                  ('cost', 'no_of_people')])
restaurant_model.fit(train)
# Fit computes the cpd of all the variables from the training data
# that we provided.
restaurant_model.get_cpds()
# Now for predicting the values of no_of_people using this model
# we can simply call the predict method on our test data.
restaurant_model.predict(test).values.ravel()
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
# Generating some random data
raw_data = np.random.randint(low=0, high=2, size=(100, 2))
print(raw_data)
data = pd.DataFrame(raw_data, columns=['X', 'Y'])
print(data)

# Two coin tossing model assuming that they are dependent.
coin_model = BayesianModel([('X', 'Y')])
coin_model.fit(data, estimator=MaximumLikelihoodEstimator)
cpd_x = coin_model.get_cpds('X')
print(cpd_x)