def estimate(self):
        """
        Estimates the `BayesianModel` structure that fits best to the given data set,
        according to the scoring method supplied in the constructor.
        Exhaustively searches through all models. Only estimates network structure, no parametrization.

        Returns
        -------
        model: `BayesianModel` instance
            A `BayesianModel` with maximal score.

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgmpy.estimators import ExhaustiveSearch
        >>> # create random data sample with 3 variables, where B and C are identical:
        >>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        >>> data['C'] = data['B']
        >>> est = ExhaustiveSearch(data)
        >>> best_model = est.estimate()
        >>> best_model
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f695c535470>
        >>> best_model.edges()
        [('B', 'C')]
        """

        best_dag = max(self.all_dags(), key=self.scoring_method.score)

        best_model = BayesianModel()
        best_model.add_nodes_from(sorted(best_dag.nodes()))
        best_model.add_edges_from(sorted(best_dag.edges()))
        return best_model
Exemple #2
0
 def test_score_titanic(self):
     scorer = BicScore(self.titanic_data2)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1896.7250012840179)
     titanic2 = BayesianModel([("Pclass", "Sex")])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))
Exemple #3
0
def Hill_Climb_Search(data,state_names):
	epsilon = 1e-14
	nodes = state_names.keys()
	start = BayesianModel()
	start.add_nodes_from(nodes)
	current_model = start
	while True:
		best_score_delta = 0
		best_operation = None
		for operation, score_delta in operations(data,current_model, state_names):
			if score_delta > best_score_delta:
				best_operation = operation
				best_score_delta = score_delta
		if best_operation is None or best_score_delta < epsilon:
			break
		elif best_operation[0] == 'add':
			current_model.add_edge(*best_operation[1])
		elif best_operation[0] == 'delete':
			current_model.remove_edge(*best_operation[1])
		elif best_operation[0] == 'reverse':
			X, Y = best_operation[1]
			current_model.remove_edge(X, Y)
			current_model.add_edge(Y, X)
		print 'Iteration:'
		print current_model.edges()
		print 'Score: ',best_score_delta
		print 'Best operation: ',best_operation
	print current_model.edges()
	return current_model
    def get_model(self):
        """
        Returns an instance of Bayesian Model.
        """
        model = BayesianModel()
        model.add_nodes_from(self.variables)
        model.add_edges_from(self.edges)
        model.name = self.model_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values[
                'CARDINALITY'] if 'CARDINALITY' in values else []
            states = self.variables[var]['STATES']
            cpd = TabularCPD(var,
                             len(states),
                             cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for var, properties in self.variables.items():
            model.node[var] = properties

        return model
Exemple #5
0
    def estimate(self):
        """
        Estimates the `BayesianModel` structure that fits best to the given data set,
        according to the scoring method supplied in the constructor.
        Exhaustively searches through all models. Only estimates network structure, no parametrization.

        Returns
        -------
        model: `BayesianModel` instance
            A `BayesianModel` with maximal score.

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgmpy.estimators import ExhaustiveSearch
        >>> # create random data sample with 3 variables, where B and C are identical:
        >>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        >>> data['C'] = data['B']
        >>> est = ExhaustiveSearch(data)
        >>> best_model = est.estimate()
        >>> best_model
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f695c535470>
        >>> best_model.edges()
        [('B', 'C')]
        """

        best_dag = max(self.all_dags(), key=self.scoring_method.score)

        best_model = BayesianModel()
        best_model.add_nodes_from(sorted(best_dag.nodes()))
        best_model.add_edges_from(sorted(best_dag.edges()))
        return best_model
Exemple #6
0
 def _train_bn(self):
     model = BayesianModel(self._dag.edges)
     model.add_nodes_from(self._dag.nodes)
     model.fit(self._get_training_data(),
               BayesianEstimator,
               prior_type='BDeu')
     return model
Exemple #7
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model.
        """
        model = BayesianModel()
        model.add_nodes_from(self.variables)
        model.add_edges_from(self.edges)
        model.name = self.model_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence = values["CONDSET"] if "CONDSET" in values else []
            cpd = values["DPIS"]
            evidence_card = values[
                "CARDINALITY"] if "CARDINALITY" in values else []
            states = self.variables[var]["STATES"]
            cpd = TabularCPD(var,
                             len(states),
                             cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        if nx.__version__.startswith("1"):
            for var, properties in self.variables.items():
                model.nodes[var] = properties
        else:
            for var, properties in self.variables.items():
                model._node[var] = properties

        return model
Exemple #8
0
    def get_model(self):
        """
        Returns the fitted bayesian model

        Example
        ----------
        >>> from pgmpy.readwrite import BIFReader
        >>> reader = BIFReader("bif_test.bif")
        >>> reader.get_model()
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320>
        """
        try:
            model = BayesianModel(self.variable_edges)
            model.name = self.network_name
            model.add_nodes_from(self.variable_names)

            tabular_cpds = []
            for var in sorted(self.variable_cpds.keys()):
                values = self.variable_cpds[var]
                cpd = TabularCPD(var, len(self.variable_states[var]), values,
                                 evidence=self.variable_parents[var],
                                 evidence_card=[len(self.variable_states[evidence_var])
                                                for evidence_var in self.variable_parents[var]])
                tabular_cpds.append(cpd)

            model.add_cpds(*tabular_cpds)
            for node, properties in self.variable_properties.items():
                for prop in properties:
                    prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                    model.node[node][prop_name] = prop_value

            return model

        except AttributeError:
            raise AttributeError('First get states of variables, edges, parents and network name')
Exemple #9
0
 def test_score_titanic(self):
     scorer = K2Score(self.titanic_data2)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1891.0630673606006)
     titanic2 = BayesianModel([("Pclass", "Sex"), ])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))
 def test_score_titanic(self):
     scorer = BdeuScore(self.titanic_data2, equivalent_sample_size=25)
     titanic = BayesianModel([("Sex", "Survived"), ("Pclass", "Survived")])
     self.assertAlmostEqual(scorer.score(titanic), -1892.7383393910427)
     titanic2 = BayesianModel([("Pclass", "Sex")])
     titanic2.add_nodes_from(["Sex", "Survived", "Pclass"])
     self.assertLess(scorer.score(titanic2), scorer.score(titanic))
Exemple #11
0
    def get_model(self):
        model = BayesianModel()
        model.add_nodes_from(self.variables)
        model.add_edges_from(self.edge_list)
        model.name = self.network_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence_card = [
                len(self.variable_states[evidence_var])
                for evidence_var in self.variable_parents[var]
            ]
            cpd = TabularCPD(
                var,
                len(self.variable_states[var]),
                values,
                evidence=self.variable_parents[var],
                evidence_card=evidence_card,
                state_names=self.get_states(),
            )
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for node, properties in self.variable_property.items():
            for prop in properties:
                if prop is not None:
                    prop_name, prop_value = map(lambda t: t.strip(), prop.split("="))
                    model.nodes[node][prop_name] = prop_value

        return model
Exemple #12
0
 def single_bayes_net(df, independent_vars, dependent_vars):
     model = BayesianModel()
     model.add_nodes_from(independent_vars)
     for independent_var in independent_vars:
         for dependent_var in dependent_vars:
             model.add_edge(independent_var, dependent_var)
     model.fit(df)
     return model
Exemple #13
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get("type") == "BayesianNetwork":
            model = BayesianModel()
            model.add_nodes_from(self.probnet["Variables"].keys())
            model.add_edges_from(self.probnet["edges"].keys())

            tabular_cpds = []
            cpds = self.probnet["Potentials"]
            for cpd in cpds:
                var = list(cpd["Variables"].keys())[0]
                states = self.probnet["Variables"][var]["States"]
                evidence = cpd["Variables"][var]
                evidence_card = [
                    len(self.probnet["Variables"][evidence_var]["States"])
                    for evidence_var in evidence
                ]
                arr = list(map(float, cpd["Values"].split()))
                values = np.array(arr)
                values = values.reshape((len(states), values.size // len(states)))
                tabular_cpds.append(
                    TabularCPD(var, len(states), values, evidence, evidence_card)
                )

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet["Variables"][var].items():
                    model.nodes[var][prop_name] = prop_value
            edges = model.edges()

            if nx.__version__.startswith("1"):
                for edge in edges:
                    for prop_name, prop_value in self.probnet["edges"][edge].items():
                        model.edge[edge[0]][edge[1]][prop_name] = prop_value
            else:
                for edge in edges:
                    for prop_name, prop_value in self.probnet["edges"][edge].items():
                        model.adj[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
Exemple #14
0
def create_bayes_net(file, keep_atts, edges):
    atts = pd.read_csv(file)
    atts = atts[keep_atts]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    # defining the structure of edges
    graph.add_edges_from(edges)

    # fit estimates the CPD tables for the given structure
    graph.fit(atts)

    return graph
Exemple #15
0
class TestBayesianModelFitPredict(unittest.TestCase):
    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])

        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'),
                                              ('C', 'D'), ('B', 'E')])

    def test_disconnected_fit(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        self.model_disconnected.fit(values)

        for node in ['A', 'B', 'C', 'D', 'E']:
            cpd = self.model_disconnected.get_cpds(node)
            self.assertEqual(cpd.variable, node)
            np_test.assert_array_equal(cpd.cardinality, np.array([2]))
            value = (values.ix[:, node].value_counts() /
                     values.ix[:, node].value_counts().sum())
            value = value.reindex(sorted(value.index)).values
            np_test.assert_array_equal(cpd.values, value)

    def test_connected_predict(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:800]
        predict_data = values[800:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict,
                          predict_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_predict = self.model_connected.predict(predict_data)
        np_test.assert_array_equal(
            e_predict.values.ravel(),
            np.array([
                1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
                0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
                1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1,
                1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
                1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
                1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
                1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0
            ]))

    def tearDown(self):
        del self.model_connected
        del self.model_disconnected
class TestBayesianModelFitPredict(unittest.TestCase):
    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])

        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])

    def test_disconnected_fit(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        self.model_disconnected.fit(values)

        for node in ['A', 'B', 'C', 'D', 'E']:
            cpd = self.model_disconnected.get_cpds(node)
            self.assertEqual(cpd.variable, node)
            np_test.assert_array_equal(cpd.cardinality, np.array([2]))
            value = (values.ix[:, node].value_counts() /
                     values.ix[:, node].value_counts().sum())
            value = value.reindex(sorted(value.index)).values
            np_test.assert_array_equal(cpd.values, value)

    def test_connected_predict(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:800]
        predict_data = values[800:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict, predict_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_predict = self.model_connected.predict(predict_data)
        np_test.assert_array_equal(e_predict.values.ravel(),
                                   np.array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
                                             1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
                                             0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                                             0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
                                             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
                                             1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
                                             1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
                                             0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
                                             1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
                                             1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
                                             0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
                                             1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                                             1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 1, 0]))

    def tearDown(self):
        del self.model_connected
        del self.model_disconnected
Exemple #17
0
def create_model_and_inference():
    dep_df = pd.read_csv('dependencies.csv', sep=';')

    def connect(df, source, edgelist):
        source_df = df[df['Column2'] == source]
        for col in source_df.iloc[0, 3:len(source_df.columns)]:
            target_df = df[df['Column1'] == col]['Column2']
            if not target_df.empty:
                target = target_df.item()
                if not (target, source) in edgelist:
                    edgelist.append((source, target))
                    connect(df, target, edgelist)

    edges = []
    connect(dep_df, 'myproximus-usage', edges)
    edges = [(t[1], t[0]) for t in edges]

    nodes = set(itertools.chain.from_iterable(edges))
    nodes_df = dep_df.iloc[:, 1].to_frame()
    nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)]

    nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df = nodes_df.set_index('Column2').transpose()

    model = BayesianModel()
    model.add_nodes_from(nodes)
    for edge in edges:
        try:
            model.add_edge(edge[0], edge[1])
        except:
            print('WARNING: tried to add edge which forms loop: ' + str(edge))

    model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu")
    # for cpd in model.get_cpds():
    #     print(cpd)

    draw_network(model.nodes(), model.edges(), {}, [])

    return model, VariableElimination(model)
Exemple #18
0
 def bayes_net_from_populational_data(data, independent_vars,
                                      dependent_vars):
     model = BayesianModel()
     model.add_nodes_from(independent_vars)
     for independent_var in independent_vars:
         for dependent_var in dependent_vars:
             model.add_edge(independent_var, dependent_var)
     cpd_list = []
     state_names = BayesNetHelper.get_state_names_from_df(
         data, independent_vars | dependent_vars)
     for node in independent_vars | dependent_vars:
         cpd = BayesNetHelper.compute_cpd(model, node, data, state_names)
         cpd_list.append(cpd)
     model.add_cpds(*cpd_list)
     return model
Exemple #19
0
def fully_connected_model(nodes=None):
    if not nodes:
        nodes = [BOREDOM, DESIRE, MOBILE, MOTOR_HYPO, LEFT_ARM]
    network = BayesianModel()
    network.add_nodes_from(nodes)

    for hypo in nodes:
        if 'hypo' in hypo:
            for obs in nodes:
                if 'obs' in obs or 'motor' in obs:
                    network.add_edge(u=hypo, v=obs)

    network.fit(TRAINING_DATA, estimator=BayesianEstimator, prior_type="BDeu")

    return network
Exemple #20
0
def create_bayes_net():
    atts = pd.read_csv('../../data/list_attr_celeba.csv')
    atts = atts[KEEP_ATTS]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    graph.add_edges_from([('Young', 'Eyeglasses'), ('Young', 'Bald'),
                          ('Young', 'Mustache'), ('Male', 'Mustache'),
                          ('Male', 'Smiling'), ('Male', 'Wearing_Lipstick'),
                          ('Young', 'Mouth_Slightly_Open'),
                          ('Young', 'Narrow_Eyes'), ('Male', 'Narrow_Eyes'),
                          ('Smiling', 'Narrow_Eyes'),
                          ('Smiling', 'Mouth_Slightly_Open'),
                          ('Young', 'Smiling')])
    graph.fit(atts)
    return graph
Exemple #21
0
def generate_approx_model_from_graph(ebunch, nodes, df):
	"""
	Aprende un modelo Bayesiano de pgmpy usando un datos de un
	dataframe de pandas. Primero se hace un barajado de los datos.
	"""
	df = df.sample(frac=1)
	approx_model = BayesianModel(ebunch)
	approx_model.add_nodes_from(nodes)
	state_names = dict()
	for pair in ebunch:
		state_names[pair[0]] = [0, 1]
		state_names[pair[1]] = [0, 1]
	for node in nodes:
		state_names[node] = [0, 1]
	approx_model.fit(df, state_names=state_names, estimator=SmoothedMaximumLikelihoodEstimator)
	return approx_model
Exemple #22
0
def bif2bayesian(pathname, verbose=3):
    """
    Returns the fitted bayesian model
 
    Example
    ----------
    >>> from pgmpy.readwrite import BIFReader
    >>> reader = BIFReader("bif_test.bif")
    >>> reader.get_model()
    <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320>
    """
    if verbose >= 3: print('[BNLEARN] Loading bif file <%s>' % (pathname))

    bifmodel = readwrite.BIF.BIFReader(path=pathname)
    #bifmodel.get_edges()

    try:
        model = BayesianModel(bifmodel.variable_edges)
        model.name = bifmodel.network_name
        model.add_nodes_from(bifmodel.variable_names)

        tabular_cpds = []
        for var in sorted(bifmodel.variable_cpds.keys()):
            values = bifmodel.variable_cpds[var]
            cpd = TabularCPD(
                var,
                len(bifmodel.variable_states[var]),
                values,
                evidence=bifmodel.variable_parents[var],
                evidence_card=[
                    len(bifmodel.variable_states[evidence_var])
                    for evidence_var in bifmodel.variable_parents[var]
                ])
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)
        #        for node, properties in bifmodel.variable_properties.items():
        #            for prop in properties:
        #                prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
        #                model.node[node][prop_name] = prop_value

        return model

    except AttributeError:
        raise AttributeError(
            '[BNLEARN] First get states of variables, edges, parents and network names'
        )
Exemple #23
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model or Markov Model.
        Varibles are in the pattern var_0, var_1, var_2 where var_0 is
        0th index variable, var_1 is 1st index variable.

        Return
        ------
        model: an instance of Bayesian or Markov Model.

        Examples
        --------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_model()
        """
        if self.network_type == 'BAYES':
            model = BayesianModel()
            model.add_nodes_from(self.variables)
            model.add_edges_from(self.edges)

            tabular_cpds = []
            for cpd in self.tables:
                child_var = cpd[0]
                states = int(self.domain[child_var])
                arr = list(map(float, cpd[1]))
                values = np.array(arr)
                values = values.reshape(states, values.size // states)
                tabular_cpds.append(TabularCPD(child_var, states, values))

            model.add_cpds(*tabular_cpds)
            return model

        elif self.network_type == 'MARKOV':
            model = MarkovModel(self.edges)

            factors = []
            for table in self.tables:
                variables = table[0]
                cardinality = [int(self.domain[var]) for var in variables]
                value = list(map(float, table[1]))
                factor = DiscreteFactor(variables=variables,
                                        cardinality=cardinality,
                                        values=value)
                factors.append(factor)

            model.add_factors(*factors)
            return model
def createBayesGraph(graph_list,mapping,data):
    '''
    Creating the bayesian network graph and table
    the graph_list, mapping and data are the parameters needed for creating the tables
    this function returns:
        bayes_model - the bayes model and its order
        cpds_array - array of the tables
        categories_each_element - categories of each element in the graph
    '''
    cpds_array = []
    categories_each_element = {}  # Returning an array with the values of each element
    bayes_model = BayesianModel()
    bayes_model.add_nodes_from(list(mapping))
    for value in graph_list:
        temp_list=value.split(',')
        bayes_model.add_edge(temp_list[0],temp_list[1])
    data_dict = {mapping[i]: data[:,i] for i in range(0, len(mapping))}
    data_dict_pd = pandas.DataFrame(data=data_dict)
    bayes_model.fit(data_dict_pd)

    cpds_tables = bayes_model.get_cpds()

    # Creating the array which returs to the client
    for cpd in cpds_tables:
        cpds_list = {}
        for cat in cpd.state_names:
            categories_each_element[cat] = cpd.state_names[cat]
        cpd_string = str(cpd).split('|')
        temp_array = []
        cpd_matrix_values = []
        digits_numbers = False

        for a in cpd_string:
            if (is_number(a)):
                temp_array.append(float(a.strip()))
                digits_numbers = True
            elif ("-+" in a and digits_numbers == True):
                cpd_matrix_values.append(temp_array)
                temp_array = []
                digits_numbers = False
        cpds_list[str(list(cpd.variables))] = cpd_matrix_values
        cpds_array.append(cpds_list)
    return(bayes_model,cpds_array,categories_each_element)
Exemple #25
0
    def join(reference_bayes, second_bayes, new_dependent_vars,
             new_independent_vars, ref_num_of_records, second_num_of_records):
        final_bayes = BayesianModel()
        #all independent variables should stay the same
        final_bayes.add_nodes_from(new_independent_vars)
        final_bayes.add_cpds(*[
            reference_bayes.get_cpds(node=node) if node in
            reference_bayes.nodes else second_bayes.get_cpds(node=node)
            for node in new_independent_vars
        ])
        for node in new_dependent_vars:
            final_bayes.add_node(node)
            ref_parents = set()
            second_parents = set()
            if node in reference_bayes:
                ref_parents = set(reference_bayes.get_parents(node))
            if node in second_bayes:
                second_parents = set(second_bayes.get_parents(node))

            if (len(ref_parents) == 0):
                final_bayes.add_edges_from([(parent, node)
                                            for parent in second_parents])
                final_bayes.add_cpds(second_bayes.get_cpds(node=node))
            else:
                final_bayes.add_edges_from([(parent, node)
                                            for parent in ref_parents])
                if len(second_parents - ref_parents) > 0:
                    raise ValueError('This join can not be performed since the\
                         second distribution contains new independent variable\
                         (s) for node {}. Please consider dropping these new \
                         dependencies or switching reference distribution. '.
                                     format(str(node)))
                elif ref_parents == second_parents:
                    new_cpd = BayesNetHelper.calculate_weighted_cpds(
                        reference_bayes.get_cpds(node=node),
                        second_bayes.get_cpds(node=node), ref_num_of_records,
                        second_num_of_records)
                    final_bayes.add_cpds(new_cpd)
                else:
                    final_bayes.add_cpds(reference_bayes.get_cpds(node=node))
        return final_bayes
def create_bayes_net():
    atts = pd.read_csv('./data/list_attr_celeba.csv')
    atts = atts[KEEP_ATTS]
    graph = BayesianModel()
    graph.add_nodes_from(atts.columns)

    # can't automate this part
    # defining the structure of edges
    graph.add_edges_from([('Young', 'Eyeglasses'), ('Young', 'Bald'),
                          ('Young', 'Mustache'), ('Male', 'Mustache'),
                          ('Male', 'Smiling'), ('Male', 'Wearing_Lipstick'),
                          ('Young', 'Mouth_Slightly_Open'),
                          ('Young', 'Narrow_Eyes'), ('Male', 'Narrow_Eyes'),
                          ('Smiling', 'Narrow_Eyes'),
                          ('Smiling', 'Mouth_Slightly_Open'),
                          ('Young', 'Smiling')])

    # fit estimates the CPD tables for the given structure
    graph.fit(atts)

    return graph
def create_network(models, processes, files):
    for p in range(files):
        temp_model = BayesianModel()
        for e in range(len(processes[p].get_errors())):
            temp_error = processes[p].get_error(e)
            for c in range(len(temp_error.get_causes())):
                temp_cause = temp_error.get_cause(c)
                q = temp_cause.get_occ_prob(
                ) / temp_error.get_total_cause_prob()
                temp_cause.set_occ_prob(q)
                temp_model.add_nodes_from([temp_cause, temp_error])
                temp_model.add_edge(temp_cause, temp_error)
                temp_cause_cpd = TabularCPD(variable=temp_cause,
                                            variable_card=2,
                                            values=[[q, 1 - q]])
                temp_model.add_cpds(temp_cause_cpd)
            temp_error_cpd = TabularCPD(
                variable=temp_error,
                variable_card=2,
                values=get_initial_error_cpd(len(temp_error.get_causes())),
                evidence=temp_error.get_causes(),
                evidence_card=[2] * (len(temp_error.get_causes())))
            temp_model.add_cpds(temp_error_cpd)
            for f in range(len(temp_error.get_effects())):
                temp_effect = temp_error.get_effect(f)
                temp_model.add_nodes_from([temp_error, temp_effect])
                temp_model.add_edge(temp_error, temp_effect)
        models.append(temp_model)
        #plotting Failure Tree
        dot = to_pydot(models[p])
        with open('failure_tree_graph_%s.png' % processes[p], 'wb') as f:
            f.write(dot.create_png())
        #Sample output of CPDs for causes and errors
        for e in range(len(processes[p].get_errors())):
            for c in range(len(processes[p].get_error(e).get_causes())):
                print(
                    temp_model.get_cpds(
                        processes[p].get_error(e).get_cause(c)))
            print(temp_model.get_cpds(processes[p].get_error(e)))
Exemple #28
0
def main():
    data, string = readData()
    genes = np.array(data.columns[1:])
    labels = np.array(data.columns)

    bayesianModel = BayesianModel()
    transitionModel = DBN()

    bayesianModel.add_nodes_from(genes)
    transitionModel.add_nodes_from(genes)

    bData, tData = getData(data, labels)
    
    print "\nDynamic Bayesian Network inference", 
    print "\nB_0 network relations:  "
    
    hcb = HillClimbSearch(bData, genes, scoring_method=BicScore(bData, labels, bk1=string, weight=4))
    best_model_b = hcb.estimate(start=bayesianModel, tabu_length=15, max_indegree=2)
    print(best_model_b.edges())

    printOutputB(best_model_b)

    print "\nLocal Probability Model: "
    best_model_b.fit(bData, BayesianEstimator)
    for cpd in best_model_b.get_cpds():
        print(cpd)

    print "\nB_transition network relations: "

    hct = HillClimbSearch(tData, genes, scoring_method=BicScore(tData, labels, bk1=string, weight=4))
    best_model_t = hct.estimate_dynamic(start=transitionModel, tabu_length=15, max_indegree=2)
    print(best_model_t.edges())

    printOutputT(best_model_t)

    print "\nLocal Probability Model: "
    best_model_t.fit(tData, BayesianEstimator)
    for cpd in best_model_t.get_cpds():
        print(cpd)
Exemple #29
0
def evaluate_single_graph(df_samples, graph, bn_truth, nb_repeat=3):
    testing_graph = BayesianModel()
    testing_graph.add_nodes_from(bn_truth.causal_graph.nodes())
    for edge in remove_bidirected_edges(graph.edges()):
        try:
            testing_graph.add_edge(edge[0], edge[1])
        except Exception as e:
            try:
                testing_graph.add_edge(edge[1], edge[0])
            except Exception as e:
                print(e)
                continue

    testing_graph.fit(df_samples, estimator=BayesianEstimator)
    testing_graph.check_model()
    bn_test = BayesianNetwork(testing_graph)

    set_observe(bn_test.bn)
    set_observe(bn_truth.bn)

    bn_truth.set_state_names()
    bn_test.set_state_names()

    return {
        'SID':
        SID(bn_truth.causal_graph, bn_test.causal_graph),
        'SHD':
        SHD(bn_truth.causal_graph, bn_test.causal_graph),
        'OD':
        np.mean([
            ODist(bn_truth, bn_test, 1000, discrete=True)
            for _ in range(nb_repeat)
        ]),
        'ID':
        np.mean([
            IDist(bn_truth, bn_test, 1000, discrete=True)
            for _ in range(nb_repeat)
        ])
    }
Exemple #30
0
    def reduce_model(self, evidence):
        model = copy.deepcopy(self.model)
        continuous_factors = [
            factor for factor in model.factors
            if isinstance(factor, ContinuousFactor)
        ]

        for var, val in evidence.items():
            for factor in continuous_factors:
                if var in factor.scope(
                ) and "F(" in var:  # make sure that we only reduce at this stage for continuous values, let the inference algorithm deal with reducing for binary variables
                    factor.reduce([(var, val)])

        new_model = BayesianModel()

        additional_evidence = {}

        for node in model.factors:
            if isinstance(node, ContinuousFactor):
                if len(node.scope()) == 1:
                    node = TabularCPD(
                        str(node.scope()[0]), 2,
                        [[node.assignment(0),
                          node.assignment(1)]])
            else:
                node = to_CPD(node)

            var = node.variable
            for v in node.scope():
                if var != v:
                    new_model.add_edge(str(v), str(var))

            if "same_reason" in var:
                additional_evidence[var] = 1

            new_model.add_nodes_from([str(n) for n in node.scope()])
            new_model.add_cpds(node)
        return new_model, additional_evidence
Exemple #31
0
    def cal(self, file1, file2):
        f1 = open(file1)
        lines = f1.readlines()
        nodes = self.getegdes(lines[0])
        edges = self.getegdes(lines[1])
        data = pd.read_csv(file2)

        G = BayesianModel()
        G.add_nodes_from(nodes)
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])

        output1 = []

        for i in range(int(len(edges) / 2)):
            mut = mr.mutual_info_score(data[edges[2 * i]],
                                       data[edges[2 * i + 1]])
            output1.append(mut)

        output2 = {}
        for node1 in G.nodes():
            d = {}
            for node2 in G.nodes():
                if node1 == node2:
                    continue
                mut = mr.mutual_info_score(data[node1], data[node2])

                d[node2] = mut
            output2[node1] = d

        print(output1)
        print(output2)

        with open('mutual_output.txt', 'w') as f:
            f.write(str(output1))
            f.write('\n')
            f.write(str(output2))
Exemple #32
0
class TestGibbsSampling(unittest.TestCase):
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'], evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = Factor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)

    def tearDown(self):
        del self.bayesian_model
        del self.markov_model

    @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_bayesian_model', autospec=True)
    @patch('pgmpy.models.MarkovChain.__init__', autospec=True)
    def test_init_bayesian_model(self, init, get_kernel):
        model = MagicMock(spec_set=BayesianModel)
        gibbs = GibbsSampling(model)
        init.assert_called_once_with(gibbs)
        get_kernel.assert_called_once_with(gibbs, model)

    @patch('pgmpy.inference.Sampling.GibbsSampling._get_kernel_from_markov_model', autospec=True)
    def test_init_markov_model(self, get_kernel):
        model = MagicMock(spec_set=MarkovModel)
        gibbs = GibbsSampling(model)
        get_kernel.assert_called_once_with(gibbs, model)

    def test_get_kernel_from_bayesian_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_bayesian_model(self.bayesian_model)
        self.assertListEqual(list(gibbs.variables), self.bayesian_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {'diff': 2, 'intel': 2, 'grade': 3})

    def test_get_kernel_from_markov_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_markov_model(self.markov_model)
        self.assertListEqual(list(gibbs.variables), self.markov_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {'A': 2, 'B': 3, 'C': 4, 'D': 2})

    def test_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(start_state, 2)
        self.assertEquals(len(sample), 2)
        self.assertEquals(len(sample.columns), 3)
        self.assertIn('diff', sample.columns)
        self.assertIn('intel', sample.columns)
        self.assertIn('grade', sample.columns)
        self.assertTrue(set(sample['diff']).issubset({0, 1}))
        self.assertTrue(set(sample['intel']).issubset({0, 1}))
        self.assertTrue(set(sample['grade']).issubset({0, 1, 2}))


    @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True)
    def test_sample_less_arg(self, random_state):
        self.gibbs.state = None
        random_state.return_value = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(size=2)
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(sample), 2)


    def test_generate_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        gen = self.gibbs.generate_sample(start_state, 2)
        samples = [sample for sample in gen]
        self.assertEqual(len(samples), 2)
        self.assertEqual({samples[0][0].var, samples[0][1].var, samples[0][2].var}, {'diff', 'intel', 'grade'})
        self.assertEqual({samples[1][0].var, samples[1][1].var, samples[1][2].var}, {'diff', 'intel', 'grade'})


    @patch("pgmpy.inference.Sampling.GibbsSampling.random_state", autospec=True)
    def test_generate_sample_less_arg(self, random_state):
        self.gibbs.state = None
        gen = self.gibbs.generate_sample(size=2)
        samples = [sample for sample in gen]
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(samples), 2)
        ax_temp.bar(x, z, zs=y, zdir='y', alpha=0.6, color='r' * 4)
        ax_temp.set_xlabel('X')
        ax_temp.set_ylabel('Y')
        ax_temp.set_zlabel('Z')
        ax_temp.title.set_text(('Feature ' + str(mean_indices[counter])))
        counter += 1
plt.show()

# Learning naive bayes model from various subsets of data
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2])
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2, 4])
naive_bayes_with_some_features(all_city_data, all_city_label, feature_list=[0, 1, 2, 3, 4, 5])

# Splitting train and test data for PGM model
temp_data = pd.concat([all_city_data, pd.DataFrame(all_city_label, columns=[13])], axis=1)
pgm_train_set = temp_data.loc[0:700]
pgm_test_set = temp_data.loc[700:]
print(pgm_train_set)


# Implementing PGM model on data
# Using these features: 0: (age) 1: (sex) 2: (cp)
pgm_model = BayesianModel()
pgm_model.add_nodes_from([0, 1, 2, 13])
pgm_model.add_edges_from([(1, 13)])
pgm_model.fit(pgm_train_set.loc[:, [0, 1, 2, 13]])
pgm_test_set = pgm_test_set.loc[:, [0, 1, 2, 13]].drop(13, axis=1)
print(pgm_test_set)
print(pgm_model.get_cpds(13))
class TestBayesianModelFitPredict(unittest.TestCase):

    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])
        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])

        self.model2 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.data1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        self.data2 = pd.DataFrame(data={'A': [0, np.NaN, 1],
                                        'B': [0, 1, 0],
                                        'C': [1, 1, np.NaN],
                                        'D': [np.NaN, 'Y', np.NaN]})

        # data_link - "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv', dtype=str)
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]

    def test_bayesian_fit(self):
        print(isinstance(BayesianEstimator, BaseEstimator))
        print(isinstance(MaximumLikelihoodEstimator, BaseEstimator))
        self.model2.fit(self.data1, estimator=BayesianEstimator, prior_type="dirichlet", pseudo_counts=[9, 3])
        self.assertEqual(self.model2.get_cpds('B'), TabularCPD('B', 2, [[11.0 / 15], [4.0 / 15]]))

    def test_fit_missing_data(self):
        self.model2.fit(self.data2, state_names={'C': [0, 1]}, complete_samples_only=False)
        cpds = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                    TabularCPD('B', 2, [[2. / 3], [1. / 3]]),
                    TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                               evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds, set(self.model2.get_cpds()))

    def test_disconnected_fit(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        self.model_disconnected.fit(values)

        for node in ['A', 'B', 'C', 'D', 'E']:
            cpd = self.model_disconnected.get_cpds(node)
            self.assertEqual(cpd.variable, node)
            np_test.assert_array_equal(cpd.cardinality, np.array([2]))
            value = (values.ix[:, node].value_counts() /
                     values.ix[:, node].value_counts().sum())
            value = value.reindex(sorted(value.index)).values
            np_test.assert_array_equal(cpd.values, value)

    def test_predict(self):
        titanic = BayesianModel()
        titanic.add_edges_from([("Sex", "Survived"), ("Pclass", "Survived")])
        titanic.fit(self.titanic_data2[500:])

        p1 = titanic.predict(self.titanic_data2[["Sex", "Pclass"]][:30])
        p2 = titanic.predict(self.titanic_data2[["Survived", "Pclass"]][:30])
        p3 = titanic.predict(self.titanic_data2[["Survived", "Sex"]][:30])

        p1_res =  np.array(['0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0',
                            '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
                            '0', '0', '0', '0'])
        p2_res = np.array(['male', 'female', 'female', 'female', 'male', 'male', 'male',
                           'male', 'female', 'female', 'female', 'female', 'male', 'male',
                           'male', 'female', 'male', 'female', 'male', 'female', 'male',
                           'female', 'female', 'female', 'male', 'female', 'male', 'male',
                           'female', 'male'])
        p3_res = np.array(['3', '1', '1', '1', '3', '3', '3', '3', '1', '1', '1', '1', '3',
                           '3', '3', '1', '3', '1', '3', '1', '3', '1', '1', '1', '3', '1',
                           '3', '3', '1', '3'])

        np_test.assert_array_equal(p1.values.ravel(), p1_res)
        np_test.assert_array_equal(p2.values.ravel(), p2_res)
        np_test.assert_array_equal(p3.values.ravel(), p3_res)

    def test_connected_predict(self):
        np.random.seed(42)
        values = pd.DataFrame(np.array(np.random.randint(low=0, high=2, size=(1000, 5)),
                                       dtype=str),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:800]
        predict_data = values[800:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict, predict_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_predict = self.model_connected.predict(predict_data)
        np_test.assert_array_equal(e_predict.values.ravel(),
                                   np.array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
                                             1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
                                             0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                                             0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
                                             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
                                             1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
                                             1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
                                             0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
                                             1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
                                             1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
                                             0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
                                             1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                                             1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 1, 0], dtype=str))

    def test_connected_predict_probability(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:80]
        predict_data = values[80:].copy()
        self.model_connected.fit(fit_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_prob = self.model_connected.predict_probability(predict_data)
        np_test.assert_allclose(e_prob.values.ravel(),
                                    np.array([0.57894737,  0.42105263,  0.57894737,  0.42105263,  0.57894737,
                                             0.42105263,  0.5       ,  0.5       ,  0.57894737,  0.42105263,
                                             0.5       ,  0.5       ,  0.57894737,  0.42105263,  0.57894737,
                                             0.42105263,  0.57894737,  0.42105263,  0.5       ,  0.5       ,
                                             0.57894737,  0.42105263,  0.57894737,  0.42105263,  0.5       ,
                                             0.5       ,  0.57894737,  0.42105263,  0.57894737,  0.42105263,
                                             0.5       ,  0.5       ,  0.57894737,  0.42105263,  0.5       ,
                                             0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ]), atol = 0)
        predict_data = pd.DataFrame(np.random.randint(low=0, high=2, size=(1, 5)),
                              columns=['A', 'B', 'C', 'F', 'E'])[:]

    def test_predict_probability_errors(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(2, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:1]
        predict_data = values[1:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict_probability, predict_data)
        predict_data = pd.DataFrame(np.random.randint(low=0, high=2, size=(1, 5)),
                              columns=['A', 'B', 'C', 'F', 'E'])[:]
        self.assertRaises(ValueError, self.model_connected.predict_probability, predict_data)

    def tearDown(self):
        del self.model_connected
        del self.model_disconnected
Exemple #35
0
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node('a')
        self.assertListEqual(self.G.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge('d', 'e')
        self.assertListEqual(sorted(self.G.nodes()), ['d', 'e'])
        self.assertListEqual(self.G.edges(), [('d', 'e')])
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, 'a', 'a')

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([('a', 'b'), ('a', 'c')])
        self.assertRaises(ValueError, self.G.add_edge, 'c', 'a')

    def test_add_edges_from_string(self):
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.G.add_nodes_from(['d', 'e', 'f'])
        self.G.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.G.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(
            hf.recursive_sorted(self.G.edges()),
            hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'),
                                 ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'),
                                                              ('b', 'c'),
                                                              ('c', 'a')])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.g.predecessors('a'), [])
        self.assertListEqual(self.g.predecessors('b'), ['a'])
        self.assertListEqual(self.g.predecessors('c'), ['b'])

    def test_update_node_parents(self):
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.G.predecessors('a'), [])
        self.assertListEqual(self.G.predecessors('b'), ['a'])
        self.assertListEqual(self.G.predecessors('c'), ['b'])

    def tearDown(self):
        del self.G
class TestBaseEstimator(unittest.TestCase):
    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = HillClimbSearch(self.rand_data, scoring_method=K2Score(self.rand_data))
        self.model1 = BayesianModel()
        self.model1.add_nodes_from(['A', 'B', 'C'])
        self.model2 = self.model1.copy()
        self.model2.add_edge('A', 'B')

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data1 = self.titanic_data[["Survived", "Sex", "Pclass", "Age", "Embarked"]]
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic1 = HillClimbSearch(self.titanic_data1)
        self.est_titanic2 = HillClimbSearch(self.titanic_data2)

    def test_legal_operations(self):
        model2_legal_ops = list(self.est_rand._legal_operations(self.model2))
        model2_legal_ops_ref = [(('+', ('C', 'A')), -28.15602208305154),
                                (('+', ('A', 'C')), -28.155467430966382),
                                (('+', ('C', 'B')), 7636.947544933631),
                                (('+', ('B', 'C')), 7937.805375579936),
                                (('-', ('A', 'B')), 28.155467430966382),
                                (('flip', ('A', 'B')), -0.0005546520851567038)]
        self.assertSetEqual(set([op for op, score in model2_legal_ops]),
                            set([op for op, score in model2_legal_ops_ref]))

    def test_legal_operations_titanic(self):
        est = self.est_titanic1
        start_model = BayesianModel([("Survived", "Sex"),
                                     ("Pclass", "Age"),
                                     ("Pclass", "Embarked")])

        legal_ops = est._legal_operations(start_model)
        self.assertEqual(len(list(legal_ops)), 20)

        tabu_list = [('-', ("Survived", "Sex")),
                     ('-', ("Survived", "Pclass")),
                     ('flip', ("Age", "Pclass"))]
        legal_ops_tabu = est._legal_operations(start_model, tabu_list=tabu_list)
        self.assertEqual(len(list(legal_ops_tabu)), 18)

        legal_ops_indegree = est._legal_operations(start_model, max_indegree=1)
        self.assertEqual(len(list(legal_ops_indegree)), 11)

        legal_ops_both = est._legal_operations(start_model, tabu_list=tabu_list, max_indegree=1)
        legal_ops_both_ref = [(('+', ('Embarked', 'Survived')), 10.050632580087608),
                              (('+', ('Survived', 'Pclass')), 41.88868046549101),
                              (('+', ('Age', 'Survived')), -23.635716036430836),
                              (('+', ('Pclass', 'Survived')), 41.81314459373226),
                              (('+', ('Sex', 'Pclass')), 4.772261678792802),
                              (('-', ('Pclass', 'Age')), 11.546515590731815),
                              (('-', ('Pclass', 'Embarked')), -32.171482832532774),
                              (('flip', ('Pclass', 'Embarked')), 3.3563814191281836),
                              (('flip', ('Survived', 'Sex')), 0.039737027979640516)]
        self.assertSetEqual(set(legal_ops_both), set(legal_ops_both_ref))

    def test_estimate_rand(self):
        est1 = self.est_rand.estimate()
        self.assertSetEqual(set(est1.nodes()), set(['A', 'B', 'C']))
        self.assertTrue(est1.edges() == [('B', 'C')] or est1.edges() == [('C', 'B')])

        est2 = self.est_rand.estimate(start=BayesianModel([('A', 'B'), ('A', 'C')]))
        self.assertTrue(est2.edges() == [('B', 'C')] or est2.edges() == [('C', 'B')])

    def test_estimate_titanic(self):
        self.assertSetEqual(set(self.est_titanic2.estimate().edges()),
                            set([('Survived', 'Pclass'), ('Sex', 'Pclass'), ('Sex', 'Survived')]))

    def tearDown(self):
        del self.rand_data
        del self.est_rand
        del self.model1
        del self.titanic_data
        del self.titanic_data1
        del self.titanic_data2
        del self.est_titanic1
        del self.est_titanic2
from pgmpy.models import BayesianModel
from pgmpy.factors import TabularCPD
# Creating the above bayesian network
model = BayesianModel()
model.add_nodes_from(['Rain', 'TrafficJam'])
model.add_edge('Rain', 'TrafficJam')
model.add_edge('Accident', 'TrafficJam')
cpd_rain = TabularCPD('Rain', 2, [[0.4], [0.6]])
cpd_accident = TabularCPD('Accident', 2, [[0.2], [0.8]])
cpd_traffic_jam = TabularCPD('TrafficJam', 2,
                             [[0.9, 0.6, 0.7, 0.1],
                              [0.1, 0.4, 0.3, 0.9]],
                             evidence=['Rain', 'Accident'],
                             evidence_card=[2, 2])
model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam)
model.add_node('LongQueues')
model.add_edge('TrafficJam', 'LongQueues')
cpd_long_queues = TabularCPD('LongQueues', 2,
                             [[0.9, 0.2],
                              [0.1, 0.8]],
                             evidence=['TrafficJam'],
                             evidence_card=[2])
model.add_cpds(cpd_long_queues)
model.add_nodes_from(['GettingUpLate', 'LateForSchool'])
model.add_edges_from([('GettingUpLate', 'LateForSchool'),
                      ('TrafficJam', 'LateForSchool')])
cpd_getting_up_late = TabularCPD('GettingUpLate', 2,
                                 [[0.6], [0.4]])
cpd_late_for_school = TabularCPD('LateForSchool', 2,
                                 [[0.9, 0.45, 0.8, 0.1],
                                  [0.1, 0.55, 0.2, 0.9]],
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node('a')
        self.assertListEqual(self.G.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge('d', 'e')
        self.assertListEqual(sorted(self.G.nodes()), ['d', 'e'])
        self.assertListEqual(self.G.edges(), [('d', 'e')])
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, 'a', 'a')

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([('a', 'b'), ('a', 'c')])
        self.assertRaises(ValueError, self.G.add_edge, 'c', 'a')

    def test_add_edges_from_string(self):
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.G.add_nodes_from(['d', 'e', 'f'])
        self.G.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.G.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             hf.recursive_sorted([('a', 'b'), ('b', 'c'),
                                                  ('d', 'e'), ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'a')])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'b'), ('b', 'c'), ('c', 'a')])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.g.predecessors('a'), [])
        self.assertListEqual(self.g.predecessors('b'), ['a'])
        self.assertListEqual(self.g.predecessors('c'), ['b'])

    def test_update_node_parents(self):
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.G.predecessors('a'), [])
        self.assertListEqual(self.G.predecessors('b'), ['a'])
        self.assertListEqual(self.G.predecessors('c'), ['b'])

    def tearDown(self):
        del self.G
    def pdag_to_dag(pdag):
        """Completes a PDAG to a DAG, without adding v-structures, if such a
        completion exists. If no faithful extension is possible, some fully
        oriented DAG that corresponds to the PDAG is returned and a warning is
        generated. This is a static method.

        Parameters
        ----------
        pdag: DirectedGraph
            A directed acyclic graph pattern, consisting in (acyclic) directed edges
            as well as "undirected" edges, represented as both-way edges between
            nodes.

        Returns
        -------
        dag: BayesianModel
            A faithful orientation of pdag, if one exists. Otherwise any
            fully orientated DAG/BayesianModel with the structure of pdag.

        References
        ----------
        [1] Chickering, Learning Equivalence Classes of Bayesian-Network Structures,
            2002; See page 454 (last paragraph) for the algorithm pdag_to_dag
            http://www.jmlr.org/papers/volume2/chickering02a/chickering02a.pdf
        [2] Dor & Tarsi, A simple algorithm to construct a consistent extension
            of a partially oriented graph, 1992,
            http://ftp.cs.ucla.edu/pub/stat_ser/r185-dor-tarsi.pdf

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgmpy.base import DirectedGraph
        >>> from pgmpy.estimators import ConstraintBasedEstimator
        >>> data = pd.DataFrame(np.random.randint(0, 4, size=(5000, 3)), columns=list('ABD'))
        >>> data['C'] = data['A'] - data['B']
        >>> data['D'] += data['A']
        >>> c = ConstraintBasedEstimator(data)
        >>> pdag = c.skeleton_to_pdag(*c.estimate_skeleton())
        >>> pdag.edges()
        [('B', 'C'), ('D', 'A'), ('A', 'D'), ('A', 'C')]
        >>> c.pdag_to_dag(pdag).edges()
        [('B', 'C'), ('A', 'D'), ('A', 'C')]

        >>> # pdag_to_dag is static:
        ... pdag1 = DirectedGraph([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'), ('D', 'A'), ('A', 'D')])
        >>> ConstraintBasedEstimator.pdag_to_dag(pdag1).edges()
        [('D', 'C'), ('C', 'B'), ('A', 'B'), ('A', 'D')]

        >>> # example of a pdag with no faithful extension:
        ... pdag2 = DirectedGraph([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'B')])
        >>> ConstraintBasedEstimator.pdag_to_dag(pdag2).edges()
        UserWarning: PDAG has no faithful extension (= no oriented DAG with the same v-structures as PDAG).
        Remaining undirected PDAG edges oriented arbitrarily.
        [('B', 'C'), ('A', 'B'), ('A', 'C')]
        """

        pdag = pdag.copy()
        dag = BayesianModel()
        dag.add_nodes_from(pdag.nodes())

        # add already directed edges of pdag to dag
        for X, Y in pdag.edges():
            if not pdag.has_edge(Y, X):
                dag.add_edge(X, Y)

        while pdag.number_of_nodes() > 0:
            # find node with (1) no directed outgoing edges and
            #                (2) the set of undirected neighbors is either empty or
            #                    undirected neighbors + parents of X are a clique
            found = False
            for X in pdag.nodes():
                directed_outgoing_edges = set(pdag.successors(X)) - set(pdag.predecessors(X))
                undirected_neighbors = set(pdag.successors(X)) & set(pdag.predecessors(X))
                neighbors_are_clique = all((pdag.has_edge(Y, Z)
                                            for Z in pdag.predecessors(X)
                                            for Y in undirected_neighbors if not Y == Z))

                if not directed_outgoing_edges and \
                        (not undirected_neighbors or neighbors_are_clique):
                    found = True
                    # add all edges of X as outgoing edges to dag
                    for Y in pdag.predecessors(X):
                        dag.add_edge(Y, X)

                    pdag.remove_node(X)
                    break

            if not found:
                warn("PDAG has no faithful extension (= no oriented DAG with the " +
                     "same v-structures as PDAG). Remaining undirected PDAG edges " +
                     "oriented arbitrarily.")
                for X, Y in pdag.edges():
                    if not dag.has_edge(Y, X):
                        try:
                            dag.add_edge(X, Y)
                        except ValueError:
                            pass
                break

        return dag
Exemple #40
0
# Bayesian network for students
from pgmpy.models import BayesianModel
model = BayesianModel()
# Add nodes
model.add_nodes_from(['difficulty', 'intelligence', 'grade', 'sat', 'letter'])
print(model.nodes())
# Add edges
model.add_edges_from([('difficulty', 'grade'), ('intelligence', 'grade'), ('intelligence', 'sat'), ('grade', 'letter')])
print(model.edges())