def Hybrid(dataset: pd.DataFrame):
    from pgmpy.estimators import MmhcEstimator
    from pgmpy.estimators import HillClimbSearch
    from pgmpy.estimators import BDeuScore, K2Score, BicScore
    from pgmpy.models import BayesianModel
    
    mmhc = MmhcEstimator(dataset)
    # The mmhc method takes a parameter significance_level(default=0.01) the desired Type 1 error probability of
    # falsely rejecting the null hypothesis that variables. That is, confining Type 1 error rate.
    # (Therefore, the lower value, the less we are gonna accept dependencies, resulting in a sparser graph.)
    skeleton = mmhc.mmpc()
    print("Part 1) Skeleton: ", skeleton.edges())

    # use hill climb search to orient the edges:
    hc = HillClimbSearch(dataset, scoring_method=BDeuScore(dataset, equivalent_sample_size=5))
    # Recording the evaluation of different iteration
    bdeu = BDeuScore(dataset, equivalent_sample_size=5)
    iter_list = [2**i for i in range(20)]
    eval_list = []
    for iteration in iter_list:
        DAG_connection = hc.estimate(tabu_length=10, white_list=skeleton.to_directed().edges(), max_iter=iteration)
        model = BayesianModel(DAG_connection.edges())
        print(bdeu.score(model))
        eval_list.append(bdeu.score(model))

    print("Part 2) Model:    ", model.edges())
    return model.edges(), [iter_list, eval_list]
Exemple #2
0
def drawGraph(model: BayesianModel,
              nodeColor: Color = LIGHT_CORNF,
              edgeColor: Color = CHERRY) -> gz.Digraph:

    # Getting the edges (the .edges() results in NetworkX OutEdgeView object)
    structures: List[Tuple[Name, Name]] = list(iter(model.edges()))

    return edgesToGraph(edges=structures,
                        nodeColor=nodeColor,
                        edgeColor=edgeColor)
Exemple #3
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get("type") == "BayesianNetwork":
            model = BayesianModel()
            model.add_nodes_from(self.probnet["Variables"].keys())
            model.add_edges_from(self.probnet["edges"].keys())

            tabular_cpds = []
            cpds = self.probnet["Potentials"]
            for cpd in cpds:
                var = list(cpd["Variables"].keys())[0]
                states = self.probnet["Variables"][var]["States"]
                evidence = cpd["Variables"][var]
                evidence_card = [
                    len(self.probnet["Variables"][evidence_var]["States"])
                    for evidence_var in evidence
                ]
                arr = list(map(float, cpd["Values"].split()))
                values = np.array(arr)
                values = values.reshape((len(states), values.size // len(states)))
                tabular_cpds.append(
                    TabularCPD(var, len(states), values, evidence, evidence_card)
                )

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet["Variables"][var].items():
                    model.nodes[var][prop_name] = prop_value
            edges = model.edges()

            if nx.__version__.startswith("1"):
                for edge in edges:
                    for prop_name, prop_value in self.probnet["edges"][edge].items():
                        model.edge[edge[0]][edge[1]][prop_name] = prop_value
            else:
                for edge in edges:
                    for prop_name, prop_value in self.probnet["edges"][edge].items():
                        model.adj[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
Exemple #4
0
def create_model_and_inference():
    dep_df = pd.read_csv('dependencies.csv', sep=';')

    def connect(df, source, edgelist):
        source_df = df[df['Column2'] == source]
        for col in source_df.iloc[0, 3:len(source_df.columns)]:
            target_df = df[df['Column1'] == col]['Column2']
            if not target_df.empty:
                target = target_df.item()
                if not (target, source) in edgelist:
                    edgelist.append((source, target))
                    connect(df, target, edgelist)

    edges = []
    connect(dep_df, 'myproximus-usage', edges)
    edges = [(t[1], t[0]) for t in edges]

    nodes = set(itertools.chain.from_iterable(edges))
    nodes_df = dep_df.iloc[:, 1].to_frame()
    nodes_df = nodes_df[nodes_df['Column2'].isin(nodes)]

    nodes_df['0'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['1'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['2'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['3'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['4'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['5'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['6'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['7'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['8'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['9'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df['10'] = pd.DataFrame(data=np.random.randint(0, 2, size=64).T)
    nodes_df = nodes_df.set_index('Column2').transpose()

    model = BayesianModel()
    model.add_nodes_from(nodes)
    for edge in edges:
        try:
            model.add_edge(edge[0], edge[1])
        except:
            print('WARNING: tried to add edge which forms loop: ' + str(edge))

    model.fit(nodes_df, estimator=BayesianEstimator, prior_type="BDeu")
    # for cpd in model.get_cpds():
    #     print(cpd)

    draw_network(model.nodes(), model.edges(), {}, [])

    return model, VariableElimination(model)
    def test_estimate_from_independencies(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        model = ConstraintBasedEstimator.estimate_from_independencies("ABCD", ind)

        self.assertSetEqual(set(model.edges()),
                            set([('B', 'D'), ('A', 'D'), ('C', 'D')]))

        model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        model2 = ConstraintBasedEstimator.estimate_from_independencies(
                            model1.nodes(),
                            model1.get_independencies())

        self.assertTrue(set(model2.edges()) == set(model1.edges()) or
                        set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'), ('D', 'B')]))
    def test_estimate_from_independencies(self):
        ind = Independencies(["B", "C"], ["A", ["B", "C"], "D"])
        ind = ind.closure()
        model = ConstraintBasedEstimator.estimate_from_independencies(
            "ABCD", ind)

        self.assertSetEqual(set(model.edges()),
                            set([("B", "D"), ("A", "D"), ("C", "D")]))

        model1 = BayesianModel([("A", "C"), ("B", "C"), ("B", "D"),
                                ("C", "E")])
        model2 = ConstraintBasedEstimator.estimate_from_independencies(
            model1.nodes(), model1.get_independencies())

        self.assertTrue(
            set(model2.edges()) == set(model1.edges())
            or set(model2.edges()) == set([("B", "C"), ("A", "C"), ("C", "E"),
                                           ("D", "B")]))
Exemple #7
0
    def test_estimate_from_independencies(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        model = ConstraintBasedEstimator.estimate_from_independencies(
            "ABCD", ind)

        self.assertSetEqual(set(model.edges()),
                            set([('B', 'D'), ('A', 'D'), ('C', 'D')]))

        model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'),
                                ('C', 'E')])
        model2 = ConstraintBasedEstimator.estimate_from_independencies(
            model1.nodes(), model1.get_independencies())

        self.assertTrue(
            set(model2.edges()) == set(model1.edges())
            or set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'),
                                           ('D', 'B')]))
def Hill_Climbing(dataset: pd.DataFrame):
    # from pgmpy.estimators import ExhaustiveSearch
    from pgmpy.estimators import HillClimbSearch
    from pgmpy.estimators import BDeuScore, K2Score, BicScore
    from pgmpy.models import BayesianModel

    bdeu = BDeuScore(dataset, equivalent_sample_size=5)
    
    hc = HillClimbSearch(dataset, scoring_method=BDeuScore(dataset, equivalent_sample_size=5))
    iter_list = [2**i for i in range(20)]
    eval_list = []
    for iteration in iter_list:
        DAG_connection = hc.estimate(tabu_length=10, max_iter=iteration)
        model = BayesianModel(DAG_connection.edges())
        print(bdeu.score(model))
        eval_list.append(bdeu.score(model))
    
    return model.edges(), [iter_list, eval_list]
Exemple #9
0
    def get_model(self):
        """
        Returns the model instance of the ProbModel.

        Return
        ---------------
        model: an instance of BayesianModel.

        Examples
        -------
        >>> reader = ProbModelXMLReader()
        >>> reader.get_model()
        """
        if self.probnet.get('type') == "BayesianNetwork":
            model = BayesianModel(self.probnet['edges'].keys())

            tabular_cpds = []
            cpds = self.probnet['Potentials']
            for cpd in cpds:
                var = list(cpd['Variables'].keys())[0]
                states = self.probnet['Variables'][var]['States']
                evidence = cpd['Variables'][var]
                evidence_card = [len(self.probnet['Variables'][evidence_var]['States'])
                                 for evidence_var in evidence]
                arr = list(map(float, cpd['Values'].split()))
                values = np.array(arr)
                values = values.reshape((len(states), values.size//len(states)))
                tabular_cpds.append(TabularCPD(var, len(states), values, evidence, evidence_card))

            model.add_cpds(*tabular_cpds)

            variables = model.nodes()
            for var in variables:
                for prop_name, prop_value in self.probnet['Variables'][var].items():
                    model.node[var][prop_name] = prop_value

            edges = model.edges()
            for edge in edges:
                for prop_name, prop_value in self.probnet['edges'][edge].items():
                    model.edge[edge[0]][edge[1]][prop_name] = prop_value
            return model
        else:
            raise ValueError("Please specify only Bayesian Network.")
Exemple #10
0
def bayesnet():
    """
    References:
        https://class.coursera.org/pgm-003/lecture/17
        http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html
        http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf
        http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html
        https://github.com/pgmpy/pgmpy.git
        http://pgmpy.readthedocs.org/en/latest/
        http://nipy.bic.berkeley.edu:5000/download/11
    """
    # import operator as op
    # # Enumerate all possible events
    # varcard_list = list(map(op.attrgetter('variable_card'), cpd_list))
    # _esdat = list(ut.iprod(*map(range, varcard_list)))
    # _escol = list(map(op.attrgetter('variable'), cpd_list))
    # event_space = pd.DataFrame(_esdat, columns=_escol)

    # # Custom compression of event space to inspect a specific graph
    # def compress_space_flags(event_space, var1, var2, var3, cmp12_):
    #     """
    #     var1, var2, cmp_ = 'Lj', 'Lk', op.eq
    #     """
    #     import vtool as vt
    #     data = event_space
    #     other_cols = ut.setdiff_ordered(data.columns.tolist(), [var1, var2, var3])
    #     case_flags12 = cmp12_(data[var1], data[var2]).values
    #     # case_flags23 = cmp23_(data[var2], data[var3]).values
    #     # case_flags = np.logical_and(case_flags12, case_flags23)
    #     case_flags = case_flags12
    #     case_flags = case_flags.astype(np.int64)
    #     subspace = np.hstack((case_flags[:, None], data[other_cols].values))
    #     sel_ = vt.unique_row_indexes(subspace)
    #     flags = np.logical_and(mask, case_flags)
    #     return flags

    # # Build special cases
    # case_same   = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.eq)]
    # case_diff = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.ne)]
    # special_cases = [
    #     case_same,
    #     case_diff,
    # ]

    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd
    from pgmpy.inference import BeliefPropagation  # NOQA
    from pgmpy.inference import VariableElimination  # NOQA

    name_nice = ['n1', 'n2', 'n3']
    score_nice = ['low', 'high']
    match_nice = ['diff', 'same']
    num_names = len(name_nice)
    num_scores = len(score_nice)
    nid_basis = list(range(num_names))
    score_basis = list(range(num_scores))

    semtype2_nice = {
        'score': score_nice,
        'name': name_nice,
        'match': match_nice,
    }
    var2_cpd = {
    }
    globals()['semtype2_nice'] = semtype2_nice
    globals()['var2_cpd'] = var2_cpd

    name_combo = np.array(list(ut.iprod(nid_basis, nid_basis)))
    combo_is_same = name_combo.T[0] == name_combo.T[1]
    def get_expected_scores_prob(level1, level2):
        part1 = combo_is_same * level1
        part2 = (1 - combo_is_same) * (1 - (level2))
        expected_scores_level = part1 + part2
        return expected_scores_level

    # def make_cpd():

    def name_cpd(aid):
        from pgmpy.factors import TabularCPD
        cpd = TabularCPD(
            variable='N' + aid,
            variable_card=num_names,
            values=[[1.0 / num_names] * num_names])
        cpd.semtype = 'name'
        return cpd

    name_cpds = [name_cpd('i'), name_cpd('j'), name_cpd('k')]
    var2_cpd.update(dict(zip([cpd.variable for cpd in name_cpds], name_cpds)))
    if True:
        num_same_diff = 2
        samediff_measure = np.array([
            # get_expected_scores_prob(.12, .2),
            # get_expected_scores_prob(.88, .8),
            get_expected_scores_prob(0, 0),
            get_expected_scores_prob(1, 1),
        ])
        samediff_vals = (samediff_measure / samediff_measure.sum(axis=0)).tolist()
        def samediff_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='A' + aid1 + aid2,
                variable_card=num_same_diff,
                values=samediff_vals,
                evidence=['N' + aid1, 'N' + aid2],  # [::-1],
                evidence_card=[num_names, num_names])  # [::-1])
            cpd.semtype = 'match'
            return cpd
        samediff_cpds = [samediff_cpd('i', 'j'), samediff_cpd('j', 'k'), samediff_cpd('k', 'i')]
        var2_cpd.update(dict(zip([cpd.variable for cpd in samediff_cpds], samediff_cpds)))

        if True:
            def score_cpd(aid1, aid2):
                semtype = 'score'
                evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2]
                evidence_cpds = [var2_cpd[key] for key in evidence]
                evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds]
                evidence_card = list(map(len, evidence_nice))
                evidence_states = list(ut.iprod(*evidence_nice))
                variable_basis = semtype2_nice[semtype]

                variable_values = []
                for mystate in variable_basis:
                    row = []
                    for state in evidence_states:
                        if state[0] == state[1]:
                            if state[2] == 'same':
                                val = .2 if mystate == 'low' else .8
                            else:
                                val = 1
                                # val = .5 if mystate == 'low' else .5
                        elif state[0] != state[1]:
                            if state[2] == 'same':
                                val = .5 if mystate == 'low' else .5
                            else:
                                val = 1
                                # val = .9 if mystate == 'low' else .1
                        row.append(val)
                    variable_values.append(row)

                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=len(variable_basis),
                    values=variable_values,
                    evidence=evidence,  # [::-1],
                    evidence_card=evidence_card)  # [::-1])
                cpd.semtype = semtype
                return cpd
        else:
            score_values = [
                [.8, .1],
                [.2, .9],
            ]
            def score_cpd(aid1, aid2):
                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=num_scores,
                    values=score_values,
                    evidence=['A' + aid1 + aid2],  # [::-1],
                    evidence_card=[num_same_diff])  # [::-1])
                cpd.semtype = 'score'
                return cpd

        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds + samediff_cpds
    else:
        score_measure = np.array([get_expected_scores_prob(level1, level2)
                                  for level1, level2 in
                                  zip(np.linspace(.1, .9, num_scores),
                                      np.linspace(.2, .8, num_scores))])

        score_values = (score_measure / score_measure.sum(axis=0)).tolist()

        def score_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='S' + aid1 + aid2,
                variable_card=num_scores,
                values=score_values,
                evidence=['N' + aid1, 'N' + aid2],
                evidence_card=[num_names, num_names])
            cpd.semtype = 'score'
            return cpd
        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds
        pass

    input_graph = []
    for cpd in cpd_list:
        if cpd.evidence is not None:
            for evar in cpd.evidence:
                input_graph.append((evar, cpd.variable))
    name_model = BayesianModel(input_graph)
    name_model.add_cpds(*cpd_list)

    var2_cpd.update(dict(zip([cpd.variable for cpd in cpd_list], cpd_list)))
    globals()['var2_cpd'] = var2_cpd

    varnames = [cpd.variable for cpd in cpd_list]

    # --- PRINT CPDS ---

    cpd = score_cpds[0]
    def print_cpd(cpd):
        print('CPT: %r' % (cpd,))
        index = semtype2_nice[cpd.semtype]
        if cpd.evidence is None:
            columns = ['None']
        else:
            basis_lists = [semtype2_nice[var2_cpd[ename].semtype] for ename in cpd.evidence]
            columns = [','.join(x) for x in ut.iprod(*basis_lists)]
        data = cpd.get_cpd()
        print(pd.DataFrame(data, index=index, columns=columns))

    for cpd in name_model.get_cpds():
        print('----')
        print(cpd._str('phi'))
        print_cpd(cpd)

    # --- INFERENCE ---

    Ni = name_cpds[0]

    event_space_combos = {}
    event_space_combos[Ni.variable] = 0  # Set ni to always be Fred
    for cpd in cpd_list:
        if cpd.semtype == 'score':
            event_space_combos[cpd.variable] = list(range(cpd.variable_card))
    evidence_dict = ut.all_dict_combinations(event_space_combos)

    # Query about name of annotation k given different event space params

    def pretty_evidence(evidence):
        return [key + '=' + str(semtype2_nice[var2_cpd[key].semtype][val])
                for key, val in evidence.items()]

    def print_factor(factor):
        row_cards = factor.cardinality
        row_vars = factor.variables
        values = factor.values.reshape(np.prod(row_cards), 1).flatten()
        # col_cards = 1
        # col_vars = ['']
        basis_lists = list(zip(*list(ut.iprod(*[range(c) for c in row_cards]))))
        nice_basis_lists = []
        for varname, basis in zip(row_vars, basis_lists):
            cpd = var2_cpd[varname]
            _nice_basis = ut.take(semtype2_nice[cpd.semtype], basis)
            nice_basis = ['%s=%s' % (varname, val) for val in _nice_basis]
            nice_basis_lists.append(nice_basis)
        row_lbls = [', '.join(sorted(x)) for x in zip(*nice_basis_lists)]
        print(ut.repr3(dict(zip(row_lbls, values)), precision=3, align=True, key_order_metric='-val'))

    # name_belief = BeliefPropagation(name_model)
    name_belief = VariableElimination(name_model)
    import pgmpy
    import six  # NOQA

    def try_query(evidence):
        print('--------')
        query_vars = ut.setdiff_ordered(varnames, list(evidence.keys()))
        evidence_str = ', '.join(pretty_evidence(evidence))
        probs = name_belief.query(query_vars, evidence)
        factor_list = probs.values()
        joint_factor = pgmpy.factors.factor_product(*factor_list)
        print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ')')
        # print(six.text_type(joint_factor))
        factor = joint_factor  # NOQA
        # print_factor(factor)
        # import utool as ut
        print(ut.hz_str([(f._str(phi_or_p='phi')) for f in factor_list]))

    for evidence in evidence_dict:
        try_query(evidence)

    evidence = {'Aij': 1, 'Ajk': 1, 'Aki': 1, 'Ni': 0}
    try_query(evidence)

    evidence = {'Aij': 0, 'Ajk': 0, 'Aki': 0, 'Ni': 0}
    try_query(evidence)

    globals()['score_nice'] = score_nice
    globals()['name_nice'] = name_nice
    globals()['score_basis'] = score_basis
    globals()['nid_basis'] = nid_basis

    print('Independencies')
    print(name_model.get_independencies())
    print(name_model.local_independencies([Ni.variable]))

    # name_belief = BeliefPropagation(name_model)
    # # name_belief = VariableElimination(name_model)
    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         probs = name_belief.query(['Lk'], evidence)
    #         factor = probs['Lk']
    #         probs = factor.values
    #         evidence_ = evidence.copy()
    #         evidence_['Li'] = name_nice[evidence['Li']]
    #         evidence_['Lj'] = name_nice[evidence['Lj']]
    #         evidence_['Sij'] = score_nice[evidence['Sij']]
    #         evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #         nice2_prob = ut.odict(zip(name_nice, probs.tolist()))
    #         ut.print_python_code('P(Lk | {evidence}) = {cpt}'.format(
    #             evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #             cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #         ))

    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.drop('Lj', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         query_vars = ['Lk', 'Lj']
    #         probs = name_belief.query(query_vars, evidence)
    #         for queryvar in query_vars:
    #             factor = probs[queryvar]
    #             print(factor._str('phi'))
    #             probs = factor.values
    #             evidence_ = evidence.copy()
    #             evidence_['Li'] = name_nice[evidence['Li']]
    #             evidence_['Sij'] = score_nice[evidence['Sij']]
    #             evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #             nice2_prob = ut.odict(zip([queryvar + '=' + x for x in name_nice], probs.tolist()))
    #             ut.print_python_code('P({queryvar} | {evidence}) = {cpt}'.format(
    #                 query_var=query_var,
    #                 evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #                 cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #             ))

    # _ draw model

    import plottool as pt
    import networkx as netx
    fig = pt.figure()  # NOQA
    fig.clf()
    ax = pt.gca()

    netx_nodes = [(node, {}) for node in name_model.nodes()]
    netx_edges = [(etup[0], etup[1], {}) for etup in name_model.edges()]
    netx_graph = netx.DiGraph()
    netx_graph.add_nodes_from(netx_nodes)
    netx_graph.add_edges_from(netx_edges)

    # pos = netx.graphviz_layout(netx_graph)
    pos = netx.pydot_layout(netx_graph, prog='dot')
    netx.draw(netx_graph, pos=pos, ax=ax, with_labels=True)

    pt.plt.savefig('foo.png')
    ut.startfile('foo.png')
Exemple #11
0
'''Model1'''
model1 = BayesianModel([('x1', 'x2'),('x2', 'x5'),('x4','x1'),('x4','x6'),('x6','x3')])

model1.add_cpds(p_21,p_52,p_14,p_64,p_36,p4)
cpd1=[]
cpd1.append(p_21)
cpd1.append(p_52)
cpd1.append(p_14)
cpd1.append(p_64)
cpd1.append(p_36)
cpd1.append(p4)

model1.add_cpds(*cpd1)

print("------------------------------------------")
print("Edges of model1:", model1.edges())
print("Checking Model1:", model1.check_model())
print("------------------------------------------")
'''generate data for model1'''
inference = BayesianModelSampling(model1)
data=inference.forward_sample(size=3000, return_type='dataframe')
print("Data for model1:")
print(data)   
k2=K2Score(data)
print('Model1 K2 Score: ' + str(k2.score(model1)))

'''Inference'''
from pgmpy.inference import VariableElimination
infer = VariableElimination(model1)
print("Inference of x3:")
print(infer.query(['x3']) ['x3'])
Exemple #12
0
def bayesnet():
    """
    References:
        https://class.coursera.org/pgm-003/lecture/17
        http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html
        http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf
        http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html
        https://github.com/pgmpy/pgmpy.git
        http://pgmpy.readthedocs.org/en/latest/
        http://nipy.bic.berkeley.edu:5000/download/11
    """
    # import operator as op
    # # Enumerate all possible events
    # varcard_list = list(map(op.attrgetter('variable_card'), cpd_list))
    # _esdat = list(ut.iprod(*map(range, varcard_list)))
    # _escol = list(map(op.attrgetter('variable'), cpd_list))
    # event_space = pd.DataFrame(_esdat, columns=_escol)

    # # Custom compression of event space to inspect a specific graph
    # def compress_space_flags(event_space, var1, var2, var3, cmp12_):
    #     """
    #     var1, var2, cmp_ = 'Lj', 'Lk', op.eq
    #     """
    #     import vtool as vt
    #     data = event_space
    #     other_cols = ut.setdiff_ordered(data.columns.tolist(), [var1, var2, var3])
    #     case_flags12 = cmp12_(data[var1], data[var2]).values
    #     # case_flags23 = cmp23_(data[var2], data[var3]).values
    #     # case_flags = np.logical_and(case_flags12, case_flags23)
    #     case_flags = case_flags12
    #     case_flags = case_flags.astype(np.int64)
    #     subspace = np.hstack((case_flags[:, None], data[other_cols].values))
    #     sel_ = vt.unique_row_indexes(subspace)
    #     flags = np.logical_and(mask, case_flags)
    #     return flags

    # # Build special cases
    # case_same   = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.eq)]
    # case_diff = event_space.loc[compress_space_flags(event_space, 'Li', 'Lj', 'Lk', op.ne)]
    # special_cases = [
    #     case_same,
    #     case_diff,
    # ]

    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd
    from pgmpy.inference import BeliefPropagation  # NOQA
    from pgmpy.inference import VariableElimination  # NOQA

    name_nice = ['n1', 'n2', 'n3']
    score_nice = ['low', 'high']
    match_nice = ['diff', 'same']
    num_names = len(name_nice)
    num_scores = len(score_nice)
    nid_basis = list(range(num_names))
    score_basis = list(range(num_scores))

    semtype2_nice = {
        'score': score_nice,
        'name': name_nice,
        'match': match_nice,
    }
    var2_cpd = {
    }
    globals()['semtype2_nice'] = semtype2_nice
    globals()['var2_cpd'] = var2_cpd

    name_combo = np.array(list(ut.iprod(nid_basis, nid_basis)))
    combo_is_same = name_combo.T[0] == name_combo.T[1]
    def get_expected_scores_prob(level1, level2):
        part1 = combo_is_same * level1
        part2 = (1 - combo_is_same) * (1 - (level2))
        expected_scores_level = part1 + part2
        return expected_scores_level

    # def make_cpd():

    def name_cpd(aid):
        from pgmpy.factors import TabularCPD
        cpd = TabularCPD(
            variable='N' + aid,
            variable_card=num_names,
            values=[[1.0 / num_names] * num_names])
        cpd.semtype = 'name'
        return cpd

    name_cpds = [name_cpd('i'), name_cpd('j'), name_cpd('k')]
    var2_cpd.update(dict(zip([cpd.variable for cpd in name_cpds], name_cpds)))
    if True:
        num_same_diff = 2
        samediff_measure = np.array([
            # get_expected_scores_prob(.12, .2),
            # get_expected_scores_prob(.88, .8),
            get_expected_scores_prob(0, 0),
            get_expected_scores_prob(1, 1),
        ])
        samediff_vals = (samediff_measure / samediff_measure.sum(axis=0)).tolist()
        def samediff_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='A' + aid1 + aid2,
                variable_card=num_same_diff,
                values=samediff_vals,
                evidence=['N' + aid1, 'N' + aid2],  # [::-1],
                evidence_card=[num_names, num_names])  # [::-1])
            cpd.semtype = 'match'
            return cpd
        samediff_cpds = [samediff_cpd('i', 'j'), samediff_cpd('j', 'k'), samediff_cpd('k', 'i')]
        var2_cpd.update(dict(zip([cpd.variable for cpd in samediff_cpds], samediff_cpds)))

        if True:
            def score_cpd(aid1, aid2):
                semtype = 'score'
                evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2]
                evidence_cpds = [var2_cpd[key] for key in evidence]
                evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds]
                evidence_card = list(map(len, evidence_nice))
                evidence_states = list(ut.iprod(*evidence_nice))
                variable_basis = semtype2_nice[semtype]

                variable_values = []
                for mystate in variable_basis:
                    row = []
                    for state in evidence_states:
                        if state[0] == state[1]:
                            if state[2] == 'same':
                                val = .2 if mystate == 'low' else .8
                            else:
                                val = 1
                                # val = .5 if mystate == 'low' else .5
                        elif state[0] != state[1]:
                            if state[2] == 'same':
                                val = .5 if mystate == 'low' else .5
                            else:
                                val = 1
                                # val = .9 if mystate == 'low' else .1
                        row.append(val)
                    variable_values.append(row)

                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=len(variable_basis),
                    values=variable_values,
                    evidence=evidence,  # [::-1],
                    evidence_card=evidence_card)  # [::-1])
                cpd.semtype = semtype
                return cpd
        else:
            score_values = [
                [.8, .1],
                [.2, .9],
            ]
            def score_cpd(aid1, aid2):
                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=num_scores,
                    values=score_values,
                    evidence=['A' + aid1 + aid2],  # [::-1],
                    evidence_card=[num_same_diff])  # [::-1])
                cpd.semtype = 'score'
                return cpd

        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds + samediff_cpds
    else:
        score_measure = np.array([get_expected_scores_prob(level1, level2)
                                  for level1, level2 in
                                  zip(np.linspace(.1, .9, num_scores),
                                      np.linspace(.2, .8, num_scores))])

        score_values = (score_measure / score_measure.sum(axis=0)).tolist()

        def score_cpd(aid1, aid2):
            cpd = TabularCPD(
                variable='S' + aid1 + aid2,
                variable_card=num_scores,
                values=score_values,
                evidence=['N' + aid1, 'N' + aid2],
                evidence_card=[num_names, num_names])
            cpd.semtype = 'score'
            return cpd
        score_cpds = [score_cpd('i', 'j'), score_cpd('j', 'k')]
        cpd_list = name_cpds + score_cpds
        pass

    input_graph = []
    for cpd in cpd_list:
        if cpd.evidence is not None:
            for evar in cpd.evidence:
                input_graph.append((evar, cpd.variable))
    name_model = BayesianModel(input_graph)
    name_model.add_cpds(*cpd_list)

    var2_cpd.update(dict(zip([cpd.variable for cpd in cpd_list], cpd_list)))
    globals()['var2_cpd'] = var2_cpd

    varnames = [cpd.variable for cpd in cpd_list]

    # --- PRINT CPDS ---

    cpd = score_cpds[0]
    def print_cpd(cpd):
        print('CPT: %r' % (cpd,))
        index = semtype2_nice[cpd.semtype]
        if cpd.evidence is None:
            columns = ['None']
        else:
            basis_lists = [semtype2_nice[var2_cpd[ename].semtype] for ename in cpd.evidence]
            columns = [','.join(x) for x in ut.iprod(*basis_lists)]
        data = cpd.get_cpd()
        print(pd.DataFrame(data, index=index, columns=columns))

    for cpd in name_model.get_cpds():
        print('----')
        print(cpd._str('phi'))
        print_cpd(cpd)

    # --- INFERENCE ---

    Ni = name_cpds[0]

    event_space_combos = {}
    event_space_combos[Ni.variable] = 0  # Set ni to always be Fred
    for cpd in cpd_list:
        if cpd.semtype == 'score':
            event_space_combos[cpd.variable] = list(range(cpd.variable_card))
    evidence_dict = ut.all_dict_combinations(event_space_combos)

    # Query about name of annotation k given different event space params

    def pretty_evidence(evidence):
        return [key + '=' + str(semtype2_nice[var2_cpd[key].semtype][val])
                for key, val in evidence.items()]

    def print_factor(factor):
        row_cards = factor.cardinality
        row_vars = factor.variables
        values = factor.values.reshape(np.prod(row_cards), 1).flatten()
        # col_cards = 1
        # col_vars = ['']
        basis_lists = list(zip(*list(ut.iprod(*[range(c) for c in row_cards]))))
        nice_basis_lists = []
        for varname, basis in zip(row_vars, basis_lists):
            cpd = var2_cpd[varname]
            _nice_basis = ut.take(semtype2_nice[cpd.semtype], basis)
            nice_basis = ['%s=%s' % (varname, val) for val in _nice_basis]
            nice_basis_lists.append(nice_basis)
        row_lbls = [', '.join(sorted(x)) for x in zip(*nice_basis_lists)]
        print(ut.repr3(dict(zip(row_lbls, values)), precision=3, align=True, key_order_metric='-val'))

    # name_belief = BeliefPropagation(name_model)
    name_belief = VariableElimination(name_model)
    import pgmpy
    import six  # NOQA

    def try_query(evidence):
        print('--------')
        query_vars = ut.setdiff_ordered(varnames, list(evidence.keys()))
        evidence_str = ', '.join(pretty_evidence(evidence))
        probs = name_belief.query(query_vars, evidence)
        factor_list = probs.values()
        joint_factor = pgmpy.factors.factor_product(*factor_list)
        print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ')')
        # print(six.text_type(joint_factor))
        factor = joint_factor  # NOQA
        # print_factor(factor)
        # import utool as ut
        print(ut.hz_str([(f._str(phi_or_p='phi')) for f in factor_list]))

    for evidence in evidence_dict:
        try_query(evidence)

    evidence = {'Aij': 1, 'Ajk': 1, 'Aki': 1, 'Ni': 0}
    try_query(evidence)

    evidence = {'Aij': 0, 'Ajk': 0, 'Aki': 0, 'Ni': 0}
    try_query(evidence)

    globals()['score_nice'] = score_nice
    globals()['name_nice'] = name_nice
    globals()['score_basis'] = score_basis
    globals()['nid_basis'] = nid_basis

    print('Independencies')
    print(name_model.get_independencies())
    print(name_model.local_independencies([Ni.variable]))

    # name_belief = BeliefPropagation(name_model)
    # # name_belief = VariableElimination(name_model)
    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         probs = name_belief.query(['Lk'], evidence)
    #         factor = probs['Lk']
    #         probs = factor.values
    #         evidence_ = evidence.copy()
    #         evidence_['Li'] = name_nice[evidence['Li']]
    #         evidence_['Lj'] = name_nice[evidence['Lj']]
    #         evidence_['Sij'] = score_nice[evidence['Sij']]
    #         evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #         nice2_prob = ut.odict(zip(name_nice, probs.tolist()))
    #         ut.print_python_code('P(Lk | {evidence}) = {cpt}'.format(
    #             evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #             cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #         ))

    # for case in special_cases:
    #     test_data = case.drop('Lk', axis=1)
    #     test_data = test_data.drop('Lj', axis=1)
    #     test_data = test_data.reset_index(drop=True)
    #     print('----')
    #     for i in range(test_data.shape[0]):
    #         evidence = test_data.loc[i].to_dict()
    #         query_vars = ['Lk', 'Lj']
    #         probs = name_belief.query(query_vars, evidence)
    #         for queryvar in query_vars:
    #             factor = probs[queryvar]
    #             print(factor._str('phi'))
    #             probs = factor.values
    #             evidence_ = evidence.copy()
    #             evidence_['Li'] = name_nice[evidence['Li']]
    #             evidence_['Sij'] = score_nice[evidence['Sij']]
    #             evidence_['Sjk'] = score_nice[evidence['Sjk']]
    #             nice2_prob = ut.odict(zip([queryvar + '=' + x for x in name_nice], probs.tolist()))
    #             ut.print_python_code('P({queryvar} | {evidence}) = {cpt}'.format(
    #                 query_var=query_var,
    #                 evidence=(ut.repr2(evidence_, explicit=True, nobraces=True, strvals=True)),
    #                 cpt=ut.repr3(nice2_prob, precision=3, align=True, key_order_metric='-val')
    #             ))

    # _ draw model

    import plottool as pt
    import networkx as netx
    fig = pt.figure()  # NOQA
    fig.clf()
    ax = pt.gca()

    netx_nodes = [(node, {}) for node in name_model.nodes()]
    netx_edges = [(etup[0], etup[1], {}) for etup in name_model.edges()]
    netx_graph = netx.DiGraph()
    netx_graph.add_nodes_from(netx_nodes)
    netx_graph.add_edges_from(netx_edges)

    # pos = netx.graphviz_layout(netx_graph)
    pos = netx.pydot_layout(netx_graph, prog='dot')
    netx.draw(netx_graph, pos=pos, ax=ax, with_labels=True)

    pt.plt.savefig('foo.png')
    ut.startfile('foo.png')
Exemple #13
0
#
###################################################################################

from pgmpy.models import BayesianModel
model = BayesianModel()
# Add nodes to empty bayesian model
# ------------------------------------------------------ ( Traffic Accident -> traffic_jam )
# ------------------------------------------------------ ( Heavy Rain -> traffic_jam )
model.add_nodes_from(['rain', 'traffic_jam'])
model.add_edge('rain', 'traffic_jam')
# If add edge without adding node, node will be automatically added
"Example: "
model.add_edge('accident', 'traffic_jam')
model.nodes()
" ['accident', 'rain', 'traffic_jam'] "
model.edges()
" [('rain', 'traffic_jam'), ('accident', 'traffic_jam')}" # two edges showed
# each node has an associated CPD with it.
from pgmpy.factor import TabularCPD
cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]])
cpd_accident = TabularCPD('accident', 2, [[0.2], [0.8]])
cpd_traffic_jam = TabularCPD('traffic_jam', 2, 
	                         [[0.9, 0.6, 0.7, 0.1],
	                          [0.1, 0.4, 0.3, 0.9]],
	                          evdience=['rain', 'accident'],
	                          evidence_card=[2, 2])
# associate each CPD to model
model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam)
model.get_cpds()
" [<TabularCPD representing P(rain: 2) at fsjidfsjdfaskdf>, "
" [<TabularCPD representing P(accident: 2) at fsxfgsdfgfsjdfaskdf>, "
from pgmpy.readwrite.BIF import BIFWriter
import pandas as pd
import numpy as np
from time import time
import graphviz as gv
import os

train = pd.read_csv('../msnbcWithHeader.csv', sep=',')
train = train[train.sum(axis=1) < 200]
train[train > 1] = 1

train_start = time()
bic = BicScore(train)
hc = HillClimbSearch(train, scoring_method=bic)
best_model = hc.estimate(prog_bar=True)
edges = best_model.edges()
model = BayesianModel(edges)
model.fit(train, estimator=BayesianEstimator, prior_type="BDeu")
variables = model.nodes()

print(model.edges())
train_end = time() - train_start
print("train time " + str(train_end))

my_graph = gv.Digraph(format='png')
for node in variables:
    my_graph.node(node)
for edge in edges:
    my_graph.edge(edge[0], edge[1])
filename = my_graph.render('../graph', view=True)
def main():

    andPGM = PGM_t()
    print('loading features..')
    train_set, test_set = andPGM.load_features()
    print('loading features.. Done')
    # Bayesian network of 19 nodes, 9*2 variables of network given
    # Initial incomplete Bayesian model connected manually based on intuition
    print('Generating model.. ')
    initialModel = BayesianModel({})
    initialModel.add_nodes_from(andPGM.img_features.columns[1:10].tolist())
    initialModel.add_edges_from([('f6_a' , 'f2_a'),\
                             ('f3_a' , 'f4_a') ,\
                             ('f5_a' , 'f9_a') ,\
                             ('f4_a' , 'f7_a') ])

    # Use hill climb search algorithm to find network structure of initial 9 nodes
    hc = HillClimbSearch(data=andPGM.img_features.iloc[0:,1:10], \
                         scoring_method=BdeuScore(andPGM.img_features.iloc[0:,1:10], \
                                                  equivalent_sample_size=0.1*len(andPGM.img_features)), \
                         state_names = andPGM.states_9)
    # Get best estimated structure
    best_model = hc.estimate(start=initialModel)
    # Edges in the acquired graph
    print('model of 9 var: ', best_model.edges())

    # Create a Clone of generated Bayesian network structure
    clone_model = BayesianModel({})
    for edge in best_model.edges():
        new_edge = [edge[0][:-1] + 'b', edge[1][:-1] + 'b']
        clone_model.add_edges_from([new_edge])

    # Join together the Original and clone network through node 'same'
    multinetModel = BayesianModel({})
    multinetModel.add_edges_from(best_model.edges() + clone_model.edges())
    multinetModel.add_node('same')
    multinetModel.add_edge('f5_a', 'same')
    multinetModel.add_edge('f9_a', 'same')
    multinetModel.add_edge('f5_b', 'same')
    multinetModel.add_edge('f9_b', 'same')
    print('Generating model.. Done')
    # Edges in the final structure
    print('Final model: ', multinetModel.edges())

    print('Fit data into model..')
    # fit the data to model to generate CPDs using maximum likelyhood estimation
    multinetModel.fit(data=train_set, state_names=andPGM.states_all)
    print('Fit data into model.. Done')
    print('CPDs generated: ')
    cpds = multinetModel.get_cpds()
    for cpd in cpds:
        print(cpd)
    # Inference using Variable Elimination
    print('Start inference..')
    inference = VariableElimination(multinetModel)
    train_set_same = train_set[train_set['same'] == 0]
    train_set_not_same = train_set[train_set['same'] == 1]

    # Accuracy of positive inferences
    acc_same = andPGM.chk_accuracy(
        train_set_same,
        inference,
        variables=train_set_same.columns[0:9].tolist(),
        evidence=train_set_same.columns[9:19].tolist())
    print('accuracy of positives ', acc_same)

    # Accuracy of negative inferences
    acc_nt_same = andPGM.chk_accuracy(
        train_set_not_same,
        inference,
        variables=train_set_not_same.columns[0:9].tolist(),
        evidence=train_set_not_same.columns[9:19].tolist())
    print('accuracy of negatives', acc_nt_same)
Exemple #16
0
def task4():
	global andRawData, task4_best_bm
	k2Scores = []
	andRawData_temp = pd.DataFrame(andRawData.values, columns=['f1','f2','f3','f4','f5','f6','f7','f8','f9'])
	#Model 1
	est = HillClimbSearch(andRawData_temp, scoring_method=K2Score(andRawData_temp))
	model_temp = est.estimate()
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 1: Model through HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 1: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 2: Manual Model based on HillClimbSearch
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f1', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f6'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 2: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 2: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 3: Manual Model based on HillClimbSearch
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f3', 'f8'), ('f5', 'f7'), ('f5', 'f3'), ('f9', 'f8'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f2')])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 3: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 3: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 4: Manual Model based on HillClimbSearch
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f5', 'f7'), ('f5', 'f3'), ('f1', 'f2'), ('f9', 'f1'), ('f9', 'f6'), ('f9', 'f8'),])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 4: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 4: K2 Accuracy Score is "+str(k2Scores_temp))
	#Model 5: Manual Model based on Intuition
	model_temp = BayesianModel([('f3', 'f4'), ('f4', 'f9'), ('f4', 'f7'), ('f1', 'f2'), ('f8', 'f5'), ('f9', 'f6'), ('f9', 'f8')])
	estimator = BayesianEstimator(model_temp, andRawData_temp)
	for fx in ['f1','f2','f3','f4','f5','f6','f7','f8','f9']:
		cpd_fx = estimator.estimate_cpd(fx, prior_type="K2")
		model_temp.add_cpds(cpd_fx)
	task4_bms.append(model_temp)
	print("	Model 5: Manual Model based on HillClimbSearch is : "+str(model_temp.edges()))
	k2Score = K2Score((BayesianModelSampling(model_temp)).forward_sample(size=1000))
	k2Scores_temp = k2Score.score(model_temp)
	k2Scores.append(k2Scores_temp)
	print("	Model 5: K2 Accuracy Score is "+str(k2Scores_temp))
	task4_best_bm = task4_bms[k2Scores.index(max(k2Scores))]
	print("	Best Bayesian Model with the highest accuracy score is thus Model "+str(1+k2Scores.index(max(k2Scores))))
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination

cancer_model = BayesianModel([('Pollution', 'Cancer'), ('Smoker', 'Cancer'),
                              ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea')])
print('Bayesian network models are :')
print('\t', cancer_model.nodes())
print('Bayesian edges are:')
print('\t', cancer_model.edges())

cpd_poll = TabularCPD(variable='Pollution',
                      variable_card=2,
                      values=[[0.9], [0.1]])
cpd_smoke = TabularCPD(variable='Smoker',
                       variable_card=2,
                       values=[[0.3], [0.7]])
cpd_cancer = TabularCPD(variable='Cancer',
                        variable_card=2,
                        values=[[0.03, 0.05, 0.001, 0.02],
                                [0.97, 0.95, 0.999, 0.98]],
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2])
cpd_xray = TabularCPD(variable='Xray',
                      variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'],
                      evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea',
                      variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
Exemple #18
0
                      evidence_card=[2])

# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)

# Checking if the cpds are valid for the model.
print(cancer_model.check_model())

# Check d-separations. This is only meant for those interested. You do not need to understand this to do the project.
print(cancer_model.is_active_trail('Pollution', 'Smoker'))
print(cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer']))
print(cancer_model.local_independencies('Xray'))
print(cancer_model.get_independencies())

# Print model information
print(cancer_model.edges())
print(cancer_model.nodes())
print(cancer_model.get_cpds())

# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination

cancer_infer = VariableElimination(cancer_model)

# Query
print(cancer_infer.query(variables=['Dyspnoea'], evidence={'Cancer': 0}))
print(
    cancer_infer.query(variables=['Cancer'],
                       evidence={
                           'Smoker': 0,
                           'Pollution': 0
Exemple #19
0
lines = list(csv.reader(open('data7_names.csv', 'r')));
attributes = lines[0]

#Read Cleveland Heart dicease data
heartDisease = pd.read_csv('data7_heart.csv', names = attributes)
heartDisease = heartDisease.replace('?', np.nan)

# Model Baysian Network
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'),
('exang', 'trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'),
('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])

print('\nBayesian Network Nodes are: ')
print('\t',model.nodes())
print('\nBayesian Network Edges are:')
print('\t',model.edges())

# Learning CPDs using Maximum Likelihood Estimators
print('\nLearning CPDs using Maximum Likelihood Estimators...');
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

# Inferencing with Bayesian Network
print('\nInferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)

# Computing the probability of bronc given smoke.
print('\n1.Probability of HeartDisease given Age=28')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28})
print(q['heartdisease'])
print('\n2. Probability of HeartDisease given chol (Cholestoral) =100')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
class TestBayesianModelMethods(unittest.TestCase):

    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                                ('d', 'e'), ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'], evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'), Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
               0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
Exemple #21
0
from pgmpy.models import BayesianModel 
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination


test_model = BayesianModel([('rain', 'sprinkler'),('rain','wetGrass'),('sprinkler','wetGrass')])


cpd_rain = TabularCPD(variable='rain', variable_card=2, values=[[0.2],[0.8]])
cpd_wetGrass = TabularCPD(variable='wetGrass', variable_card=2, values=[(0,0.8,0.9,0.99),(1,0.2,0.1,0.01)], evidence=['rain','sprinkler'], evidence_card=[2, 2])
cpd_sprinkler = TabularCPD(variable='sprinkler', variable_card=2, values=[(0.4,0.01),(0.6,0.99)],evidence=['rain'], evidence_card=[2])


test_model.add_cpds(cpd_rain,cpd_sprinkler,cpd_wetGrass)
test_model.edges()
test_model.nodes()
test_model.check_model()

# print('hello')

test_infer = VariableElimination(test_model)

q = test_infer.query(variables=['wetGrass'], evidence={'rain':1})
result = q['wetGrass']
string = str(q['wetGrass'])
print(result.values[0])
# print(type(string))



"""
Exemple #22
0
 random_indices = sample(range(row_size), 2000)
 smallDF = data.iloc[random_indices, :]
 smallDF.shape
 PseudoCounts = {}
 #Pseudocounts are given (1,1) for uniform
 for productName in smallDF.columns:
     PseudoCounts[productName] = [1, 1]
 print('Existing network not found')
 est = ConstraintBasedEstimator(smallDF)
 print('Starting to estimate the model structure, might take a while...')
 start = time.time()
 model = est.estimate(significance_level=0.05)
 end = time.time()
 print('Time spent to estimate model structure {0}'.format(end - start))
 print('Edges of the model:')
 print(model.edges())
 print('Starting to estimate model parameters..')
 start = time.time()
 model.fit(smallDF,
           estimator=BayesianEstimator,
           prior_type='dirichlet',
           pseudo_counts=PseudoCounts)
 end = time.time()
 print('Time spent to estimete the model parameters {0}'.format(end -
                                                                start))
 #Save edge ,node, CPD information
 Edges = model.edges()
 Nodes = model.nodes()
 CPD = model.get_cpds()
 with open("Edges.txt", "wb") as fp:
     pickle.dump(Edges, fp)
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'),
                                ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade',
                               3,
                               values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                       [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'),
                                 ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'),
                                     ('d', 'b'), ('e', 'd')]
                            or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'),
                                                      ('b', 'c'), ('d', 'b'),
                                                      ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'),
                           ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'),
                                     ('d', 'b'), ('d', 'e')]
                            or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'),
                                                      ('d', 'a'), ('d', 'b'),
                                                      ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'),
                         Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'),
                         Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'),
                         Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'),
                         Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'),
                         Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'),
                         Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(),
                         Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(),
                         Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(),
                         Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [
            0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04,
            0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128
        ]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'],
                                           [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'),
                            ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()),
                            sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()),
                            sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
Exemple #24
0
# data_size = len(data)
model = BayesianModel()
list_edges = []
for i in range(3):
    list_edges += [('DI' + str(i), 'DFT' + str(i)),
                ('TQ', 'DFT' + str(i)),
                ('DI' + str(i), 'RD' + str(i)),
                ('DFT' + str(i), 'RD' + str(i)),
                ('RD' + str(i), 'DFO' + str(i)),
                ('OU', 'DFO' + str(i))]

list_edges += [('RD0', 'DI1'), ('RD1', 'DI2'), ('DPQ', 'DI0'), ('C', 'DI0')]

model.add_edges_from(list_edges)
model.fit(data, estimator_type = BayesianEstimator, prior_type = "BDeu", equivalent_sample_size = 10)
for edge in model.edges():
    print(edge)
    print("\n")
infer = VariableElimination(model)

nodes = model.nodes()
Distribution = {}

for key in pr.keys():
    Distribution[key] = [1 - abs(np.sign(pr[key] - i)) for i in range(5)]
    nodes.remove(key)
    print('pr done')

for key in nodes:
    Distribution[key] = infer.query([key], evidence = pr)[key].values
    print('done' + key)
                       values=[[0.998], [0.002]])
cpd_alarm = TabularCPD(variable='Alarm', variable_card=2,
                        values=[[0.999, 0.71, 0.06, 0.05],
                                [0.001, 0.29, 0.94, 0.95]],
                        evidence=['Burglary', 'Earthquake'],
                        evidence_card=[2, 2])
cpd_johncalls = TabularCPD(variable='JohnCalls', variable_card=2,
                      values=[[0.95, 0.1], [0.05, 0.9]],
                      evidence=['Alarm'], evidence_card=[2])
cpd_marycalls = TabularCPD(variable='MaryCalls', variable_card=2,
                      values=[[0.1, 0.7], [0.9, 0.3]],
                      evidence=['Alarm'], evidence_card=[2])

# Associating the parameters with the model structure
alarm_model.add_cpds(cpd_burglary, cpd_earthquake, cpd_alarm, cpd_johncalls, cpd_marycalls)


#new cell
alarm_model.check_model() 

#new cell
alarm_model.nodes()

#new cell
alarm_model.edges()

#new cell
alarm_model.local_independencies('Burglary')

#new cell
alarm_model.local_independencies('JohnCalls')
Exemple #26
0
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node('a')
        self.assertListEqual(self.G.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge('d', 'e')
        self.assertListEqual(sorted(self.G.nodes()), ['d', 'e'])
        self.assertListEqual(self.G.edges(), [('d', 'e')])
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, 'a', 'a')

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([('a', 'b'), ('a', 'c')])
        self.assertRaises(ValueError, self.G.add_edge, 'c', 'a')

    def test_add_edges_from_string(self):
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.G.add_nodes_from(['d', 'e', 'f'])
        self.G.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.G.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(
            hf.recursive_sorted(self.G.edges()),
            hf.recursive_sorted([('a', 'b'), ('b', 'c'), ('d', 'e'),
                                 ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'a')])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [('a', 'b'),
                                                              ('b', 'c'),
                                                              ('c', 'a')])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.g.predecessors('a'), [])
        self.assertListEqual(self.g.predecessors('b'), ['a'])
        self.assertListEqual(self.g.predecessors('c'), ['b'])

    def test_update_node_parents(self):
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.G.predecessors('a'), [])
        self.assertListEqual(self.G.predecessors('b'), ['a'])
        self.assertListEqual(self.G.predecessors('c'), ['b'])

    def tearDown(self):
        del self.G
Exemple #27
0
class Utilities(object):
    def __init__(self, file):
        ''' no object creation -> opportune  ?'''
        self.keywords = ['BENS', 'MEMS', 'LANS', 'MOTOR', 'WORLD']
        self.standard_nodes = {
            'RONS': {
                'BENS': [],
                'MEMS': []
            },
            'LANS': {
                'LANS': []
            },
            'LENS': {
                'MOTOR': [],
                'WORLD': []
            }
        }
        self.file = file
        self.get_json_path(file)
        self.pgmpy_object = BayesianModel()
        self.networkx_object = nx.DiGraph()
        self.header = ''
        self.dictionary = []

    def get_nodes_in_family(self, family, attributes=False):
        nw_nodes = self.networkx_object.nodes()
        nw_dim = np.asarray(nw_nodes).ndim
        nodes = []
        for i, node in enumerate(nw_nodes):
            if nw_dim > 1:
                node = node[0]
            if family in node:
                nodes.append(node)
        return nodes

    def check_json_path(directory):
        """
        Checks whether the necessary project_repository directory exists.
        If not, creates it

        :param directory: the mother directory to search from downwards

        :type directory: string
        :rtype : none
        """
        if not os.path.exists(directory + '\project_repository\\'):
            os.makedirs(directory + '\project_repository\\')

    def get_json_path(self, file):
        """
        Creates a string containing the full path for the filename passed
        so it will be saved in the project_repository directory

        :param filename: filename without path or extension
        :return: a full path for the file

        :type filename :string
        :rtype : string
        """
        levels = 5
        common = os.path.dirname(os.path.realpath(__file__))
        for i in range(levels + 1):
            common = os.path.dirname(common)
            if 'peepo\peepo' not in common:
                break
        Utilities.check_json_path(common)
        self.file = str(common + '\project_repository\\' + file + '.json')
        print('in get_json_path :', self.file)

    def save_json(self, astring):
        """
        This helping function is only needed to have the json file  formatted in a user friendly way
        as the "dump" method does not provide a lot of possibilities to get it "pretty"

        :param file :the ull path of the json file
        :param astring: the name of the string containing the whole information
        :return: void
        :type file: string
        :type astring : string
        :rtype : void
        """
        text_file = open(str(self.file), "w")
        '''remove all LF written by the dump method'''
        astring = re.sub('\n', '', astring)
        '''For keywords -> insert LF and tabs'''
        astring = re.sub('\"Identification', '\n\"Identification', astring)
        astring = re.sub('\"Date', '\n\"Date', astring)
        astring = re.sub('\"Description', '\n\"Description', astring)
        astring = re.sub('\"Train_from', '\n\"Train_from', astring)
        astring = re.sub('\"Frozen', '\n\"Frozen', astring)
        astring = re.sub('\"Nodes', '\n\n\"Nodes', astring)
        astring = re.sub('\"RONS', '\n\t\t\"RONS', astring)
        astring = re.sub('\"BENS', '\n\t\t\t\"BENS', astring)
        astring = re.sub('\"MEMS', '\n\t\t\t\"MEMS', astring)
        astring = re.sub('\"LANS', '\n\t\t\"LANS', astring)
        astring = re.sub('\"LENS', '\n\t\t\"LENS', astring)
        astring = re.sub('\"MOTOR', '\n\t\t\t\"MOTOR', astring)
        astring = re.sub('\"WORLD', '\n\t\t\t\"WORLD', astring)
        astring = re.sub('\"Edges', '\n\n\"Edges', astring)
        astring = re.sub('\"CPDs', '\n\n\"CPDs', astring)
        astring = re.sub('{', '\n\t\t{', astring)
        text_file.write(astring)
        text_file.write('\n')
        text_file.close()

    def translation(self, astring, from_man_to_machine):
        """
        Given an array of tuples (a,b) in dictionary, returns the second element of the tuple where astring was found
        Is used to not loose the users node names as peepo generates standardized names for the corresponding node

        :param dictionary:an array of tuples -> is created in the method : get_network(file)
        :param astring: the name of the node passsed by the user
        :param from_man_to_machine: an integer -> 0 when we want the translation for the user give name to the standardized name, 1 the other way around
        :return: the corresponding standardized node name
        :type dictionary: np.array
        :type astring : string
        :rtype : string
        """
        source = 0
        target = 1
        if from_man_to_machine == 1:
            source = 1
            target = 0

        for index, item in enumerate(self.dictionary):
            if item[source] == astring:
                break
        return item[target]

    def clean_edge_list(self, edge_array, parent):
        '''the get functions for the edges, both in networx as pgmpy contain the parent name
            this function removes it from the list'''
        cleaned_list = []
        for a in edge_array:
            if a != parent:
                cleaned_list.append(a)
        return cleaned_list

    def clean_parent_list(self, parent_array, child):
        '''the get functions for the edges, both in networx as pgmpy contain the parent name
            this function removes it from the list'''
        cleaned_list = []
        for i, a in enumerate(parent_array):
            if a[0] != child:
                cleaned_list.append(a[0])
        return cleaned_list

    def get_edges(self):
        """
        Creates a dictionary with a node as a key and an array with its child as value
        (the methods get_child give generally a list of tuples (parent,child)

        :param  pgmpy_object: the pgmpy network
        :return: a dictionary with the edges of all the node

        :type fpgmpy_object:adress
        :rtype :dictionary
                """
        edg = self.pgmpy_object.edges()
        edges = dict()
        [
            edges[str(t[0])].append(str(t[1])) if t[0] in list(edges.keys())
            else edges.update({str(t[0]): [str(t[1])]}) for t in edg
        ]
        return edges

    def get_nodes_and_attributes(self):
        """
        Creates an  array  of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's and
         the key cardinality returns an int
         the key cpd a 2 dimensional matrix

        :param  pgmpy_object: the pgmpy network
        :return: array  of tuple with a node as element 0 and a dictionary with cardinalities and cpd as key's

        :type  :pgmpy_object:adress
        :rtype :array of tuples
        """
        nodes = self.pgmpy_object.nodes()
        nod_and_attributes = []
        [
            nod_and_attributes.append((str(node), {
                'cardinality':
                int(self.pgmpy_object.get_cardinality(node)),
                'cpd':
                self.pgmpy_object.get_cpds(node).values.astype(float)
            })) for i, node in enumerate(nodes)
        ]
        #need to reshape the cpds when more than 1 parent
        for i, node in enumerate(nod_and_attributes):
            shape = nod_and_attributes[i][1]['cpd'].shape
            dimension = nod_and_attributes[i][1]['cpd'].ndim
            if dimension > 2:
                col = int(np.prod(shape) / shape[0])
                nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][
                    'cpd'].reshape(shape[0], col)
            nod_and_attributes[i][1]['cpd'] = nod_and_attributes[i][1][
                'cpd'].tolist()
        return nod_and_attributes

    def translate_pgmpy_to_digraph(self):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.networkx_object = nx.DiGraph()
        edges = self.pgmpy_object.edges()
        nodes_and_attributes = self.get_nodes_and_attributes()
        self.networkx_object.add_nodes_from(nodes_and_attributes)
        self.networkx_object.add_edges_from(edges)
        return

    def update_networkx(self, networkx, dic, header):
        self.header = header
        self.dictionary = dic
        self.networkx_object = networkx

    def update_pgmpy(self, pgmpy, dic, header):
        self.header = header
        self.dictionary = dic
        self.pgmpy_object = pgmpy

    def save_pgmpy_network(self):
        """
                Saves the passed pgmpy_object class object in a json file
        """
        self.translate_pgmpy_to_digraph()
        self.save_network()
        return

    def translate_digraph_to_pgmpy(self, digraf):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.pgmpy_object, x, y = self.get_pgmpy_network(from_object=True,
                                                         digraph=digraf)
        return self.pgmpy_object

    def translate_pgmpy_to_digraph(self):
        """
        Converts a pgmpy network into a networkx network

        :param  pgmpy_object: the pgmpy network
        :return networkx : networkx network

        :type  :pgmpy_object:adress
        :rtype :networkx:adress
        """
        self.networkx_object = nx.DiGraph()
        edges = self.pgmpy_object.edges()
        nodes_and_attributes = self.get_nodes_and_attributes()
        self.networkx_object.add_nodes_from(nodes_and_attributes)
        self.networkx_object.add_edges_from(edges)
        return

    def save_network(self):
        """
        Saves the passed networkx class object in a json file

        """
        data = self.get_empty_canvas()
        data["header"] = self.header
        nw_nodes = self.networkx_object.nodes(data=True)
        nw_edges = self.networkx_object.edges()
        keywords = self.keywords
        nodes = copy.deepcopy(
            self.standard_nodes
        )  #{'RONS': {'BENS': [], 'MEMS': []}, 'LANS': {'LANS': []}, 'LENS': {'MOTOR': [], 'WORLD': []}}
        edges = []
        cpds = []
        '''adding edges'''
        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            childs = []
            for k, edge in enumerate(nw_edges):
                if edge[0] == node_name:
                    childs.append(self.translation(edge[1], 1))
            if len(childs) != 0:
                edges.append({self.translation(node_name, 1): childs})

        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            cardinality = node[1]['cardinality']
            cpd = node[1]['cpd']
            for pseudonym in keywords:
                if pseudonym in node_name:
                    node_name_ = self.translation(node_name, 1)
                    if pseudonym == 'BENS' or pseudonym == 'MEMS':
                        nodes['RONS'][pseudonym].append(
                            [node_name_, cardinality])
                    if pseudonym == 'LANS':
                        nodes['LANS'][pseudonym].append(
                            [node_name_, cardinality])
                    if pseudonym == 'MOTOR' or pseudonym == 'WORLD':
                        nodes['LENS'][pseudonym].append(
                            [node_name_, cardinality])
            cpds.append({self.translation(node_name, 1): cpd})
        data['Nodes'] = nodes
        data['Edges'] = edges
        data['CPDs'] = cpds
        data['header']['Date'] = datetime.datetime.now().strftime("%c")
        self.save_json(json.dumps(data))
        return

    def get_pgmpy_network(self, from_object=False, digraph=None):
        """
        Reads the passed json file and translates it's content to the passed pgmpy class object
        - uses the get_network(file) to read the json file in a networkx format and translate this to pgmpy
        - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)]

        :param file: : filename without path or extension
        :pgmp_object : the pgmpy object which will be completed
        :return: a dictionary as an array of tuples and the header of the json file

        :type file : string
        :type pgmp_object : pgmpy class object
        :rtype : array of tuples, dictionary

        CAUTION : the method does not perform a check() on the constructed DAG ! -> has to be done in the calling module
        """
        self.pgmpy_object = BayesianModel()
        if not (from_object):
            network, dictionary, header = self.get_network()
        else:
            network = digraph
        nw_nodes = network.nodes(data=True)
        nw_edges = network.edges()
        '''adding nnodes and edges'''
        for i, node in enumerate(nw_nodes):
            node_name = node[0]
            self.pgmpy_object.add_node(node_name)
            for k, edge in enumerate(nw_edges):
                if edge[0] == node_name:
                    self.pgmpy_object.add_edge(node_name, edge[1])
        '''add  cpd's'''
        for i, node in enumerate(nw_nodes):
            parent_nodes = network.in_edges(node[0])
            parent_nodes = self.clean_parent_list(parent_nodes, node[0])
            cpd = node[1]['cpd']
            ''' find the cardinality of the node '''
            cardinality_node = node[1]['cardinality']
            """  cardinality card of parents has to be determined"""
            cardinality_parents = []
            for i, nod in enumerate(parent_nodes):
                cardinality_parents.append(network.node[nod]['cardinality'])
            ''' Depending on the place in the BN and/or the number of parents  the PGMPY CPD methods have another call'''
            if len(cardinality_parents) == 0:
                self.pgmpy_object.add_cpds(
                    TabularCPD(variable=node[0],
                               variable_card=cardinality_node,
                               values=[cpd]))
                continue
            table = TabularCPD(variable=node[0], variable_card= cardinality_node, values=cpd, \
                              evidence=parent_nodes,\
                              evidence_card=np.asarray(cardinality_parents))
            self.pgmpy_object.add_cpds(table)
        '''------TO DELETE-------------'''
        # pgmpy_object.check_model()
        # draw_network(pgmpy_object)
        '''-----------------------------'''
        return self.pgmpy_object, self.dictionary, self.header

    def get_network(self):
        """
        Reads the passed json file and translate it's content in a networkx class object
        - The nodes in the object are renamed so they have a standardized signature
        - Creates a dictionary for the nodes in the form of an array of tuples : [(names defines by user, standard name)]

        :param file: : filename without path or extension
        :return: a networkx class object, dictionary as an array of tuples and the header of the json file

        :type file : string
        :rtype : networkx class object, array of tuples, dictionary
        """
        self.dictionary = []
        self.networkx_object = nx.DiGraph()
        with open(self.file) as f:
            data = f.read()
        '''Remove possible non informative characters'''
        data = re.sub('\n', '', data)
        data = re.sub('\t', '', data)
        data = json.loads(data)
        self.header = data['header']
        '''Feeding G with the nodes'''
        cardinality = {}
        for key in data['Nodes'].keys():
            for secondkey in data['Nodes'][key].keys():
                for c, n in enumerate(data['Nodes'][key][secondkey]):
                    node = secondkey + "_" + str(c)
                    self.networkx_object.add_node(node, {
                        'cardinality': n[1],
                        'cpd': []
                    })
                    self.dictionary.append((n[0], node))
                    cardinality.update(
                        {node: n[1]}
                    )  #this contains the cardinality of each node with the node name as dictionary entry
        '''Feeding G with the edges'''
        edges = []
        for j, pair in enumerate(data['Edges']):
            for parent in pair.keys():
                for child in data['Edges'][j][parent]:
                    parent_ = self.translation(parent, 0)
                    child_ = self.translation(child, 0)
                    edges.append((parent_, child_))
        np.ravel(edges)
        self.networkx_object.add_edges_from(edges)
        '''Feeding G with the  CPD's as nodes attributes'''
        for j, node in enumerate(data['CPDs']):
            for parent, cpd in node.items():
                node_ = self.translation(parent, 0)
                self.networkx_object.node[node_]['cpd'] = cpd
        '''TO REMOVE LATER'''
        # plt.figure(figsize=(10, 5))
        # pos = nx.circular_layout(G, scale=2)
        # node_labels = nx.get_node_attributes(G, 'cpd')
        # nx.draw(G, pos, node_size=1200, node_color='lightblue',
        #         linewidths=0.25,  font_size=10, font_weight='bold', with_labels=True)
        # plt.show()
        return self.networkx_object, self.dictionary, self.header

    def create_json_file(self, **kwargs):
        """
        EWAMPLE :

        A helping method if the user prefers to create the BN within the code

        :param case_name: the file name without path or extension where the json file will be saved
        :param : **kwargs takes the following variables:
                                                            description = kwargs.get('description', '')
                                                            train_from = kwargs.get('train_from', '')
                                                            cpds = kwargs.get('CPDs', [])
                                                            bens = kwargs.get('BENS',[])
                                                            mems = kwargs.get('MEMS', [])
                                                            lans = kwargs.get('LANS', [])
                                                            motors = kwargs.get('MOTORS', [])
                                                            world = kwargs.get('WORLD', [])
                                                            edges = kwargs.get('Edges', [])
                                                            frozen = kwargs.get('frozen',False)
        .
        .
        .
        :return: void

        :type case_name : string
        :type  :
        .
        .
        .
        :rtype : void
        """
        description = kwargs.get('description', '')
        train_from = kwargs.get('train_from', '')
        cpds = kwargs.get('CPDs', [])
        bens = kwargs.get('BENS', [])
        mems = kwargs.get('MEMS', [])
        lans = kwargs.get('LANS', [])
        motors = kwargs.get('MOTORS', [])
        world = kwargs.get('WORLD', [])
        edges = kwargs.get('Edges', [])
        frozen = kwargs.get('frozen', False)

        #json_tab_file_write = JSONTabIndentFileWriter( Case_name,5a)
        data = self.get_empty_canvas()
        '''       - the 3 next items are for tracking purpose only, not fundamentally necessary'''
        data["header"]['Identification'] = self.file
        data["header"]['Date'] = datetime.datetime.now().strftime("%c")
        data["header"]['Description'] = description
        '''       - the next item gives a file containing possible training data (OPTIONAL)'''
        data["header"]['Train_from'] = train_from
        '''      Frozen tells whether or not the model can be considered as final i.e. is there still "training" needed'''
        data["header"]['Frozen'] = frozen
        '''       - the 5 next lines tells how much nodes  and their names + cardinality the model will start with
                    the names can be any valid python string'''
        bens = [['pooping', 2], ['peeing', 2], ['constipated', 2]]
        mems = [['havenotoiletpaper', 2]]
        lans = [['diarhea', 2], ['happypoop', 2]]
        motors = [['asshole1', 2], ['asshole2', 2]]
        world = [['toilet1', 2], ['toilet2', 2], ['garden1', 2],
                 ['garden2', 2], ['doctor', 2]]
        '''     - the next items describe the edges as a dictionary
                 -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs'''
        edges = []
        '''       !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty'''
        edges.append({'pooping': ['toilet1', 'diarhea', 'happypoop']})
        edges.append({'peeing': ['toilet2', 'garden1', 'garden2']})
        edges.append({'constipated': ['doctor']})
        edges.append({'havenotoiletpaper': ['garden1', 'garden2']})
        edges.append(
            {'diarhea': ['toilet1', 'doctor', 'asshole1', 'asshole2']})
        edges.append(
            {'happypoop': ['garden1', 'garden2', 'asshole1', 'asshole2']})
        '''       - the next items describe the CPD's  as a dictionary
                  -> the dictionary entry is the corresponding node'''
        cpds = []
        cpds.append({'pooping': [0.5, 0.5]})
        cpds.append({'peeing': [0.2, 0.8]})
        cpds.append({'constipated': [0.9, 0.1]})
        cpds.append({'havenotoiletpaper': [0.6, 0.4]})
        cpds.append({'happypoop': [[0.3, 0.8], [0.7, 0.2]]})
        cpds.append({'diarhea': [[0.8, 0.3], [0.2, 0.7]]})
        cpds.append({'toilet1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'asshole1': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'asshole2': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({'toilet2': [[0.5, 0.5], [0.5, 0.5]]})
        cpds.append({'doctor': [[0.3, 0.8, 0.8, 0.7], [0.7, 0.2, 0.2, 0.3]]})
        cpds.append({
            'garden1': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5],
                        [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]]
        })
        cpds.append({
            'garden2': [[0.3, 0.8, 0.8, 0.7, 0.8, 0.2, 0.5, 0.5],
                        [0.7, 0.2, 0.2, 0.3, 0.2, 0.8, 0.5, 0.5]]
        })
        '''       - feeding the data'''
        data["Nodes"]['RONS']['BENS'] = bens
        data["Nodes"]['RONS']['MEMS'] = mems
        data["Nodes"]['LANS']['LANS'] = lans
        data["Nodes"]['LENS']['MOTOR'] = motors
        data["Nodes"]['LENS']['WORLD'] = world
        data["Edges"] = edges
        data["CPDs"] = cpds
        ''' dumping to CASENAME file in jason format'''
        self.save_json(json.dumps(data))

        print("Json file for  - ", self.file, "  - created")

    def create_json_template(self):
        """
        A helping method if the  jason template in the project_repository ditectory has been deleted or corrupted

        :param : void
        :return: void

        :type : void
        :rtype : void
        """
        self.get_json_path(
            "Template"
        )  # creates the right path in which case_name will be saved
        data = self.get_empty_canvas()
        data['header']['Identification'] = self.file
        '''Filling some dummies to facilitate the user'''
        a_node = ['*', 0]
        an_edge = {'*': ['&', '&', '&']}
        a_cpd = {'*': [[0, 0, 0], [0, 0, 0]]}
        nodes = []
        edges = []
        cpds = []
        for i in range(0, 3):
            nodes.append(a_node)
            edges.append(an_edge)
            cpds.append(a_cpd)

        data['Nodes']['RONS']['BENS'] = nodes
        data['Nodes']['RONS']['MEMS'] = nodes
        data['Nodes']['LANS']['LANS'] = nodes
        data['Nodes']['LENS']['MOTOR'] = nodes
        data['Nodes']['LENS']['WORLD'] = nodes
        data['Edges'] = edges
        data['CPDs'] = cpds
        ''' dumping to CASENAME file in jason format'''
        # with open(case_name, 'w') as f:
        #     json.dump(data, f, separators = (",",":"))
        self.save_json(json.dumps(data))
        print("Empty template created")

    def get_empty_canvas(self):
        """
         This method creates a json canvas which will be used for the several json creating method

         :param : void
         :return: a dictionary with the structure of the json file
         :type : non
         :rtype : dictionary
         """

        data = {
            'header': {
                'Identification': '',
                'Date': '',
                'Description': '',
                'Frozen': '',
                'Train_from': ''
            },
            'Nodes': {},
            'Edges': [],
            'CPDs': []
        }
        '''       - the 5 next lines tells how much nodes  and their names the model will start with
                    the names can be any valid python string'''
        bens = []
        mems = []
        lans = []
        motors = []
        world = []
        '''     - the next items describe the edges as a dictionary
                 -> the dictionary entry is always one of the rootnodes, the array following can only contain LANs or LENs

                 !! in case we start from scratch and we rely on peepo to find the best BN -> leave this array empty'''
        edges = []
        '''       - the next items describe the CPD's  as a dictionary
                  -> the dictionary entry is the corresponding node'''
        cpds = []
        '''       - feeding the data'''
        data['Nodes'] = {
            'RONS': {
                'BENS': bens,
                'MEMS': mems
            },
            'LANS': {
                'LANS': lans
            },
            'LENS': {
                'MOTOR': motors,
                'WORLD': world
            }
        }
        data['Edges'] = edges
        data['CPDs'] = cpds
        return data
Exemple #28
0
                              ('staff_of_d_g', 'is_continuous_g'),
                              ('staff_of_d_g', 'exit_stroke_d_g'),
                              ('staff_of_d_g', 'is_lowercase_g'),
                              ('staff_of_d_g', 'slantness_g'),
                              ('staff_of_d_g', 'entry_stroke_a_g'),
                              ('word_formation_g', 'dimension_g'),
                              ('word_formation_g', 'staff_of_a_g'),
                              ('word_formation_g', 'size_g'),
                              ('word_formation_g', 'staff_of_d_g'),
                              ('word_formation_g', 'constancy_g'),
                              ('constancy_g', 'staff_of_a_g'),
                              ('constancy_g', 'letter_spacing_g'),
                              ('constancy_g', 'dimension_g'),
                              ('dimension_f', 'output'),
                              ('dimension_g', 'output')])
print(verify_model.edges())
print(verify_model.nodes())

fields = ['left', 'right', 'label']
seen_dat = pd.read_csv('dataset_seen_training_siamese_seen.csv',
                       usecols=fields)

seen_dat['pen_pressure_f'] = ''
seen_dat['letter_spacing_f'] = ''
seen_dat['size_f'] = ''
seen_dat['dimension_f'] = ''
seen_dat['is_lowercase_f'] = ''
seen_dat['is_continuous_f'] = ''
seen_dat['slantness_f'] = ''
seen_dat['tilt_f'] = ''
seen_dat['entry_stroke_a_f'] = ''
Exemple #29
0
class Network_handler:
    '''
    Handles creation and usage of the probabilistic network over CERN's data.
    Can deal only with a SINGLE file-priority combination.
    Note that the methods of this class have numbers and must be called in order.
    '''
    def __init__(self, pnh, gh):
        '''
        Constructor
        '''
        extractor = pnh.get_data_extractor()
        self.best_model = BayesianModel()
        self.training_instances = ""
        self.device_considered = pnh.get_device()
        self.priority_considered = pnh.get_priority()
        self.markov = MarkovModel()
        self.general_handler = gh
        self.variables_names = extractor.get_variable_names()
        self.rankedDevices = extractor.get_ranked_devices()
        self.data = pnh.get_dataframe()
        self.file_writer = pnh.get_file_writer()
        self.file_suffix = pnh.get_file_suffix()

    def learn_structure(self, method, scoring_method, log=True):
        ''' (4)
        Method that builds the structure of the data
        -----------------
        Parameters:
        method          : The technique used to search for the structure
            -> scoring_approx     - To use an approximated search with scoring method
            -> scoring_exhaustive - To use an exhaustive search with scoring method
            -> constraint         - To use the constraint based technique
        scoring_method : K2, bic, bdeu
        log             - "True" if you want to print debug information in the console    
        '''

        #Select the scoring method for the local search of the structure
        if scoring_method == "K2":
            scores = K2Score(self.data)
        elif scoring_method == "bic":
            scores = BicScore(self.data)
        elif scoring_method == "bdeu":
            scores = BdeuScore(self.data)

        #Select the actual method
        if method == "scoring_approx":
            est = HillClimbSearch(self.data, scores)
        elif method == "scoring_exhaustive":
            est = ExhaustiveSearch(self.data, scores)
        elif method == "constraint":
            est = ConstraintBasedEstimator(self.data)

        self.best_model = est.estimate()
        self.eliminate_isolated_nodes(
        )  # REMOVE all nodes not connected to anything else

        for edge in self.best_model.edges_iter():
            self.file_writer.write_txt(str(edge))

        self.log("Method used for structural learning: " + method, log)
        #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log)
        self.log("Search terminated", log)

    def estimate_parameters(self, log=True):
        ''' (5)
        Estimates the parameters of the found network
        '''
        estimator = BayesianEstimator(self.best_model, self.data)
        self.file_writer.write_txt("Number of nodes: " +
                                   str(len(self.variables_names)))
        self.file_writer.write_txt("Complete list: " +
                                   str(self.variables_names))

        for node in self.best_model.nodes():
            cpd = estimator.estimate_cpd(node, prior_type='K2')
            self.best_model.add_cpds(cpd)
            self.log(cpd, log)
            self.file_writer.write_txt(cpd.__str__())

    def inference(self, variables, evidence, mode="auto", log=True):
        ''' (6)
        Computes the inference over some variables of the network (given some evidence)
        '''

        inference = VariableElimination(self.best_model)
        #inference = BeliefPropagation(self.markov)
        #inference = Mplp(self.best_model)
        header = "------------------- INFERENCE ------------------------"
        self.log(header, log)
        self.file_writer.write_txt(header, newline=True)
        self.file_writer.write_txt("(With parents all set to value 1)")

        if mode == "auto":
            self.log("          (with parents all set to value 1)", log)
            for node in self.best_model.nodes():
                variables = [node]
                parents = self.best_model.get_parents(node)
                evidence = dict()
                for p in parents:
                    evidence[p] = 1
                phi_query = inference.query(variables, evidence)
                for key in phi_query:
                    self.file_writer.write_txt(str(phi_query[key]))
                    self.log(phi_query[key], log)

        elif mode == "manual":
            phi_query = inference.query(variables, evidence)
            for key in phi_query:
                self.log(phi_query[key], log)
            '''
            map_query = inference.map_query(variables, evidence)
            print(map_query)
            '''

    def draw_network(self, label_choice, location_choice, location, log):
        ''' (7) 
        Draws the bayesian network.
        ----
        location_choice = True iff we want to show the location of devices in the graph.
        label_choice = "single" if we want to show single label, "double" for double label of arcs
        location = 0,1,2 depending by the location (H0, H1, H2)
        '''
        bn_graph = gv.Digraph(format="png")

        # Extract color based on the building
        if location_choice:

            devices = self.variables_names
            device_location = dict()
            device_locationH1 = dict()

            #For H0
            for d in devices:
                allDevicesLocations = self.general_handler.get_device_locations(
                )
                device_location[d] = allDevicesLocations[d][0]
                device_locationH1[d] = allDevicesLocations[d][1]  #temp for H1
            location_color = self.assign_color(device_location)
            location_colorH1 = self.assign_color(device_locationH1)
            '''
            # Logging and saving info
            self.log(device_location, log)
            self.log(location_color, log)
            self.file_writer.write_txt(device_location, newline = True)
            self.file_writer.write_txt(location_color, newline = True)
            '''

            # Creating the subgraphs, one for each location:
            loc_subgraphs = dict()
            for loc in location_color:
                name = "cluster_" + loc
                loc_subgraphs[loc] = gv.Digraph(name)
                loc_subgraphs[loc].graph_attr[
                    'label'] = loc  #Label with name to be visualized in the image

        # Create nodes
        for node in self.best_model.nodes():
            if location_choice:
                locationH0 = device_location[node]
                locationH1 = device_locationH1[node]
                loc_subgraphs[locationH0].node(
                    node,
                    style='filled',
                    fillcolor=location_colorH1[locationH1]
                )  #add the node to the right subgraph
                #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0
            else:
                bn_graph.node(node)

        # Add all subgraphs in the final graph:
        if location_choice:
            for loc in loc_subgraphs:
                bn_graph.subgraph(loc_subgraphs[loc])

        # Create and color edges
        for edge in self.best_model.edges_iter():
            inference = VariableElimination(self.best_model)
            label = ""

            # Inference for first label and color of edges
            variables = [edge[1]]
            evidence = dict()
            evidence[edge[0]] = 1
            phi_query = inference.query(variables, evidence)
            value = phi_query[edge[1]].values[1]
            value = round(value, 2)

            if label_choice == "single":
                label = str(value)

            if label_choice == "double":
                # Inference for second label
                variables = [edge[0]]
                evidence = dict()
                evidence[edge[1]] = 1
                phi_query = inference.query(variables, evidence)
                value_inv = phi_query[edge[0]].values[1]
                value_inv = round(value_inv, 2)
                label = str(value) + "|" + str(value_inv)

            if value >= 0.75:
                bn_graph.edge(edge[0], edge[1], color="red", label=label)
            else:
                bn_graph.edge(edge[0], edge[1], color="black", label=label)

        # Save the .png graph
        if self.device_considered == "CUSTOM":
            imgPath = '../../output/CUSTOM' + self.file_suffix
        else:
            if location_choice:
                locat = "_H0H1"
            else:
                locat = ""
            imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat
        bn_graph.render(imgPath)
        os.remove(imgPath)  #remove the source code generated by graphviz

    def data_info(self, selection, log):
        ''' (9) Prints or logs some extra information about the data or the network
        '''
        # 1 - DEVICE FREQUENCY AND OCCURRENCES
        if 1 in selection:
            self.file_writer.write_txt(
                "Device ranking (max 20 devices are visualized)", newline=True)
            i = 1
            for dr in self.rankedDevices:
                self.file_writer.write_txt(dr[0] + "             \t" +
                                           str(dr[1]) + "\t" + str(dr[2]))
                i = i + 1
                if i == 20:
                    break

        # 2 - EDGES OF THE NETWORK
        if 2 in selection:
            self.file_writer.write_txt("Edges of the network:", newline=True)
            for edge in self.best_model.edges_iter():
                self.file_writer.write_txt(str(edge))

        # 3 - MARKOV NETWORK
        if 3 in selection:
            self.markov = self.best_model.to_markov_model(
            )  #create the markov model from the BN
            nice_graph = pydot.Dot(graph_type='graph')
            for node in self.markov.nodes():
                node_pydot = pydot.Node(node)
                nice_graph.add_node(node_pydot)
            for edge in self.markov.edges():
                edge_pydot = pydot.Edge(edge[0], edge[1], color="black")
                nice_graph.add_edge(edge_pydot)
            nice_graph.write_png('../../output/' + self.device_considered +
                                 '_' + self.priority_considered +
                                 '-markov.png')

            self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True)
            for factor in self.markov.factors:
                self.log("MARKOV---------------------------------------", log)
                self.log(factor, log)
                self.file_writer.write_txt(factor.__str__())

        # 4 - INFERENCE NETWORK
        if 4 in selection:
            nice_graph = pydot.Dot(graph_type='digraph')
            nodes = self.best_model.nodes()
            inference = VariableElimination(self.best_model)
            for node1 in nodes:
                pos = nodes.index(node1) + 1
                for i in range(pos, len(nodes)):
                    node2 = nodes[i]
                    variables = [node2]
                    evidence = dict()
                    evidence[node1] = 1
                    phi_query = inference.query(variables, evidence)
                    prob1 = phi_query[node2].values[
                        1]  #probability of direct activation (inference from node1=1 to node2)
                    variables = [node1]
                    evidence = dict()
                    evidence[node2] = 1
                    phi_query = inference.query(variables, evidence)
                    prob2 = phi_query[node1].values[
                        1]  #probability of inverse activation (inference from node2=1 to node1)
                    prob1 = round(prob1, 2)
                    prob2 = round(prob2, 2)
                    if prob1 >= 0.75 and (
                            prob1 - prob2
                    ) <= 0.40:  #add direct arc from node1 to node2
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        double_label = str(prob1) + "|" + str(prob2)
                        nice_graph.add_edge(
                            pydot.Edge(node1,
                                       node2,
                                       color="red",
                                       label=double_label))
                    elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        double_label = str(prob2) + "|" + str(prob1)
                        nice_graph.add_edge(
                            pydot.Edge(node2,
                                       node1,
                                       color="red",
                                       label=double_label))
                    elif prob1 >= 0.75 and prob2 >= 0.75:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        if prob1 >= prob2:
                            double_label = str(prob1) + "|" + str(prob2)
                            nice_graph.add_edge(
                                pydot.Edge(node1,
                                           node2,
                                           color="orange",
                                           label=double_label))
                        else:
                            double_label = str(prob2) + "|" + str(prob1)
                            nice_graph.add_edge(
                                pydot.Edge(node2,
                                           node1,
                                           color="orange",
                                           label=double_label))
                    elif prob1 >= 0.55 and prob2 >= 0.55:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        if prob1 >= prob2:
                            double_label = str(prob1) + "|" + str(prob2)
                            nice_graph.add_edge(
                                pydot.Edge(node1,
                                           node2,
                                           color="black",
                                           label=double_label))
                        else:
                            double_label = str(prob2) + "|" + str(prob1)
                            nice_graph.add_edge(
                                pydot.Edge(node2,
                                           node1,
                                           color="black",
                                           label=double_label))

            if self.device_considered == "CUSTOM":
                imgPath = '../../output/CUSTOM' + self.file_suffix
                nice_graph.write_png(imgPath + "-inference_network.png")
            else:
                nice_graph.write_png('../../output/' + self.device_considered +
                                     '_' + self.priority_considered +
                                     '-inference_network.png')

    def fix_node_presence(self, nodes, pydot_graph):
        ''' Adds the list of nodes to the graph, if they are not already present '''
        for node in nodes:
            if node not in pydot_graph.get_nodes():
                pydot_graph.add_node(pydot.Node(node))

    def eliminate_isolated_nodes(self):
        '''
        If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph
        '''
        for nodeX in self.best_model.nodes():
            tup = [item for item in self.best_model.edges() if nodeX in item]
            if not tup:
                self.file_writer.write_txt(
                    "Node " + str(nodeX) +
                    " has no edges: it has been eliminated.")
                self.best_model.remove_node(nodeX)
        if self.best_model.nodes() == []:
            raise DataError("No nodes left in this file-priority combination.")

    def assign_color(self, device_location):
        '''
        Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS)
        '''
        system_color = [
            'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey',
            'Light Red', 'Light Blue', 'Light Green'
        ]
        location_color = dict()  # key = location; value = color
        for dev, loc in device_location.items():
            if loc not in location_color:
                color = system_color[0]
                system_color.remove(color)
                location_color[loc] = color
        return location_color

    def log(self, text, log):
        ''' Prints the text in the console, if the "log" condition is True. '''
        if log:
            print(text)
Exemple #30
0
exam_cpd = TabularCPD(
			variable='Exam',
			variable_card=2,
			values=[[0.95,0.2],
					[0.05,0.8]],
			evidence=['Musicianship'],
			evidence_card=[2])

#print(rating_cpd)
#print(difficulty_cpd)
#print(musicianship_cpd)
#print(letter_cpd)
#print(exam_cpd)

print(music_model.edges())

#Add the CPDS to the model
music_model.add_cpds(difficulty_cpd,musicianship_cpd,letter_cpd,exam_cpd,rating_cpd)

#print(music_model.get_cpds())

print(music_model.check_model())

#Create object to perform inference on model
music_infer = VariableElimination(music_model)

#Probability Musicianship
m_1 = music_infer.query(variables=['Musicianship'])
print(m_1['Musicianship'])
#coding: utf-8
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
#定义模型结构
cancer_model = BayesianModel([
    ('PT', 'HO'), #('party', 'hangover'),
    ('HO', 'SA'),#'hangover', 'smell-alcohol'),
    ('HO', 'HA'), #('hangover', 'headache'),
    ('BT', 'HA'),#'brain-tumor', 'headache'),
    ('BT', 'PX') #('brain-tumor', 'pos-xray'),
])
for e in cancer_model.edges():
    print(e)

#添加概率
cpd_party = TabularCPD(variable='PT',
                       variable_card=2,
                       values=[[0.8], [0.2]])
cpd_braintumor = TabularCPD(variable='BT',
                            variable_card=2,
                            values=[[0.999], [0.001]])
cpd_hangover = TabularCPD(variable='HO',
                          variable_card=2,
                          values=[[1.000, 0.300], [0.000, 0.700]],
                          evidence=['PT'],
                          evidence_card=[2])
cpd_smellalcohol = TabularCPD(variable='SA',
                          variable_card=2,
                          values=[[0.900, 0.200], [0.100, 0.800]],
                          evidence=['HO'],
                          evidence_card=[2])
Exemple #32
0
# Bayesian network for students
from pgmpy.models import BayesianModel
model = BayesianModel()
# Add nodes
model.add_nodes_from(['difficulty', 'intelligence', 'grade', 'sat', 'letter'])
print(model.nodes())
# Add edges
model.add_edges_from([('difficulty', 'grade'), ('intelligence', 'grade'), ('intelligence', 'sat'), ('grade', 'letter')])
print(model.edges())
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([("a", "d"), ("b", "d"), ("d", "e"),
                                ("b", "c")])
        self.G1 = BayesianModel([("diff", "grade"), ("intel", "grade")])
        diff_cpd = TabularCPD("diff", 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD("intel", 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD(
            "grade",
            3,
            values=[
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
        )
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([("d", "g"), ("g", "l"), ("i", "g"),
                                 ("i", "l")])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("a", "d"), ("b", "c"),
                                     ("d", "b"), ("e", "d")]
                            or (edge[1], edge[0]) in [("a", "b"), ("a", "d"),
                                                      ("b", "c"), ("d", "b"),
                                                      ("e", "d")])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([("a", "d"), ("d", "e"), ("b", "d"), ("b", "c"),
                           ("a", "b")])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("c", "b"), ("d", "a"),
                                     ("d", "b"), ("d", "e")]
                            or (edge[1], edge[0]) in [("a", "b"), ("c", "b"),
                                                      ("d", "a"), ("d", "b"),
                                                      ("d", "e")])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of("g")
        ancenstors2 = self.G2._get_ancestors_of("d")
        ancenstors3 = self.G2._get_ancestors_of(["i", "l"])
        self.assertEqual(ancenstors1, {"d", "i", "g"})
        self.assertEqual(ancenstors2, {"d"})
        self.assertEqual(ancenstors3, {"g", "i", "l", "d"})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, "h")

    def test_get_cardinality(self):
        self.assertDictEqual(self.G1.get_cardinality(), {
            "diff": 2,
            "intel": 3,
            "grade": 3
        })

    def test_get_cardinality_with_node(self):
        self.assertEqual(self.G1.get_cardinality("diff"), 2)
        self.assertEqual(self.G1.get_cardinality("intel"), 3)
        self.assertEqual(self.G1.get_cardinality("grade"), 3)

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies("a"),
                         Independencies(["a", ["b", "c"]]))
        self.assertEqual(
            self.G.local_independencies("c"),
            Independencies(["c", ["a", "d", "e"], "b"]),
        )
        self.assertEqual(self.G.local_independencies("d"),
                         Independencies(["d", "c", ["b", "a"]]))
        self.assertEqual(
            self.G.local_independencies("e"),
            Independencies(["e", ["c", "b", "a"], "d"]),
        )
        self.assertEqual(self.G.local_independencies("b"),
                         Independencies(["b", "a"]))
        self.assertEqual(self.G1.local_independencies("grade"),
                         Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([("X", "Y"), ("Y", "Z")])
        self.assertEqual(chain.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        fork = BayesianModel([("Y", "X"), ("Y", "Z")])
        self.assertEqual(fork.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        collider = BayesianModel([("X", "Y"), ("Z", "Y")])
        self.assertEqual(collider.get_independencies(),
                         Independencies(("X", "Z"), ("Z", "X")))

    def test_is_imap(self):
        val = [
            0.01,
            0.01,
            0.08,
            0.006,
            0.006,
            0.048,
            0.004,
            0.004,
            0.032,
            0.04,
            0.04,
            0.32,
            0.024,
            0.024,
            0.192,
            0.016,
            0.016,
            0.128,
        ]
        JPD = JointProbabilityDistribution(["diff", "intel", "grade"],
                                           [2, 3, 3], val)
        fac = DiscreteFactor(["diff", "intel", "grade"], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_markov_blanet(self):
        G = DAG([
            ("x", "y"),
            ("z", "y"),
            ("y", "w"),
            ("y", "v"),
            ("u", "w"),
            ("s", "v"),
            ("w", "t"),
            ("w", "m"),
            ("v", "n"),
            ("v", "q"),
        ])
        self.assertEqual(set(G.get_markov_blanket("y")),
                         set(["s", "w", "x", "u", "z", "v"]))

    def test_get_immoralities(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertEqual(G.get_immoralities(), {("w", "x"), ("w", "z")})
        G1 = BayesianModel([("x", "y"), ("z", "y"), ("z", "x"), ("w", "y")])
        self.assertEqual(G1.get_immoralities(), {("w", "x"), ("w", "z")})
        G2 = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y"),
                            ("w", "x")])
        self.assertEqual(G2.get_immoralities(), {("w", "z")})

    def test_is_iequivalent(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([("V", "W"), ("W", "X"), ("X", "Y"), ("Z", "Y")])
        G2 = BayesianModel([("W", "V"), ("X", "W"), ("X", "Y"), ("Z", "Y")])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([("W", "V"), ("W", "X"), ("Y", "X"), ("Z", "Y")])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds("diff")),
                            id(model_copy.get_cpds("diff")))

        self.G1.remove_cpds("diff")
        diff_cpd = TabularCPD("diff", 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds("diff"),
                            model_copy.get_cpds("diff"))

        self.G1.remove_node("intel")
        self.assertNotEqual(sorted(self.G1.nodes()),
                            sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()),
                            sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node("diff")
        self.assertEqual(sorted(self.G1.nodes()), sorted(["grade", "intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(["diff", "grade"])
        self.assertEqual(sorted(self.G1.nodes()), sorted(["intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")
        self.assertRaises(ValueError, self.G1.get_cpds, "grade")

    def tearDown(self):
        del self.G
        del self.G1
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node('a')
        self.assertListEqual(self.G.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge('d', 'e')
        self.assertListEqual(sorted(self.G.nodes()), ['d', 'e'])
        self.assertListEqual(self.G.edges(), [('d', 'e')])
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, 'a', 'a')

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([('a', 'b'), ('a', 'c')])
        self.assertRaises(ValueError, self.G.add_edge, 'c', 'a')

    def test_add_edges_from_string(self):
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.G.add_nodes_from(['d', 'e', 'f'])
        self.G.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.G.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             hf.recursive_sorted([('a', 'b'), ('b', 'c'),
                                                  ('d', 'e'), ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'a')])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'b'), ('b', 'c'), ('c', 'a')])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.g.predecessors('a'), [])
        self.assertListEqual(self.g.predecessors('b'), ['a'])
        self.assertListEqual(self.g.predecessors('c'), ['b'])

    def test_update_node_parents(self):
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.G.predecessors('a'), [])
        self.assertListEqual(self.G.predecessors('b'), ['a'])
        self.assertListEqual(self.G.predecessors('c'), ['b'])

    def tearDown(self):
        del self.G
Exemple #35
0
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import BayesianEstimator
# Generating random data for two coin tossing examples
raw_data = np.random.randint(low=0, high=2, size=(1000, 2))
data = pd.DataFrame(raw_data, columns=['X', 'Y'])
print(data)
coin_model = BayesianModel()
coin_model.fit(data, estimator=BayesianEstimator)
coin_model.get_cpds()
coin_model.nodes()
coin_model.edges()
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.g.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [["a", "b"], ["b", "c"]])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node("a")
        self.assertListEqual(list(self.G.nodes()), ["a"])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(["a", "b", "c", "d"])
        self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c", "d"])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge("d", "e")
        self.assertListEqual(sorted(self.G.nodes()), ["d", "e"])
        self.assertListEqual(list(self.G.edges()), [("d", "e")])
        self.G.add_nodes_from(["a", "b", "c"])
        self.G.add_edge("a", "b")
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [["a", "b"], ["d", "e"]])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, "a", "a")

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([("a", "b"), ("a", "c")])
        self.assertRaises(ValueError, self.G.add_edge, "c", "a")

    def test_add_edges_from_string(self):
        self.G.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(sorted(self.G.nodes()), ["a", "b", "c"])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [["a", "b"], ["b", "c"]])
        self.G.add_nodes_from(["d", "e", "f"])
        self.G.add_edges_from([("d", "e"), ("e", "f")])
        self.assertListEqual(sorted(self.G.nodes()),
                             ["a", "b", "c", "d", "e", "f"])
        self.assertListEqual(
            hf.recursive_sorted(self.G.edges()),
            hf.recursive_sorted([("a", "b"), ("b", "c"), ("d", "e"),
                                 ("e", "f")]),
        )

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [("a", "a")])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from, [("a", "b"),
                                                              ("b", "c"),
                                                              ("c", "a")])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([("a", "b"), ("b", "c")])
        self.assertListEqual(list(self.g.predecessors("a")), [])
        self.assertListEqual(list(self.g.predecessors("b")), ["a"])
        self.assertListEqual(list(self.g.predecessors("c")), ["b"])

    def test_update_node_parents(self):
        self.G.add_nodes_from(["a", "b", "c"])
        self.G.add_edges_from([("a", "b"), ("b", "c")])
        self.assertListEqual(list(self.G.predecessors("a")), [])
        self.assertListEqual(list(self.G.predecessors("b")), ["a"])
        self.assertListEqual(list(self.G.predecessors("c")), ["b"])

    def tearDown(self):
        del self.G