Ejemplo n.º 1
0
def sample_dag(dag, num):

    #zzz this loses disconnected nodes!!!
    # bayesmod = BayesianModel(dag.edges())
    # bayesmod = BayesianModel(dag)
    bayesmod = BayesianModel()
    bayesmod.add_nodes_from(dag.nodes())
    bayesmod.add_edges_from(dag.edges())

    tab_cpds = []
    cards = {node: len(dag.node[node]['cpd']) for node in dag.nodes()}
    for node in dag.nodes():
        parents = dag.predecessors(node)
        cpd = dag.node[node]['cpd']
        if parents:
            parent_cards = [cards[par] for par in parents]
            logging.debug("TablularCPD({}, {}, {}, {}, {})".format(
                node, cards[node], cpd, parents, parent_cards))
            tab_cpds.append(
                TabularCPD(node, cards[node], cpd, parents, parent_cards))
        else:
            logging.debug("TablularCPD({}, {}, {})".format(
                node, cards[node], cpd))
            tab_cpds.append(TabularCPD(node, cards[node], cpd))

    logging.debug("cpds add: {}".format(tab_cpds))

    print "model variables:", bayesmod.nodes()
    for tab_cpd in tab_cpds:
        print "cpd variables:", tab_cpd.variables

    bayesmod.add_cpds(*tab_cpds)

    logging.debug("cpds get: {}".format(bayesmod.get_cpds()))
    inference = BayesianModelSampling(bayesmod)

    logging.debug("generating data")
    recs = inference.forward_sample(size=num, return_type='recarray')
    return recs
burglary = BayesianModel([('Burglary', 'Alarm'), ('Earthquake', 'Alarm'),
                          ('Alarm', 'Johncalls'), ('Alarm', 'Marycalls')])

cpd_burgary = TabularCPD('Burglary', 2, [[0.001], [0.999]])
cpd_earthquake = TabularCPD('Earthquake', 2, [[0.002], [0.998]])
cpd_alarm = TabularCPD('Alarm', 2,
                       [[0.99, 0.71, 0.06, 0.05], [0.01, 0.29, 0.94, 0.95]],
                       ['Burglary', 'Earthquake'], [2, 2])
cpd_johncall = TabularCPD('Johncalls', 2, [[0.95, 0.1], [0.05, 0.90]],
                          ['Alarm'], [2])
cpd_marrycall = TabularCPD('Marycalls', 2, [[0.99, 0.30], [0.01, 0.70]],
                           ['Alarm'], [2])
burglary.add_cpds(cpd_burgary, cpd_earthquake, cpd_alarm, cpd_johncall,
                  cpd_marrycall)

for cpd in burglary.get_cpds():
    print(cpd)
#######################################################################
"""inference using variable elimination"""

inference = VariableElimination(burglary)
alarm = inference.query(['Alarm'], {'Johncalls': 0})
print(alarm['Alarm'])

##############################################################
"""inference using likelihood weighting sampling"""

inference = BayesianModelSampling(burglary)
evidence = [State('Johncalls', 0)]
sample = inference.likelihood_weighted_sample(evidence, 2)
print(sample)
from pgmpy.sampling import BayesianModelSampling

# sample data from BN
inference = BayesianModelSampling(model)
df_data = inference.forward_sample(size=10000, return_type='dataframe')
print(df_data)

from pgmpy.estimators import TreeSearch

# learn graph structure
est = TreeSearch(df_data, root_node="A")
dag = est.estimate(estimator_type="chow-liu")
nx.draw_circular(dag,
                 with_labels=True,
                 arrowsize=30,
                 node_size=800,
                 alpha=0.3,
                 font_weight='bold')
plt.savefig('fig2.png', bbox_inches='tight')
plt.show()

from pgmpy.estimators import BayesianEstimator

# there are many choices of parametrization, here is one example
model = BayesianModel(dag.edges())
model.fit(df_data,
          estimator=BayesianEstimator,
          prior_type='dirichlet',
          pseudo_counts=0.1)
print(model.get_cpds())