def load_bn_from_BIF(dataset_name='child', folder=DATA_FOLDER, verbose=False):
    bif_file = os.path.join(folder, BIF_FOLDER_MAP[dataset_name],
                            dataset_name + '.bif', dataset_name + '.bif')
    image_file = os.path.join(folder, BIF_FOLDER_MAP[dataset_name],
                              dataset_name + '.png')

    if verbose:
        print(f'Loading graph from {bif_file}')

    reader = BIFReader(bif_file)
    model = reader.get_model()

    if os.path.exists(image_file):
        if verbose:
            print(f'Loading graph image from {image_file}')
        model.image = PIL.Image.open(image_file)

    # Take the leaves as features
    __FEATURES = model.get_leaves()
    __ROOTS = model.get_roots()
    __NODES = model.nodes()
    __NOT_FEATURES = list({
        node
        for node in __NODES if node not in __FEATURES and node not in __ROOTS
    })

    if verbose:
        print(f'Nodes: {__NODES} ({len(__NODES)})')
        print(f'Features/Leaves: {__FEATURES} ({len(__FEATURES)})')
        print(f'Roots: {__ROOTS} ({len(__ROOTS)})')
        print(
            f'Intermediate (non-roots/non-leaves): {__NOT_FEATURES} ({len(__NOT_FEATURES)})'
        )

    return model
Exemple #2
0
    def __init__(self, evidence, bif_path=None, model=None):
        """
            Args:
                evidence  A dictionary with observations, e.g., {node-name: node-value}. 
                model / bif_path  PGMPY model specification or .bif file path.
        """

        assert bif_path is not None or model is not None, \
                "You must set either pgmpy model or bif_path!"
        if model is None:
            logger.debug("[BayesianNetworkVI] reading from %s" % bif_path)
            reader = BIFReader(bif_path)
            model = reader.get_model()
        self._model = model
        self._evidence = evidence
        
        latent_variables = sorted( set(v for cpd in self._model.get_cpds() 
                                         for v in cpd.variables).difference(evidence) )
        observed_variables = sorted(evidence)
        self._no2var = dict(enumerate(latent_variables+observed_variables))
        self._var2no = dict((var, no) for no, var in self._no2var.items())  
        logger.debug("[BayesianNetworkVI] mapping(id->variable)=%s" % self._no2var)      

        variable2values = cpds_to_variables(self._model.get_cpds())
        self._variable2values = dict( (var, dict( (v, i) for i, v in enumerate(vals) )) 
                                             for var, vals in variable2values.items()) 
        
        self._prob_arrays = [cpd_to_prob_array(cpd)+EPS for cpd in self._model.get_cpds()] #!TODO renormalization

        logger.info("[BayesianNetworkVI] %i vars with max cardinality=%i => enumeration size=%i" % 
                    (len(variable2values), self.cardinality, self.enumeration_size))       
from pgmpy.sampling import BayesianModelSampling
import networkx as nx
import sys
from tqdm import tqdm

if __name__ == '__main__':
    dataset = sys.argv[1]
    sample_size = int(sys.argv[2])
    n_runs = int(sys.argv[3])
    score = sys.argv[4]
    alpha = float(sys.argv[5])
    palim = int(sys.argv[6])

    # Read the BN model
    reader = BIFReader(dataset + '.bif')
    model = reader.get_model()
    arities = dict(model.get_cardinality())
    for key, value in arities.items():
        arities[key] = [value]
    nodes = list(nx.topological_sort(model))

    for run in tqdm(range(n_runs)):
        data = pd.read_csv(os.path.join(
            dataset,
            str(sample_size) + '_' + str(run) + '.csv'),
                           sep=' ')
        data = data.iloc[1:]  # Remove the arities from the table
        if score == 'BDeu':
            save_folder = os.path.join(
                dataset,
                str(alpha) + '_' + score + '_' + str(sample_size) + '_' +
Exemple #4
0
class TestBIFReader(unittest.TestCase):
    def setUp(self):

        self.reader = BIFReader(string="""
// Bayesian Network in the Interchange Format
// Produced by BayesianNetworks package in JavaBayes
// Output created Sun Nov 02 17:49:49 GMT+00:00 1997
// Bayesian network
network "Dog-Problem" { //5 variables and 5 probability distributions
        property "credal-set constant-density-bounded 1.1" ;
}
variable  "light-on" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (218, 195)" ;
}
variable  "bowel-problem" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (335, 99)" ;
}
variable  "dog-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (300, 195)" ;
}
variable  "hear-bark" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (296, 268)" ;
}
variable  "family-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (257, 99)" ;
}
probability (  "light-on"  "family-out" ) { //2 variable(s) and 4 values
        table 0.6 0.05 0.4 0.95 ;
}
probability (  "bowel-problem" ) { //1 variable(s) and 2 values
        table 0.01 0.99 ;
}
probability (  "dog-out"  "bowel-problem"  "family-out" ) { //3 variable(s) and 8 values
        table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ;
}
probability (  "hear-bark"  "dog-out" ) { //2 variable(s) and 4 values
        table 0.7 0.01 0.3 0.99 ;
}
probability (  "family-out" ) { //1 variable(s) and 2 values
        table 0.15 0.85 ;
}
""")

    def test_network_name(self):

        name_expected = 'Dog-Problem'
        self.assertEqual(self.reader.network_name, name_expected)

    def test_get_variables(self):

        var_expected = [
            'light-on', 'bowel-problem', 'dog-out', 'hear-bark', 'family-out'
        ]
        self.assertListEqual(self.reader.get_variables(), var_expected)

    def test_states(self):

        states_expected = {
            'bowel-problem': ['true', 'false'],
            'dog-out': ['true', 'false'],
            'family-out': ['true', 'false'],
            'hear-bark': ['true', 'false'],
            'light-on': ['true', 'false']
        }
        states = self.reader.get_states()
        for variable in states_expected:
            self.assertListEqual(states_expected[variable], states[variable])

    def test_get_property(self):

        property_expected = {
            'bowel-problem': ['position = (335, 99)'],
            'dog-out': ['position = (300, 195)'],
            'family-out': ['position = (257, 99)'],
            'hear-bark': ['position = (296, 268)'],
            'light-on': ['position = (218, 195)']
        }
        prop = self.reader.get_property()
        for variable in property_expected:
            self.assertListEqual(property_expected[variable], prop[variable])

    def test_get_cpd(self):

        cpd_expected = {
            'bowel-problem': np.array([[0.01], [0.99]]),
            'dog-out': np.array([[0.99, 0.97, 0.9, 0.3],
                                 [0.01, 0.03, 0.1, 0.7]]),
            'family-out': np.array([[0.15], [0.85]]),
            'hear-bark': np.array([[0.7, 0.01], [0.3, 0.99]]),
            'light-on': np.array([[0.6, 0.05], [0.4, 0.95]])
        }
        cpd = self.reader.variable_cpds
        for variable in cpd_expected:
            np_test.assert_array_equal(cpd_expected[variable], cpd[variable])

    def test_get_parents(self):

        parents_expected = {
            'bowel-problem': [],
            'dog-out': ['bowel-problem', 'family-out'],
            'family-out': [],
            'hear-bark': ['dog-out'],
            'light-on': ['family-out']
        }
        parents = self.reader.get_parents()
        for variable in parents_expected:
            self.assertListEqual(parents_expected[variable], parents[variable])

    def test_get_edges(self):

        edges_expected = [['family-out', 'dog-out'],
                          ['bowel-problem', 'dog-out'],
                          ['family-out', 'light-on'], ['dog-out', 'hear-bark']]
        self.assertListEqual(sorted(self.reader.variable_edges),
                             sorted(edges_expected))

    def test_get_model(self):
        edges_expected = [('family-out', 'dog-out'),
                          ('bowel-problem', 'dog-out'),
                          ('family-out', 'light-on'), ('dog-out', 'hear-bark')]
        nodes_expected = [
            'bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out'
        ]
        edge_expected = {
            'bowel-problem': {
                'dog-out': {}
            },
            'dog-out': {
                'hear-bark': {}
            },
            'family-out': {
                'dog-out': {},
                'light-on': {}
            },
            'hear-bark': {},
            'light-on': {}
        }
        node_expected = {
            'bowel-problem': {
                'position': '(335, 99)'
            },
            'dog-out': {
                'position': '(300, 195)'
            },
            'family-out': {
                'position': '(257, 99)'
            },
            'hear-bark': {
                'position': '(296, 268)'
            },
            'light-on': {
                'position': '(218, 195)'
            }
        }
        cpds_expected = [
            np.array([[0.01], [0.99]]),
            np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]),
            np.array([[0.15], [0.85]]),
            np.array([[0.7, 0.01], [0.3, 0.99]]),
            np.array([[0.6, 0.05], [0.4, 0.95]])
        ]
        model = self.reader.get_model()
        for cpd_index in range(0, len(cpds_expected)):
            np_test.assert_array_equal(model.get_cpds()[cpd_index].get_cpd(),
                                       cpds_expected[cpd_index])
        self.assertDictEqual(model.node, node_expected)
        self.assertDictEqual(model.edge, edge_expected)
        self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected))
        self.assertListEqual(sorted(model.edges()), sorted(edges_expected))

    def tearDown(self):
        del self.reader
Exemple #5
0
def get_dataset(dataset):
    """
    Fetches the `dataset` and returns a pgmpy.model instance.

    Parameter
    ---------
    dataset: str
        Any dataset from bnlearn repository (http://www.bnlearn.com/bnrepository).

        Discrete Bayesian Network Options:
            Small Networks:
                1. asia
                2. cancer
                3. earthquake
                4. sachs
                5. survey
            Medium Networks:
                1. alarm
                2. barley
                3. child
                4. insurance
                5. mildew
                6. water
            Large Networks:
                1. hailfinder
                2. hepar2
                3. win95pts
            Very Large Networks:
                1. andes
                2. diabetes
                3. link
                4. munin1
                5. munin2
                6. munin3
                7. munin4
                8. pathfinder
                9. pigs
                10. munin
        Gaussian Bayesian Network Options:
                1. ecoli70
                2. magic-niab
                3. magic-irri
                4. arth150
        Conditional Linear Gaussian Bayesian Network Options:
                1. sangiovese
                2. mehra

    Example
    -------
    >>> from pgmpy.data import get_dataset
    >>> model = get_dataset(dataset='asia')
    >>> model

    Returns
    -------
    pgmpy.models instance: An instance of one of the model classes in pgmpy.models
                           depending on the type of dataset.
    """
    dataset_links = {
        "asia": "http://www.bnlearn.com/bnrepository/asia/asia.bif.gz",
        "cancer": "http://www.bnlearn.com/bnrepository/cancer/cancer.bif.gz",
        "earthquake":
        "http://www.bnlearn.com/bnrepository/earthquake/earthquake.bif.gz",
        "sachs": "http://www.bnlearn.com/bnrepository/sachs/sachs.bif.gz",
        "survey": "http://www.bnlearn.com/bnrepository/survey/survey.bif.gz",
        "alarm": "http://www.bnlearn.com/bnrepository/alarm/alarm.bif.gz",
        "barley": "http://www.bnlearn.com/bnrepository/barley/barley.bif.gz",
        "child": "http://www.bnlearn.com/bnrepository/child/child.bif.gz",
        "insurance":
        "http://www.bnlearn.com/bnrepository/insurance/insurance.bif.gz",
        "mildew": "http://www.bnlearn.com/bnrepository/mildew/mildew.bif.gz",
        "water": "http://www.bnlearn.com/bnrepository/water/water.bif.gz",
        "hailfinder":
        "http://www.bnlearn.com/bnrepository/hailfinder/hailfinder.bif.gz",
        "hepar2": "http://www.bnlearn.com/bnrepository/hepar2/hepar2.bif.gz",
        "win95pts":
        "http://www.bnlearn.com/bnrepository/win95pts/win95pts.bif.gz",
        "andes": "http://www.bnlearn.com/bnrepository/andes/andes.bif.gz",
        "diabetes":
        "http://www.bnlearn.com/bnrepository/diabetes/diabetes.bif.gz",
        "link": "http://www.bnlearn.com/bnrepository/link/link.bif.gz",
        "munin1": "http://www.bnlearn.com/bnrepository/munin4/munin1.bif.gz",
        "munin2": "http://www.bnlearn.com/bnrepository/munin4/munin2.bif.gz",
        "munin3": "http://www.bnlearn.com/bnrepository/munin4/munin3.bif.gz",
        "munin4": "http://www.bnlearn.com/bnrepository/munin4/munin4.bif.gz",
        "pathfinder":
        "http://www.bnlearn.com/bnrepository/pathfinder/pathfinder.bif.gz",
        "pigs": "http://www.bnlearn.com/bnrepository/pigs/pigs.bif.gz",
        "munin": "http://www.bnlearn.com/bnrepository/munin/munin.bif.gz",
        "ecoli70": "",
        "magic-niab": "",
        "magic-irri": "",
        "arth150": "",
        "sangiovese": "",
        "mehra": "",
    }

    if dataset not in dataset_links.keys():
        raise ValueError("dataset should be one of the options")
    if dataset_links[dataset] == "":
        raise NotImplementedError("The specified dataset isn't supported")

    filename, _ = urlretrieve(dataset_links[dataset])
    with gzip.open(filename, "rb") as f:
        content = f.read()
    reader = BIFReader(content)
    return reader.get_model()
class TestBIFReader(unittest.TestCase):

    def setUp(self):

        self.reader = BIFReader(string="""
// Bayesian Network in the Interchange Format
// Produced by BayesianNetworks package in JavaBayes
// Output created Sun Nov 02 17:49:49 GMT+00:00 1997
// Bayesian network
network "Dog-Problem" { //5 variables and 5 probability distributions
        property "credal-set constant-density-bounded 1.1" ;
}
variable  "light-on" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (218, 195)" ;
}
variable  "bowel-problem" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (335, 99)" ;
}
variable  "dog-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (300, 195)" ;
}
variable  "hear-bark" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (296, 268)" ;
}
variable  "family-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (257, 99)" ;
}
probability (  "light-on"  "family-out" ) { //2 variable(s) and 4 values
        (true) 0.6 0.4 ;
        (false) 0.05 0.95 ;
}
probability (  "bowel-problem" ) { //1 variable(s) and 2 values
        table 0.01 0.99 ;
}
probability (  "dog-out"  "bowel-problem"  "family-out" ) { //3 variable(s) and 8 values
        table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ;
}
probability (  "hear-bark"  "dog-out" ) { //2 variable(s) and 4 values
        table 0.7 0.01 0.3 0.99 ;
}
probability (  "family-out" ) { //1 variable(s) and 2 values
        table 0.15 0.85 ;
}
""")

    def test_network_name(self):

        name_expected = 'Dog-Problem'
        self.assertEqual(self.reader.network_name, name_expected)

    def test_get_variables(self):

        var_expected = ['light-on', 'bowel-problem', 'dog-out',
                        'hear-bark', 'family-out']
        self.assertListEqual(self.reader.get_variables(), var_expected)

    def test_states(self):

        states_expected = {'bowel-problem': ['true', 'false'],
                           'dog-out': ['true', 'false'],
                           'family-out': ['true', 'false'],
                           'hear-bark': ['true', 'false'],
                           'light-on': ['true', 'false']}
        states = self.reader.get_states()
        for variable in states_expected:
            self.assertListEqual(states_expected[variable], states[variable])

    def test_get_property(self):

        property_expected = {'bowel-problem': ['position = (335, 99)'],
                             'dog-out': ['position = (300, 195)'],
                             'family-out': ['position = (257, 99)'],
                             'hear-bark': ['position = (296, 268)'],
                             'light-on': ['position = (218, 195)']}
        prop = self.reader.get_property()
        for variable in property_expected:
            self.assertListEqual(property_expected[variable],
                                 prop[variable])

    def test_get_values(self):

        cpd_expected = {'bowel-problem': np.array([[0.01],
                                                   [0.99]]),
                        'dog-out': np.array([[0.99, 0.97, 0.9, 0.3],
                                             [0.01, 0.03, 0.1, 0.7]]),
                        'family-out': np.array([[0.15],
                                                [0.85]]),
                        'hear-bark': np.array([[0.7, 0.01],
                                               [0.3, 0.99]]),
                        'light-on': np.array([[0.6, 0.05],
                                              [0.4, 0.95]])}
        cpd = self.reader.variable_cpds
        for variable in cpd_expected:
            np_test.assert_array_equal(cpd_expected[variable],
                                       cpd[variable])

    def test_get_parents(self):

        parents_expected = {'bowel-problem': [],
                            'dog-out': ['bowel-problem', 'family-out'],
                            'family-out': [],
                            'hear-bark': ['dog-out'],
                            'light-on': ['family-out']}
        parents = self.reader.get_parents()
        for variable in parents_expected:
            self.assertListEqual(parents_expected[variable],
                                 parents[variable])

    def test_get_edges(self):

        edges_expected = [['family-out', 'dog-out'],
                          ['bowel-problem', 'dog-out'],
                          ['family-out', 'light-on'],
                          ['dog-out', 'hear-bark']]
        self.assertListEqual(sorted(self.reader.variable_edges),
                             sorted(edges_expected))

    def test_get_model(self):
        edges_expected = [('family-out', 'dog-out'),
                          ('bowel-problem', 'dog-out'),
                          ('family-out', 'light-on'),
                          ('dog-out', 'hear-bark')]
        nodes_expected = ['bowel-problem', 'hear-bark', 'light-on',
                          'dog-out', 'family-out']
        edge_expected = {'bowel-problem': {'dog-out': {'weight': None}},
                         'dog-out': {'hear-bark': {'weight': None}},
                         'family-out': {'dog-out': {'weight': None},
                                        'light-on': {'weight': None}},
                         'hear-bark': {},
                         'light-on': {}}
        node_expected = {'bowel-problem': {'weight': None,
                                           'position': '(335, 99)'},
                         'dog-out': {'weight': None,
                                     'position': '(300, 195)'},
                         'family-out': {'weight': None,
                                        'position': '(257, 99)'},
                         'hear-bark': {'weight': None,
                                       'position': '(296, 268)'},
                         'light-on': {'weight': None,
                                      'position': '(218, 195)'}}
        cpds_expected = [np.array([[0.01],
                                   [0.99]]),
                         np.array([[0.99, 0.97, 0.9, 0.3],
                                   [0.01, 0.03, 0.1, 0.7]]),
                         np.array([[0.15],
                                   [0.85]]),
                         np.array([[0.7, 0.01],
                                   [0.3, 0.99]]),
                         np.array([[0.6, 0.05],
                                   [0.4, 0.95]])]
        model = self.reader.get_model()
        for cpd_index in range(0, len(cpds_expected)):
            np_test.assert_array_equal(model.get_cpds()[cpd_index].get_values(),
                                       cpds_expected[cpd_index])
        self.assertDictEqual(model.node, node_expected)
        self.assertDictEqual(model.edge, edge_expected)
        self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected))
        self.assertListEqual(sorted(model.edges()), sorted(edges_expected))

    def tearDown(self):
        del self.reader
Exemple #7
0
state, c, states, costs = annealing(maxsteps=3000, debug=True)
nodes = ['asia', 'tub', 'smoke', 'lung', 'bronc', 'either', 'xray', 'dysp']
G = vetor_Rede(state, nodes)
nx.draw(G, with_labels=True)
print(state)
print(c)
with open('Asia.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    aux = 0
    data = []
    data1 = [[] for i in range(8)]
    for row in csv_reader:
        data.append(row)
        for i in range(len(row)):
            data1[i].append(row[i])
        aux = aux + 1
        if aux == 50001:
            break
#22376.39851240954
data = {}
for i in range(len(data1)):
    data[data1[i][0]] = [data1[i][j] for j in range(1, len(data1[i]))]
data = pd.DataFrame(data)
print("Data: ")
print(data)  #Dados Retirandos do arquivo
reader = BIFReader('asia.bif')  # melhor rede do asia, como esta no bnlearn.com
asia_model = reader.get_model()  # lendo esse modelo
print("Score BIC")
print(abs(BicScore(data).score(asia_model)))
#see_annealing(states, costs)