def load_bn_from_BIF(dataset_name='child', folder=DATA_FOLDER, verbose=False): bif_file = os.path.join(folder, BIF_FOLDER_MAP[dataset_name], dataset_name + '.bif', dataset_name + '.bif') image_file = os.path.join(folder, BIF_FOLDER_MAP[dataset_name], dataset_name + '.png') if verbose: print(f'Loading graph from {bif_file}') reader = BIFReader(bif_file) model = reader.get_model() if os.path.exists(image_file): if verbose: print(f'Loading graph image from {image_file}') model.image = PIL.Image.open(image_file) # Take the leaves as features __FEATURES = model.get_leaves() __ROOTS = model.get_roots() __NODES = model.nodes() __NOT_FEATURES = list({ node for node in __NODES if node not in __FEATURES and node not in __ROOTS }) if verbose: print(f'Nodes: {__NODES} ({len(__NODES)})') print(f'Features/Leaves: {__FEATURES} ({len(__FEATURES)})') print(f'Roots: {__ROOTS} ({len(__ROOTS)})') print( f'Intermediate (non-roots/non-leaves): {__NOT_FEATURES} ({len(__NOT_FEATURES)})' ) return model
def __init__(self, evidence, bif_path=None, model=None): """ Args: evidence A dictionary with observations, e.g., {node-name: node-value}. model / bif_path PGMPY model specification or .bif file path. """ assert bif_path is not None or model is not None, \ "You must set either pgmpy model or bif_path!" if model is None: logger.debug("[BayesianNetworkVI] reading from %s" % bif_path) reader = BIFReader(bif_path) model = reader.get_model() self._model = model self._evidence = evidence latent_variables = sorted( set(v for cpd in self._model.get_cpds() for v in cpd.variables).difference(evidence) ) observed_variables = sorted(evidence) self._no2var = dict(enumerate(latent_variables+observed_variables)) self._var2no = dict((var, no) for no, var in self._no2var.items()) logger.debug("[BayesianNetworkVI] mapping(id->variable)=%s" % self._no2var) variable2values = cpds_to_variables(self._model.get_cpds()) self._variable2values = dict( (var, dict( (v, i) for i, v in enumerate(vals) )) for var, vals in variable2values.items()) self._prob_arrays = [cpd_to_prob_array(cpd)+EPS for cpd in self._model.get_cpds()] #!TODO renormalization logger.info("[BayesianNetworkVI] %i vars with max cardinality=%i => enumeration size=%i" % (len(variable2values), self.cardinality, self.enumeration_size))
from pgmpy.sampling import BayesianModelSampling import networkx as nx import sys from tqdm import tqdm if __name__ == '__main__': dataset = sys.argv[1] sample_size = int(sys.argv[2]) n_runs = int(sys.argv[3]) score = sys.argv[4] alpha = float(sys.argv[5]) palim = int(sys.argv[6]) # Read the BN model reader = BIFReader(dataset + '.bif') model = reader.get_model() arities = dict(model.get_cardinality()) for key, value in arities.items(): arities[key] = [value] nodes = list(nx.topological_sort(model)) for run in tqdm(range(n_runs)): data = pd.read_csv(os.path.join( dataset, str(sample_size) + '_' + str(run) + '.csv'), sep=' ') data = data.iloc[1:] # Remove the arities from the table if score == 'BDeu': save_folder = os.path.join( dataset, str(alpha) + '_' + score + '_' + str(sample_size) + '_' +
class TestBIFReader(unittest.TestCase): def setUp(self): self.reader = BIFReader(string=""" // Bayesian Network in the Interchange Format // Produced by BayesianNetworks package in JavaBayes // Output created Sun Nov 02 17:49:49 GMT+00:00 1997 // Bayesian network network "Dog-Problem" { //5 variables and 5 probability distributions property "credal-set constant-density-bounded 1.1" ; } variable "light-on" { //2 values type discrete[2] { "true" "false" }; property "position = (218, 195)" ; } variable "bowel-problem" { //2 values type discrete[2] { "true" "false" }; property "position = (335, 99)" ; } variable "dog-out" { //2 values type discrete[2] { "true" "false" }; property "position = (300, 195)" ; } variable "hear-bark" { //2 values type discrete[2] { "true" "false" }; property "position = (296, 268)" ; } variable "family-out" { //2 values type discrete[2] { "true" "false" }; property "position = (257, 99)" ; } probability ( "light-on" "family-out" ) { //2 variable(s) and 4 values table 0.6 0.05 0.4 0.95 ; } probability ( "bowel-problem" ) { //1 variable(s) and 2 values table 0.01 0.99 ; } probability ( "dog-out" "bowel-problem" "family-out" ) { //3 variable(s) and 8 values table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ; } probability ( "hear-bark" "dog-out" ) { //2 variable(s) and 4 values table 0.7 0.01 0.3 0.99 ; } probability ( "family-out" ) { //1 variable(s) and 2 values table 0.15 0.85 ; } """) def test_network_name(self): name_expected = 'Dog-Problem' self.assertEqual(self.reader.network_name, name_expected) def test_get_variables(self): var_expected = [ 'light-on', 'bowel-problem', 'dog-out', 'hear-bark', 'family-out' ] self.assertListEqual(self.reader.get_variables(), var_expected) def test_states(self): states_expected = { 'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false'] } states = self.reader.get_states() for variable in states_expected: self.assertListEqual(states_expected[variable], states[variable]) def test_get_property(self): property_expected = { 'bowel-problem': ['position = (335, 99)'], 'dog-out': ['position = (300, 195)'], 'family-out': ['position = (257, 99)'], 'hear-bark': ['position = (296, 268)'], 'light-on': ['position = (218, 195)'] } prop = self.reader.get_property() for variable in property_expected: self.assertListEqual(property_expected[variable], prop[variable]) def test_get_cpd(self): cpd_expected = { 'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.01], [0.3, 0.99]]), 'light-on': np.array([[0.6, 0.05], [0.4, 0.95]]) } cpd = self.reader.variable_cpds for variable in cpd_expected: np_test.assert_array_equal(cpd_expected[variable], cpd[variable]) def test_get_parents(self): parents_expected = { 'bowel-problem': [], 'dog-out': ['bowel-problem', 'family-out'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out'] } parents = self.reader.get_parents() for variable in parents_expected: self.assertListEqual(parents_expected[variable], parents[variable]) def test_get_edges(self): edges_expected = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] self.assertListEqual(sorted(self.reader.variable_edges), sorted(edges_expected)) def test_get_model(self): edges_expected = [('family-out', 'dog-out'), ('bowel-problem', 'dog-out'), ('family-out', 'light-on'), ('dog-out', 'hear-bark')] nodes_expected = [ 'bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out' ] edge_expected = { 'bowel-problem': { 'dog-out': {} }, 'dog-out': { 'hear-bark': {} }, 'family-out': { 'dog-out': {}, 'light-on': {} }, 'hear-bark': {}, 'light-on': {} } node_expected = { 'bowel-problem': { 'position': '(335, 99)' }, 'dog-out': { 'position': '(300, 195)' }, 'family-out': { 'position': '(257, 99)' }, 'hear-bark': { 'position': '(296, 268)' }, 'light-on': { 'position': '(218, 195)' } } cpds_expected = [ np.array([[0.01], [0.99]]), np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]), np.array([[0.15], [0.85]]), np.array([[0.7, 0.01], [0.3, 0.99]]), np.array([[0.6, 0.05], [0.4, 0.95]]) ] model = self.reader.get_model() for cpd_index in range(0, len(cpds_expected)): np_test.assert_array_equal(model.get_cpds()[cpd_index].get_cpd(), cpds_expected[cpd_index]) self.assertDictEqual(model.node, node_expected) self.assertDictEqual(model.edge, edge_expected) self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected)) self.assertListEqual(sorted(model.edges()), sorted(edges_expected)) def tearDown(self): del self.reader
def get_dataset(dataset): """ Fetches the `dataset` and returns a pgmpy.model instance. Parameter --------- dataset: str Any dataset from bnlearn repository (http://www.bnlearn.com/bnrepository). Discrete Bayesian Network Options: Small Networks: 1. asia 2. cancer 3. earthquake 4. sachs 5. survey Medium Networks: 1. alarm 2. barley 3. child 4. insurance 5. mildew 6. water Large Networks: 1. hailfinder 2. hepar2 3. win95pts Very Large Networks: 1. andes 2. diabetes 3. link 4. munin1 5. munin2 6. munin3 7. munin4 8. pathfinder 9. pigs 10. munin Gaussian Bayesian Network Options: 1. ecoli70 2. magic-niab 3. magic-irri 4. arth150 Conditional Linear Gaussian Bayesian Network Options: 1. sangiovese 2. mehra Example ------- >>> from pgmpy.data import get_dataset >>> model = get_dataset(dataset='asia') >>> model Returns ------- pgmpy.models instance: An instance of one of the model classes in pgmpy.models depending on the type of dataset. """ dataset_links = { "asia": "http://www.bnlearn.com/bnrepository/asia/asia.bif.gz", "cancer": "http://www.bnlearn.com/bnrepository/cancer/cancer.bif.gz", "earthquake": "http://www.bnlearn.com/bnrepository/earthquake/earthquake.bif.gz", "sachs": "http://www.bnlearn.com/bnrepository/sachs/sachs.bif.gz", "survey": "http://www.bnlearn.com/bnrepository/survey/survey.bif.gz", "alarm": "http://www.bnlearn.com/bnrepository/alarm/alarm.bif.gz", "barley": "http://www.bnlearn.com/bnrepository/barley/barley.bif.gz", "child": "http://www.bnlearn.com/bnrepository/child/child.bif.gz", "insurance": "http://www.bnlearn.com/bnrepository/insurance/insurance.bif.gz", "mildew": "http://www.bnlearn.com/bnrepository/mildew/mildew.bif.gz", "water": "http://www.bnlearn.com/bnrepository/water/water.bif.gz", "hailfinder": "http://www.bnlearn.com/bnrepository/hailfinder/hailfinder.bif.gz", "hepar2": "http://www.bnlearn.com/bnrepository/hepar2/hepar2.bif.gz", "win95pts": "http://www.bnlearn.com/bnrepository/win95pts/win95pts.bif.gz", "andes": "http://www.bnlearn.com/bnrepository/andes/andes.bif.gz", "diabetes": "http://www.bnlearn.com/bnrepository/diabetes/diabetes.bif.gz", "link": "http://www.bnlearn.com/bnrepository/link/link.bif.gz", "munin1": "http://www.bnlearn.com/bnrepository/munin4/munin1.bif.gz", "munin2": "http://www.bnlearn.com/bnrepository/munin4/munin2.bif.gz", "munin3": "http://www.bnlearn.com/bnrepository/munin4/munin3.bif.gz", "munin4": "http://www.bnlearn.com/bnrepository/munin4/munin4.bif.gz", "pathfinder": "http://www.bnlearn.com/bnrepository/pathfinder/pathfinder.bif.gz", "pigs": "http://www.bnlearn.com/bnrepository/pigs/pigs.bif.gz", "munin": "http://www.bnlearn.com/bnrepository/munin/munin.bif.gz", "ecoli70": "", "magic-niab": "", "magic-irri": "", "arth150": "", "sangiovese": "", "mehra": "", } if dataset not in dataset_links.keys(): raise ValueError("dataset should be one of the options") if dataset_links[dataset] == "": raise NotImplementedError("The specified dataset isn't supported") filename, _ = urlretrieve(dataset_links[dataset]) with gzip.open(filename, "rb") as f: content = f.read() reader = BIFReader(content) return reader.get_model()
class TestBIFReader(unittest.TestCase): def setUp(self): self.reader = BIFReader(string=""" // Bayesian Network in the Interchange Format // Produced by BayesianNetworks package in JavaBayes // Output created Sun Nov 02 17:49:49 GMT+00:00 1997 // Bayesian network network "Dog-Problem" { //5 variables and 5 probability distributions property "credal-set constant-density-bounded 1.1" ; } variable "light-on" { //2 values type discrete[2] { "true" "false" }; property "position = (218, 195)" ; } variable "bowel-problem" { //2 values type discrete[2] { "true" "false" }; property "position = (335, 99)" ; } variable "dog-out" { //2 values type discrete[2] { "true" "false" }; property "position = (300, 195)" ; } variable "hear-bark" { //2 values type discrete[2] { "true" "false" }; property "position = (296, 268)" ; } variable "family-out" { //2 values type discrete[2] { "true" "false" }; property "position = (257, 99)" ; } probability ( "light-on" "family-out" ) { //2 variable(s) and 4 values (true) 0.6 0.4 ; (false) 0.05 0.95 ; } probability ( "bowel-problem" ) { //1 variable(s) and 2 values table 0.01 0.99 ; } probability ( "dog-out" "bowel-problem" "family-out" ) { //3 variable(s) and 8 values table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ; } probability ( "hear-bark" "dog-out" ) { //2 variable(s) and 4 values table 0.7 0.01 0.3 0.99 ; } probability ( "family-out" ) { //1 variable(s) and 2 values table 0.15 0.85 ; } """) def test_network_name(self): name_expected = 'Dog-Problem' self.assertEqual(self.reader.network_name, name_expected) def test_get_variables(self): var_expected = ['light-on', 'bowel-problem', 'dog-out', 'hear-bark', 'family-out'] self.assertListEqual(self.reader.get_variables(), var_expected) def test_states(self): states_expected = {'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false']} states = self.reader.get_states() for variable in states_expected: self.assertListEqual(states_expected[variable], states[variable]) def test_get_property(self): property_expected = {'bowel-problem': ['position = (335, 99)'], 'dog-out': ['position = (300, 195)'], 'family-out': ['position = (257, 99)'], 'hear-bark': ['position = (296, 268)'], 'light-on': ['position = (218, 195)']} prop = self.reader.get_property() for variable in property_expected: self.assertListEqual(property_expected[variable], prop[variable]) def test_get_values(self): cpd_expected = {'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.01], [0.3, 0.99]]), 'light-on': np.array([[0.6, 0.05], [0.4, 0.95]])} cpd = self.reader.variable_cpds for variable in cpd_expected: np_test.assert_array_equal(cpd_expected[variable], cpd[variable]) def test_get_parents(self): parents_expected = {'bowel-problem': [], 'dog-out': ['bowel-problem', 'family-out'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out']} parents = self.reader.get_parents() for variable in parents_expected: self.assertListEqual(parents_expected[variable], parents[variable]) def test_get_edges(self): edges_expected = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] self.assertListEqual(sorted(self.reader.variable_edges), sorted(edges_expected)) def test_get_model(self): edges_expected = [('family-out', 'dog-out'), ('bowel-problem', 'dog-out'), ('family-out', 'light-on'), ('dog-out', 'hear-bark')] nodes_expected = ['bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out'] edge_expected = {'bowel-problem': {'dog-out': {'weight': None}}, 'dog-out': {'hear-bark': {'weight': None}}, 'family-out': {'dog-out': {'weight': None}, 'light-on': {'weight': None}}, 'hear-bark': {}, 'light-on': {}} node_expected = {'bowel-problem': {'weight': None, 'position': '(335, 99)'}, 'dog-out': {'weight': None, 'position': '(300, 195)'}, 'family-out': {'weight': None, 'position': '(257, 99)'}, 'hear-bark': {'weight': None, 'position': '(296, 268)'}, 'light-on': {'weight': None, 'position': '(218, 195)'}} cpds_expected = [np.array([[0.01], [0.99]]), np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]), np.array([[0.15], [0.85]]), np.array([[0.7, 0.01], [0.3, 0.99]]), np.array([[0.6, 0.05], [0.4, 0.95]])] model = self.reader.get_model() for cpd_index in range(0, len(cpds_expected)): np_test.assert_array_equal(model.get_cpds()[cpd_index].get_values(), cpds_expected[cpd_index]) self.assertDictEqual(model.node, node_expected) self.assertDictEqual(model.edge, edge_expected) self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected)) self.assertListEqual(sorted(model.edges()), sorted(edges_expected)) def tearDown(self): del self.reader
state, c, states, costs = annealing(maxsteps=3000, debug=True) nodes = ['asia', 'tub', 'smoke', 'lung', 'bronc', 'either', 'xray', 'dysp'] G = vetor_Rede(state, nodes) nx.draw(G, with_labels=True) print(state) print(c) with open('Asia.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') aux = 0 data = [] data1 = [[] for i in range(8)] for row in csv_reader: data.append(row) for i in range(len(row)): data1[i].append(row[i]) aux = aux + 1 if aux == 50001: break #22376.39851240954 data = {} for i in range(len(data1)): data[data1[i][0]] = [data1[i][j] for j in range(1, len(data1[i]))] data = pd.DataFrame(data) print("Data: ") print(data) #Dados Retirandos do arquivo reader = BIFReader('asia.bif') # melhor rede do asia, como esta no bnlearn.com asia_model = reader.get_model() # lendo esse modelo print("Score BIC") print(abs(BicScore(data).score(asia_model))) #see_annealing(states, costs)