def test_ipcmb_finding_Markov_blankets_in_graphs__as_in_pcmb_article(): # Test IPC-MB with the graphs proposed in the PCMB article, used to # illustrate the flaws of MMMB and HITON. graph_a = {0: [1, 2], 1: [3], 2: [3], 3: [], 4: [1]} bn = BayesianNetwork('testnet_a') bn.from_directed_graph(graph_a) parameters = make_parameters(4, bn) parameters['pc_only'] = True pc = AlgorithmIPCMB(None, parameters).discover_mb() assert pc == [1] parameters = make_parameters(4, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [0, 1] parameters = make_parameters(0, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [1, 2, 4] parameters = make_parameters(2, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [0, 1, 3] parameters = make_parameters(1, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [0, 2, 3, 4]
def test_reading_bif_file(): survey_bif = Path(testutil.bif_folder, 'survey.bif') bn_expected = default_Bayesian_network() bn_read = BayesianNetwork.from_bif_file(survey_bif, use_cache=False) assertBayesianNetworkEqual(bn_expected, bn_read)
def test_sampling_bayesian_network_as_dataset_source__random(): configuration = default_configuration() configuration['method'] = 'random' sample_count = configuration['sample_count'] random.seed(configuration['random_seed']) bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False) bayesian_network.finalize() sample_matrix = bayesian_network.sample_matrix(configuration['sample_count']) sbnds = SampledBayesianNetworkDatasetSource(configuration) sbnds.reset_random_seed = True datasetmatrix = sbnds.create_dataset_matrix('test_sbnds') assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X assert ['R', 'TRN'] == datasetmatrix.column_labels_Y assert ['row{}'.format(i) for i in range(0, sample_count)] == datasetmatrix.row_labels assert (sample_count, 4) == datasetmatrix.X.get_shape() assert (sample_count, 2) == datasetmatrix.Y.get_shape() assert numpy.array_equal(sample_matrix[:, 0], datasetmatrix.get_column_X(0)) is True assert numpy.array_equal(sample_matrix[:, 1], datasetmatrix.get_column_X(1)) is True assert numpy.array_equal(sample_matrix[:, 2], datasetmatrix.get_column_X(2)) is True assert numpy.array_equal(sample_matrix[:, 4], datasetmatrix.get_column_X(3)) is True assert numpy.array_equal(sample_matrix[:, 3], datasetmatrix.get_column_Y(0)) is True assert numpy.array_equal(sample_matrix[:, 5], datasetmatrix.get_column_Y(1)) is True
def test_sampling_bayesian_network_as_dataset_source__exact(): configuration = default_configuration() configuration['method'] = 'exact' sample_count = configuration['sample_count'] bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False) bayesian_network.finalize() joint_pmf = bayesian_network.create_joint_pmf() instances = joint_pmf.create_instances_list(sample_count) instances_as_lists = [list(instance) for instance in instances] instances_matrix = numpy.asarray(instances_as_lists, dtype=numpy.int8) sbnds = SampledBayesianNetworkDatasetSource(configuration) datasetmatrix = sbnds.create_dataset_matrix('test_sbnds') assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X assert ['R', 'TRN'] == datasetmatrix.column_labels_Y assert ['row{}'.format(i) for i in range(0, sample_count)] == datasetmatrix.row_labels assert (sample_count, 4) == datasetmatrix.X.get_shape() assert (sample_count, 2) == datasetmatrix.Y.get_shape() assert numpy.array_equal(instances_matrix[:, 0], datasetmatrix.get_column_X(0)) is True assert numpy.array_equal(instances_matrix[:, 1], datasetmatrix.get_column_X(1)) is True assert numpy.array_equal(instances_matrix[:, 2], datasetmatrix.get_column_X(2)) is True assert numpy.array_equal(instances_matrix[:, 4], datasetmatrix.get_column_X(3)) is True assert numpy.array_equal(instances_matrix[:, 3], datasetmatrix.get_column_Y(0)) is True assert numpy.array_equal(instances_matrix[:, 5], datasetmatrix.get_column_Y(1)) is True
def __init__(self, configuration, finalize_bn=True): self.configuration = configuration path = self.configuration['sourcepath'] self.bayesian_network = BayesianNetwork.from_bif_file(path, use_cache=True) if finalize_bn: self.bayesian_network.finalize() self.reset_random_seed = True
def test_d_separation__custom_graph_3(): # Simple graph taken from the PCMB article, where authors provide # examples to illustrate the flaws found in MMMB and HITON. graph_a = {0: [1, 2], 1: [3], 2: [3], 3: [], 4: [1]} bn = BayesianNetwork('testnet_a') bn.from_directed_graph(graph_a) bn.debug = True assert bn.d_separated(4, [1], 3) is False # Assert that [0, 1] is the Markov blanket of 4, by the Intersection # Property. assert bn.d_separated(4, [0, 1, 3], 2) is True assert bn.d_separated(4, [0, 1, 2], 3) is True # Assert that [0, 1] is the Markov blanket of 4, by the Contraction # Property. assert bn.d_separated(4, [0, 1, 3], 2) is True assert bn.d_separated(4, [0, 1], 3) is True
def test_d_separation__custom_graph_2(): # Simple graph imitating the 'survey' Bayesian network, from # http://www.bnlearn.com/bnrepository/discrete-small.html#survey graph = {0: [1], 4: [1], 1: [2, 3], 2: [5], 3: [5]} bn = BayesianNetwork('testnet') bn.from_directed_graph(graph) assert bn.d_separated(3, [0], 1) is False assert bn.d_separated(3, [], 2) is False assert bn.d_separated(3, [5], 2) is False assert bn.d_separated(3, [1], 2) is True
def network_definition(self, items): bn = BayesianNetwork('') # Firstly, gather 'network_name' and all the Variables. for item in items: if isinstance(item, dict): bn.name = item['network_name'] bn.properties = item.get('properties', {}) if isinstance(item, VariableNode): bn.variable_nodes[item.name] = item # Secondly, add references from VariableNodes to ProbabilityDistributionOfVariableNode and vice-versa. for item in items: if isinstance(item, ProbabilityDistributionOfVariableNode): variable = bn.variable_nodes[item.variable_name] pd = item variable.probdist = pd pd.variable = variable for varname in pd.conditioning_variable_nodes.keys(): pd.conditioning_variable_nodes[ varname] = bn.variable_nodes[varname] return bn
def setup(self, arguments): self.Arguments = arguments self.algorithm_name = self.arguments.algorithm self.algorithm_class = self.get_algorithm_class() self.source_type = self.arguments.source_type self.bayesian_network_name = self.arguments.source_name bn_sourcepath = self.paths.BIFRepository / self.bayesian_network_name bn_sourcepath = bn_sourcepath.with_suffix('.bif') self.bayesian_network = BayesianNetwork.from_bif_file(bn_sourcepath, use_cache=True) self.bayesian_network.finalize() if self.source_type == 'ds': if self.arguments.sample_count is None: raise ValueError('sample count required') self.sample_count_string = self.arguments.sample_count self.sample_count = int(float(self.sample_count_string)) self.ci_test_significance = 0.95
def test_d_separation__custom_graph_1(): # Simple graph, from "Probabilistic Reasoning in Intelligent Systems" # by Judea Pearl, 1988 graph = {1: [2, 3], 2: [4], 3: [4], 4: [5], 5: []} bn = BayesianNetwork('testnet') bn.from_directed_graph(graph) assert bn.d_separated(2, [1], 3) is True assert bn.d_separated(2, [1, 5], 3) is False assert bn.d_separated(1, [], 2) is False assert bn.d_separated(1, [], 3) is False assert bn.d_separated(1, [], 4) is False assert bn.d_separated(1, [], 5) is False assert bn.d_separated(1, [4], 5) is True assert bn.d_separated(2, [], 3) is False assert bn.d_separated(2, [4], 3) is False assert bn.d_separated(5, [2], 1) is False assert bn.d_separated(5, [3], 1) is False assert bn.d_separated(5, [2, 3], 1) is True assert bn.d_separated(3, [1, 2], 5) is False
def bn_survey() -> BayesianNetwork: path = Path(testutil.bif_folder, 'survey.bif') bn = BayesianNetwork.from_bif_file(path, use_cache=False) bn.finalize() return bn
def bn_andes(): bn = BayesianNetwork.from_bif_file(Path(testutil.bif_folder, 'andes.bif'), use_cache=True) bn.finalize() return bn
def test_building_from_directed_graph(): graph = { 0: [32], 1: [3, 9], 2: [], 3: [12], 4: [2], 5: [], 6: [36], 7: [14, 15], 8: [13], 9: [], 10: [28], 11: [], 12: [4, 13, 14, 15], 13: [], 14: [], 15: [], 16: [20, 31], 17: [3], 18: [22, 26, 30, 33, 34], 19: [26, 34], 20: [5, 25], 21: [11, 20, 31], 22: [], 23: [35], 24: [], 25: [], 26: [], 27: [24, 30], 28: [29], 29: [3], 30: [29], 31: [4], 32: [2, 3], 33: [1, 28], 34: [9, 22, 33], 35: [36], 36: [26, 34], } expected_undirected_graph = { 0: [32], 1: [3, 9, 33], 2: [4, 32], 3: [1, 12, 17, 29, 32], 4: [2, 12, 31], 5: [20], 6: [36], 7: [14, 15], 8: [13], 9: [1, 34], 10: [28], 11: [21], 12: [3, 4, 13, 14, 15], 13: [8, 12], 14: [7, 12], 15: [7, 12], 16: [20, 31], 17: [3], 18: [22, 26, 30, 33, 34], 19: [26, 34], 20: [5, 16, 21, 25], 21: [11, 20, 31], 22: [18, 34], 23: [35], 24: [27], 25: [20], 26: [18, 19, 36], 27: [24, 30], 28: [10, 29, 33], 29: [3, 28, 30], 30: [18, 27, 29], 31: [4, 16, 21], 32: [0, 2, 3], 33: [1, 18, 28, 34], 34: [9, 18, 19, 22, 33, 36], 35: [23, 36], 36: [6, 26, 34, 35], } bn = BayesianNetwork('testnet_graph') bn.from_directed_graph(graph) assert bn.graph_d == graph assert bn.graph_u == expected_undirected_graph graph = {1: [2, 3], 2: [4], 3: [4], 4: [5], 5: []} expected_undirected_graph = { 1: [2, 3], 2: [1, 4], 3: [1, 4], 4: [2, 3, 5], 5: [4] } bn = BayesianNetwork('testnet_graph') bn.from_directed_graph(graph) assert bn.graph_d == graph assert bn.graph_u == expected_undirected_graph
def test_ipcmb_finding_Markov_blankets_in_graphs__imitating_survey(): # Simple graph imitating the 'survey' Bayesian network, from # http://www.bnlearn.com/bnrepository/discrete-small.html#survey graph = {0: [1], 4: [1], 1: [2, 3], 2: [5], 3: [5]} bn = BayesianNetwork('testnet') bn.from_directed_graph(graph) parameters = make_parameters(3, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [1, 2, 5] parameters = make_parameters(1, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [0, 2, 3, 4] # Remove the edge 1 → 2 from the Bayesian network. graph[1] = [3] bn = BayesianNetwork('testnet') bn.from_directed_graph(graph) parameters = make_parameters(3, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [1, 2, 5] parameters = make_parameters(1, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [0, 3, 4] # Replace the edge from 1 → 3 with 1 → 2. graph[1] = [2] bn = BayesianNetwork('testnet') bn.from_directed_graph(graph) parameters = make_parameters(3, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [2, 5] parameters = make_parameters(1, bn) mb = AlgorithmIPCMB(None, parameters).discover_mb() assert mb == [0, 2, 4]
def make_test_bayesian_network(configuration): bn = None with Lock('bn-' + configuration['sourcepath'].name, 'w'): bn = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=True) bn.finalize() return bn
def bn_lc_repaired() -> BayesianNetwork: path = Path(testutil.bif_folder, 'lc_repaired.bif') bn = BayesianNetwork.from_bif_file(path, use_cache=False) bn.finalize() return bn
def bn_lungcancer() -> BayesianNetwork: path = Path(testutil.bif_folder, 'lungcancer.bif') bn = BayesianNetwork.from_bif_file(path, use_cache=False) bn.finalize() return bn
def test_conditional_pmf__from_bayesian_network(): configuration = dict() configuration['sourcepath'] = testutil.bif_folder / 'survey.bif' configuration['sample_count'] = int(4e4) # Using a random seed of 42 somehow requires 2e6 samples to pass, but # with the seed 1984, it is sufficient to generate only 4e4. Maybe the # random generator is biased somehow? configuration['random_seed'] = 1984 configuration['values_as_indices'] = False configuration['objectives'] = ['R', 'TRN'] bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False) bayesian_network.finalize() sbnds = SampledBayesianNetworkDatasetSource(configuration) sbnds.reset_random_seed = True datasetmatrix = sbnds.create_dataset_matrix('test_sbnds') assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X assert ['R', 'TRN'] == datasetmatrix.column_labels_Y AGE = Variable(datasetmatrix.get_column_by_label('X', 'AGE')) PrAge = PMF(AGE) SEX = Variable(datasetmatrix.get_column_by_label('X', 'SEX')) PrSex = PMF(SEX) assert_PMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['AGE'].probdist, PrAge) assert_PMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['SEX'].probdist, PrSex) EDU = Variable(datasetmatrix.get_column_by_label('X', 'EDU')) PrEdu = CPMF(EDU, given=JointVariables(AGE, SEX)) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['EDU'].probdist, PrEdu) OCC = Variable(datasetmatrix.get_column_by_label('X', 'OCC')) PrOcc = CPMF(OCC, given=EDU) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['OCC'].probdist, PrOcc) R = Variable(datasetmatrix.get_column_by_label('Y', 'R')) PrR = CPMF(R, given=EDU) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['R'].probdist, PrR) TRN = Variable(datasetmatrix.get_column_by_label('Y', 'TRN')) PrTRN = CPMF(TRN, given=JointVariables(OCC, R)) assert_CPMF_AlmostEquals_BNProbDist( bayesian_network.variable_nodes['TRN'].probdist, PrTRN)
def bn_pathfinder(): path = Path(testutil.bif_folder, 'pathfinder.bif') bn = BayesianNetwork.from_bif_file(path, use_cache=True) bn.finalize() return bn
def default_Bayesian_network(): AGE = VariableNode('AGE') AGE.values = ['young', 'adult', 'old'] AGE.properties = {'label': 'age'} AGE.probdist = ProbabilityDistributionOfVariableNode(AGE) AGE.probdist.conditioning_variable_nodes = OrderedDict() AGE.probdist.probabilities = {'<unconditioned>': [0.3, 0.5, 0.2]} SEX = VariableNode('SEX') SEX.values = ['M', 'F'] SEX.properties = {'label': 'sex'} SEX.probdist = ProbabilityDistributionOfVariableNode(SEX) SEX.probdist.conditioning_variable_nodes = OrderedDict() SEX.probdist.probabilities = {'<unconditioned>': [0.49, 0.51]} EDU = VariableNode('EDU') EDU.values = ['highschool', 'uni'] EDU.properties = {'label': 'education'} EDU.probdist = ProbabilityDistributionOfVariableNode(AGE) EDU.probdist.conditioning_variable_nodes = OrderedDict([('AGE', AGE), ('SEX', SEX)]) EDU.probdist.probabilities = { ('young', 'M'): [0.75, 0.25], ('young', 'F'): [0.64, 0.36], ('adult', 'M'): [0.72, 0.28], ('adult', 'F'): [0.70, 0.30], ('old', 'M'): [0.88, 0.12], ('old', 'F'): [0.90, 0.10] } OCC = VariableNode('OCC') OCC.values = ['emp', 'self'] OCC.properties = {'label': 'occupation'} OCC.probdist = ProbabilityDistributionOfVariableNode(OCC) OCC.probdist.conditioning_variable_nodes = OrderedDict([('EDU', EDU)]) OCC.probdist.probabilities = { ('highschool', ): [0.96, 0.04], ('uni', ): [0.92, 0.08] } R = VariableNode('R') R.values = ['small', 'big'] R.properties = {'label': 'unknown'} R.probdist = ProbabilityDistributionOfVariableNode(R) R.probdist.conditioning_variable_nodes = OrderedDict([('EDU', EDU)]) R.probdist.probabilities = { ('highschool', ): [0.25, 0.75], ('uni', ): [0.2, 0.8] } TRN = VariableNode('TRN') TRN.values = ['car', 'train', 'other'] TRN.properties = {'label': 'transportation'} TRN.probdist = ProbabilityDistributionOfVariableNode(TRN) TRN.probdist.conditioning_variable_nodes = OrderedDict([('OCC', OCC), ('R', R)]) TRN.probdist.probabilities = { ('emp', 'small'): [0.48, 0.42, 0.10], ('self', 'small'): [0.56, 0.36, 0.08], ('emp', 'big'): [0.58, 0.24, 0.18], ('self', 'big'): [0.70, 0.21, 0.09] } BN = BayesianNetwork('survey') BN.properties = {'testing': 'yes'} BN.variable_nodes = { 'AGE': AGE, 'SEX': SEX, 'EDU': EDU, 'OCC': OCC, 'R': R, 'TRN': TRN } return BN