def test_partition_function(self): self.graph.add_nodes_from(['a', 'b', 'c']) phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4)) phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4)) self.graph.add_factors(phi1, phi2) self.graph.add_edges_from([('a', 'b'), ('b', 'c')]) self.assertEqual(self.graph.get_partition_function(), 22.0)
def setUp(self): self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')), (('B', 'C'), ('C', 'D'))]) phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) self.junction_tree.add_factors(phi1, phi2, phi3) self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
def get_grammar(self): """ Returns the grammar of the UAI file. """ network_name = Word(alphas).setResultsName('network_name') no_variables = Word(nums).setResultsName('no_variables') grammar = network_name + no_variables self.no_variables = int(grammar.parseString(self.network)['no_variables']) domain_variables = (Word(nums)*self.no_variables).setResultsName('domain_variables') grammar += domain_variables no_functions = Word(nums).setResultsName('no_functions') grammar += no_functions self.no_functions = int(grammar.parseString(self.network)['no_functions']) integer = Word(nums).setParseAction(lambda t: int(t[0])) for function in range(0, self.no_functions): scope_grammar = Word(nums).setResultsName('fun_scope_' + str(function)) grammar += scope_grammar function_scope = grammar.parseString(self.network)['fun_scope_' + str(function)] function_grammar = ((integer)*int(function_scope)).setResultsName('fun_' + str(function)) grammar += function_grammar floatnumber = Combine(Word(nums) + Optional(Literal(".") + Optional(Word(nums)))) for function in range(0, self.no_functions): no_values_grammar = Word(nums).setResultsName('fun_no_values_' + str(function)) grammar += no_values_grammar no_values = grammar.parseString(self.network)['fun_no_values_' + str(function)] values_grammar = ((floatnumber)*int(no_values)).setResultsName('fun_values_' + str(function)) grammar += values_grammar return grammar
def test_partition_function_raises_error(self): self.graph.add_nodes_from(['a', 'b', 'c', 'd']) phi1 = Factor(['a', 'b'], [2, 2], range(4)) phi2 = Factor(['b', 'c'], [2, 2], range(4)) self.graph.add_factors(phi1, phi2) self.assertRaises(ValueError, self.graph.get_partition_function)
def test_remove_single_factor(self): self.graph.add_nodes_from(['a', 'b', 'c']) phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4)) phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4)) self.graph.add_factors(phi1, phi2) self.graph.remove_factors(phi1) six.assertCountEqual(self, self.graph.factors, [phi2])
def test_get_partition_function(self): phi1 = Factor(['a', 'b'], [2, 2], range(4)) phi2 = Factor(['b', 'c'], [2, 2], range(4)) self.graph.add_edges_from([('a', phi1), ('b', phi1), ('b', phi2), ('c', phi2)]) self.graph.add_factors(phi1, phi2) self.assertEqual(self.graph.get_partition_function(), 22.0)
def _str(self, phi_or_p="phi", tablefmt="fancy_grid", print_state_names=True): """ Generate the string from `__str__` method. Parameters ---------- phi_or_p: 'phi' | 'p' 'phi': When used for Factors. 'p': When used for CPDs. print_state_names: boolean If True, the user defined state names are displayed. """ string_header = list(map(lambda x: six.text_type(x), self.scope())) string_header.append('{phi_or_p}({variables})'.format(phi_or_p=phi_or_p, variables=','.join(string_header))) value_index = 0 factor_table = [] for prob in product(*[range(card) for card in self.cardinality]): if self.state_names and print_state_names: prob_list = ["{var}({state})".format( var=list(self.variables)[i], state=self.state_names[list( self.variables)[i]][prob[i]]) for i in range(len(self.variables))] else: prob_list = ["{s}_{d}".format(s=list(self.variables)[i], d=prob[i]) for i in range(len(self.variables))] prob_list.append(self.values.ravel()[value_index]) factor_table.append(prob_list) value_index += 1 return tabulate(factor_table, headers=string_header, tablefmt=tablefmt, floatfmt=".4f")
def test_remove_multiple_factors(self): self.graph.add_nodes_from(['a', 'b', 'c']) phi1 = Factor(['a', 'b'], [2, 2], range(4)) phi2 = Factor(['b', 'c'], [2, 2], range(4)) self.graph.add_factors(phi1, phi2) self.graph.remove_factors(phi1, phi2) six.assertCountEqual(self, self.graph.factors, [])
def test_get_factors(self): self.graph.add_nodes_from(['a', 'b', 'c']) phi1 = Factor(['a', 'b'], [2, 2], range(4)) phi2 = Factor(['b', 'c'], [2, 2], range(4)) six.assertCountEqual(self, self.graph.get_factors(), []) self.graph.add_factors(phi1, phi2) six.assertCountEqual(self, self.graph.get_factors(), [phi1, phi2])
def sample(self, start_state=None, size=1): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. Return Type: ------------ pandas.DataFrame Examples: --------- >>> from pgmpy.models import MarkovChain as MC >>> from pgmpy.factors.discrete import State >>> model = MC(['intel', 'diff'], [2, 3]) >>> model.set_start_state([State('intel', 0), State('diff', 2)]) >>> intel_tm = {0: {0: 0.25, 1: 0.75}, 1: {0: 0.5, 1: 0.5}} >>> model.add_transition_model('intel', intel_tm) >>> diff_tm = {0: {0: 0.1, 1: 0.5, 2: 0.4}, 1: {0: 0.2, 1: 0.2, 2: 0.6 }, 2: {0: 0.7, 1: 0.15, 2: 0.15}} >>> model.add_transition_model('diff', diff_tm) >>> model.sample(size=5) intel diff 0 0 2 1 1 0 2 0 1 3 1 0 4 0 2 """ if start_state is None: if self.state is None: self.state = self.random_state() # else use previously-set state else: self.set_start_state(start_state) sampled = DataFrame(index=range(size), columns=self.variables) sampled.loc[0] = [st for var, st in self.state] var_states = defaultdict(dict) var_values = defaultdict(dict) samples = defaultdict(dict) for var in self.transition_models.keys(): for st in self.transition_models[var]: var_states[var][st] = list(self.transition_models[var][st].keys()) var_values[var][st] = list(self.transition_models[var][st].values()) samples[var][st] = sample_discrete(var_states[var][st], var_values[var][st], size=size) for i in range(size - 1): for j, (var, st) in enumerate(self.state): next_st = samples[var][st][i] self.state[j] = State(var, next_st) sampled.loc[i + 1] = [st for var, st in self.state] return sampled
def setUp(self): self.phi = Factor(['x1', 'x2', 'x3'], [2, 2, 2], np.random.uniform(5, 10, size=8)) self.phi1 = Factor(['x1', 'x2', 'x3'], [2, 3, 2], range(12)) self.phi2 = Factor([('x1', 0), ('x2', 0), ('x3', 0)], [2, 3, 2], range(12)) # This larger factor (phi3) caused a bug in reduce card3 = [3, 3, 3, 2, 2, 2, 2, 2, 2] self.phi3 = Factor(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'], card3, np.arange(np.prod(card3), dtype=np.float))
def test_factorset_divide(self): phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(1, 13)) phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(1, 9)) factor_set1 = FactorSet(phi1, phi2) phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(1, 9)) phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(1, 9)) factor_set2 = FactorSet(phi3, phi4) factor_set3 = factor_set2.divide(factor_set1, inplace=False) self.assertEqual({phi3, phi4, phi1.identity_factor() / phi1, phi2.identity_factor() / phi2}, factor_set3.factors)
def sample(self, start_state=None, size=1, return_type="dataframe"): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples: --------- >>> from pgmpy.factors import DiscreteFactor >>> from pgmpy.inference import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4, return_tupe='dataframe') A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) types = [(var_name, 'int') for var_name in self.variables] sampled = np.zeros(size, dtype=types).view(np.recarray) sampled[0] = np.array([st for var, st in self.state]) for i in range(size - 1): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete(list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled[i + 1] = np.array([st for var, st in self.state]) return _return_samples(return_type, sampled)
def get_partition_function(self): """ Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> from pgmpy.models import MarkovModel >>> from pgmpy.factors.discrete import DiscreteFactor >>> G = MarkovModel() >>> G.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> G.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in G.edges()] >>> G.add_factors(*phi) >>> G.get_partition_function() """ self.check_model() factor = self.factors[0] factor = factor_product(factor, *[self.factors[i] for i in range(1, len(self.factors))]) if set(factor.scope()) != set(self.nodes()): raise ValueError('DiscreteFactor for all the random variables not defined.') return np.sum(factor.values)
def get_partition_function(self): r""" Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> from pgmpy.models import ClusterGraph >>> from pgmpy.factors import Factor >>> G = ClusterGraph() >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')]) >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')), ... (('a', 'b', 'c'), ('a', 'c'))]) >>> phi1 = Factor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8)) >>> phi2 = Factor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi3 = Factor(['a', 'c'], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2, phi3) >>> G.get_partition_function() """ if self.check_model(): factor = self.factors[0] factor = factor_product(factor, *[self.factors[i] for i in range(1, len(self.factors))]) return np.sum(factor.values)
def __eq__(self, other): if not (isinstance(self, DiscreteFactor) and isinstance(other, DiscreteFactor)): return False elif set(self.scope()) != set(other.scope()): return False else: phi = other.copy() for axis in range(self.values.ndim): exchange_index = phi.variables.index(self.variables[axis]) phi.variables[axis], phi.variables[exchange_index] = (phi.variables[exchange_index], phi.variables[axis]) phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index], phi.cardinality[axis]) phi.values = phi.values.swapaxes(axis, exchange_index) if phi.values.shape != self.values.shape: return False elif not np.allclose(phi.values, self.values): return False elif not all(self.cardinality == phi.cardinality): return False else: return True
def _get_kernel_from_markov_model(self, model): """ Computes the Gibbs transition models from a Markov Network. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Section 12.3.3 pp 512-513. Parameters: ----------- model: MarkovModel The model from which probabilities will be computed. """ self.variables = np.array(model.nodes()) factors_dict = {var: [] for var in self.variables} for factor in model.get_factors(): for var in factor.scope(): factors_dict[var].append(factor) # Take factor product factors_dict = {var: factor_product(*factors) if len(factors) > 1 else factors[0] for var, factors in factors_dict.items()} self.cardinalities = {var: factors_dict[var].get_cardinality([var])[var] for var in self.variables} for var in self.variables: other_vars = [v for v in self.variables if var != v] other_cards = [self.cardinalities[v] for v in other_vars] kernel = {} factor = factors_dict[var] scope = set(factor.scope()) for tup in itertools.product(*[range(card) for card in other_cards]): states = [State(var, s) for var, s in zip(other_vars, tup) if var in scope] reduced_factor = factor.reduce(states, inplace=False) kernel[tup] = reduced_factor.values / sum(reduced_factor.values) self.transition_models[var] = kernel
def get_tables(self): """ Returns list of tuple of child variable and CPD in case of Bayesian and list of tuple of scope of variables and values in case of Markov. Returns ------- list : list of tuples of child variable and values in Bayesian list of tuples of scope of variables and values in case of Markov. Example ------- >>> reader = UAIReader('TestUAI.uai') >>> reader.get_tables() [(['var_0', 'var_1'], ['4.000', '2.400', '1.000', '0.000']), (['var_0', 'var_1', 'var_2'], ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000', '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])] """ tables = [] for function in range(0, self.no_functions): function_variables = self.grammar.parseString(self.network)['fun_' + str(function)] if isinstance(function_variables, int): function_variables = [function_variables] if self.network_type == 'BAYES': child_var = "var_" + str(function_variables[-1]) values = self.grammar.parseString(self.network)['fun_values_' + str(function)] tables.append((child_var, list(values))) elif self.network_type == "MARKOV": function_variables = ["var_" + str(var) for var in function_variables] values = self.grammar.parseString(self.network)['fun_values_' + str(function)] tables.append((function_variables, list(values))) return tables
def _get_kernel_from_bayesian_model(self, model): """ Computes the Gibbs transition models from a Bayesian Network. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Section 12.3.3 pp 512-513. Parameters: ----------- model: BayesianModel The model from which probabilities will be computed. """ self.variables = np.array(model.nodes()) self.cardinalities = {var: model.get_cpds(var).variable_card for var in self.variables} for var in self.variables: other_vars = [v for v in self.variables if var != v] other_cards = [self.cardinalities[v] for v in other_vars] cpds = [cpd for cpd in model.cpds if var in cpd.scope()] prod_cpd = factor_product(*cpds) kernel = {} scope = set(prod_cpd.scope()) for tup in itertools.product(*[range(card) for card in other_cards]): states = [State(v, s) for v, s in zip(other_vars, tup) if v in scope] prod_cpd_reduced = prod_cpd.reduce(states, inplace=False) kernel[tup] = prod_cpd_reduced.values / sum(prod_cpd_reduced.values) self.transition_models[var] = kernel
def get_edges(self): """ Returns the edges of the network. Returns ------- set: set containing the edges of the network Example ------- >>> reader = UAIReader('TestUAI.uai') >>> reader.get_edges() {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')} """ edges = [] for function in range(0, self.no_functions): function_variables = self.grammar.parseString( self.network)['fun_' + str(function)] if isinstance(function_variables, int): function_variables = [function_variables] if self.network_type == 'BAYES': child_var = "var_" + str(function_variables[-1]) function_variables = function_variables[:-1] for var in function_variables: edges.append((child_var, "var_" + str(var))) elif self.network_type == "MARKOV": function_variables = [ "var_" + str(var) for var in function_variables ] edges.extend(list(combinations(function_variables, 2))) return set(edges)
def get_independencies(self, latex=False): """ Computes independencies in the Bayesian Network, by checking d-seperation. Parameters ---------- latex: boolean If latex=True then latex string of the independence assertion would be created. Examples -------- >>> from pgmpy.models import BayesianModel >>> chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) >>> chain.get_independencies() (X _|_ Z | Y) (Z _|_ X | Y) """ independencies = Independencies() for start in (self.nodes()): rest = set(self.nodes()) - {start} for r in range(len(rest)): for observed in itertools.combinations(rest, r): d_seperated_variables = rest - set(observed) - set( self.active_trail_nodes(start, observed=observed)[start]) if d_seperated_variables: independencies.add_assertions([start, d_seperated_variables, observed]) independencies.reduce() if not latex: return independencies else: return independencies.latex_string()
def get_partition_function(self): """ Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> from pgmpy.models import FactorGraph >>> from pgmpy.factors.discrete import DiscreteFactor >>> G = FactorGraph() >>> G.add_nodes_from(['a', 'b', 'c']) >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2) >>> G.add_nodes_from([phi1, phi2]) >>> G.add_edges_from([('a', phi1), ('b', phi1), ... ('b', phi2), ('c', phi2)]) >>> G.get_factors() >>> G.get_partition_function() """ factor = self.factors[0] factor = factor_product(factor, *[self.factors[i] for i in range(1, len(self.factors))]) if set(factor.scope()) != set(self.get_variable_nodes()): raise ValueError('DiscreteFactor for all the random variables not defined.') return np.sum(factor.values)
def test_eq1(self): phi1 = Factor(['x1', 'x2', 'x3'], [2, 4, 3], range(24)) phi2 = Factor(['x2', 'x1', 'x3'], [4, 2, 3], [0, 1, 2, 12, 13, 14, 3, 4, 5, 15, 16, 17, 6, 7, 8, 18, 19, 20, 9, 10, 11, 21, 22, 23]) self.assertTrue(phi1, phi2)
def get_states(self): """ Add outcome to variables of XMLBIF Return ------ dict: dict of type {variable: outcome tags} Examples -------- >>> writer = XMLBIFWriter(model) >>> writer.get_states() {'dog-out': [<Element OUTCOME at 0x7ffbabfcdec8>, <Element OUTCOME at 0x7ffbabfcdf08>], 'family-out': [<Element OUTCOME at 0x7ffbabfd4108>, <Element OUTCOME at 0x7ffbabfd4148>], 'bowel-problem': [<Element OUTCOME at 0x7ffbabfd4088>, <Element OUTCOME at 0x7ffbabfd40c8>], 'hear-bark': [<Element OUTCOME at 0x7ffbabfcdf48>, <Element OUTCOME at 0x7ffbabfcdf88>], 'light-on': [<Element OUTCOME at 0x7ffbabfcdfc8>, <Element OUTCOME at 0x7ffbabfd4048>]} """ outcome_tag = {} cpds = self.model.get_cpds() for cpd in cpds: var = cpd.variable outcome_tag[var] = [] for state in [State(var, state) for state in range(cpd.get_cardinality([var])[var])]: # for state in [cpd.variables[var]: state_tag = etree.SubElement(self.variables[var], "OUTCOME") state_tag.text = str(state.state) outcome_tag[var].append(state_tag) return outcome_tag
def set_distributions(self): """ Set distributions in the network. Examples -------- >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter >>> writer =XBNWriter() >>> writer.set_distributions() """ distributions = etree.SubElement(self.bnmodel, 'DISTRIBUTIONS') cpds = self.model.get_cpds() cpds.sort(key=lambda x: x.variable) for cpd in cpds: cpd_values = cpd.values.ravel() var = cpd.variable dist = etree.SubElement(distributions, 'DIST', attrib={'TYPE': self.model.node[var]['TYPE']}) etree.SubElement(dist, 'PRIVATE', attrib={'NAME': var}) dpis = etree.SubElement(dist, 'DPIS') if len(cpd.evidence): condset = etree.SubElement(dist, 'CONDSET') for condelem in sorted(cpd.evidence): etree.SubElement(condset, 'CONDELEM', attrib={'NAME': condelem}) # TODO: Get Index value. for val in range(0, len(cpd_values), 2): etree.SubElement(dpis, "DPI", attrib={'INDEXES': ' '}).text = \ " " + str(cpd_values[val]) + " " + str(cpd_values[val+1]) + " " else: etree.SubElement(dpis, "DPI").text = ' ' + ' '.join(map(str, cpd_values))
def test_max_calibrate_clique_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() clique_belief = belief_propagation.get_clique_beliefs() phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(['C'], inplace=False) b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False)) b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize(['B'], inplace=False) np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values) np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values) np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values)
def get_edges(self): """ Returns the edges of the network. Returns ------- set: set containing the edges of the network Example ------- >>> reader = UAIReader('TestUAI.uai') >>> reader.get_edges() {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')} """ edges = [] for function in range(0, self.no_functions): function_variables = self.grammar.parseString(self.network)['fun_' + str(function)] if isinstance(function_variables, int): function_variables = [function_variables] if self.network_type == 'BAYES': child_var = "var_" + str(function_variables[-1]) function_variables = function_variables[:-1] for var in function_variables: edges.append((child_var, "var_" + str(var))) elif self.network_type == "MARKOV": function_variables = ["var_" + str(var) for var in function_variables] edges.extend(list(combinations(function_variables, 2))) return set(edges)
def get_states(self): """ Add states to variable of BIF Returns ------- dict: dict of type {variable: a list of states} Example ------- >>> from pgmpy.readwrite import BIFReader, BIFWriter >>> model = BIFReader('dog-problem.bif').get_model() >>> writer = BIFWriter(model) >>> writer.get_states() {'bowel-problem': ['bowel-problem_0', 'bowel-problem_1'], 'dog-out': ['dog-out_0', 'dog-out_1'], 'family-out': ['family-out_0', 'family-out_1'], 'hear-bark': ['hear-bark_0', 'hear-bark_1'], 'light-on': ['light-on_0', 'light-on_1']} """ variable_states = {} cpds = self.model.get_cpds() for cpd in cpds: variable = cpd.variable variable_states[variable] = [] for state in range(cpd.get_cardinality([variable])[variable]): variable_states[variable].append(str(variable)+'_'+str(state)) return variable_states
def get_partition_function(self): """ Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> from pgmpy.models import FactorGraph >>> from pgmpy.factors import Factor >>> G = FactorGraph() >>> G.add_nodes_from(['a', 'b', 'c']) >>> phi1 = Factor(['a', 'b'], [2, 2], np.random.rand(4)) >>> phi2 = Factor(['b', 'c'], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2) >>> G.add_nodes_from([phi1, phi2]) >>> G.add_edges_from([('a', phi1), ('b', phi1), ... ('b', phi2), ('c', phi2)]) >>> G.get_factors() >>> G.get_partition_function() """ factor = self.factors[0] factor = factor_product(factor, *[self.factors[i] for i in range(1, len(self.factors))]) if set(factor.scope()) != set(self.get_variable_nodes()): raise ValueError('Factor for all the random variables not defined.') return np.sum(factor.values)
def get_states(self): """ Add states to variable of BIF Returns ------- dict: dict of type {variable: a list of states} Example ------- >>> from pgmpy.readwrite import BIFReader, BIFWriter >>> model = BIFReader('dog-problem.bif').get_model() >>> writer = BIFWriter(model) >>> writer.get_states() {'bowel-problem': ['bowel-problem_0', 'bowel-problem_1'], 'dog-out': ['dog-out_0', 'dog-out_1'], 'family-out': ['family-out_0', 'family-out_1'], 'hear-bark': ['hear-bark_0', 'hear-bark_1'], 'light-on': ['light-on_0', 'light-on_1']} """ variable_states = {} cpds = self.model.get_cpds() for cpd in cpds: variable = cpd.variable variable_states[variable] = [] for state in range(cpd.get_cardinality([variable])[variable]): variable_states[variable].append(str(variable) + '_' + str(state)) return variable_states
def _get_kernel_from_bayesian_model(self, model): """ Computes the Gibbs transition models from a Bayesian Network. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Section 12.3.3 pp 512-513. Parameters: ----------- model: BayesianModel The model from which probabilities will be computed. """ self.variables = np.array(model.nodes()) self.cardinalities = { var: model.get_cpds(var).variable_card for var in self.variables } for var in self.variables: other_vars = [v for v in self.variables if var != v] other_cards = [self.cardinalities[v] for v in other_vars] cpds = [cpd for cpd in model.cpds if var in cpd.scope()] prod_cpd = factor_product(*cpds) kernel = {} scope = set(prod_cpd.scope()) for tup in itertools.product( *[range(card) for card in other_cards]): states = [ State(v, s) for v, s in zip(other_vars, tup) if v in scope ] prod_cpd_reduced = prod_cpd.reduce(states, inplace=False) kernel[tup] = prod_cpd_reduced.values / sum( prod_cpd_reduced.values) self.transition_models[var] = kernel
def sample(self, start_state=None, size=1): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. Return Type: ------------ pandas.DataFrame Examples: --------- >>> from pgmpy.factors import Factor >>> from pgmpy.inference import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = Factor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = Factor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4) A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() else: self.set_start_state(start_state) sampled = DataFrame(index=range(size), columns=self.variables) sampled.loc[0] = [st for var, st in self.state] for i in range(size - 1): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete(list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled.loc[i + 1] = [st for var, st in self.state] return sampled
def reduce(self, values, inplace=True): """ Reduces the factor to the context of given variable values. Parameters ---------- values: list, array-like A list of tuples of the form (variable_name, variable_state). inplace: boolean If inplace=True it will modify the factor itself, else would return a new factor. Returns ------- DiscreteFactor or None: if inplace=True (default) returns None if inplace=False returns a new `DiscreteFactor` instance. Examples -------- >>> from pgmpy.factors.discrete import DiscreteFactor >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12)) >>> phi.reduce([('x1', 0), ('x2', 0)]) >>> phi.variables ['x3'] >>> phi.cardinality array([2]) >>> phi.values array([0., 1.]) """ if isinstance(values, six.string_types): raise TypeError( "values: Expected type list or array-like, got type str") if not all([isinstance(state_tuple, tuple) for state_tuple in values]): raise TypeError( "values: Expected type list of tuples, get type {type}", type(values[0])) phi = self if inplace else self.copy() values = [(var, self.get_state_no(var, state_name)) for var, state_name in values] var_index_to_del = [] slice_ = [slice(None)] * len(self.variables) for var, state in values: var_index = phi.variables.index(var) slice_[var_index] = state var_index_to_del.append(var_index) var_index_to_keep = sorted( set(range(len(phi.variables))) - set(var_index_to_del)) # set difference is not gaurenteed to maintain ordering phi.variables = [phi.variables[index] for index in var_index_to_keep] phi.cardinality = phi.cardinality[var_index_to_keep] phi.values = phi.values[tuple(slice_)] if not inplace: return phi
def forward_sample(self, size=1): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated Returns ------- sampled: pandas.DataFrame the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.CPD import TabularCPD >>> from pgmpy.inference.Sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(2) diff intel grade 0 1 0 1 1 1 0 2 """ sampled = DataFrame(index=range(size), columns=self.topological_order) for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = sampled.ix[:, evidence].values weights = list(map(lambda t: cached_values[tuple(t)], evidence)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return sampled
def test_factor_product2(self): from pgmpy import factors phi = factors.Factor(['x1', 'x2'], [2, 2], range(4)) phi1 = factors.Factor(['x3', 'x4'], [2, 2], range(4)) prod = phi.product(phi1, inplace=False) expected_factor = Factor(['x1', 'x2', 'x3', 'x4'], [2, 2, 2, 2], [0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9]) self.assertEqual(prod, expected_factor) self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3', 'x4']) phi = Factor(['x1', 'x2'], [3, 2], range(6)) phi1 = Factor(['x2', 'x3'], [2, 2], range(4)) prod = phi.product(phi1, inplace=False) expected_factor = Factor(['x1', 'x2', 'x3'], [3, 2, 2], [0, 0, 2, 3, 0, 2, 6, 9, 0, 4, 10, 15]) self.assertEqual(prod, expected_factor) self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3'])
def test_factor_mul(self): phi = Factor(['x1', 'x2'], [2, 2], range(4)) phi1 = Factor(['x3', 'x4'], [2, 2], range(4)) prod = phi * phi1 sorted_vars = ['x1', 'x2', 'x3', 'x4'] for axis in range(prod.values.ndim): exchange_index = prod.variables.index(sorted_vars[axis]) prod.variables[axis], prod.variables[exchange_index] = prod.variables[exchange_index], prod.variables[axis] prod.values = prod.values.swapaxes(axis, exchange_index) np_test.assert_almost_equal(prod.values.ravel(), np.array([0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9])) self.assertEqual(prod.variables, ['x1', 'x2', 'x3', 'x4'])
def test_class_init_typeerror(self): self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], ['val1', 'val2']) self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], [1, 'val1']) self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], ['val1', 1]) self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], [0.1, 'val1']) self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], ['val1', 0.1]) self.assertRaises(TypeError, Factor, 'x1', [3], [1, 2, 3]) self.assertRaises(ValueError, Factor, ['x1', 'x1', 'x3'], [2, 3, 2], range(12))
def get_distributions(self): """ Returns a dictionary of name and its distribution. Distribution is a ndarray. The ndarray is stored in the standard way such that the rightmost variable changes most often. Consider a CPD of variable 'd' which has parents 'b' and 'c' (distribution['CONDSET'] = ['b', 'c']) | d_0 d_1 --------------------------- b_0, c_0 | 0.8 0.2 b_0, c_1 | 0.9 0.1 b_1, c_0 | 0.7 0.3 b_1, c_1 | 0.05 0.95 The value of distribution['d']['DPIS'] for the above example will be: array([[ 0.8 , 0.2 ], [ 0.9 , 0.1 ], [ 0.7 , 0.3 ], [ 0.05, 0.95]]) Examples -------- >>> reader = XBNReader('xbn_test.xml') >>> reader.get_distributions() {'a': {'TYPE': 'discrete', 'DPIS': array([[ 0.2, 0.8]])}, 'e': {'TYPE': 'discrete', 'DPIS': array([[ 0.8, 0.2], [ 0.6, 0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]}, 'b': {'TYPE': 'discrete', 'DPIS': array([[ 0.8, 0.2], [ 0.2, 0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]}, 'c': {'TYPE': 'discrete', 'DPIS': array([[ 0.2 , 0.8 ], [ 0.05, 0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]}, 'd': {'TYPE': 'discrete', 'DPIS': array([[ 0.8 , 0.2 ], [ 0.9 , 0.1 ], [ 0.7 , 0.3 ], [ 0.05, 0.95]]), 'CONDSET': ['b', 'c']}, 'CARDINALITY': [2, 2]} """ import numpy as np distribution = {} for dist in self.bnmodel.find('DISTRIBUTIONS'): variable_name = dist.find('PRIVATE').get('NAME') distribution[variable_name] = {'TYPE': dist.get('TYPE')} if dist.find('CONDSET') is not None: distribution[variable_name]['CONDSET'] = [ var.get('NAME') for var in dist.find('CONDSET').findall('CONDELEM') ] distribution[variable_name]['CARDINALITY'] = np.array([ len( set( np.array([ list(map(int, dpi.get('INDEXES').split())) for dpi in dist.find('DPIS') ])[:, i])) for i in range(len(distribution[variable_name]['CONDSET'])) ]) distribution[variable_name]['DPIS'] = np.array([ list(map(float, dpi.text.split())) for dpi in dist.find('DPIS') ]) return distribution
def _make_table_str(self, tablefmt="fancy_grid", print_state_names=True): headers_list = [] # build column headers evidence = self.variables[1:] evidence_card = self.cardinality[1:] if evidence: col_indexes = np.array( list(product(*[range(i) for i in evidence_card]))) if self.state_names and print_state_names: for i in range(len(evidence_card)): column_header = [str(evidence[i])] + [ '{var}({state})'.format( var=evidence[i], state=self.state_names[evidence[i]][d]) for d in col_indexes.T[i] ] headers_list.append(column_header) else: for i in range(len(evidence_card)): column_header = [str(evidence[i])] + [ '{s}_{d}'.format(s=evidence[i], d=d) for d in col_indexes.T[i] ] headers_list.append(column_header) # Build row headers if self.state_names and print_state_names: variable_array = [[ '{var}({state})'.format( var=self.variable, state=self.state_names[self.variable][i]) for i in range(self.variable_card) ]] else: variable_array = [[ '{s}_{d}'.format(s=self.variable, d=i) for i in range(self.variable_card) ]] # Stack with data labeled_rows = np.hstack( (np.array(variable_array).T, self.get_values())).tolist() # No support for multi-headers in tabulate cdf_str = tabulate(headers_list + labeled_rows, tablefmt=tablefmt) return cdf_str
def _find_common_cliques(cliques_list): """ Finds the common cliques among the given set of cliques for corresponding node. """ common = set([tuple(x) for x in cliques_list[0]]) for i in range(1, len(cliques_list)): common = common & set([tuple(x) for x in cliques_list[i]]) return list(common)
def _tighten_triplet(self, max_iterations, later_iter, max_triplets, prolong): """ This method finds all the triplets that are eligible and adds them iteratively in the bunch of max_triplets Parameters ---------- max_iterations: integer Maximum number of times we tighten the relaxation later_iter: integer Number of maximum iterations that we want MPLP to run. This is lesser than the initial number of iterations. max_triplets: integer Maximum number of triplets that can be added atmost in one iteration. prolong: bool It sets the continuation of tightening after all the triplets are exhausted """ # Find all the triplets that are possible in the present model triangles = self.find_triangles() # Evaluate scores for each of the triplets found above triplet_scores = self._get_triplet_scores(triangles) # Arrange the keys on the basis of increasing order of the values of the dict. triplet_scores sorted_scores = sorted(triplet_scores, key=triplet_scores.get) for niter in range(max_iterations): if self._is_converged( integrality_gap_threshold=self.integrality_gap_threshold): break # add triplets that are yet not added. add_triplets = [] for triplet_number in range(len(sorted_scores)): # At once, we can add atmost 5 triplets if triplet_number >= max_triplets: break add_triplets.append(sorted_scores.pop()) # Break from the tighten triplets loop if there are no triplets to add if the prolong is set to False if not add_triplets and prolong is False: break # Update the eligible triplets to tighten the relaxation self._update_triangles(add_triplets) # Run MPLP for a maximum of later_iter times. self._run_mplp(later_iter)
def forward_sample(self, size=1, return_type="dataframe"): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(size=2, return_type='recarray') rec.array([(0, 0, 1), (1, 0, 2)], dtype= [('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')]) """ types = [(var_name, "int") for var_name in self.topological_order] sampled = np.zeros(size, dtype=types).view(np.recarray) pbar = tqdm(self.topological_order) for node in pbar: pbar.set_description( "Generating for node: {node}".format(node=node)) cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = np.vstack([sampled[i] for i in evidence]) weights = list( map(lambda t: cached_values[tuple(t)], evidence.T)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return _return_samples(return_type, sampled)
def to_bayesian_model(self): """ Creates a Bayesian Model which is a minimum I-Map for this markov model. The ordering of parents may not remain constant. It would depend on the ordering of variable in the junction tree (which is not constant) all the time. Examples -------- >>> from pgmpy.models import MarkovModel >>> from pgmpy.factors.discrete import DiscreteFactor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> bm = mm.to_bayesian_model() """ from pgmpy.models import BayesianModel bm = BayesianModel() var_clique_dict = defaultdict(tuple) var_order = [] # Create a junction tree from the markov model. # Creation of clique tree involves triangulation, finding maximal cliques # and creating a tree from these cliques junction_tree = self.to_junction_tree() # create an ordering of the nodes based on the ordering of the clique # in which it appeared first root_node = next(iter(junction_tree.nodes())) bfs_edges = nx.bfs_edges(junction_tree, root_node) for node in root_node: var_clique_dict[node] = root_node var_order.append(node) for edge in bfs_edges: clique_node = edge[1] for node in clique_node: if not var_clique_dict[node]: var_clique_dict[node] = clique_node var_order.append(node) # create a bayesian model by adding edges from parent of node to node as # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}} for node_index in range(len(var_order)): node = var_order[node_index] node_parents = (set(var_clique_dict[node]) - set([node])).intersection( set(var_order[:node_index])) bm.add_edges_from([(parent, node) for parent in node_parents]) # TODO : Convert factor into CPDs return bm
def maximize(self, variables, inplace=True): """ Maximizes the factor with respect to `variables`. Parameters ---------- variables: list, array-like List of variables with respect to which factor is to be maximized inplace: boolean If inplace=True it will modify the factor itself, else would return a new factor. Returns ------- DiscreteFactor or None: if inplace=True (default) returns None if inplace=False returns a new `DiscreteFactor` instance. Examples -------- >>> from pgmpy.factors.discrete import DiscreteFactor >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [3, 2, 2], [0.25, 0.35, 0.08, 0.16, 0.05, 0.07, ... 0.00, 0.00, 0.15, 0.21, 0.09, 0.18]) >>> phi.variables ['x1','x2','x3'] >>> phi.maximize(['x2']) >>> phi.variables ['x1', 'x3'] >>> phi.cardinality array([3, 2]) >>> phi.values array([[ 0.25, 0.35], [ 0.05, 0.07], [ 0.15, 0.21]]) """ if isinstance(variables, six.string_types): raise TypeError( "variables: Expected type list or array-like, got type str") phi = self if inplace else self.copy() for var in variables: if var not in phi.variables: raise ValueError("{var} not in scope.".format(var=var)) var_indexes = [phi.variables.index(var) for var in variables] index_to_keep = sorted( set(range(len(self.variables))) - set(var_indexes)) phi.variables = [phi.variables[index] for index in index_to_keep] phi.cardinality = phi.cardinality[index_to_keep] phi.values = np.max(phi.values, axis=tuple(var_indexes)) if not inplace: return phi
def get_next_region_layer(self, region_type=None): """ Give label R0, generate the children region layer R1. """ label = "R{}".format(int(region_type[-1])+1) layer_nodes = self.region_layers[region_type] intersections = [] for pair_nodes in combinations(layer_nodes,2): i_node, j_node = pair_nodes intersection_node = self._get_interset(i_node, j_node) # if empty set, go to next iteration if intersection_node is (): continue if intersection_node not in intersections: # add the new node and arcs to this nodes # self.add_node(intersection_node) # self.add_edges_from([(i_node, intersection_node), \ # (j_node, intersection_node)]) intersections.append(intersection_node) if intersections == []: # There is no next layer node generated return False else: # Do filtering, delete the node that is subset of the other nodes of the same layer filter_out_idx = [] # test node i against from others for i in range(len(intersections)): for j in range(len(intersections)): if i != j and self._is_subset(intersections[i], intersections[j]) and i not in filter_out_idx: # self.remove_node(intersections[i]) filter_out_idx.append(i) # the final selected nodes for next layer selected = [intersections[idx] for idx in range(len(intersections)) if idx not in filter_out_idx] # add them into graph for c_node in selected: self.add_node(c_node) for p_node in layer_nodes: if self._is_subset(c_node, p_node): self.add_edges_from([(p_node, c_node)]) self.region_layers[label] = selected return True
def test_factor_product(self): phi = Factor(['x1', 'x2'], [2, 2], range(4)) phi1 = Factor(['x3', 'x4'], [2, 2], range(4)) prod = factor_product(phi, phi1) expected_factor = Factor(['x1', 'x2', 'x3', 'x4'], [2, 2, 2, 2], [0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9]) self.assertEqual(prod, expected_factor) self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3', 'x4']) phi = Factor(['x1', 'x2'], [3, 2], range(6)) phi1 = Factor(['x2', 'x3'], [2, 2], range(4)) prod = factor_product(phi, phi1) expected_factor = Factor(['x1', 'x2', 'x3'], [3, 2, 2], [0, 0, 2, 3, 0, 2, 6, 9, 0, 4, 10, 15]) np_test.assert_almost_equal(prod.values, np.array([0, 0, 2, 3, 0, 2, 6, 9, 0, 4, 10, 15]).reshape(3, 2, 2)) self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3'])
def pre_compute_reduce(self, variable): variable_cpd = self.model.get_cpds(variable) variable_evid = variable_cpd.variables[:0:-1] cached_values = {} for state_combination in itertools.product(*[range(self.cardinality[var]) for var in variable_evid]): states = list(zip(variable_evid, state_combination)) cached_values[state_combination] = variable_cpd.reduce(states, inplace=False).values return cached_values
def _str(self, phi_or_p="phi", tablefmt="fancy_grid", print_state_names=True): """ Generate the string from `__str__` method. Parameters ---------- phi_or_p: 'phi' | 'p' 'phi': When used for Factors. 'p': When used for CPDs. print_state_names: boolean If True, the user defined state names are displayed. """ string_header = list(map(lambda x: six.text_type(x), self.scope())) string_header.append('{phi_or_p}({variables})'.format( phi_or_p=phi_or_p, variables=','.join(string_header))) value_index = 0 factor_table = [] for prob in product(*[range(card) for card in self.cardinality]): if self.state_names and print_state_names: prob_list = [ "{var}({state})".format(var=list(self.variables)[i], state=self.state_names[list( self.variables)[i]][prob[i]]) for i in range(len(self.variables)) ] else: prob_list = [ "{s}_{d}".format(s=list(self.variables)[i], d=prob[i]) for i in range(len(self.variables)) ] prob_list.append(self.values.ravel()[value_index]) factor_table.append(prob_list) value_index += 1 return tabulate(factor_table, headers=string_header, tablefmt=tablefmt, floatfmt=".4f")
def region_count(self): """Given the generated region graph, calculate each region's count number from top to down.""" for node in self.region_layers["R0"]: self.nodes[node]["weight"] = 1 for i_layer in range(len(self.region_layers)-1): for i_node in self.region_layers["R{}".format(i_layer + 1)]: ancestors_counts = [self.nodes[the_node]["weight"] for the_node in self.get_ancestors_of([i_node])] self.nodes[i_node]["weight"] = 1 - sum(ancestors_counts) return self
def setUp(self): self.variables = ['intel', 'diff', 'grade'] self.card = [3, 2, 3] self.cardinalities = {'intel': 3, 'diff': 2, 'grade': 3} self.intel_tm = { 0: { 0: 0.1, 1: 0.25, 2: 0.65 }, 1: { 0: 0.5, 1: 0.3, 2: 0.2 }, 2: { 0: 0.3, 1: 0.3, 2: 0.4 } } self.intel_tm_matrix = np.array([[0.1, 0.25, 0.65], [0.5, 0.3, 0.2], [0.3, 0.3, 0.4]]) self.diff_tm = {0: {0: 0.3, 1: 0.7}, 1: {0: 0.75, 1: 0.25}} self.diff_tm_matrix = np.array([[0.3, 0.7], [0.75, 0.25]]) self.grade_tm = { 0: { 0: 0.4, 1: 0.2, 2: 0.4 }, 1: { 0: 0.9, 1: 0.05, 2: 0.05 }, 2: { 0: 0.1, 1: 0.4, 2: 0.5 } } self.grade_tm_matrix = [[0.4, 0.2, 0.4], [0.9, 0.05, 0.05], [0.1, 0.4, 0.5]] self.start_state = [ State('intel', 0), State('diff', 1), State('grade', 2) ] self.model = MC() self.sample = DataFrame(index=range(200), columns=['a', 'b']) self.sample.a = [1] * 100 + [0] * 100 self.sample.b = [0] * 100 + [1] * 100
def get_grammar(self): """ Returns the grammar of the UAI file. """ network_name = Word(alphas).setResultsName('network_name') no_variables = Word(nums).setResultsName('no_variables') grammar = network_name + no_variables self.no_variables = int( grammar.parseString(self.network)['no_variables']) domain_variables = ( Word(nums) * self.no_variables).setResultsName('domain_variables') grammar += domain_variables no_functions = Word(nums).setResultsName('no_functions') grammar += no_functions self.no_functions = int( grammar.parseString(self.network)['no_functions']) integer = Word(nums).setParseAction(lambda t: int(t[0])) for function in range(0, self.no_functions): scope_grammar = Word(nums).setResultsName('fun_scope_' + str(function)) grammar += scope_grammar function_scope = grammar.parseString(self.network)['fun_scope_' + str(function)] function_grammar = ( (integer) * int(function_scope)).setResultsName('fun_' + str(function)) grammar += function_grammar floatnumber = Combine( Word(nums) + Optional(Literal(".") + Optional(Word(nums)))) for function in range(0, self.no_functions): no_values_grammar = Word(nums).setResultsName('fun_no_values_' + str(function)) grammar += no_values_grammar no_values = grammar.parseString(self.network)['fun_no_values_' + str(function)] values_grammar = ((floatnumber) * int(no_values)).setResultsName('fun_values_' + str(function)) grammar += values_grammar return grammar
def __hash__(self): variable_hashes = [hash(variable) for variable in self.variables] sorted_var_hashes = sorted(variable_hashes) phi = self.copy() for axis in range(phi.values.ndim): exchange_index = variable_hashes.index(sorted_var_hashes[axis]) variable_hashes[axis], variable_hashes[exchange_index] = (variable_hashes[exchange_index], variable_hashes[axis]) phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index], phi.cardinality[axis]) phi.values = phi.values.swapaxes(axis, exchange_index) return hash(str(sorted_var_hashes) + str(phi.values) + str(phi.cardinality))
def test_get_model(self): edges_expected = [('family-out', 'dog-out'), ('bowel-problem', 'dog-out'), ('family-out', 'light-on'), ('dog-out', 'hear-bark')] nodes_expected = [ 'bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out' ] edge_expected = { 'bowel-problem': { 'dog-out': {} }, 'dog-out': { 'hear-bark': {} }, 'family-out': { 'dog-out': {}, 'light-on': {} }, 'hear-bark': {}, 'light-on': {} } node_expected = { 'bowel-problem': { 'position': '(335, 99)' }, 'dog-out': { 'position': '(300, 195)' }, 'family-out': { 'position': '(257, 99)' }, 'hear-bark': { 'position': '(296, 268)' }, 'light-on': { 'position': '(218, 195)' } } cpds_expected = [ np.array([[0.01], [0.99]]), np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]), np.array([[0.15], [0.85]]), np.array([[0.7, 0.01], [0.3, 0.99]]), np.array([[0.6, 0.05], [0.4, 0.95]]) ] model = self.reader.get_model() for cpd_index in range(0, len(cpds_expected)): np_test.assert_array_equal(model.get_cpds()[cpd_index].get_cpd(), cpds_expected[cpd_index]) self.assertDictEqual(model.node, node_expected) self.assertDictEqual(model.edge, edge_expected) self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected)) self.assertListEqual(sorted(model.edges()), sorted(edges_expected))
def test_max_calibrate_sepset_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() sepset_belief = belief_propagation.get_sepset_beliefs() phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) b_B = (phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize( ['C'], inplace=False)).maximize(['A'], inplace=False) b_C = (phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False))).maximize( ['B'], inplace=False) np_test.assert_array_almost_equal( sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values, b_B.values) np_test.assert_array_almost_equal( sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values, b_C.values)
def test_max_calibrate_clique_belief(self): belief_propagation = BeliefPropagation(self.junction_tree) belief_propagation.max_calibrate() clique_belief = belief_propagation.get_clique_beliefs() phi1 = Factor(['A', 'B'], [2, 3], range(6)) phi2 = Factor(['B', 'C'], [3, 2], range(6)) phi3 = Factor(['C', 'D'], [2, 2], range(4)) b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize( ['C'], inplace=False) b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False)) b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize( ['B'], inplace=False) np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values) np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values) np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values)