def test_partition_function(self):
     self.graph.add_nodes_from(['a', 'b', 'c'])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4))
     phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.graph.add_edges_from([('a', 'b'), ('b', 'c')])
     self.assertEqual(self.graph.get_partition_function(), 22.0)
    def setUp(self):
        self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')),
                                           (('B', 'C'), ('C', 'D'))])
        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))
        self.junction_tree.add_factors(phi1, phi2, phi3)

        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
    def get_grammar(self):
        """
        Returns the grammar of the UAI file.
        """
        network_name = Word(alphas).setResultsName('network_name')
        no_variables = Word(nums).setResultsName('no_variables')
        grammar = network_name + no_variables
        self.no_variables = int(grammar.parseString(self.network)['no_variables'])
        domain_variables = (Word(nums)*self.no_variables).setResultsName('domain_variables')
        grammar += domain_variables
        no_functions = Word(nums).setResultsName('no_functions')
        grammar += no_functions
        self.no_functions = int(grammar.parseString(self.network)['no_functions'])
        integer = Word(nums).setParseAction(lambda t: int(t[0]))
        for function in range(0, self.no_functions):
            scope_grammar = Word(nums).setResultsName('fun_scope_' + str(function))
            grammar += scope_grammar
            function_scope = grammar.parseString(self.network)['fun_scope_' + str(function)]
            function_grammar = ((integer)*int(function_scope)).setResultsName('fun_' + str(function))
            grammar += function_grammar

        floatnumber = Combine(Word(nums) + Optional(Literal(".") + Optional(Word(nums))))
        for function in range(0, self.no_functions):
            no_values_grammar = Word(nums).setResultsName('fun_no_values_' + str(function))
            grammar += no_values_grammar
            no_values = grammar.parseString(self.network)['fun_no_values_' + str(function)]
            values_grammar = ((floatnumber)*int(no_values)).setResultsName('fun_values_' + str(function))
            grammar += values_grammar
        return grammar
Exemple #4
0
 def test_partition_function_raises_error(self):
     self.graph.add_nodes_from(['a', 'b', 'c', 'd'])
     phi1 = Factor(['a', 'b'], [2, 2], range(4))
     phi2 = Factor(['b', 'c'], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.assertRaises(ValueError,
                       self.graph.get_partition_function)
 def test_remove_single_factor(self):
     self.graph.add_nodes_from(['a', 'b', 'c'])
     phi1 = DiscreteFactor(['a', 'b'], [2, 2], range(4))
     phi2 = DiscreteFactor(['b', 'c'], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.graph.remove_factors(phi1)
     six.assertCountEqual(self, self.graph.factors, [phi2])
Exemple #6
0
 def test_get_partition_function(self):
     phi1 = Factor(['a', 'b'], [2, 2], range(4))
     phi2 = Factor(['b', 'c'], [2, 2], range(4))
     self.graph.add_edges_from([('a', phi1), ('b', phi1),
                                ('b', phi2), ('c', phi2)])
     self.graph.add_factors(phi1, phi2)
     self.assertEqual(self.graph.get_partition_function(), 22.0)
    def _str(self, phi_or_p="phi", tablefmt="fancy_grid", print_state_names=True):
        """
        Generate the string from `__str__` method.

        Parameters
        ----------
        phi_or_p: 'phi' | 'p'
                'phi': When used for Factors.
                  'p': When used for CPDs.
        print_state_names: boolean
                If True, the user defined state names are displayed.
        """
        string_header = list(map(lambda x: six.text_type(x), self.scope()))
        string_header.append('{phi_or_p}({variables})'.format(phi_or_p=phi_or_p,
                                                              variables=','.join(string_header)))

        value_index = 0
        factor_table = []
        for prob in product(*[range(card) for card in self.cardinality]):
            if self.state_names and print_state_names:
                prob_list = ["{var}({state})".format(
                    var=list(self.variables)[i], state=self.state_names[list(
                        self.variables)[i]][prob[i]])
                             for i in range(len(self.variables))]
            else:
                prob_list = ["{s}_{d}".format(s=list(self.variables)[i], d=prob[i])
                             for i in range(len(self.variables))]

            prob_list.append(self.values.ravel()[value_index])
            factor_table.append(prob_list)
            value_index += 1

        return tabulate(factor_table, headers=string_header, tablefmt=tablefmt, floatfmt=".4f")
Exemple #8
0
 def test_remove_multiple_factors(self):
     self.graph.add_nodes_from(['a', 'b', 'c'])
     phi1 = Factor(['a', 'b'], [2, 2], range(4))
     phi2 = Factor(['b', 'c'], [2, 2], range(4))
     self.graph.add_factors(phi1, phi2)
     self.graph.remove_factors(phi1, phi2)
     six.assertCountEqual(self, self.graph.factors, [])
Exemple #9
0
 def test_get_factors(self):
     self.graph.add_nodes_from(['a', 'b', 'c'])
     phi1 = Factor(['a', 'b'], [2, 2], range(4))
     phi2 = Factor(['b', 'c'], [2, 2], range(4))
     six.assertCountEqual(self, self.graph.get_factors(), [])
     self.graph.add_factors(phi1, phi2)
     six.assertCountEqual(self, self.graph.get_factors(), [phi1, phi2])
    def sample(self, start_state=None, size=1):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.

        Return Type:
        ------------
        pandas.DataFrame

        Examples:
        ---------
        >>> from pgmpy.models import MarkovChain as MC
        >>> from pgmpy.factors.discrete import State
        >>> model = MC(['intel', 'diff'], [2, 3])
        >>> model.set_start_state([State('intel', 0), State('diff', 2)])
        >>> intel_tm = {0: {0: 0.25, 1: 0.75}, 1: {0: 0.5, 1: 0.5}}
        >>> model.add_transition_model('intel', intel_tm)
        >>> diff_tm = {0: {0: 0.1, 1: 0.5, 2: 0.4}, 1: {0: 0.2, 1: 0.2, 2: 0.6 }, 2: {0: 0.7, 1: 0.15, 2: 0.15}}
        >>> model.add_transition_model('diff', diff_tm)
        >>> model.sample(size=5)
           intel  diff
        0      0     2
        1      1     0
        2      0     1
        3      1     0
        4      0     2
        """
        if start_state is None:
            if self.state is None:
                self.state = self.random_state()
            # else use previously-set state
        else:
            self.set_start_state(start_state)

        sampled = DataFrame(index=range(size), columns=self.variables)
        sampled.loc[0] = [st for var, st in self.state]

        var_states = defaultdict(dict)
        var_values = defaultdict(dict)
        samples = defaultdict(dict)
        for var in self.transition_models.keys():
            for st in self.transition_models[var]:
                var_states[var][st] = list(self.transition_models[var][st].keys())
                var_values[var][st] = list(self.transition_models[var][st].values())
                samples[var][st] = sample_discrete(var_states[var][st], var_values[var][st], size=size)

        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                next_st = samples[var][st][i]
                self.state[j] = State(var, next_st)
            sampled.loc[i + 1] = [st for var, st in self.state]

        return sampled
Exemple #11
0
 def setUp(self):
     self.phi = Factor(['x1', 'x2', 'x3'], [2, 2, 2], np.random.uniform(5, 10, size=8))
     self.phi1 = Factor(['x1', 'x2', 'x3'], [2, 3, 2], range(12))
     self.phi2 = Factor([('x1', 0), ('x2', 0), ('x3', 0)], [2, 3, 2], range(12))
     # This larger factor (phi3) caused a bug in reduce
     card3 = [3, 3, 3, 2, 2, 2, 2, 2, 2]
     self.phi3 = Factor(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
                        card3, np.arange(np.prod(card3), dtype=np.float))
 def test_factorset_divide(self):
     phi1 = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(1, 13))
     phi2 = DiscreteFactor(['x3', 'x4', 'x1'], [2, 2, 2], range(1, 9))
     factor_set1 = FactorSet(phi1, phi2)
     phi3 = DiscreteFactor(['x5', 'x6', 'x7'], [2, 2, 2], range(1, 9))
     phi4 = DiscreteFactor(['x5', 'x7', 'x8'], [2, 2, 2], range(1, 9))
     factor_set2 = FactorSet(phi3, phi4)
     factor_set3 = factor_set2.divide(factor_set1, inplace=False)
     self.assertEqual({phi3, phi4, phi1.identity_factor() / phi1, phi2.identity_factor() / phi2},
                      factor_set3.factors)
    def sample(self, start_state=None, size=1, return_type="dataframe"):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.
        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples

        Examples:
        ---------
        >>> from pgmpy.factors import DiscreteFactor
        >>> from pgmpy.inference import GibbsSampling
        >>> from pgmpy.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gibbs.sample(size=4, return_tupe='dataframe')
           A  B  C
        0  0  1  1
        1  1  0  0
        2  1  1  0
        3  1  1  1
        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        elif start_state is not None:
            self.set_start_state(start_state)

        types = [(var_name, 'int') for var_name in self.variables]
        sampled = np.zeros(size, dtype=types).view(np.recarray)
        sampled[0] = np.array([st for var, st in self.state])
        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(list(range(self.cardinalities[var])),
                                          self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            sampled[i + 1] = np.array([st for var, st in self.state])

        return _return_samples(return_type, sampled)
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = MarkovModel()
        >>> G.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> G.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                   ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                   ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in G.edges()]
        >>> G.add_factors(*phi)
        >>> G.get_partition_function()
        """
        self.check_model()

        factor = self.factors[0]
        factor = factor_product(factor, *[self.factors[i] for i in
                                          range(1, len(self.factors))])
        if set(factor.scope()) != set(self.nodes()):
            raise ValueError('DiscreteFactor for all the random variables not defined.')

        return np.sum(factor.values)
Exemple #15
0
    def get_partition_function(self):
        r"""
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors import Factor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')])
        >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')),
        ...                   (('a', 'b', 'c'), ('a', 'c'))])
        >>> phi1 = Factor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8))
        >>> phi2 = Factor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi3 = Factor(['a', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_partition_function()
        """
        if self.check_model():
            factor = self.factors[0]
            factor = factor_product(factor, *[self.factors[i] for i in range(1, len(self.factors))])
            return np.sum(factor.values)
Exemple #16
0
    def __eq__(self, other):
        if not (isinstance(self, DiscreteFactor) and isinstance(other, DiscreteFactor)):
            return False

        elif set(self.scope()) != set(other.scope()):
            return False

        else:
            phi = other.copy()
            for axis in range(self.values.ndim):
                exchange_index = phi.variables.index(self.variables[axis])
                phi.variables[axis], phi.variables[exchange_index] = (phi.variables[exchange_index],
                                                                      phi.variables[axis])
                phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index],
                                                                          phi.cardinality[axis])
                phi.values = phi.values.swapaxes(axis, exchange_index)

            if phi.values.shape != self.values.shape:
                return False
            elif not np.allclose(phi.values, self.values):
                return False
            elif not all(self.cardinality == phi.cardinality):
                return False
            else:
                return True
    def _get_kernel_from_markov_model(self, model):
        """
        Computes the Gibbs transition models from a Markov Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: MarkovModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        factors_dict = {var: [] for var in self.variables}
        for factor in model.get_factors():
            for var in factor.scope():
                factors_dict[var].append(factor)

        # Take factor product
        factors_dict = {var: factor_product(*factors) if len(factors) > 1 else factors[0]
                        for var, factors in factors_dict.items()}
        self.cardinalities = {var: factors_dict[var].get_cardinality([var])[var] for var in self.variables}

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            kernel = {}
            factor = factors_dict[var]
            scope = set(factor.scope())
            for tup in itertools.product(*[range(card) for card in other_cards]):
                states = [State(var, s) for var, s in zip(other_vars, tup) if var in scope]
                reduced_factor = factor.reduce(states, inplace=False)
                kernel[tup] = reduced_factor.values / sum(reduced_factor.values)
            self.transition_models[var] = kernel
    def get_tables(self):
        """
        Returns list of tuple of child variable and CPD in case of Bayesian
        and list of tuple of scope of variables and values in case of Markov.

        Returns
        -------
        list : list of tuples of child variable and values in Bayesian
            list of tuples of scope of variables and values in case of Markov.

        Example
        -------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_tables()
        [(['var_0', 'var_1'], ['4.000', '2.400', '1.000', '0.000']),
         (['var_0', 'var_1', 'var_2'],
          ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000',
           '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])]
        """
        tables = []
        for function in range(0, self.no_functions):
            function_variables = self.grammar.parseString(self.network)['fun_' + str(function)]
            if isinstance(function_variables, int):
                function_variables = [function_variables]
            if self.network_type == 'BAYES':
                child_var = "var_" + str(function_variables[-1])
                values = self.grammar.parseString(self.network)['fun_values_' + str(function)]
                tables.append((child_var, list(values)))
            elif self.network_type == "MARKOV":
                function_variables = ["var_" + str(var) for var in function_variables]
                values = self.grammar.parseString(self.network)['fun_values_' + str(function)]
                tables.append((function_variables, list(values)))
        return tables
    def _get_kernel_from_bayesian_model(self, model):
        """
        Computes the Gibbs transition models from a Bayesian Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: BayesianModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        self.cardinalities = {var: model.get_cpds(var).variable_card for var in self.variables}

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            cpds = [cpd for cpd in model.cpds if var in cpd.scope()]
            prod_cpd = factor_product(*cpds)
            kernel = {}
            scope = set(prod_cpd.scope())
            for tup in itertools.product(*[range(card) for card in other_cards]):
                states = [State(v, s) for v, s in zip(other_vars, tup) if v in scope]
                prod_cpd_reduced = prod_cpd.reduce(states, inplace=False)
                kernel[tup] = prod_cpd_reduced.values / sum(prod_cpd_reduced.values)
            self.transition_models[var] = kernel
Exemple #20
0
    def get_edges(self):
        """
        Returns the edges of the network.

        Returns
        -------
        set: set containing the edges of the network

        Example
        -------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_edges()
        {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        """
        edges = []
        for function in range(0, self.no_functions):
            function_variables = self.grammar.parseString(
                self.network)['fun_' + str(function)]
            if isinstance(function_variables, int):
                function_variables = [function_variables]
            if self.network_type == 'BAYES':
                child_var = "var_" + str(function_variables[-1])
                function_variables = function_variables[:-1]
                for var in function_variables:
                    edges.append((child_var, "var_" + str(var)))
            elif self.network_type == "MARKOV":
                function_variables = [
                    "var_" + str(var) for var in function_variables
                ]
                edges.extend(list(combinations(function_variables, 2)))
        return set(edges)
    def get_independencies(self, latex=False):
        """
        Computes independencies in the Bayesian Network, by checking d-seperation.

        Parameters
        ----------
        latex: boolean
            If latex=True then latex string of the independence assertion
            would be created.

        Examples
        --------
        >>> from pgmpy.models import BayesianModel
        >>> chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        >>> chain.get_independencies()
        (X _|_ Z | Y)
        (Z _|_ X | Y)
        """
        independencies = Independencies()
        for start in (self.nodes()):
            rest = set(self.nodes()) - {start}
            for r in range(len(rest)):
                for observed in itertools.combinations(rest, r):
                    d_seperated_variables = rest - set(observed) - set(
                        self.active_trail_nodes(start, observed=observed)[start])
                    if d_seperated_variables:
                        independencies.add_assertions([start, d_seperated_variables, observed])

        independencies.reduce()

        if not latex:
            return independencies
        else:
            return independencies.latex_string()
Exemple #22
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import FactorGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = FactorGraph()
        >>> G.add_nodes_from(['a', 'b', 'c'])
        >>> phi1 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi2 = DiscreteFactor(['b', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2)
        >>> G.add_nodes_from([phi1, phi2])
        >>> G.add_edges_from([('a', phi1), ('b', phi1),
        ...                   ('b', phi2), ('c', phi2)])
        >>> G.get_factors()
        >>> G.get_partition_function()
        """
        factor = self.factors[0]
        factor = factor_product(factor, *[self.factors[i] for i in
                                          range(1, len(self.factors))])
        if set(factor.scope()) != set(self.get_variable_nodes()):
            raise ValueError('DiscreteFactor for all the random variables not defined.')

        return np.sum(factor.values)
Exemple #23
0
 def test_eq1(self):
     phi1 = Factor(['x1', 'x2', 'x3'], [2, 4, 3], range(24))
     phi2 = Factor(['x2', 'x1', 'x3'], [4, 2, 3], [0, 1, 2, 12, 13, 14, 3,
                                                   4, 5, 15, 16, 17, 6, 7,
                                                   8, 18, 19, 20, 9, 10, 11,
                                                   21, 22, 23])
     self.assertTrue(phi1, phi2)
Exemple #24
0
    def get_states(self):
        """
        Add outcome to variables of XMLBIF

        Return
        ------
        dict: dict of type {variable: outcome tags}

        Examples
        --------
        >>> writer = XMLBIFWriter(model)
        >>> writer.get_states()
        {'dog-out': [<Element OUTCOME at 0x7ffbabfcdec8>, <Element OUTCOME at 0x7ffbabfcdf08>],
         'family-out': [<Element OUTCOME at 0x7ffbabfd4108>, <Element OUTCOME at 0x7ffbabfd4148>],
         'bowel-problem': [<Element OUTCOME at 0x7ffbabfd4088>, <Element OUTCOME at 0x7ffbabfd40c8>],
         'hear-bark': [<Element OUTCOME at 0x7ffbabfcdf48>, <Element OUTCOME at 0x7ffbabfcdf88>],
         'light-on': [<Element OUTCOME at 0x7ffbabfcdfc8>, <Element OUTCOME at 0x7ffbabfd4048>]}
        """
        outcome_tag = {}
        cpds = self.model.get_cpds()
        for cpd in cpds:
            var = cpd.variable
            outcome_tag[var] = []
            for state in [State(var, state) for state in range(cpd.get_cardinality([var])[var])]:
            # for state in [cpd.variables[var]:
                state_tag = etree.SubElement(self.variables[var], "OUTCOME")
                state_tag.text = str(state.state)
                outcome_tag[var].append(state_tag)
        return outcome_tag
Exemple #25
0
    def set_distributions(self):
        """
        Set distributions in the network.

        Examples
        --------
        >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer =XBNWriter()
        >>> writer.set_distributions()
        """
        distributions = etree.SubElement(self.bnmodel, 'DISTRIBUTIONS')

        cpds = self.model.get_cpds()
        cpds.sort(key=lambda x: x.variable)
        for cpd in cpds:
            cpd_values = cpd.values.ravel()
            var = cpd.variable
            dist = etree.SubElement(distributions, 'DIST', attrib={'TYPE': self.model.node[var]['TYPE']})
            etree.SubElement(dist, 'PRIVATE', attrib={'NAME': var})
            dpis = etree.SubElement(dist, 'DPIS')
            if len(cpd.evidence):
                condset = etree.SubElement(dist, 'CONDSET')
                for condelem in sorted(cpd.evidence):
                    etree.SubElement(condset, 'CONDELEM', attrib={'NAME': condelem})
                # TODO: Get Index value.
                for val in range(0, len(cpd_values), 2):
                    etree.SubElement(dpis, "DPI", attrib={'INDEXES': ' '}).text = \
                        " " + str(cpd_values[val]) + " " + str(cpd_values[val+1]) + " "
            else:
                etree.SubElement(dpis, "DPI").text = ' ' + ' '.join(map(str, cpd_values))
    def test_max_calibrate_clique_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        clique_belief = belief_propagation.get_clique_beliefs()

        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))

        b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(['C'], inplace=False)
        b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) * phi3.maximize(['D'], inplace=False))
        b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize(['B'], inplace=False)

        np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values, b_A_B.values)
        np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values, b_B_C.values)
        np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values, b_C_D.values)
    def get_edges(self):
        """
        Returns the edges of the network.

        Returns
        -------
        set: set containing the edges of the network

        Example
        -------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_edges()
        {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        """
        edges = []
        for function in range(0, self.no_functions):
            function_variables = self.grammar.parseString(self.network)['fun_' + str(function)]
            if isinstance(function_variables, int):
                function_variables = [function_variables]
            if self.network_type == 'BAYES':
                child_var = "var_" + str(function_variables[-1])
                function_variables = function_variables[:-1]
                for var in function_variables:
                    edges.append((child_var, "var_" + str(var)))
            elif self.network_type == "MARKOV":
                function_variables = ["var_" + str(var) for var in function_variables]
                edges.extend(list(combinations(function_variables, 2)))
        return set(edges)
Exemple #28
0
    def get_states(self):

        """
        Add states to variable of BIF

        Returns
        -------
        dict: dict of type {variable: a list of states}

        Example
        -------
        >>> from pgmpy.readwrite import BIFReader, BIFWriter
        >>> model = BIFReader('dog-problem.bif').get_model()
        >>> writer = BIFWriter(model)
        >>> writer.get_states()
        {'bowel-problem': ['bowel-problem_0', 'bowel-problem_1'],
         'dog-out': ['dog-out_0', 'dog-out_1'],
         'family-out': ['family-out_0', 'family-out_1'],
         'hear-bark': ['hear-bark_0', 'hear-bark_1'],
         'light-on': ['light-on_0', 'light-on_1']}
        """
        variable_states = {}
        cpds = self.model.get_cpds()
        for cpd in cpds:
            variable = cpd.variable
            variable_states[variable] = []
            for state in range(cpd.get_cardinality([variable])[variable]):
                variable_states[variable].append(str(variable)+'_'+str(state))
        return variable_states
Exemple #29
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import FactorGraph
        >>> from pgmpy.factors import Factor
        >>> G = FactorGraph()
        >>> G.add_nodes_from(['a', 'b', 'c'])
        >>> phi1 = Factor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi2 = Factor(['b', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2)
        >>> G.add_nodes_from([phi1, phi2])
        >>> G.add_edges_from([('a', phi1), ('b', phi1),
        ...                   ('b', phi2), ('c', phi2)])
        >>> G.get_factors()
        >>> G.get_partition_function()
        """
        factor = self.factors[0]
        factor = factor_product(factor, *[self.factors[i] for i in
                                          range(1, len(self.factors))])
        if set(factor.scope()) != set(self.get_variable_nodes()):
            raise ValueError('Factor for all the random variables not defined.')

        return np.sum(factor.values)
    def get_states(self):
        """
        Add states to variable of BIF

        Returns
        -------
        dict: dict of type {variable: a list of states}

        Example
        -------
        >>> from pgmpy.readwrite import BIFReader, BIFWriter
        >>> model = BIFReader('dog-problem.bif').get_model()
        >>> writer = BIFWriter(model)
        >>> writer.get_states()
        {'bowel-problem': ['bowel-problem_0', 'bowel-problem_1'],
         'dog-out': ['dog-out_0', 'dog-out_1'],
         'family-out': ['family-out_0', 'family-out_1'],
         'hear-bark': ['hear-bark_0', 'hear-bark_1'],
         'light-on': ['light-on_0', 'light-on_1']}
        """
        variable_states = {}
        cpds = self.model.get_cpds()
        for cpd in cpds:
            variable = cpd.variable
            variable_states[variable] = []
            for state in range(cpd.get_cardinality([variable])[variable]):
                variable_states[variable].append(str(variable) + '_' + str(state))
        return variable_states
    def _get_kernel_from_bayesian_model(self, model):
        """
        Computes the Gibbs transition models from a Bayesian Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: BayesianModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        self.cardinalities = {
            var: model.get_cpds(var).variable_card
            for var in self.variables
        }

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            cpds = [cpd for cpd in model.cpds if var in cpd.scope()]
            prod_cpd = factor_product(*cpds)
            kernel = {}
            scope = set(prod_cpd.scope())
            for tup in itertools.product(
                    *[range(card) for card in other_cards]):
                states = [
                    State(v, s) for v, s in zip(other_vars, tup) if v in scope
                ]
                prod_cpd_reduced = prod_cpd.reduce(states, inplace=False)
                kernel[tup] = prod_cpd_reduced.values / sum(
                    prod_cpd_reduced.values)
            self.transition_models[var] = kernel
Exemple #32
0
    def sample(self, start_state=None, size=1):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.

        Return Type:
        ------------
        pandas.DataFrame

        Examples:
        ---------
        >>> from pgmpy.factors import Factor
        >>> from pgmpy.inference import GibbsSampling
        >>> from pgmpy.models import MarkovModel
        >>> model = MarkovModel([('A', 'B'), ('C', 'B')])
        >>> factor_ab = Factor(['A', 'B'], [2, 2], [1, 2, 3, 4])
        >>> factor_cb = Factor(['C', 'B'], [2, 2], [5, 6, 7, 8])
        >>> model.add_factors(factor_ab, factor_cb)
        >>> gibbs = GibbsSampling(model)
        >>> gibbs.sample(size=4)
           A  B  C
        0  0  1  1
        1  1  0  0
        2  1  1  0
        3  1  1  1
        """
        if start_state is None and self.state is None:
            self.state = self.random_state()
        else:
            self.set_start_state(start_state)

        sampled = DataFrame(index=range(size), columns=self.variables)
        sampled.loc[0] = [st for var, st in self.state]
        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                other_st = tuple(st for v, st in self.state if var != v)
                next_st = sample_discrete(list(range(self.cardinalities[var])),
                                          self.transition_models[var][other_st])[0]
                self.state[j] = State(var, next_st)
            sampled.loc[i + 1] = [st for var, st in self.state]
        return sampled
Exemple #33
0
    def reduce(self, values, inplace=True):
        """
        Reduces the factor to the context of given variable values.

        Parameters
        ----------
        values: list, array-like
            A list of tuples of the form (variable_name, variable_state).

        inplace: boolean
            If inplace=True it will modify the factor itself, else would return
            a new factor.

        Returns
        -------
        DiscreteFactor or None: if inplace=True (default) returns None
                        if inplace=False returns a new `DiscreteFactor` instance.

        Examples
        --------
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [2, 3, 2], range(12))
        >>> phi.reduce([('x1', 0), ('x2', 0)])
        >>> phi.variables
        ['x3']
        >>> phi.cardinality
        array([2])
        >>> phi.values
        array([0., 1.])
        """
        if isinstance(values, six.string_types):
            raise TypeError(
                "values: Expected type list or array-like, got type str")

        if not all([isinstance(state_tuple, tuple) for state_tuple in values]):
            raise TypeError(
                "values: Expected type list of tuples, get type {type}",
                type(values[0]))

        phi = self if inplace else self.copy()
        values = [(var, self.get_state_no(var, state_name))
                  for var, state_name in values]

        var_index_to_del = []
        slice_ = [slice(None)] * len(self.variables)
        for var, state in values:
            var_index = phi.variables.index(var)
            slice_[var_index] = state
            var_index_to_del.append(var_index)

        var_index_to_keep = sorted(
            set(range(len(phi.variables))) - set(var_index_to_del))
        # set difference is not gaurenteed to maintain ordering
        phi.variables = [phi.variables[index] for index in var_index_to_keep]
        phi.cardinality = phi.cardinality[var_index_to_keep]
        phi.values = phi.values[tuple(slice_)]

        if not inplace:
            return phi
Exemple #34
0
    def forward_sample(self, size=1):
        """
        Generates sample(s) from joint distribution of the bayesian network.

        Parameters
        ----------
        size: int
            size of sample to be generated

        Returns
        -------
        sampled: pandas.DataFrame
            the generated samples

        Examples
        --------
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.CPD import TabularCPD
        >>> from pgmpy.inference.Sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> inference.forward_sample(2)
                diff       intel       grade
        0        1           0          1
        1        1           0          2
        """
        sampled = DataFrame(index=range(size), columns=self.topological_order)
        for node in self.topological_order:
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.variables[:0:-1]
            if evidence:
                cached_values = self.pre_compute_reduce(variable=node)
                evidence = sampled.ix[:, evidence].values
                weights = list(map(lambda t: cached_values[tuple(t)],
                                   evidence))
            else:
                weights = cpd.values
            sampled[node] = sample_discrete(states, weights, size)
        return sampled
Exemple #35
0
    def test_factor_product2(self):
        from pgmpy import factors
        phi = factors.Factor(['x1', 'x2'], [2, 2], range(4))
        phi1 = factors.Factor(['x3', 'x4'], [2, 2], range(4))
        prod = phi.product(phi1, inplace=False)
        expected_factor = Factor(['x1', 'x2', 'x3', 'x4'], [2, 2, 2, 2],
                                 [0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9])
        self.assertEqual(prod, expected_factor)
        self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3', 'x4'])

        phi = Factor(['x1', 'x2'], [3, 2], range(6))
        phi1 = Factor(['x2', 'x3'], [2, 2], range(4))
        prod = phi.product(phi1, inplace=False)
        expected_factor = Factor(['x1', 'x2', 'x3'], [3, 2, 2],
                                 [0, 0, 2, 3, 0, 2, 6, 9, 0, 4, 10, 15])
        self.assertEqual(prod, expected_factor)
        self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3'])
Exemple #36
0
    def test_factor_mul(self):
        phi = Factor(['x1', 'x2'], [2, 2], range(4))
        phi1 = Factor(['x3', 'x4'], [2, 2], range(4))
        prod = phi * phi1

        sorted_vars = ['x1', 'x2', 'x3', 'x4']
        for axis in range(prod.values.ndim):
            exchange_index = prod.variables.index(sorted_vars[axis])
            prod.variables[axis], prod.variables[exchange_index] = prod.variables[exchange_index], prod.variables[axis]
            prod.values = prod.values.swapaxes(axis, exchange_index)

        np_test.assert_almost_equal(prod.values.ravel(),
                                    np.array([0, 0, 0, 0, 0, 1,
                                              2, 3, 0, 2, 4, 6,
                                              0, 3, 6, 9]))

        self.assertEqual(prod.variables, ['x1', 'x2', 'x3', 'x4'])
Exemple #37
0
 def test_class_init_typeerror(self):
     self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], ['val1', 'val2'])
     self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], [1, 'val1'])
     self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], ['val1', 1])
     self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], [0.1, 'val1'])
     self.assertRaises(TypeError, Factor, ['x1', 'x2', 'x3'], [2, 1, 1], ['val1', 0.1])
     self.assertRaises(TypeError, Factor, 'x1', [3], [1, 2, 3])
     self.assertRaises(ValueError, Factor, ['x1', 'x1', 'x3'], [2, 3, 2], range(12))
Exemple #38
0
    def get_distributions(self):
        """
        Returns a dictionary of name and its distribution. Distribution is a ndarray.

        The ndarray is stored in the standard way such that the rightmost variable changes most often.
        Consider a CPD of variable 'd' which has parents 'b' and 'c' (distribution['CONDSET'] = ['b', 'c'])

                  |  d_0     d_1
        ---------------------------
        b_0, c_0  |  0.8     0.2
        b_0, c_1  |  0.9     0.1
        b_1, c_0  |  0.7     0.3
        b_1, c_1  |  0.05    0.95

        The value of distribution['d']['DPIS'] for the above example will be:
        array([[ 0.8 ,  0.2 ], [ 0.9 ,  0.1 ], [ 0.7 ,  0.3 ], [ 0.05,  0.95]])

        Examples
        --------
        >>> reader = XBNReader('xbn_test.xml')
        >>> reader.get_distributions()
        {'a': {'TYPE': 'discrete', 'DPIS': array([[ 0.2,  0.8]])},
         'e': {'TYPE': 'discrete', 'DPIS': array([[ 0.8,  0.2],
                 [ 0.6,  0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]},
         'b': {'TYPE': 'discrete', 'DPIS': array([[ 0.8,  0.2],
                 [ 0.2,  0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
         'c': {'TYPE': 'discrete', 'DPIS': array([[ 0.2 ,  0.8 ],
                 [ 0.05,  0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
         'd': {'TYPE': 'discrete', 'DPIS': array([[ 0.8 ,  0.2 ],
                 [ 0.9 ,  0.1 ],
                 [ 0.7 ,  0.3 ],
                 [ 0.05,  0.95]]), 'CONDSET': ['b', 'c']}, 'CARDINALITY': [2, 2]}
        """
        import numpy as np
        distribution = {}
        for dist in self.bnmodel.find('DISTRIBUTIONS'):
            variable_name = dist.find('PRIVATE').get('NAME')
            distribution[variable_name] = {'TYPE': dist.get('TYPE')}
            if dist.find('CONDSET') is not None:
                distribution[variable_name]['CONDSET'] = [
                    var.get('NAME')
                    for var in dist.find('CONDSET').findall('CONDELEM')
                ]
                distribution[variable_name]['CARDINALITY'] = np.array([
                    len(
                        set(
                            np.array([
                                list(map(int,
                                         dpi.get('INDEXES').split()))
                                for dpi in dist.find('DPIS')
                            ])[:, i]))
                    for i in range(len(distribution[variable_name]['CONDSET']))
                ])
            distribution[variable_name]['DPIS'] = np.array([
                list(map(float, dpi.text.split())) for dpi in dist.find('DPIS')
            ])

        return distribution
Exemple #39
0
    def _make_table_str(self, tablefmt="fancy_grid", print_state_names=True):
        headers_list = []
        # build column headers

        evidence = self.variables[1:]
        evidence_card = self.cardinality[1:]
        if evidence:
            col_indexes = np.array(
                list(product(*[range(i) for i in evidence_card])))
            if self.state_names and print_state_names:
                for i in range(len(evidence_card)):
                    column_header = [str(evidence[i])] + [
                        '{var}({state})'.format(
                            var=evidence[i],
                            state=self.state_names[evidence[i]][d])
                        for d in col_indexes.T[i]
                    ]
                    headers_list.append(column_header)
            else:
                for i in range(len(evidence_card)):
                    column_header = [str(evidence[i])] + [
                        '{s}_{d}'.format(s=evidence[i], d=d)
                        for d in col_indexes.T[i]
                    ]
                    headers_list.append(column_header)

        # Build row headers
        if self.state_names and print_state_names:
            variable_array = [[
                '{var}({state})'.format(
                    var=self.variable,
                    state=self.state_names[self.variable][i])
                for i in range(self.variable_card)
            ]]
        else:
            variable_array = [[
                '{s}_{d}'.format(s=self.variable, d=i)
                for i in range(self.variable_card)
            ]]
        # Stack with data
        labeled_rows = np.hstack(
            (np.array(variable_array).T, self.get_values())).tolist()
        # No support for multi-headers in tabulate
        cdf_str = tabulate(headers_list + labeled_rows, tablefmt=tablefmt)
        return cdf_str
Exemple #40
0
 def _find_common_cliques(cliques_list):
     """
     Finds the common cliques among the given set of cliques for
     corresponding node.
     """
     common = set([tuple(x) for x in cliques_list[0]])
     for i in range(1, len(cliques_list)):
         common = common & set([tuple(x) for x in cliques_list[i]])
     return list(common)
Exemple #41
0
    def _tighten_triplet(self, max_iterations, later_iter, max_triplets,
                         prolong):
        """
        This method finds all the triplets that are eligible and adds them iteratively in the bunch of max_triplets

        Parameters
        ----------
        max_iterations: integer
                        Maximum number of times we tighten the relaxation

        later_iter: integer
                    Number of maximum iterations that we want MPLP to run. This is lesser than the initial number
                    of iterations.

        max_triplets: integer
                      Maximum number of triplets that can be added atmost in one iteration.

        prolong: bool
                It sets the continuation of tightening after all the triplets are exhausted
        """
        # Find all the triplets that are possible in the present model
        triangles = self.find_triangles()
        # Evaluate scores for each of the triplets found above
        triplet_scores = self._get_triplet_scores(triangles)
        # Arrange the keys on the basis of increasing order of the values of the dict. triplet_scores
        sorted_scores = sorted(triplet_scores, key=triplet_scores.get)
        for niter in range(max_iterations):
            if self._is_converged(
                    integrality_gap_threshold=self.integrality_gap_threshold):
                break
            # add triplets that are yet not added.
            add_triplets = []
            for triplet_number in range(len(sorted_scores)):
                # At once, we can add atmost 5 triplets
                if triplet_number >= max_triplets:
                    break
                add_triplets.append(sorted_scores.pop())
            # Break from the tighten triplets loop if there are no triplets to add if the prolong is set to False
            if not add_triplets and prolong is False:
                break
            # Update the eligible triplets to tighten the relaxation
            self._update_triangles(add_triplets)
            # Run MPLP for a maximum of later_iter times.
            self._run_mplp(later_iter)
    def forward_sample(self, size=1, return_type="dataframe"):
        """
        Generates sample(s) from joint distribution of the bayesian network.

        Parameters
        ----------
        size: int
            size of sample to be generated

        return_type: string (dataframe | recarray)
            Return type for samples, either of 'dataframe' or 'recarray'.
            Defaults to 'dataframe'

        Returns
        -------
        sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument
            the generated samples


        Examples
        --------
        >>> from pgmpy.models.BayesianModel import BayesianModel
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from pgmpy.sampling import BayesianModelSampling
        >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]])
        >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]])
        >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25,
        ...                0.08, 0.3], [0.3, 0.7, 0.02, 0.2]],
        ...                ['intel', 'diff'], [2, 2])
        >>> student.add_cpds(cpd_d, cpd_i, cpd_g)
        >>> inference = BayesianModelSampling(student)
        >>> inference.forward_sample(size=2, return_type='recarray')
        rec.array([(0, 0, 1), (1, 0, 2)], dtype=
                  [('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')])
        """
        types = [(var_name, "int") for var_name in self.topological_order]
        sampled = np.zeros(size, dtype=types).view(np.recarray)

        pbar = tqdm(self.topological_order)
        for node in pbar:
            pbar.set_description(
                "Generating for node: {node}".format(node=node))
            cpd = self.model.get_cpds(node)
            states = range(self.cardinality[node])
            evidence = cpd.variables[:0:-1]
            if evidence:
                cached_values = self.pre_compute_reduce(variable=node)
                evidence = np.vstack([sampled[i] for i in evidence])
                weights = list(
                    map(lambda t: cached_values[tuple(t)], evidence.T))
            else:
                weights = cpd.values
            sampled[node] = sample_discrete(states, weights, size)

        return _return_samples(return_type, sampled)
    def to_bayesian_model(self):
        """
        Creates a Bayesian Model which is a minimum I-Map for this markov model.

        The ordering of parents may not remain constant. It would depend on the
        ordering of variable in the junction tree (which is not constant) all the
        time.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> bm = mm.to_bayesian_model()
        """
        from pgmpy.models import BayesianModel

        bm = BayesianModel()
        var_clique_dict = defaultdict(tuple)
        var_order = []

        # Create a junction tree from the markov model.
        # Creation of clique tree involves triangulation, finding maximal cliques
        # and creating a tree from these cliques
        junction_tree = self.to_junction_tree()

        # create an ordering of the nodes based on the ordering of the clique
        # in which it appeared first
        root_node = next(iter(junction_tree.nodes()))
        bfs_edges = nx.bfs_edges(junction_tree, root_node)
        for node in root_node:
            var_clique_dict[node] = root_node
            var_order.append(node)
        for edge in bfs_edges:
            clique_node = edge[1]
            for node in clique_node:
                if not var_clique_dict[node]:
                    var_clique_dict[node] = clique_node
                    var_order.append(node)

        # create a bayesian model by adding edges from parent of node to node as
        # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}}
        for node_index in range(len(var_order)):
            node = var_order[node_index]
            node_parents = (set(var_clique_dict[node]) -
                            set([node])).intersection(
                                set(var_order[:node_index]))
            bm.add_edges_from([(parent, node) for parent in node_parents])
            # TODO : Convert factor into CPDs
        return bm
Exemple #44
0
    def maximize(self, variables, inplace=True):
        """
        Maximizes the factor with respect to `variables`.

        Parameters
        ----------
        variables: list, array-like
            List of variables with respect to which factor is to be maximized

        inplace: boolean
            If inplace=True it will modify the factor itself, else would return
            a new factor.

        Returns
        -------
        DiscreteFactor or None: if inplace=True (default) returns None
                        if inplace=False returns a new `DiscreteFactor` instance.

        Examples
        --------
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> phi = DiscreteFactor(['x1', 'x2', 'x3'], [3, 2, 2], [0.25, 0.35, 0.08, 0.16, 0.05, 0.07,
        ...                                              0.00, 0.00, 0.15, 0.21, 0.09, 0.18])
        >>> phi.variables
        ['x1','x2','x3']
        >>> phi.maximize(['x2'])
        >>> phi.variables
        ['x1', 'x3']
        >>> phi.cardinality
        array([3, 2])
        >>> phi.values
        array([[ 0.25,  0.35],
               [ 0.05,  0.07],
               [ 0.15,  0.21]])
        """
        if isinstance(variables, six.string_types):
            raise TypeError(
                "variables: Expected type list or array-like, got type str")

        phi = self if inplace else self.copy()

        for var in variables:
            if var not in phi.variables:
                raise ValueError("{var} not in scope.".format(var=var))

        var_indexes = [phi.variables.index(var) for var in variables]

        index_to_keep = sorted(
            set(range(len(self.variables))) - set(var_indexes))
        phi.variables = [phi.variables[index] for index in index_to_keep]
        phi.cardinality = phi.cardinality[index_to_keep]

        phi.values = np.max(phi.values, axis=tuple(var_indexes))

        if not inplace:
            return phi
Exemple #45
0
    def get_next_region_layer(self, region_type=None):
        """ Give label R0, generate the children region layer R1.
        """
        label = "R{}".format(int(region_type[-1])+1)
        layer_nodes = self.region_layers[region_type]
        intersections = []
        for pair_nodes in combinations(layer_nodes,2):
            i_node, j_node = pair_nodes
            intersection_node = self._get_interset(i_node, j_node)
            # if empty set, go to next iteration
            if intersection_node is ():
                continue
            if intersection_node not in intersections:
                # add the new node and arcs to this nodes
                # self.add_node(intersection_node)
                # self.add_edges_from([(i_node, intersection_node), \
                #                      (j_node, intersection_node)])
                intersections.append(intersection_node)
        
        if intersections == []:
            # There is no next layer node generated
            return False
        else:
            # Do filtering, delete the node that is subset of the other nodes of the same layer
            filter_out_idx = []
            # test node i against from others
            for i in range(len(intersections)):
                for j in range(len(intersections)):
                    if i != j and self._is_subset(intersections[i], intersections[j]) and i not in filter_out_idx:
                        # self.remove_node(intersections[i])
                        filter_out_idx.append(i)

            # the final selected nodes for next layer
            selected = [intersections[idx] for idx in range(len(intersections)) if idx not in filter_out_idx]
            # add them into graph
            for c_node in selected:
                self.add_node(c_node)
                for p_node in layer_nodes:
                    if self._is_subset(c_node, p_node):
                        self.add_edges_from([(p_node, c_node)])
            
            self.region_layers[label] = selected
            return True
Exemple #46
0
    def test_factor_product(self):
        phi = Factor(['x1', 'x2'], [2, 2], range(4))
        phi1 = Factor(['x3', 'x4'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        expected_factor = Factor(['x1', 'x2', 'x3', 'x4'], [2, 2, 2, 2], [0, 0, 0, 0, 0, 1,
                                                                          2, 3, 0, 2, 4, 6,
                                                                          0, 3, 6, 9])
        self.assertEqual(prod, expected_factor)
        self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3', 'x4'])

        phi = Factor(['x1', 'x2'], [3, 2], range(6))
        phi1 = Factor(['x2', 'x3'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        expected_factor = Factor(['x1', 'x2', 'x3'], [3, 2, 2], [0, 0, 2, 3, 0, 2,
                                                                 6, 9, 0, 4, 10, 15])
        np_test.assert_almost_equal(prod.values,
                                   np.array([0, 0, 2, 3, 0, 2,
                                             6, 9, 0, 4, 10, 15]).reshape(3, 2, 2))
        self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3'])
    def pre_compute_reduce(self, variable):
        variable_cpd = self.model.get_cpds(variable)
        variable_evid = variable_cpd.variables[:0:-1]
        cached_values = {}

        for state_combination in itertools.product(*[range(self.cardinality[var]) for var in variable_evid]):
            states = list(zip(variable_evid, state_combination))
            cached_values[state_combination] = variable_cpd.reduce(states, inplace=False).values

        return cached_values
Exemple #48
0
    def _str(self,
             phi_or_p="phi",
             tablefmt="fancy_grid",
             print_state_names=True):
        """
        Generate the string from `__str__` method.

        Parameters
        ----------
        phi_or_p: 'phi' | 'p'
                'phi': When used for Factors.
                  'p': When used for CPDs.
        print_state_names: boolean
                If True, the user defined state names are displayed.
        """
        string_header = list(map(lambda x: six.text_type(x), self.scope()))
        string_header.append('{phi_or_p}({variables})'.format(
            phi_or_p=phi_or_p, variables=','.join(string_header)))

        value_index = 0
        factor_table = []
        for prob in product(*[range(card) for card in self.cardinality]):
            if self.state_names and print_state_names:
                prob_list = [
                    "{var}({state})".format(var=list(self.variables)[i],
                                            state=self.state_names[list(
                                                self.variables)[i]][prob[i]])
                    for i in range(len(self.variables))
                ]
            else:
                prob_list = [
                    "{s}_{d}".format(s=list(self.variables)[i], d=prob[i])
                    for i in range(len(self.variables))
                ]

            prob_list.append(self.values.ravel()[value_index])
            factor_table.append(prob_list)
            value_index += 1

        return tabulate(factor_table,
                        headers=string_header,
                        tablefmt=tablefmt,
                        floatfmt=".4f")
Exemple #49
0
    def region_count(self):
        """Given the generated region graph, calculate each region's count number from top to down."""
        for node in self.region_layers["R0"]:
            self.nodes[node]["weight"] = 1
            
        for i_layer in range(len(self.region_layers)-1):
            for i_node in self.region_layers["R{}".format(i_layer + 1)]:
                ancestors_counts = [self.nodes[the_node]["weight"] for the_node in self.get_ancestors_of([i_node])]
                self.nodes[i_node]["weight"] = 1 - sum(ancestors_counts)

        return self
    def setUp(self):
        self.variables = ['intel', 'diff', 'grade']
        self.card = [3, 2, 3]
        self.cardinalities = {'intel': 3, 'diff': 2, 'grade': 3}
        self.intel_tm = {
            0: {
                0: 0.1,
                1: 0.25,
                2: 0.65
            },
            1: {
                0: 0.5,
                1: 0.3,
                2: 0.2
            },
            2: {
                0: 0.3,
                1: 0.3,
                2: 0.4
            }
        }
        self.intel_tm_matrix = np.array([[0.1, 0.25, 0.65], [0.5, 0.3, 0.2],
                                         [0.3, 0.3, 0.4]])
        self.diff_tm = {0: {0: 0.3, 1: 0.7}, 1: {0: 0.75, 1: 0.25}}
        self.diff_tm_matrix = np.array([[0.3, 0.7], [0.75, 0.25]])
        self.grade_tm = {
            0: {
                0: 0.4,
                1: 0.2,
                2: 0.4
            },
            1: {
                0: 0.9,
                1: 0.05,
                2: 0.05
            },
            2: {
                0: 0.1,
                1: 0.4,
                2: 0.5
            }
        }
        self.grade_tm_matrix = [[0.4, 0.2, 0.4], [0.9, 0.05, 0.05],
                                [0.1, 0.4, 0.5]]
        self.start_state = [
            State('intel', 0),
            State('diff', 1),
            State('grade', 2)
        ]
        self.model = MC()

        self.sample = DataFrame(index=range(200), columns=['a', 'b'])
        self.sample.a = [1] * 100 + [0] * 100
        self.sample.b = [0] * 100 + [1] * 100
Exemple #51
0
    def get_grammar(self):
        """
        Returns the grammar of the UAI file.
        """
        network_name = Word(alphas).setResultsName('network_name')
        no_variables = Word(nums).setResultsName('no_variables')
        grammar = network_name + no_variables
        self.no_variables = int(
            grammar.parseString(self.network)['no_variables'])
        domain_variables = (
            Word(nums) * self.no_variables).setResultsName('domain_variables')
        grammar += domain_variables
        no_functions = Word(nums).setResultsName('no_functions')
        grammar += no_functions
        self.no_functions = int(
            grammar.parseString(self.network)['no_functions'])
        integer = Word(nums).setParseAction(lambda t: int(t[0]))
        for function in range(0, self.no_functions):
            scope_grammar = Word(nums).setResultsName('fun_scope_' +
                                                      str(function))
            grammar += scope_grammar
            function_scope = grammar.parseString(self.network)['fun_scope_' +
                                                               str(function)]
            function_grammar = (
                (integer) * int(function_scope)).setResultsName('fun_' +
                                                                str(function))
            grammar += function_grammar

        floatnumber = Combine(
            Word(nums) + Optional(Literal(".") + Optional(Word(nums))))
        for function in range(0, self.no_functions):
            no_values_grammar = Word(nums).setResultsName('fun_no_values_' +
                                                          str(function))
            grammar += no_values_grammar
            no_values = grammar.parseString(self.network)['fun_no_values_' +
                                                          str(function)]
            values_grammar = ((floatnumber) *
                              int(no_values)).setResultsName('fun_values_' +
                                                             str(function))
            grammar += values_grammar
        return grammar
Exemple #52
0
 def __hash__(self):
     variable_hashes = [hash(variable) for variable in self.variables]
     sorted_var_hashes = sorted(variable_hashes)
     phi = self.copy()
     for axis in range(phi.values.ndim):
         exchange_index = variable_hashes.index(sorted_var_hashes[axis])
         variable_hashes[axis], variable_hashes[exchange_index] = (variable_hashes[exchange_index],
                                                                   variable_hashes[axis])
         phi.cardinality[axis], phi.cardinality[exchange_index] = (phi.cardinality[exchange_index],
                                                                   phi.cardinality[axis])
         phi.values = phi.values.swapaxes(axis, exchange_index)
     return hash(str(sorted_var_hashes) + str(phi.values) + str(phi.cardinality))
Exemple #53
0
 def test_get_model(self):
     edges_expected = [('family-out', 'dog-out'),
                       ('bowel-problem', 'dog-out'),
                       ('family-out', 'light-on'), ('dog-out', 'hear-bark')]
     nodes_expected = [
         'bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out'
     ]
     edge_expected = {
         'bowel-problem': {
             'dog-out': {}
         },
         'dog-out': {
             'hear-bark': {}
         },
         'family-out': {
             'dog-out': {},
             'light-on': {}
         },
         'hear-bark': {},
         'light-on': {}
     }
     node_expected = {
         'bowel-problem': {
             'position': '(335, 99)'
         },
         'dog-out': {
             'position': '(300, 195)'
         },
         'family-out': {
             'position': '(257, 99)'
         },
         'hear-bark': {
             'position': '(296, 268)'
         },
         'light-on': {
             'position': '(218, 195)'
         }
     }
     cpds_expected = [
         np.array([[0.01], [0.99]]),
         np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]),
         np.array([[0.15], [0.85]]),
         np.array([[0.7, 0.01], [0.3, 0.99]]),
         np.array([[0.6, 0.05], [0.4, 0.95]])
     ]
     model = self.reader.get_model()
     for cpd_index in range(0, len(cpds_expected)):
         np_test.assert_array_equal(model.get_cpds()[cpd_index].get_cpd(),
                                    cpds_expected[cpd_index])
     self.assertDictEqual(model.node, node_expected)
     self.assertDictEqual(model.edge, edge_expected)
     self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected))
     self.assertListEqual(sorted(model.edges()), sorted(edges_expected))
Exemple #54
0
    def test_max_calibrate_sepset_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        sepset_belief = belief_propagation.get_sepset_beliefs()

        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))

        b_B = (phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(
            ['C'], inplace=False)).maximize(['A'], inplace=False)

        b_C = (phi2 * (phi1.maximize(['A'], inplace=False) *
                       phi3.maximize(['D'], inplace=False))).maximize(
                           ['B'], inplace=False)

        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('A', 'B'), ('B', 'C')))].values,
            b_B.values)
        np_test.assert_array_almost_equal(
            sepset_belief[frozenset((('B', 'C'), ('C', 'D')))].values,
            b_C.values)
Exemple #55
0
    def test_max_calibrate_clique_belief(self):
        belief_propagation = BeliefPropagation(self.junction_tree)
        belief_propagation.max_calibrate()
        clique_belief = belief_propagation.get_clique_beliefs()

        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))

        b_A_B = phi1 * (phi3.maximize(['D'], inplace=False) * phi2).maximize(
            ['C'], inplace=False)
        b_B_C = phi2 * (phi1.maximize(['A'], inplace=False) *
                        phi3.maximize(['D'], inplace=False))
        b_C_D = phi3 * (phi1.maximize(['A'], inplace=False) * phi2).maximize(
            ['B'], inplace=False)

        np_test.assert_array_almost_equal(clique_belief[('A', 'B')].values,
                                          b_A_B.values)
        np_test.assert_array_almost_equal(clique_belief[('B', 'C')].values,
                                          b_B_C.values)
        np_test.assert_array_almost_equal(clique_belief[('C', 'D')].values,
                                          b_C_D.values)
Exemple #56
0
    def setUp(self):
        self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')),
                                           (('B', 'C'), ('C', 'D'))])
        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))
        self.junction_tree.add_factors(phi1, phi2, phi3)

        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)