Beispiel #1
0
    def test_factor_product(self):
        from pgmpy import factors
        phi = factors.Factor(['x1', 'x2'], [2, 2], range(4))
        phi1 = factors.Factor(['x3', 'x4'], [2, 2], range(4))
        factor_product = factors.factor_product(phi, phi1)
        np_test.assert_array_equal(factor_product.values,
                                   np.array([0, 0, 0, 0, 0, 1,
                                             2, 3, 0, 2, 4, 6,
                                             0, 3, 6, 9]))
        self.assertEqual(factor_product.variables, OrderedDict([
            ('x1', ['x1_0', 'x1_1']),
            ('x2', ['x2_0', 'x2_1']),
            ('x3', ['x3_0', 'x3_1']),
            ('x4', ['x4_0', 'x4_1'])]
        ))

        phi = factors.Factor(['x1', 'x2'], [3, 2], range(6))
        phi1 = factors.Factor(['x2', 'x3'], [2, 2], range(4))
        factor_product = factors.factor_product(phi, phi1)
        np_test.assert_array_equal(factor_product.values,
                                   np.array([0, 0, 2, 3, 0, 2, 6, 9, 0, 4, 10, 15]))
        self.assertEqual(factor_product.variables, OrderedDict(
            [('x1', ['x1_0', 'x1_1', 'x1_2']),
             ('x2', ['x2_0', 'x2_1']),
             ('x3', ['x3_0', 'x3_1'])]))
Beispiel #2
0
    def test_factor_product(self):
        phi = Factor(['x1', 'x2'], [2, 2], range(4))
        phi1 = Factor(['x3', 'x4'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        np_test.assert_array_equal(
            prod.values,
            np.array([0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9]))
        self.assertEqual(
            prod.variables,
            OrderedDict([('x1', [State('x1', 0),
                                 State('x1', 1)]),
                         ('x2', [State('x2', 0),
                                 State('x2', 1)]),
                         ('x3', [State('x3', 0),
                                 State('x3', 1)]),
                         ('x4', [State('x4', 0),
                                 State('x4', 1)])]))

        phi = Factor(['x1', 'x2'], [3, 2], range(6))
        phi1 = Factor(['x2', 'x3'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        np_test.assert_array_equal(
            prod.values, np.array([0, 0, 2, 3, 0, 2, 6, 9, 0, 4, 10, 15]))
        self.assertEqual(
            prod.variables,
            OrderedDict([('x1',
                          [State('x1', 0),
                           State('x1', 1),
                           State('x1', 2)]),
                         ('x2', [State('x2', 0),
                                 State('x2', 1)]),
                         ('x3', [State('x3', 0),
                                 State('x3', 1)])]))
Beispiel #3
0
    def get_partition_function(self):
        r"""
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors import Factor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')])
        >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')),
        ...                   (('a', 'b', 'c'), ('a', 'c'))])
        >>> phi1 = Factor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8))
        >>> phi2 = Factor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi3 = Factor(['a', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_partition_function()
        """
        if self.check_model():
            factor = self.factors[0]
            factor = factor_product(factor, *[self.factors[i] for i in range(1, len(self.factors))])
            return np.sum(factor.values)
    def _get_kernel_from_markov_model(self, model):
        """
        Computes the Gibbs transition models from a Markov Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: MarkovModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        factors_dict = {var: [] for var in self.variables}
        for factor in model.get_factors():
            for var in factor.scope():
                factors_dict[var].append(factor)

        # Take factor product
        factors_dict = {var: factor_product(*factors) if len(factors) > 1 else factors[0]
                        for var, factors in factors_dict.items()}
        self.cardinalities = {var: factors_dict[var].get_cardinality([var])[var] for var in self.variables}

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            kernel = {}
            factor = factors_dict[var]
            scope = set(factor.scope())
            for tup in itertools.product(*[range(card) for card in other_cards]):
                states = [State(var, s) for var, s in zip(other_vars, tup) if var in scope]
                reduced_factor = factor.reduce(states, inplace=False)
                kernel[tup] = reduced_factor.values / sum(reduced_factor.values)
            self.transition_models[var] = kernel
    def _get_kernel_from_bayesian_model(self, model):
        """
        Computes the Gibbs transition models from a Bayesian Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: BayesianModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        self.cardinalities = {var: model.get_cpds(var).variable_card for var in self.variables}

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            cpds = [cpd for cpd in model.cpds if var in cpd.scope()]
            prod_cpd = factor_product(*cpds)
            kernel = {}
            scope = set(prod_cpd.scope())
            for tup in itertools.product(*[range(card) for card in other_cards]):
                states = [State(v, s) for v, s in zip(other_vars, tup) if v in scope]
                prod_cpd_reduced = prod_cpd.reduce(states, inplace=False)
                kernel[tup] = prod_cpd_reduced.values / sum(prod_cpd_reduced.values)
            self.transition_models[var] = kernel
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = MarkovModel()
        >>> G.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> G.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                   ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                   ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in G.edges()]
        >>> G.add_factors(*phi)
        >>> G.get_partition_function()
        """
        self.check_model()

        factor = self.factors[0]
        factor = factor_product(factor, *[self.factors[i] for i in
                                          range(1, len(self.factors))])
        if set(factor.scope()) != set(self.nodes()):
            raise ValueError('DiscreteFactor for all the random variables not defined.')

        return np.sum(factor.values)
Beispiel #7
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import FactorGraph
        >>> from pgmpy.factors import Factor
        >>> G = FactorGraph()
        >>> G.add_nodes_from(['a', 'b', 'c'])
        >>> phi1 = Factor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi2 = Factor(['b', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2)
        >>> G.add_nodes_from([phi1, phi2])
        >>> G.add_edges_from([('a', phi1), ('b', phi1),
        ...                   ('b', phi2), ('c', phi2)])
        >>> G.get_factors()
        >>> G.get_partition_function()
        """
        factor = self.factors[0]
        factor = factor_product(factor, *[self.factors[i] for i in
                                          range(1, len(self.factors))])
        if set(factor.scope()) != set(self.get_variable_nodes()):
            raise ValueError('Factor for all the random variables not defined.')

        return np.sum(factor.values)
Beispiel #8
0
    def _get_kernel_from_bayesian_model(self, model):
        """
        Computes the Gibbs transition models from a Bayesian Network.
        'Probabilistic Graphical Model Principles and Techniques', Koller and
        Friedman, Section 12.3.3 pp 512-513.

        Parameters:
        -----------
        model: BayesianModel
            The model from which probabilities will be computed.
        """
        self.variables = np.array(model.nodes())
        self.cardinalities = {
            var: model.get_cpds(var).variable_card
            for var in self.variables
        }

        for var in self.variables:
            other_vars = [v for v in self.variables if var != v]
            other_cards = [self.cardinalities[v] for v in other_vars]
            cpds = [cpd for cpd in model.cpds if var in cpd.scope()]
            prod_cpd = factor_product(*cpds)
            kernel = {}
            scope = set(prod_cpd.scope())
            for tup in itertools.product(
                    *[range(card) for card in other_cards]):
                states = [
                    State(v, s) for v, s in zip(other_vars, tup) if v in scope
                ]
                prod_cpd_reduced = prod_cpd.reduce(states, inplace=False)
                kernel[tup] = prod_cpd_reduced.values / sum(
                    prod_cpd_reduced.values)
            self.transition_models[var] = kernel
    def get_partition_function(self):
        r"""
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([('a', 'b', 'c'), ('a', 'b'), ('a', 'c')])
        >>> G.add_edges_from([(('a', 'b', 'c'), ('a', 'b')),
        ...                   (('a', 'b', 'c'), ('a', 'c'))])
        >>> phi1 = DiscreteFactor(['a', 'b', 'c'], [2, 2, 2], np.random.rand(8))
        >>> phi2 = DiscreteFactor(['a', 'b'], [2, 2], np.random.rand(4))
        >>> phi3 = DiscreteFactor(['a', 'c'], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_partition_function()
        """
        if self.check_model():
            factor = self.factors[0]
            factor = factor_product(
                factor,
                *[self.factors[i] for i in range(1, len(self.factors))])
            return np.sum(factor.values)
Beispiel #10
0
    def get_partition_function(self):
        """
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors import Factor
        >>> G = MarkovModel()
        >>> G.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> G.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                   ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                   ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [Factor(edge, [2, 2], np.random.rand(4)) for edge in G.edges()]
        >>> G.add_factors(*phi)
        >>> G.get_partition_function()
        """
        self.check_model()

        factor = self.factors[0]
        factor = factor_product(factor, *[self.factors[i] for i in
                                          range(1, len(self.factors))])
        if set(factor.scope()) != set(self.nodes()):
            raise ValueError('Factor for all the random variables not defined.')

        return np.sum(factor.values)
Beispiel #11
0
 def optimize_fun(params):
     factors = []
     for index in range(len(edges)):
         u, v = edges[index][0], edges[index][1]
         factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]],
                               params[param_cumsum[index]: param_cumsum[index + 1]]))
     Z = sum(factor_product(*factors).values)
     return Z - sum(constants * params)
Beispiel #12
0
    def test_factor_product(self):
        phi = Factor(['x1', 'x2'], [2, 2], range(4))
        phi1 = Factor(['x3', 'x4'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        expected_factor = Factor(['x1', 'x2', 'x3', 'x4'], [2, 2, 2, 2], [0, 0, 0, 0, 0, 1,
                                                                          2, 3, 0, 2, 4, 6,
                                                                          0, 3, 6, 9])
        self.assertEqual(prod, expected_factor)
        self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3', 'x4'])

        phi = Factor(['x1', 'x2'], [3, 2], range(6))
        phi1 = Factor(['x2', 'x3'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        expected_factor = Factor(['x1', 'x2', 'x3'], [3, 2, 2], [0, 0, 2, 3, 0, 2,
                                                                 6, 9, 0, 4, 10, 15])
        np_test.assert_almost_equal(prod.values,
                                   np.array([0, 0, 2, 3, 0, 2,
                                             6, 9, 0, 4, 10, 15]).reshape(3, 2, 2))
        self.assertEqual(sorted(prod.variables), ['x1', 'x2', 'x3'])
Beispiel #13
0
 def optimize_fun(params):
     factors = []
     for index in range(len(edges)):
         u, v = edges[index][0], edges[index][1]
         factors.append(
             Factor([u, v], [self.node_card[u], self.node_card[v]],
                    params[param_cumsum[index]:param_cumsum[index +
                                                            1]]))
     Z = sum(factor_product(*factors).values)
     return Z - sum(constants * params)
 def query(self, var, evidence):
     # self.factors is a dict of the form of {node: [factors_involving_node]}
     factors_list = set(itertools.chain(*self.factors.values()))
     product = factor_product(*factors_list)
     reduced_prod = product.reduce(evidence, inplace=False)
     #reduced_prod.normalize()
     var_to_marg = set(self.model.nodes()) - set(var) - set(
         [state[0] for state in evidence])
     marg_prod = reduced_prod.marginalize(var_to_marg, inplace=False)
     return marg_prod
    def map_query(self, variables=None, evidence=None, elimination_order=None):
        """
        Computes the MAP Query over the variables given the evidence.

        Parameters
        ----------
        variables: list
            list of variables over which we want to compute the max-marginal.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list
            order of variable eliminations (if nothing is provided) order is
            computed automatically

        Examples
        --------
        >>> from pgmpy.inference import VariableElimination
        >>> from pgmpy.models import BayesianModel
        >>> import numpy as np
        >>> import pandas as pd
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = VariableElimination(model)
        >>> phi_query = inference.map_query(['A', 'B'])
        """
        elimination_variables = set(self.variables) - set(
            evidence.keys()) if evidence else set()
        final_distribution = self._variable_elimination(
            elimination_variables,
            'maximize',
            evidence=evidence,
            elimination_order=elimination_order)
        # To handle the case when no argument is passed then
        # _variable_elimination returns a dict.
        if isinstance(final_distribution, dict):
            final_distribution = final_distribution.values()
        distribution = factor_product(*final_distribution)
        argmax = np.argmax(distribution.values)
        assignment = distribution.assignment([argmax])[0]

        map_query_results = {}
        for var_assignment in assignment:
            var, value = var_assignment
            map_query_results[var] = value

        if not variables:
            return map_query_results
        else:
            return_dict = {}
            for var in variables:
                return_dict[var] = map_query_results[var]
            return return_dict
    def test_junction_tree_single_clique(self):

        self.graph.add_edges_from([('x1', 'x2'), ('x2', 'x3'), ('x1', 'x3')])
        phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in self.graph.edges()]
        self.graph.add_factors(*phi)

        junction_tree = self.graph.to_junction_tree()
        self.assertListEqual(hf.recursive_sorted(junction_tree.nodes()),
                             [['x1', 'x2', 'x3']])
        factors = junction_tree.get_factors()
        self.assertEqual(factors[0], factor_product(*phi))
    def test_junction_tree_single_clique(self):

        self.graph.add_edges_from([('x1', 'x2'), ('x2', 'x3'), ('x1', 'x3')])
        phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in self.graph.edges()]
        self.graph.add_factors(*phi)

        junction_tree = self.graph.to_junction_tree()
        self.assertListEqual(hf.recursive_sorted(junction_tree.nodes()),
                             [['x1', 'x2', 'x3']])
        factors = junction_tree.get_factors()
        self.assertEqual(factors[0], factor_product(*phi))
    def map_query(self, variables=None, evidence=None, elimination_order=None):
        """
        Computes the MAP Query over the variables given the evidence.

        Parameters
        ----------
        variables: list
            list of variables over which we want to compute the max-marginal.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list
            order of variable eliminations (if nothing is provided) order is
            computed automatically

        Examples
        --------
        >>> from pgmpy.inference import VariableElimination
        >>> from pgmpy.models import BayesianModel
        >>> import numpy as np
        >>> import pandas as pd
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = VariableElimination(model)
        >>> phi_query = inference.map_query(['A', 'B'])
        """
        elimination_variables = set(self.variables) - set(evidence.keys()) if evidence else set()
        final_distribution = self._variable_elimination(elimination_variables, 'maximize',
                                                        evidence=evidence,
                                                        elimination_order=elimination_order)
        # To handle the case when no argument is passed then
        # _variable_elimination returns a dict.
        if isinstance(final_distribution, dict):
            final_distribution = final_distribution.values()
        distribution = factor_product(*final_distribution)
        argmax = np.argmax(distribution.values)
        assignment = distribution.assignment([argmax])[0]

        map_query_results = {}
        for var_assignment in assignment:
            var, value = var_assignment
            map_query_results[var] = value

        if not variables:
            return map_query_results
        else:
            return_dict = {}
            for var in variables:
                return_dict[var] = map_query_results[var]
            return return_dict
    def product_cpds(self, observation_set: ObservationSet) -> DiscreteFactor:
        """
        Multiplies the given to the context of the observed CPD variables of the graph nodes.

        :param observation_set: set of observed variables.
        :return: product of CPD of all nodes of the graph.
        """
        self.warn_on_not_compiled()

        def reduce_cpd(node):
            return observation_set.reduce(self.graph.get_cpds(node))

        return factor_product(*map(reduce_cpd, self.nodes))
Beispiel #20
0
    def test_factor_product(self):
        phi = Factor(['x1', 'x2'], [2, 2], range(4))
        phi1 = Factor(['x3', 'x4'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        np_test.assert_array_equal(prod.values,
                                   np.array([0, 0, 0, 0, 0, 1,
                                             2, 3, 0, 2, 4, 6,
                                             0, 3, 6, 9]))
        self.assertEqual(prod.variables, OrderedDict([
            ('x1', [State('x1', 0), State('x1', 1)]),
            ('x2', [State('x2', 0), State('x2', 1)]),
            ('x3', [State('x3', 0), State('x3', 1)]),
            ('x4', [State('x4', 0), State('x4', 1)])]
        ))

        phi = Factor(['x1', 'x2'], [3, 2], range(6))
        phi1 = Factor(['x2', 'x3'], [2, 2], range(4))
        prod = factor_product(phi, phi1)
        np_test.assert_array_equal(prod.values,
                                   np.array([0, 0, 2, 3, 0, 2, 6, 9, 0, 4, 10, 15]))
        self.assertEqual(prod.variables, OrderedDict(
            [('x1', [State('x1', 0), State('x1', 1), State('x1', 2)]),
             ('x2', [State('x2', 0), State('x2', 1)]),
             ('x3', [State('x3', 0), State('x3', 1)])]))
Beispiel #21
0
    def query(self, var, evidence):
        # self.factors is a dict of the form of {node: [factors_involving_node]}

        factors_list = set(itertools.chain(*self.factors.values()))
        product = factor_product(*factors_list)
        reduced_prod = product.reduce([(evidence_var, evidence[evidence_var])
                                       for evidence_var in evidence],
                                      inplace=False)
        #reduced_prod.normalize()
        var_to_marg = set(self.model.nodes()) - set(var) - set(
            [state for state in evidence])

        query_var_factor = {}
        for query_var in var:
            query_var_factor[query_var] = reduced_prod.marginalize(
                var_to_marg, inplace=False)
        return query_var_factor
Beispiel #22
0
    def _get_factor(self, belief_prop, evidence):
        """
        Extracts the required factor from the junction tree.

        Parameters:
        ----------
        belief_prop: Belief Propagation
            Belief Propagation which needs to be updated.

        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
        """
        final_factor = factor_product(*belief_prop.junction_tree.get_factors())
        if evidence:
            for var in evidence:
                if var in final_factor.scope():
                    final_factor.reduce([(var, evidence[var])])
        return final_factor
    def max_marginal(self,
                     variables=None,
                     evidence=None,
                     elimination_order=None):
        """
        Computes the max-marginal over the variables given the evidence.

        Parameters
        ----------
        variables: list
            list of variables over which we want to compute the max-marginal.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list
            order of variable eliminations (if nothing is provided) order is
            computed automatically

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.inference import VariableElimination
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = VariableElimination(model)
        >>> phi_query = inference.max_marginal(['A', 'B'])
        """
        if not variables:
            variables = []
        final_distribution = self._variable_elimination(
            variables,
            'maximize',
            evidence=evidence,
            elimination_order=elimination_order)

        # To handle the case when no argument is passed then
        # _variable_elimination returns a dict.
        if isinstance(final_distribution, dict):
            final_distribution = final_distribution.values()
        return np.max(factor_product(*final_distribution).values)
    def max_marginal(self, variables=None, evidence=None, elimination_order=None):
        """
        Computes the max-marginal over the variables given the evidence.

        Parameters
        ----------
        variables: list
            list of variables over which we want to compute the max-marginal.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list
            order of variable eliminations (if nothing is provided) order is
            computed automatically

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.inference import VariableElimination
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> inference = VariableElimination(model)
        >>> phi_query = inference.max_marginal(['A', 'B'])
        """
        if not variables:
            variables = []
        final_distribution = self._variable_elimination(variables, 'maximize',
                                                        evidence=evidence,
                                                        elimination_order=elimination_order)

        # To handle the case when no argument is passed then
        # _variable_elimination returns a dict.
        if isinstance(final_distribution, dict):
            final_distribution = final_distribution.values()
        return np.max(factor_product(*final_distribution).values)
Beispiel #25
0
def eliminacionVariablesOcultas(factor, consultas, evidencias):
    for variable, valor in factor.items():
        if variable not in consultas:
            if variable not in evidencias:
                variablesEliminadas.append(variable)
    for eliminada in variablesEliminadas:
        for variable, valor in factor.items():
            if eliminada in valor.scope():
                if len(valor.scope()) > 1:
                    producto = pgmf.factor_product(factor[eliminada], valor)
                    producto.marginalize([eliminada])
                    factor[variable] = producto
    for variable, valor in factor.items():
        for v in valor.scope():
            if v in consultas:
                resultado[variable] = valor
    for variable, valor in resultado.items():
        print(variable)
        print(valor.scope())
        print(valor.values)
        print("\n")
    return resultado
Beispiel #26
0
def cpd_multiplication(cpds: List[pd.DataFrame],
                       normalize: bool = True) -> pd.DataFrame:
    """
    Multiplies CPDs represented as pandas.DataFrame
    It does so by converting to PGMPY's TabularCPDs and calling a product function designed for these.
    It then convert the table back to pandas.DataFrame

    Important note: the result will be a CPD and the index will be the index of the first element on the list `cpds`

    Args:
        cpds: cpds to multiply
        normalize: wether to normalise the columns, so that each column sums to 1

    Returns:
        Pandas dataframe containing the resulting product, looking like a cpd
    """
    cpds_pgmpy = [pd_to_tabular_cpd(df) for df in cpds]
    product_pgmpy = factor_product(*cpds_pgmpy)  # type: TabularCPD

    if normalize:
        product_pgmpy.normalize()

    return tabular_cpd_to_pd(product_pgmpy)
    def _variable_elimination(self, variables, operation, evidence=None, elimination_order=None):
        """
        Implementation of a generalized variable elimination.

        Parameters
        ----------
        variables: list, array-like
            variables that are not to be eliminated.
        operation: str ('marginalize' | 'maximize')
            The operation to do for eliminating the variable.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list, array-like
            list of variables representing the order in which they
            are to be eliminated. If None order is computed automatically.
        """
        if isinstance(variables, string_types):
            raise TypeError("variables must be a list of strings")
        if isinstance(evidence, string_types):
            raise TypeError("evidence must be a list of strings")

        # Dealing with the case when variables is not provided.
        if not variables:
            all_factors = []
            for factor_li in self.factors.values():
                all_factors.extend(factor_li)
            return set(all_factors)

        eliminated_variables = set()
        working_factors = {node: {factor for factor in self.factors[node]}
                           for node in self.factors}

        # Dealing with evidence. Reducing factors over it before VE is run.
        if evidence:
            for evidence_var in evidence:
                for factor in working_factors[evidence_var]:
                    factor_reduced = factor.reduce([(evidence_var, evidence[evidence_var])], inplace=False)
                    for var in factor_reduced.scope():
                        working_factors[var].remove(factor)
                        working_factors[var].add(factor_reduced)
                del working_factors[evidence_var]

        # TODO: Modify it to find the optimal elimination order
        if not elimination_order:
            elimination_order = list(set(self.variables) -
                                     set(variables) -
                                     set(evidence.keys() if evidence else []))

        elif any(var in elimination_order for var in
                 set(variables).union(set(evidence.keys() if evidence else []))):
            raise ValueError("Elimination order contains variables which are in"
                             " variables or evidence args")

        for var in elimination_order:
            # Removing all the factors containing the variables which are
            # eliminated (as all the factors should be considered only once)
            factors = [factor for factor in working_factors[var]
                       if not set(factor.variables).intersection(eliminated_variables)]
            phi = factor_product(*factors)
            phi = getattr(phi, operation)([var], inplace=False)
            del working_factors[var]
            for variable in phi.variables:
                working_factors[variable].add(phi)
            eliminated_variables.add(var)

        final_distribution = set()
        for node in working_factors:
            factors = working_factors[node]
            for factor in factors:
                if not set(factor.variables).intersection(eliminated_variables):
                    final_distribution.add(factor)

        query_var_factor = {}
        for query_var in variables:
            phi = factor_product(*final_distribution)
            query_var_factor[query_var] = phi.marginalize(list(set(variables) -
                                                               set([query_var])),
                                                          inplace=False).normalize(inplace=False)
        return query_var_factor
    def _variable_elimination(
        self,
        variables,
        operation,
        evidence=None,
        elimination_order="MinFill",
        joint=True,
        stopwatch=True,
    ):
        """
        Implementation of a generalized variable elimination.

        Parameters
        ----------
        variables: list, array-like
            variables that are not to be eliminated.

        operation: str ('marginalize' | 'maximize')
            The operation to do for eliminating the variable.

        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence

        elimination_order: str or list (array-like)
            If str: Heuristic to use to find the elimination order.
            If array-like: The elimination order to use.
            If None: A random elimination order is used.
        """
        # Step 1: Deal with the input arguments.
        if isinstance(variables, string_types):
            raise TypeError("variables must be a list of strings")
        if isinstance(evidence, string_types):
            raise TypeError("evidence must be a list of strings")

        # Dealing with the case when variables is not provided.
        if not variables:
            all_factors = []
            for factor_li in self.factors.values():
                all_factors.extend(factor_li)
            if joint:
                return factor_product(*set(all_factors))
            else:
                return set(all_factors)

        # Step 2: Prepare data structures to run the algorithm.
        eliminated_variables = set()
        # Get working factors and elimination order
        working_factors = self._get_working_factors(evidence)
        elimination_order = self._get_elimination_order(
            variables, evidence, elimination_order)

        # Step 3: Run variable elimination
        pbar = tqdm(elimination_order)
        if stopwatch:
            start = time.time()
        for var in pbar:
            pbar.set_description("Eliminating: {var}".format(var=var))
            # Removing all the factors containing the variables which are
            # eliminated (as all the factors should be considered only once)
            factors = [
                factor for factor in working_factors[var]
                if not set(factor.variables).intersection(eliminated_variables)
            ]
            phi = factor_product(*factors)
            phi = getattr(phi, operation)([var], inplace=False)
            del working_factors[var]
            for variable in phi.variables:
                working_factors[variable].add(phi)
            eliminated_variables.add(var)
        if stopwatch:
            total_time = time.time() - start

        # Step 4: Prepare variables to be returned.
        final_distribution = set()
        for node in working_factors:
            factors = working_factors[node]
            for factor in factors:
                if not set(
                        factor.variables).intersection(eliminated_variables):
                    final_distribution.add(factor)

        if joint:
            if isinstance(self.model, BayesianModel):
                if stopwatch:
                    prod = factor_product(*final_distribution)
                    normed = prod.normalize(inplace=False)
                    return normed, total_time
                else:
                    return factor_product(*final_distribution).normalize(
                        inplace=False)
            else:
                if stopwatch:
                    return factor_product(*final_distribution), total_time
                else:
                    return factor_product(*final_distribution)
        else:
            query_var_factor = {}
            for query_var in variables:
                phi = factor_product(*final_distribution)
                query_var_factor[query_var] = phi.marginalize(
                    list(set(variables) - set([query_var])),
                    inplace=False).normalize(inplace=False)
            if stopwatch:
                return query_var_factor, total_time
            else:
                return query_var_factor
    def map_query(self, variables=None, evidence=None):
        """
        MAP Query method using belief propagation.

        Note: When multiple variables are passed, it returns the map_query for each
        of them individually.

        Parameters
        ----------
        variables: list
            list of variables for which you want to compute the probability
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence

        Examples
        --------
        >>> from pgmpy.factors.discrete import TabularCPD
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.inference import BeliefPropagation
        >>> bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
        ...                                 ('J', 'L'), ('G', 'L')])
        >>> cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        >>> cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        >>> cpd_j = TabularCPD('J', 2,
        ...                    [[0.9, 0.6, 0.7, 0.1],
        ...                     [0.1, 0.4, 0.3, 0.9]],
        ...                    ['R', 'A'], [2, 2])
        >>> cpd_q = TabularCPD('Q', 2,
        ...                    [[0.9, 0.2],
        ...                     [0.1, 0.8]],
        ...                    ['J'], [2])
        >>> cpd_l = TabularCPD('L', 2,
        ...                    [[0.9, 0.45, 0.8, 0.1],
        ...                     [0.1, 0.55, 0.2, 0.9]],
        ...                    ['G', 'J'], [2, 2])
        >>> cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        >>> bayesian_model.add_cpds(cpd_a, cpd_r, cpd_j, cpd_q, cpd_l, cpd_g)
        >>> belief_propagation = BeliefPropagation(bayesian_model)
        >>> belief_propagation.map_query(variables=['J', 'Q'],
        ...                              evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1})
        """
        # TODO:Check the note in docstring. Change that behavior to return the joint MAP
        if not variables:
            variables = set(self.variables)

        final_distribution = self._query(variables=variables, operation='marginalize', evidence=evidence)

        # To handle the case when no argument is passed then
        # _variable_elimination returns a dict.
        if isinstance(final_distribution, dict):
            final_distribution = final_distribution.values()
        distribution = factor_product(*final_distribution)
        argmax = np.argmax(distribution.values)
        assignment = distribution.assignment([argmax])[0]

        map_query_results = {}
        for var_assignment in assignment:
            var, value = var_assignment
            map_query_results[var] = value

        if not variables:
            return map_query_results
        else:
            return_dict = {}
            for var in variables:
                return_dict[var] = map_query_results[var]
            return return_dict
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgmpy.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(map(lambda x: len(set(x[0]).intersection(set(x[1]))),
                           edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(*[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError('DiscreteFactor for all the random variables not specified')

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = DiscreteFactor(node, var_card, np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError('All the factors were not used to create Junction Tree.'
                             'Extra factors are defined.')

        return clique_trees
Beispiel #31
0
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors import Factor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [Factor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from pgmpy.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(map(lambda x: len(set(x[0]).intersection(set(x[1]))),
                           edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(*[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError('Factor for all the random variables not specified')

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = Factor(node, var_card, np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError('All the factors were not used to create Junction Tree.'
                             'Extra factors are defined.')

        return clique_trees
    def _variable_elimination(self,
                              variables,
                              operation,
                              evidence=None,
                              elimination_order=None):
        """
        Implementation of a generalized variable elimination.

        Parameters
        ----------
        variables: list, array-like
            variables that are not to be eliminated.
        operation: str ('marginalize' | 'maximize')
            The operation to do for eliminating the variable.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list, array-like
            list of variables representing the order in which they
            are to be eliminated. If None order is computed automatically.
        """
        if isinstance(variables, string_types):
            raise TypeError("variables must be a list of strings")
        if isinstance(evidence, string_types):
            raise TypeError("evidence must be a list of strings")

        # Dealing with the case when variables is not provided.
        if not variables:
            all_factors = []
            for factor_li in self.factors.values():
                all_factors.extend(factor_li)
            return set(all_factors)

        eliminated_variables = set()

        # --MODIFICACIÓN 1, 2:
        #     Ya que las evidencias son fijas en cada partida, aplicamos las
        #     evidencias de forma permanente en la red bayesiana, además de
        #     aplicarlas sobre la copia de los factores (working_factors).
        #
        #     Además inicializamos working_factors solo con los factores de
        #     las variables relevantes: "Toda variable que no sea antecesor
        #     (en la red) de alguna de las variables de consulta o de
        #     evidencia, es irrelevante para la consulta"

        if evidence:
            for evidence_var in evidence:
                for factor in self.factors[evidence_var]:
                    factor_reduced = factor.reduce(
                        [(evidence_var, evidence[evidence_var])],
                        inplace=False)
                    for var in factor_reduced.scope():
                        self.factors[var].remove(factor)
                        self.factors[var].append(factor_reduced)

                del self.factors[evidence_var]

        # En este problema en concreto:
        #    - Las variables de consulta (que siempre serán variables X)
        #      nunca tendrán padres ==> solo tenemos que calcular los
        #      antecesores de las variables de evidencia.
        #    - Las variables de evidencia son "eliminadas" en el bloque
        #      if-else de arriba ==> dejan de ser variables relevantes
        #    - Las variables X no tienen antecesores, y las variables Y solo
        #      tienen como antecesores sus padres directos (es decir, no
        #      tienen abuelos) ==> no es necesario un algoritmo recursivo
        #      para calcular los antecesores de las variables de evidencia.

        relevant_variables = set(variables)
        for e in evidence.keys():
            parents = self.model.get_parents(e)
            for p in parents:
                relevant_variables.add(p)

        working_factors = {
            node: {
                factor
                for factor in self.factors[node]
                if (set(factor.variables).issubset(relevant_variables))
            }
            for node in self.factors if node in relevant_variables
        }
        #
        # --

        if not elimination_order:
            # --MODIFICACIÓN 3: Min-Degree Heuristic. Eliminamos primero las
            #     variables con menos vecinos.

            ordered_degree = sorted(self.model.degree,
                                    key=lambda node: node[1])
            ordered_nodes = [node[0] for node in ordered_degree]
            elimination_order = [
                var for var in ordered_nodes
                if var not in set(variables).union(
                    set(evidence.keys() if evidence else []))
                and var in relevant_variables
            ]

        #
        # --
        elif any(var in elimination_order for var in set(variables).union(
                set(evidence.keys() if evidence else []))):
            raise ValueError(
                "Elimination order contains variables which are in"
                " variables or evidence args")

        for var in elimination_order:
            # Removing all the factors containing the variables which are
            # eliminated (as all the factors should be considered only once)
            factors = [
                factor for factor in working_factors[var]
                if not set(factor.variables).intersection(eliminated_variables)
            ]

            phi = factor_product(*factors)
            phi = getattr(phi, operation)([var], inplace=False)
            del working_factors[var]
            for variable in phi.variables:
                working_factors[variable].add(phi)
            eliminated_variables.add(var)

        final_distribution = set()
        for node in working_factors:
            factors = working_factors[node]
            for factor in factors:
                if not set(
                        factor.variables).intersection(eliminated_variables):
                    final_distribution.add(factor)

        query_var_factor = {}
        for query_var in variables:
            phi = factor_product(*final_distribution)
            query_var_factor[query_var] = phi.marginalize(
                list(set(variables) - set([query_var])),
                inplace=False).normalize(inplace=False)
        return query_var_factor
    def _variable_elimination(self,
                              variables,
                              operation,
                              evidence=None,
                              elimination_order=None):
        """
        Implementation of a generalized variable elimination.

        Parameters
        ----------
        variables: list, array-like
            variables that are not to be eliminated.
        operation: str ('marginalize' | 'maximize')
            The operation to do for eliminating the variable.
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence
        elimination_order: list, array-like
            list of variables representing the order in which they
            are to be eliminated. If None order is computed automatically.
        """
        if isinstance(variables, string_types):
            raise TypeError("variables must be a list of strings")
        if isinstance(evidence, string_types):
            raise TypeError("evidence must be a list of strings")

        # Dealing with the case when variables is not provided.
        if not variables:
            all_factors = []
            for factor_li in self.factors.values():
                all_factors.extend(factor_li)
            return set(all_factors)

        eliminated_variables = set()
        working_factors = {
            node: {factor
                   for factor in self.factors[node]}
            for node in self.factors
        }

        # Dealing with evidence. Reducing factors over it before VE is run.
        if evidence:
            for evidence_var in evidence:
                for factor in working_factors[evidence_var]:
                    factor_reduced = factor.reduce(
                        [(evidence_var, evidence[evidence_var])],
                        inplace=False)
                    for var in factor_reduced.scope():
                        working_factors[var].remove(factor)
                        working_factors[var].add(factor_reduced)
                del working_factors[evidence_var]

        # TODO: Modify it to find the optimal elimination order
        if not elimination_order:
            elimination_order = list(
                set(self.variables) - set(variables) -
                set(evidence.keys() if evidence else []))

        elif any(var in elimination_order for var in set(variables).union(
                set(evidence.keys() if evidence else []))):
            raise ValueError(
                "Elimination order contains variables which are in"
                " variables or evidence args")

        for var in elimination_order:
            # Removing all the factors containing the variables which are
            # eliminated (as all the factors should be considered only once)
            factors = [
                factor for factor in working_factors[var]
                if not set(factor.variables).intersection(eliminated_variables)
            ]
            phi = factor_product(*factors)
            phi = getattr(phi, operation)([var], inplace=False)
            del working_factors[var]
            for variable in phi.variables:
                working_factors[variable].add(phi)
            eliminated_variables.add(var)

        final_distribution = set()
        for node in working_factors:
            factors = working_factors[node]
            for factor in factors:
                if not set(
                        factor.variables).intersection(eliminated_variables):
                    final_distribution.add(factor)

        query_var_factor = {}
        for query_var in variables:
            phi = factor_product(*final_distribution)
            query_var_factor[query_var] = phi.marginalize(
                list(set(variables) - set([query_var])),
                inplace=False).normalize(inplace=False)
        return query_var_factor