Ejemplo n.º 1
0
    def _update_triangles(self, triangles_list):
        """
        From a set of variables forming a triangle in the model, we form the corresponding Clusters.
        These clusters are then appended to the code.

        Parameters
        ----------
        triangle_list : list
                        The list of variables forming the triangles to be updated. It is of the form of
                        [['var_5', 'var_8', 'var_7'], ['var_4', 'var_5', 'var_7']]

        """
        new_intersection_set = []
        for triangle_vars in triangles_list:
            cardinalities = [
                self.cardinality[variable] for variable in triangle_vars
            ]
            current_intersection_set = [
                frozenset(intersect)
                for intersect in it.combinations(triangle_vars, 2)
            ]
            current_factor = DiscreteFactor(triangle_vars, cardinalities,
                                            np.zeros(np.prod(cardinalities)))
            self.cluster_set[frozenset(triangle_vars)] = self.Cluster(
                current_intersection_set, current_factor)
            # add new factors
            self.model.factors.append(current_factor)
            # add new intersection sets
            new_intersection_set.extend(current_intersection_set)
            # add new factors in objective
            self.objective[frozenset(triangle_vars)] = current_factor
    def _shift_factor(self, factor, shift):
        """
        Shifting the factor to a certain required time slice.

        Parameters
        ----------
        factor: DiscreteFactor
           The factor which needs to be shifted.

        shift: int
           The new timeslice to which the factor should belong to.
        """
        new_scope = self._shift_nodes(factor.scope(), shift)
        return DiscreteFactor(new_scope, factor.cardinality, factor.values)
    def to_factor(self):
        """
        Returns JointProbabilityDistribution as a DiscreteFactor object

        Examples
        --------
        >>> import numpy as np
        >>> from ProbabilityModel.factors.discrete import JointProbabilityDistribution
        >>> prob = JointProbabilityDistribution(['x1', 'x2', 'x3'], [2, 3, 2], np.ones(12)/12)
        >>> phi = prob.to_factor()
        >>> type(phi)
        ProbabilityModel.factors.DiscreteFactor.DiscreteFactor
        """
        return DiscreteFactor(self.variables, self.cardinality, self.values)
Ejemplo n.º 4
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model or Markov Model.
        Variables are in the pattern var_0, var_1, var_2 where var_0 is
        0th index variable, var_1 is 1st index variable.

        Return
        ------
        model: an instance of Bayesian or Markov Model.

        Examples
        --------
        >>> reader = UAIReader('TestUAI.uai')
        >>> reader.get_model()
        """
        if self.network_type == "BAYES":
            model = BayesianModel()
            model.add_nodes_from(self.variables)
            model.add_edges_from(self.edges)

            tabular_cpds = []
            for cpd in self.tables:
                child_var = cpd[0]
                states = int(self.domain[child_var])
                arr = list(map(float, cpd[1]))
                values = np.array(arr)
                values = values.reshape(states, values.size // states)
                tabular_cpds.append(TabularCPD(child_var, states, values))

            model.add_cpds(*tabular_cpds)
            return model

        elif self.network_type == "MARKOV":
            model = MarkovModel(self.edges)

            factors = []
            for table in self.tables:
                variables = table[0]
                cardinality = [int(self.domain[var]) for var in variables]
                value = list(map(float, table[1]))
                factor = DiscreteFactor(variables=variables,
                                        cardinality=cardinality,
                                        values=value)
                factors.append(factor)

            model.add_factors(*factors)
            return model
    def is_imap(self, model):
        """
        Checks whether the given BayesianModel is Imap of JointProbabilityDistribution

        Parameters
        ----------
        model : An instance of BayesianModel Class, for which you want to
            check the Imap

        Returns
        -------
        boolean : True if given bayesian model is Imap for Joint Probability Distribution
                False otherwise

        Examples
        --------
        >>> from ProbabilityModel.models import BayesianModel
        >>> from ProbabilityModel.factors.discrete import TabularCPD
        >>> from ProbabilityModel.factors.discrete import JointProbabilityDistribution
        >>> bm = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        >>> diff_cpd = TabularCPD('diff', 2, [[0.2], [0.8]])
        >>> intel_cpd = TabularCPD('intel', 3, [[0.5], [0.3], [0.2]])
        >>> grade_cpd = TabularCPD('grade', 3,
        ...                        [[0.1,0.1,0.1,0.1,0.1,0.1],
        ...                         [0.1,0.1,0.1,0.1,0.1,0.1],
        ...                         [0.8,0.8,0.8,0.8,0.8,0.8]],
        ...                        evidence=['diff', 'intel'],
        ...                        evidence_card=[2, 3])
        >>> bm.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        >>> val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
        ...        0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
        >>> JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
        >>> JPD.is_imap(bm)
        True
        """
        from ProbabilityModel.models import BayesianModel

        if not isinstance(model, BayesianModel):
            raise TypeError("model must be an instance of BayesianModel")
        factors = [cpd.to_factor() for cpd in model.get_cpds()]
        factor_prod = reduce(mul, factors)
        JPD_fact = DiscreteFactor(self.variables, self.cardinality, self.values)
        if JPD_fact == factor_prod:
            return True
        else:
            return False
Ejemplo n.º 6
0
    def to_factor(self):
        """
        Returns an equivalent factor with the same variables, cardinality, values as that of the cpd

        Examples
        --------
        >>> from ProbabilityModel.factors.discrete import TabularCPD
        >>> cpd = TabularCPD('grade', 3, [[0.1, 0.1],
        ...                               [0.1, 0.1],
        ...                               [0.8, 0.8]],
        ...                  evidence='evi1', evidence_card=2)
        >>> factor = cpd.to_factor()
        >>> factor
        <DiscreteFactor representing phi(grade:3, evi1:2) at 0x7f847a4f2d68>
        """
        return DiscreteFactor(
            variables=self.variables,
            cardinality=self.cardinality,
            values=self.values,
            state_names=self.state_names,
        )
Ejemplo n.º 7
0
        def __init__(self, intersection_set_variables, cluster_potential):
            """
            Initialization of the current cluster
            """

            # The variables with which the cluster is made of.
            self.cluster_variables = frozenset(cluster_potential.scope())

            # The cluster potentials must be specified before only.
            self.cluster_potential = copy.deepcopy(cluster_potential)

            # Generate intersection sets for this cluster; S(c)
            self.intersection_sets_for_cluster_c = [
                intersect.intersection(self.cluster_variables)
                for intersect in intersection_set_variables
                if intersect.intersection(self.cluster_variables)
            ]

            # Initialize messages from this cluster to its respective intersection sets
            # \lambda_{c \rightarrow \s} = 0
            self.message_from_cluster = {}
            for intersection in self.intersection_sets_for_cluster_c:
                # Present variable. It can be a node or an edge too. (that is ['A'] or ['A', 'C'] too)
                present_variables = list(intersection)

                # Present variables cardinality
                present_variables_card = cluster_potential.get_cardinality(
                    present_variables)
                present_variables_card = [
                    present_variables_card[var] for var in present_variables
                ]

                # We need to create a new factor whose messages are blank
                self.message_from_cluster[intersection] = DiscreteFactor(
                    present_variables,
                    present_variables_card,
                    np.zeros(np.prod(present_variables_card)),
                )
    def backward_inference(self, variables, evidence=None):
        """
        Backward inference method using belief propagation.

        Parameters
        ----------
        variables: list
            list of variables for which you want to compute the probability
        evidence: dict
            a dict key, value pair as {var: state_of_var_observed}
            None if no evidence

        Examples
        --------
        >>> from ProbabilityModel.factors.discrete import TabularCPD
        >>> from ProbabilityModel.models import DynamicBayesianNetwork as DBN
        >>> from ProbabilityModel.inference import DBNInference
        >>> dbnet = DBN()
        >>> dbnet.add_edges_from([(('Z', 0), ('X', 0)), (('X', 0), ('Y', 0)),
        ...                       (('Z', 0), ('Z', 1))])
        >>> z_start_cpd = TabularCPD(('Z', 0), 2, [[0.5, 0.5]])
        >>> x_i_cpd = TabularCPD(('X', 0), 2, [[0.6, 0.9],
        ...                                    [0.4, 0.1]],
        ...                      evidence=[('Z', 0)],
        ...                      evidence_card=[2])
        >>> y_i_cpd = TabularCPD(('Y', 0), 2, [[0.2, 0.3],
        ...                                    [0.8, 0.7]],
        ...                      evidence=[('X', 0)],
        ...                      evidence_card=[2])
        >>> z_trans_cpd = TabularCPD(('Z', 1), 2, [[0.4, 0.7],
        ...                                        [0.6, 0.3]],
        ...                      evidence=[('Z', 0)],
        ...                      evidence_card=[2])
        >>> dbnet.add_cpds(z_start_cpd, z_trans_cpd, x_i_cpd, y_i_cpd)
        >>> dbnet.initialize_initial_state()
        >>> dbn_inf = DBNInference(dbnet)
        >>> dbn_inf.backward_inference([('X', 0)], {('Y', 0):0, ('Y', 1):1, ('Y', 2):1})[('X', 0)].values
        array([ 0.66594382,  0.33405618])
        """
        variable_dict = defaultdict(list)
        for var in variables:
            variable_dict[var[1]].append(var)
        time_range = max(variable_dict)
        interface_nodes_dict = {}
        if evidence:
            evid_time_range = max(
                [time_slice for var, time_slice in evidence.keys()])
            time_range = max(time_range, evid_time_range)
        end_bp = BeliefPropagation(self.start_junction_tree)
        potential_dict = self.forward_inference(variables, evidence,
                                                "potential")
        update_factor = self._shift_factor(potential_dict[time_range], 1)
        factor_values = {}

        for time_slice in range(time_range, 0, -1):
            evidence_time = self._get_evidence(evidence, time_slice, 1)
            evidence_prev_time = self._get_evidence(evidence, time_slice - 1,
                                                    0)
            if evidence_prev_time:
                interface_nodes_dict = {
                    k: v
                    for k, v in evidence_prev_time.items()
                    if k in self.interface_nodes_0
                }
            if evidence_time:
                evidence_time.update(interface_nodes_dict)
            mid_bp = BeliefPropagation(self.one_and_half_junction_tree)
            self._update_belief(mid_bp, self.in_clique,
                                potential_dict[time_slice - 1])
            forward_factor = self._shift_factor(potential_dict[time_slice], 1)
            self._update_belief(mid_bp, self.out_clique, forward_factor,
                                update_factor)

            if variable_dict[time_slice]:
                variable_time = self._shift_nodes(variable_dict[time_slice], 1)
                new_values = mid_bp.query(variable_time,
                                          evidence=evidence_time,
                                          joint=False)
                changed_values = {}
                for key in new_values.keys():
                    new_key = (key[0], time_slice)
                    new_factor = DiscreteFactor([new_key],
                                                new_values[key].cardinality,
                                                new_values[key].values)
                    changed_values[new_key] = new_factor
                factor_values.update(changed_values)

            clique_phi = self._get_factor(mid_bp, evidence_time)
            in_clique_phi = self._marginalize_factor(self.interface_nodes_0,
                                                     clique_phi)
            update_factor = self._shift_factor(in_clique_phi, 1)

        out_clique_phi = self._shift_factor(update_factor, 0)
        self._update_belief(end_bp, self.start_interface_clique,
                            potential_dict[0], out_clique_phi)
        evidence_0 = self._get_evidence(evidence, 0, 0)
        if variable_dict[0]:
            factor_values.update(
                end_bp.query(variable_dict[0], evidence_0, joint=False))
        return factor_values
    def to_junction_tree(self):
        """
        Creates a junction tree (or clique tree) for a given markov model.

        For a given markov model (H) a junction tree (G) is a graph
        1. where each node in G corresponds to a maximal clique in H
        2. each sepset in G separates the variables strictly on one side of the
        edge to other.

        Examples
        --------
        >>> from ProbabilityModel.models import MarkovModel
        >>> from ProbabilityModel.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> junction_tree = mm.to_junction_tree()
        """
        from ProbabilityModel.models import JunctionTree

        # Check whether the model is valid or not
        self.check_model()

        # Triangulate the graph to make it chordal
        triangulated_graph = self.triangulate()

        # Find maximal cliques in the chordal graph
        cliques = list(map(tuple, nx.find_cliques(triangulated_graph)))

        # If there is only 1 clique, then the junction tree formed is just a
        # clique tree with that single clique as the node
        if len(cliques) == 1:
            clique_trees = JunctionTree()
            clique_trees.add_node(cliques[0])

        # Else if the number of cliques is more than 1 then create a complete
        # graph with all the cliques as nodes and weight of the edges being
        # the length of sepset between two cliques
        elif len(cliques) >= 2:
            complete_graph = UndirectedGraph()
            edges = list(itertools.combinations(cliques, 2))
            weights = list(
                map(lambda x: len(set(x[0]).intersection(set(x[1]))), edges))
            for edge, weight in zip(edges, weights):
                complete_graph.add_edge(*edge, weight=-weight)

            # Create clique trees by minimum (or maximum) spanning tree method
            clique_trees = JunctionTree(
                nx.minimum_spanning_tree(complete_graph).edges())

        # Check whether the factors are defined for all the random variables or not
        all_vars = itertools.chain(
            *[factor.scope() for factor in self.factors])
        if set(all_vars) != set(self.nodes()):
            ValueError(
                "DiscreteFactor for all the random variables not specified")

        # Dictionary stating whether the factor is used to create clique
        # potential or not
        # If false, then it is not used to create any clique potential
        is_used = {factor: False for factor in self.factors}

        for node in clique_trees.nodes():
            clique_factors = []
            for factor in self.factors:
                # If the factor is not used in creating any clique potential as
                # well as has any variable of the given clique in its scope,
                # then use it in creating clique potential
                if not is_used[factor] and set(factor.scope()).issubset(node):
                    clique_factors.append(factor)
                    is_used[factor] = True

            # To compute clique potential, initially set it as unity factor
            var_card = [self.get_cardinality()[x] for x in node]
            clique_potential = DiscreteFactor(node, var_card,
                                              np.ones(np.product(var_card)))
            # multiply it with the factors associated with the variables present
            # in the clique (or node)
            # Checking if there's clique_factors, to handle the case when clique_factors
            # is empty, otherwise factor_product with throw an error [ref #889]
            if clique_factors:
                clique_potential *= factor_product(*clique_factors)
            clique_trees.add_factors(clique_potential)

        if not all(is_used.values()):
            raise ValueError(
                "All the factors were not used to create Junction Tree."
                "Extra factors are defined.")

        return clique_trees