예제 #1
0
    def get_parameters(self, prior='dirichlet', **kwargs):
        """
        Method for getting all the learned CPDs of the model.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        if prior == 'dirichlet' and 'alpha' not in kwargs:
            alpha = {node: [1] * (self.node_card[node] * (np.product([self.node_card[_node]
                                                                      for _node in self.model.predecessors(node)])
                                                          if self.model.predecessors(node) else 1))
                     for node in self.model.nodes()}
        else:
            alpha = kwargs['alpha']

        parameters = []

        for node in self.model.nodes():
            if prior == 'dirichlet':
                parents = self.model.get_parents(node)
                if not parents:
                    state_counts = self.data.ix[:, node].value_counts()
                    node_alpha = np.array(alpha[node])

                    values = (state_counts.values + node_alpha) / (state_counts.values.sum() + node_alpha.sum())
                    cpd = TabularCPD(node, self.node_card[node], values[:, np.newaxis])
                    cpd.normalize()
                    parameters.append(cpd)
                else:
                    parent_card = np.array([self.node_card[parent] for parent in parents])
                    var_card = self.node_card[node]
                    state_counts = (self.data.groupby([node] + self.model.predecessors(node)).size()).values
                    node_alpha = np.array(alpha[node])
                    
                    values = (state_counts + node_alpha) / (state_counts.sum() + node_alpha.sum())
                    values = values.reshape(var_card, np.product(parent_card))
                    cpd = TabularCPD(node, var_card, values,
                                     evidence=parents,
                                     evidence_card=parent_card.astype('int'))
                    cpd.normalize()
                    parameters.append(cpd)

        return parameters
예제 #2
0
    def get_parameters(self):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List of TabularCPDs, one for each variable of the model

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        parameters = []

        for node in self.model.nodes():
            parents = self.model.get_parents(node)
            if not parents:
                state_counts = self.data.ix[:, node].value_counts()
                state_counts = state_counts.reindex(sorted(state_counts.index))
                cpd = TabularCPD(node, self.node_card[node],
                                 state_counts.values[:, np.newaxis])
                cpd.normalize()
                parameters.append(cpd)
            else:
                parent_card = np.array([self.node_card[parent] for parent in parents])
                var_card = self.node_card[node]

                values = self.data.groupby([node] + parents).size().unstack(parents).fillna(0)
                if not len(values.columns) == np.prod(parent_card):
                    # some columns are missing if for some states of the parents no data was observed.
                    # reindex to add missing columns and fill in uniform (conditional) probabilities:
                    full_index = pd.MultiIndex.from_product([range(card) for card in parent_card], names=parents)
                    values = values.reindex(columns=full_index).fillna(1.0/var_card)

                cpd = TabularCPD(node, var_card, np.array(values),
                                 evidence=parents,
                                 evidence_card=parent_card.astype('int'))
                cpd.normalize()
                parameters.append(cpd)

        return parameters
예제 #3
0
    def get_parameters(self):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        parameters = []

        for node in self.model.nodes():
            parents = self.model.get_parents(node)
            if not parents:
                state_counts = self.data.ix[:, node].value_counts()
                state_counts = state_counts.reindex(sorted(state_counts.index))
                cpd = TabularCPD(node, self.node_card[node],
                                 state_counts.values[:, np.newaxis])
                cpd.normalize()
                parameters.append(cpd)
            else:
                parent_card = np.array(
                    [self.node_card[parent] for parent in parents])
                var_card = self.node_card[node]
                values = self.data.groupby([node] + parents).size().unstack(
                    parents).fillna(0)
                cpd = TabularCPD(node,
                                 var_card,
                                 np.array(values),
                                 evidence=parents,
                                 evidence_card=parent_card.astype('int'))
                cpd.normalize()
                parameters.append(cpd)

        return parameters
예제 #4
0
파일: MLE.py 프로젝트: ankurankan/pgmpy
    def get_parameters(self):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        parameters = []

        for node in self.model.nodes():
            parents = self.model.get_parents(node)
            if not parents:
                state_counts = self.data.ix[:, node].value_counts()
                state_counts = state_counts.reindex(sorted(state_counts.index))
                cpd = TabularCPD(node, self.node_card[node],
                                 state_counts.values[:, np.newaxis])
                cpd.normalize()
                parameters.append(cpd)
            else:
                parent_card = np.array([self.node_card[parent] for parent in parents])
                var_card = self.node_card[node]
                values = self.data.groupby([node] + parents).size().unstack(parents).fillna(0)
                cpd = TabularCPD(node, var_card, np.array(values),
                                 evidence=parents,
                                 evidence_card=parent_card.astype('int'))
                cpd.normalize()
                parameters.append(cpd)

        return parameters
예제 #5
0
파일: MLE.py 프로젝트: EJHortala/books-2
    def get_parameters(self, **kwargs):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        if isinstance(self.model, BayesianModel):
            parameters = []

            for node in self.model.nodes():
                parents = self.model.get_parents(node)
                if not parents:
                    state_counts = self.data.ix[:, node].value_counts()
                    cpd = TabularCPD(node, self.node_card[node],
                                     state_counts.values[:, np.newaxis])
                    cpd.normalize()
                    parameters.append(cpd)
                else:
                    parent_card = np.array(
                        [self.node_card[parent] for parent in parents])
                    var_card = self.node_card[node]
                    state_counts = self.data.groupby(
                        [node] + self.model.predecessors(node)).size()
                    values = state_counts.values.reshape(
                        var_card, np.product(parent_card))
                    cpd = TabularCPD(node,
                                     var_card,
                                     values,
                                     evidence=parents,
                                     evidence_card=parent_card.astype('int'))
                    cpd.normalize()
                    parameters.append(cpd)

            return parameters

        elif isinstance(self.model, MarkovModel):
            edges = self.model.edges()
            no_of_params = [
                self.node_card[u] * self.node_card[v] for u, v in edges
            ]
            constants = []
            for u, v in edges:
                value_counts = self.data.groupby([u, v]).size()
                constants.extend(value_counts.values)
            total_params = sum(no_of_params)
            constants = np.array(constants)

            no_of_params.insert(0, 0)
            param_cumsum = np.cumsum(no_of_params)

            def optimize_fun(params):
                factors = []
                for index in range(len(edges)):
                    u, v = edges[index][0], edges[index][1]
                    factors.append(
                        Factor([u, v], [self.node_card[u], self.node_card[v]],
                               params[param_cumsum[index]:param_cumsum[index +
                                                                       1]]))
                Z = sum(factor_product(*factors).values)
                return Z - sum(constants * params)

            mini = minimize(optimize_fun, x0=[1] * total_params)
            final_params = mini.x
            score = mini.fun

            factors = []
            for index in range(len(edges)):
                u, v = edges[index][0], edges[index][1]
                factors.append(
                    Factor(
                        [u, v], [self.node_card[u], self.node_card[v]],
                        final_params[param_cumsum[index]:param_cumsum[index +
                                                                      1]]))

            if 'score' in kwargs and kwargs['score']:
                return factors, score
            else:
                return factors
예제 #6
0
파일: MLE.py 프로젝트: Sayan-Paul/kod
    def get_parameters(self, **kwargs):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        if isinstance(self.model, BayesianModel):
            parameters = []

            for node in self.model.nodes():
                parents = self.model.get_parents(node)
                if not parents:
                    state_counts = self.data.ix[:, node].value_counts()
                    cpd = TabularCPD(node, self.node_card[node],
                                     state_counts.values[:, np.newaxis])
                    cpd.normalize()
                    parameters.append(cpd)
                else:
                    parent_card = np.array([self.node_card[parent] for parent in parents])
                    var_card = self.node_card[node]
                    state_counts = self.data.groupby([node] + self.model.predecessors(node)).size()
                    values = state_counts.values.reshape(var_card, np.product(parent_card))
                    cpd = TabularCPD(node, var_card, values,
                                     evidence=parents,
                                     evidence_card=parent_card.astype('int'))
                    cpd.normalize()
                    parameters.append(cpd)

            return parameters

        elif isinstance(self.model, MarkovModel):
            edges = self.model.edges()
            no_of_params = [self.node_card[u] * self.node_card[v] for u, v in edges]
            constants = []
            for u, v in edges:
                value_counts = self.data.groupby([u, v]).size()
                constants.extend(value_counts.values)
            total_params = sum(no_of_params)
            constants = np.array(constants)

            no_of_params.insert(0, 0)
            param_cumsum = np.cumsum(no_of_params)

            def optimize_fun(params):
                factors = []
                for index in range(len(edges)):
                    u, v = edges[index][0], edges[index][1]
                    factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]],
                                          params[param_cumsum[index]: param_cumsum[index + 1]]))
                Z = sum(factor_product(*factors).values)
                return Z - sum(constants * params)

            mini = minimize(optimize_fun, x0=[1]*total_params)
            final_params = mini.x
            score = mini.fun

            factors = []
            for index in range(len(edges)):
                u, v = edges[index][0], edges[index][1]
                factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]],
                                      final_params[param_cumsum[index]: param_cumsum[index + 1]]))

            if 'score' in kwargs and kwargs['score']:
                return factors, score
            else:
                return factors