def get_parameters(self, prior='dirichlet', **kwargs): """ Method for getting all the learned CPDs of the model. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ if prior == 'dirichlet' and 'alpha' not in kwargs: alpha = {node: [1] * (self.node_card[node] * (np.product([self.node_card[_node] for _node in self.model.predecessors(node)]) if self.model.predecessors(node) else 1)) for node in self.model.nodes()} else: alpha = kwargs['alpha'] parameters = [] for node in self.model.nodes(): if prior == 'dirichlet': parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() node_alpha = np.array(alpha[node]) values = (state_counts.values + node_alpha) / (state_counts.values.sum() + node_alpha.sum()) cpd = TabularCPD(node, self.node_card[node], values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array([self.node_card[parent] for parent in parents]) var_card = self.node_card[node] state_counts = (self.data.groupby([node] + self.model.predecessors(node)).size()).values node_alpha = np.array(alpha[node]) values = (state_counts + node_alpha) / (state_counts.sum() + node_alpha.sum()) values = values.reshape(var_card, np.product(parent_card)) cpd = TabularCPD(node, var_card, values, evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters
def get_parameters(self): """ Method used to get parameters. Returns ------- parameters: list List of TabularCPDs, one for each variable of the model Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ parameters = [] for node in self.model.nodes(): parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() state_counts = state_counts.reindex(sorted(state_counts.index)) cpd = TabularCPD(node, self.node_card[node], state_counts.values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array([self.node_card[parent] for parent in parents]) var_card = self.node_card[node] values = self.data.groupby([node] + parents).size().unstack(parents).fillna(0) if not len(values.columns) == np.prod(parent_card): # some columns are missing if for some states of the parents no data was observed. # reindex to add missing columns and fill in uniform (conditional) probabilities: full_index = pd.MultiIndex.from_product([range(card) for card in parent_card], names=parents) values = values.reindex(columns=full_index).fillna(1.0/var_card) cpd = TabularCPD(node, var_card, np.array(values), evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters
def get_parameters(self): """ Method used to get parameters. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ parameters = [] for node in self.model.nodes(): parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() state_counts = state_counts.reindex(sorted(state_counts.index)) cpd = TabularCPD(node, self.node_card[node], state_counts.values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array( [self.node_card[parent] for parent in parents]) var_card = self.node_card[node] values = self.data.groupby([node] + parents).size().unstack( parents).fillna(0) cpd = TabularCPD(node, var_card, np.array(values), evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters
def get_parameters(self): """ Method used to get parameters. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ parameters = [] for node in self.model.nodes(): parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() state_counts = state_counts.reindex(sorted(state_counts.index)) cpd = TabularCPD(node, self.node_card[node], state_counts.values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array([self.node_card[parent] for parent in parents]) var_card = self.node_card[node] values = self.data.groupby([node] + parents).size().unstack(parents).fillna(0) cpd = TabularCPD(node, var_card, np.array(values), evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters
def get_parameters(self, **kwargs): """ Method used to get parameters. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ if isinstance(self.model, BayesianModel): parameters = [] for node in self.model.nodes(): parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() cpd = TabularCPD(node, self.node_card[node], state_counts.values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array( [self.node_card[parent] for parent in parents]) var_card = self.node_card[node] state_counts = self.data.groupby( [node] + self.model.predecessors(node)).size() values = state_counts.values.reshape( var_card, np.product(parent_card)) cpd = TabularCPD(node, var_card, values, evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters elif isinstance(self.model, MarkovModel): edges = self.model.edges() no_of_params = [ self.node_card[u] * self.node_card[v] for u, v in edges ] constants = [] for u, v in edges: value_counts = self.data.groupby([u, v]).size() constants.extend(value_counts.values) total_params = sum(no_of_params) constants = np.array(constants) no_of_params.insert(0, 0) param_cumsum = np.cumsum(no_of_params) def optimize_fun(params): factors = [] for index in range(len(edges)): u, v = edges[index][0], edges[index][1] factors.append( Factor([u, v], [self.node_card[u], self.node_card[v]], params[param_cumsum[index]:param_cumsum[index + 1]])) Z = sum(factor_product(*factors).values) return Z - sum(constants * params) mini = minimize(optimize_fun, x0=[1] * total_params) final_params = mini.x score = mini.fun factors = [] for index in range(len(edges)): u, v = edges[index][0], edges[index][1] factors.append( Factor( [u, v], [self.node_card[u], self.node_card[v]], final_params[param_cumsum[index]:param_cumsum[index + 1]])) if 'score' in kwargs and kwargs['score']: return factors, score else: return factors
def get_parameters(self, **kwargs): """ Method used to get parameters. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ if isinstance(self.model, BayesianModel): parameters = [] for node in self.model.nodes(): parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() cpd = TabularCPD(node, self.node_card[node], state_counts.values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array([self.node_card[parent] for parent in parents]) var_card = self.node_card[node] state_counts = self.data.groupby([node] + self.model.predecessors(node)).size() values = state_counts.values.reshape(var_card, np.product(parent_card)) cpd = TabularCPD(node, var_card, values, evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters elif isinstance(self.model, MarkovModel): edges = self.model.edges() no_of_params = [self.node_card[u] * self.node_card[v] for u, v in edges] constants = [] for u, v in edges: value_counts = self.data.groupby([u, v]).size() constants.extend(value_counts.values) total_params = sum(no_of_params) constants = np.array(constants) no_of_params.insert(0, 0) param_cumsum = np.cumsum(no_of_params) def optimize_fun(params): factors = [] for index in range(len(edges)): u, v = edges[index][0], edges[index][1] factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]], params[param_cumsum[index]: param_cumsum[index + 1]])) Z = sum(factor_product(*factors).values) return Z - sum(constants * params) mini = minimize(optimize_fun, x0=[1]*total_params) final_params = mini.x score = mini.fun factors = [] for index in range(len(edges)): u, v = edges[index][0], edges[index][1] factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]], final_params[param_cumsum[index]: param_cumsum[index + 1]])) if 'score' in kwargs and kwargs['score']: return factors, score else: return factors