def generate_sample(self, start_state=None, size=1): """ Generator version of self.sample Return Type: ------------ List of State namedtuples, representing the assignment to all variables of the model. Examples: --------- >>> from pgmpy.factors import Factor >>> from pgmpy.inference import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = Factor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = Factor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gen = gibbs.generate_sample(size=2) >>> [sample for sample in gen] [[State(var='C', state=1), State(var='B', state=1), State(var='A', state=0)], [State(var='C', state=0), State(var='B', state=1), State(var='A', state=1)]] """ if start_state is None and self.state is None: self.state = self.random_state() else: self.set_start_state(start_state) for i in range(size): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete(list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) yield self.state[:]
def forward_sample(self, size=1): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated Returns ------- sampled: pandas.DataFrame the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.CPD import TabularCPD >>> from pgmpy.inference.Sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(2) diff intel grade 0 (diff, 1) (intel, 0) (grade, 1) 1 (diff, 1) (intel, 0) (grade, 2) """ sampled = DataFrame(index=range(size), columns=self.topological_order) for node in self.topological_order: cpd = self.cpds[node] states = [st for var, st in cpd.variables[node]] if cpd.evidence: indices = [i for i, x in enumerate(self.topological_order) if x in cpd.evidence] evidence = sampled.values[:, [indices]].tolist() weights = list(map(lambda t: cpd.reduce(t[0], inplace=False).values, evidence)) sampled[node] = list(map(lambda t: State(node, t), sample_discrete(states, weights))) else: sampled[node] = list(map(lambda t: State(node, t), sample_discrete(states, cpd.values, size))) return sampled
def forward_sample(self, size=1, return_type="dataframe"): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(size=2, return_type='recarray') rec.array([(0, 0, 1), (1, 0, 2)], dtype= [('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')]) """ types = [(var_name, "int") for var_name in self.topological_order] sampled = np.zeros(size, dtype=types).view(np.recarray) pbar = tqdm(self.topological_order) for node in pbar: pbar.set_description( "Generating for node: {node}".format(node=node)) cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = np.vstack([sampled[i] for i in evidence]) weights = list( map(lambda t: cached_values[tuple(t)], evidence.T)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return _return_samples(return_type, sampled)
def sample(self, start_state=None, size=1, return_type="dataframe"): """ Sample from the Markov Chain. Parameters ---------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.factors import DiscreteFactor >>> from pgmpy.sampling import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4, return_tupe='dataframe') A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) types = [(var_name, "int") for var_name in self.variables] sampled = np.zeros(size, dtype=types).view(np.recarray) sampled[0] = tuple([st for var, st in self.state]) for i in tqdm(range(size - 1)): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete( list(range(self.cardinalities[var])), self.transition_models[var][other_st], )[0] self.state[j] = State(var, next_st) sampled[i + 1] = tuple([st for var, st in self.state]) return _return_samples(return_type, sampled)
def sample(self, start_state=None, size=1, return_type="dataframe"): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples: --------- >>> from pgmpy.factors import DiscreteFactor >>> from pgmpy.inference import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4, return_tupe='dataframe') A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) types = [(var_name, 'int') for var_name in self.variables] sampled = np.zeros(size, dtype=types).view(np.recarray) sampled[0] = np.array([st for var, st in self.state]) for i in range(size - 1): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete(list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled[i + 1] = np.array([st for var, st in self.state]) return _return_samples(return_type, sampled)
def forward_sample(self, size=1, return_type='dataframe'): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(size=2, return_type='recarray') rec.array([(0, 0, 1), (1, 0, 2)], dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')]) """ types = [(var_name, 'int') for var_name in self.topological_order] sampled = np.zeros(size, dtype=types).view(np.recarray) for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = np.vstack([sampled[i] for i in evidence]) weights = list(map(lambda t: cached_values[tuple(t)], evidence.T)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return _return_samples(return_type, sampled)
def sample(self, start_state=None, size=1): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. Return Type: ------------ pandas.DataFrame Examples: --------- >>> from pgmpy.factors import Factor >>> from pgmpy.inference import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = Factor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = Factor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4) A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) sampled = DataFrame(index=range(size), columns=self.variables) sampled.loc[0] = [st for var, st in self.state] for i in range(size - 1): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete( list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled.loc[i + 1] = [st for var, st in self.state] return sampled
def forward_sample(self, size=1): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated Returns ------- sampled: pandas.DataFrame the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.CPD import TabularCPD >>> from pgmpy.inference.Sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(2) diff intel grade 0 1 0 1 1 1 0 2 """ sampled = DataFrame(index=range(size), columns=self.topological_order) for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = sampled.ix[:, evidence].values weights = list(map(lambda t: cached_values[tuple(t)], evidence)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return sampled
def generate_sample(self, start_state=None, size=1): """ Generator version of self.sample Return Type: ------------ List of State namedtuples, representing the assignment to all variables of the model. Examples: --------- >>> from pgmpy.factors.discrete import DiscreteFactor >>> from pgmpy.sampling import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gen = gibbs.generate_sample(size=2) >>> [sample for sample in gen] [[State(var='C', state=1), State(var='B', state=1), State(var='A', state=0)], [State(var='C', state=0), State(var='B', state=1), State(var='A', state=1)]] """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) for i in range(size): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete(list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) yield self.state[:]
def likelihood_weighted_sample(self, evidence=None, size=1, return_type="dataframe"): """ Generates weighted sample(s) from joint distribution of the bayesian network, that comply with the given evidence. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Algorithm 12.2 pp 493. Parameters ---------- evidence: list of `pgmpy.factor.State` namedtuples None if no evidence size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples with corresponding weights Examples -------- >>> from pgmpy.factors.discrete import State >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> evidence = [State('diff', 0)] >>> inference.likelihood_weighted_sample(evidence=evidence, size=2, return_type='recarray') rec.array([(0, 0, 1, 0.6), (0, 0, 2, 0.6)], dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8'), ('_weight', '<f8')]) """ types = [(var_name, 'int') for var_name in self.topological_order] types.append(('_weight', 'float')) sampled = np.zeros(size, dtype=types).view(np.recarray) sampled['_weight'] = np.ones(size) evidence_dict = {var: st for var, st in evidence} for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.get_evidence() if evidence: evidence_values = np.vstack([sampled[i] for i in evidence]) cached_values = self.pre_compute_reduce(node) weights = list( map(lambda t: cached_values[tuple(t)], evidence_values.T)) if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= weights[i][ evidence_dict[node]] else: sampled[node] = sample_discrete(states, weights) else: if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= cpd.values[ evidence_dict[node]] else: sampled[node] = sample_discrete(states, cpd.values, size) return _return_samples(return_type, sampled)
def likelihood_weighted_sample(self, evidence=None, size=1, return_type="dataframe"): """ Generates weighted sample(s) from joint distribution of the bayesian network, that comply with the given evidence. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Algorithm 12.2 pp 493. Parameters ---------- evidence: list of `pgmpy.factor.State` namedtuples None if no evidence size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples with corresponding weights Examples -------- >>> from pgmpy.factors.discrete import State >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> evidence = [State('diff', 0)] >>> inference.likelihood_weighted_sample(evidence=evidence, size=2, return_type='recarray') rec.array([(0, 0, 1, 0.6), (0, 0, 2, 0.6)], dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8'), ('_weight', '<f8')]) """ types = [(var_name, 'int') for var_name in self.topological_order] types.append(('_weight', 'float')) sampled = np.zeros(size, dtype=types).view(np.recarray) sampled['_weight'] = np.ones(size) evidence_dict = {var: st for var, st in evidence} for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.get_evidence() if evidence: evidence_values = np.vstack([sampled[i] for i in evidence]) cached_values = self.pre_compute_reduce(node) weights = list(map(lambda t: cached_values[tuple(t)], evidence_values.T)) if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= weights[i][evidence_dict[node]] else: sampled[node] = sample_discrete(states, weights) else: if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= cpd.values[evidence_dict[node]] else: sampled[node] = sample_discrete(states, cpd.values, size) return _return_samples(return_type, sampled)
def likelihood_weighted_sample(self, evidence=None, size=1): """ Generates weighted sample(s) from joint distribution of the bayesian network, that comply with the given evidence. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Algorithm 12.2 pp 493. Parameters ---------- evidence: list of `pgmpy.factor.State` namedtuples None if no evidence size: int size of sample to be generated Returns ------- sampled: pandas.DataFrame the generated samples with corresponding weights Examples -------- >>> from pgmpy.factors.Factor import State >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.CPD import TabularCPD >>> from pgmpy.inference.Sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> evidence = {'diff': State(var='diff',state=0)} >>> inference.likelihood_weighted_sample(evidence, 2) intel diff grade _weight 0 (intel, 0) (diff, 0) (grade, 1) 0.6 1 (intel, 1) (diff, 0) (grade, 1) 0.6 """ sampled = DataFrame(index=range(size), columns=self.topological_order) sampled['_weight'] = np.ones(size) evidence_dict = {var: st for var, st in evidence} for node in self.topological_order: cpd = self.cpds[node] states = [st for var, st in cpd.variables[node]] if cpd.evidence: indices = [i for i, x in enumerate(self.topological_order) if x in cpd.evidence] evidence = sampled.values[:, [indices]].tolist() weights = list(map(lambda t: cpd.reduce(t[0], inplace=False).values, evidence)) if node in evidence_dict: sampled[node] = (State(node, evidence_dict[node]), ) * size for i in range(size): sampled.loc[i, '_weight'] *= weights[i][evidence_dict[node]] else: sampled[node] = list(map(lambda t: State(node, t), sample_discrete(states, weights))) else: if node in evidence_dict: sampled[node] = (State(node, evidence_dict[node]), ) * size for i in range(size): sampled.loc[i, '_weight'] *= cpd.values[evidence_dict[node]] else: sampled[node] = list(map(lambda t: State(node, t), sample_discrete(states, cpd.values, size))) return sampled
def likelihood_weighted_sample(self, evidence=None, size=1): """ Generates weighted sample(s) from joint distribution of the bayesian network, that comply with the given evidence. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Algorithm 12.2 pp 493. Parameters ---------- evidence: list of `pgmpy.factor.State` namedtuples None if no evidence size: int size of sample to be generated Returns ------- sampled: pandas.DataFrame the generated samples with corresponding weights Examples -------- >>> from pgmpy.factors.Factor import State >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.CPD import TabularCPD >>> from pgmpy.inference.Sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> evidence = [State('diff', 0)] >>> inference.likelihood_weighted_sample(evidence, 2) intel diff grade _weight 0 0 0 1 0.6 1 1 0 1 0.6 """ sampled = DataFrame(index=range(size), columns=self.topological_order) sampled['_weight'] = np.ones(size) evidence_dict = {var: st for var, st in evidence} for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) if cpd.evidence: evidence = sampled.ix[:, cpd.evidence].values cached_values = self.pre_compute_reduce(node) weights = list(map(lambda t: cached_values[tuple(t)], evidence)) if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled.loc[i, '_weight'] *= weights[i][evidence_dict[node]] else: sampled[node] = sample_discrete(states, weights) else: if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled.loc[i, '_weight'] *= cpd.values[evidence_dict[node]] else: sampled[node] = sample_discrete(states, cpd.values, size) return sampled
def sample(self, evidence=None, start_state=None, size=1, return_type="dataframe"): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. evidence: array-like iterable Representing states of the evidence variables size: int Number of samples to be generated. return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples: --------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from GibbsSamplingWithEvidence import GibbsSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> gibbs_sampler = GibbsSampling(student) >>> samples = gibbs_sampler.sample(size=5, evidence=[('grade',1)]) >>> print(samples) diff grade intel 0 1 1 1 1 1 1 0 2 0 1 0 3 0 1 0 4 1 1 0 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) #overwriting with evidence if evidence is not None: for j, (var, st) in enumerate(self.state): for key, value in evidence.items(): #for k, (v_e, st_e) in enumerate(evidence): if var == key: #print(var, self.state[j]) self.state[j] = State(var, value) types = [(var_name, 'int') for var_name in self.variables] sampled = np.zeros(size, dtype=types).view(np.recarray) sampled[0] = tuple([st for var, st in self.state]) for i in range(size - 1): for j, (var, st) in enumerate(self.state): # check for evidence next_st = None if evidence is not None: for v_e, st_e in evidence.items(): if var == v_e: next_st = st_e if next_st is None: other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete( list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled[i + 1] = tuple([st for var, st in self.state]) return _return_samples(return_type, sampled)