def rejection_sample(self, evidence=None, size=1, return_type="dataframe"): """ Generates sample(s) from joint distribution of the bayesian network, given the evidence. Parameters ---------- evidence: list of `pgmpy.factor.State` namedtuples None if no evidence size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.factors.discrete import State >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> evidence = [State(var='diff', state=0)] >>> inference.rejection_sample(evidence=evidence, size=2, return_type='dataframe') intel diff grade 0 0 0 1 1 0 0 1 """ if evidence is None: return self.forward_sample(size) types = [(var_name, 'int') for var_name in self.topological_order] sampled = np.zeros(0, dtype=types).view(np.recarray) prob = 1 i = 0 while i < size: _size = int(((size - i) / prob) * 1.5) _sampled = self.forward_sample(_size, 'recarray') for evid in evidence: _sampled = _sampled[_sampled[evid[0]] == evid[1]] prob = max(len(_sampled) / _size, 0.01) sampled = np.append(sampled, _sampled)[:size] i += len(_sampled) return _return_samples(return_type, sampled)
def forward_sample(self, size=1, return_type="dataframe"): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(size=2, return_type='recarray') rec.array([(0, 0, 1), (1, 0, 2)], dtype= [('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')]) """ types = [(var_name, "int") for var_name in self.topological_order] sampled = np.zeros(size, dtype=types).view(np.recarray) pbar = tqdm(self.topological_order) for node in pbar: pbar.set_description( "Generating for node: {node}".format(node=node)) cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = np.vstack([sampled[i] for i in evidence]) weights = list( map(lambda t: cached_values[tuple(t)], evidence.T)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return _return_samples(return_type, sampled)
def sample(self, start_state=None, size=1, return_type="dataframe"): """ Sample from the Markov Chain. Parameters ---------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.factors import DiscreteFactor >>> from pgmpy.sampling import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4, return_tupe='dataframe') A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) types = [(var_name, "int") for var_name in self.variables] sampled = np.zeros(size, dtype=types).view(np.recarray) sampled[0] = tuple([st for var, st in self.state]) for i in tqdm(range(size - 1)): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete( list(range(self.cardinalities[var])), self.transition_models[var][other_st], )[0] self.state[j] = State(var, next_st) sampled[i + 1] = tuple([st for var, st in self.state]) return _return_samples(return_type, sampled)
def sample(self, start_state=None, size=1, return_type="dataframe"): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. size: int Number of samples to be generated. return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples: --------- >>> from pgmpy.factors import DiscreteFactor >>> from pgmpy.inference import GibbsSampling >>> from pgmpy.models import MarkovModel >>> model = MarkovModel([('A', 'B'), ('C', 'B')]) >>> factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4]) >>> factor_cb = DiscreteFactor(['C', 'B'], [2, 2], [5, 6, 7, 8]) >>> model.add_factors(factor_ab, factor_cb) >>> gibbs = GibbsSampling(model) >>> gibbs.sample(size=4, return_tupe='dataframe') A B C 0 0 1 1 1 1 0 0 2 1 1 0 3 1 1 1 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) types = [(var_name, 'int') for var_name in self.variables] sampled = np.zeros(size, dtype=types).view(np.recarray) sampled[0] = np.array([st for var, st in self.state]) for i in range(size - 1): for j, (var, st) in enumerate(self.state): other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete(list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled[i + 1] = np.array([st for var, st in self.state]) return _return_samples(return_type, sampled)
def forward_sample(self, size=1, return_type='dataframe'): """ Generates sample(s) from joint distribution of the bayesian network. Parameters ---------- size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples -------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> inference.forward_sample(size=2, return_type='recarray') rec.array([(0, 0, 1), (1, 0, 2)], dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8')]) """ types = [(var_name, 'int') for var_name in self.topological_order] sampled = np.zeros(size, dtype=types).view(np.recarray) for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.variables[:0:-1] if evidence: cached_values = self.pre_compute_reduce(variable=node) evidence = np.vstack([sampled[i] for i in evidence]) weights = list(map(lambda t: cached_values[tuple(t)], evidence.T)) else: weights = cpd.values sampled[node] = sample_discrete(states, weights, size) return _return_samples(return_type, sampled)
def likelihood_weighted_sample(self, evidence=None, size=1, return_type="dataframe"): """ Generates weighted sample(s) from joint distribution of the bayesian network, that comply with the given evidence. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Algorithm 12.2 pp 493. Parameters ---------- evidence: list of `pgmpy.factor.State` namedtuples None if no evidence size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples with corresponding weights Examples -------- >>> from pgmpy.factors.discrete import State >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> evidence = [State('diff', 0)] >>> inference.likelihood_weighted_sample(evidence=evidence, size=2, return_type='recarray') rec.array([(0, 0, 1, 0.6), (0, 0, 2, 0.6)], dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8'), ('_weight', '<f8')]) """ types = [(var_name, 'int') for var_name in self.topological_order] types.append(('_weight', 'float')) sampled = np.zeros(size, dtype=types).view(np.recarray) sampled['_weight'] = np.ones(size) evidence_dict = {var: st for var, st in evidence} for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.get_evidence() if evidence: evidence_values = np.vstack([sampled[i] for i in evidence]) cached_values = self.pre_compute_reduce(node) weights = list( map(lambda t: cached_values[tuple(t)], evidence_values.T)) if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= weights[i][ evidence_dict[node]] else: sampled[node] = sample_discrete(states, weights) else: if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= cpd.values[ evidence_dict[node]] else: sampled[node] = sample_discrete(states, cpd.values, size) return _return_samples(return_type, sampled)
def sample(self, initial_pos, num_adapt, num_samples, trajectory_length, stepsize=None, return_type='dataframe'): """ Method to return samples using Hamiltonian Monte Carlo Parameters ---------- initial_pos: A 1d array like object Vector representing values of parameter position, the starting state in markov chain. num_adapt: int The number of interations to run the adaptation of stepsize num_samples: int Number of samples to be generated trajectory_length: int or float Target trajectory length, stepsize * number of steps(L), where L is the number of steps taken per HMC iteration, and stepsize is step size for splitting time method. stepsize: float , defaults to None The stepsize for proposing new values of position and momentum in simulate_dynamics If None, then will be choosen suitably return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument Examples --------- >>> from pgmpy.sampling import HamiltonianMCDA as HMCda, GradLogPDFGaussian as GLPG, LeapFrog >>> from pgmpy.factors.continuous import JointGaussianDistribution as JGD >>> import numpy as np >>> mean = np.array([1, 1]) >>> covariance = np.array([[1, 0.7], [0.7, 3]]) >>> model = JGD(['x', 'y'], mean, covariance) >>> sampler = HMCda(model=model, grad_log_pdf=GLPG, simulate_dynamics=LeapFrog) >>> samples = sampler.sample(np.array([1, 1]), num_adapt=10000, num_samples = 10000, ... trajectory_length=2, stepsize=None, return_type='recarray') >>> samples_array = np.array([samples[var_name] for var_name in model.variables]) >>> np.cov(samples_array) array([[ 0.98432155, 0.66517394], [ 0.66517394, 2.95449533]]) """ self.accepted_proposals = 1.0 initial_pos = _check_1d_array_object(initial_pos, 'initial_pos') _check_length_equal(initial_pos, self.model.variables, 'initial_pos', 'model.variables') if stepsize is None: stepsize = self._find_reasonable_stepsize(initial_pos) if num_adapt <= 1: # Return samples genrated using Simple HMC algorithm return HamiltonianMC.sample(self, initial_pos, num_samples, trajectory_length, stepsize) # stepsize is epsilon # freely chosen point, after each iteration xt(/position) is shrunk towards it mu = np.log(10.0 * stepsize) # log(10 * stepsize) large values to save computation # stepsize_bar is epsilon_bar stepsize_bar = 1.0 h_bar = 0.0 # See equation (6) section 3.2.1 for details types = [(var_name, 'float') for var_name in self.model.variables] samples = np.zeros(num_samples, dtype=types).view(np.recarray) samples[0] = tuple(initial_pos) position_m = initial_pos for i in range(1, num_samples): # Genrating sample position_m, alpha = self._sample(position_m, trajectory_length, stepsize) samples[i] = position_m # Adaptation of stepsize till num_adapt iterations if i <= num_adapt: stepsize, stepsize_bar, h_bar = self._adapt_params( stepsize, stepsize_bar, h_bar, mu, i, alpha) else: stepsize = stepsize_bar self.acceptance_rate = self.accepted_proposals / num_samples return _return_samples(return_type, samples)
def sample(self, initial_pos, num_samples, trajectory_length, stepsize=None, return_type='dataframe'): """ Method to return samples using Hamiltonian Monte Carlo Parameters ---------- initial_pos: A 1d array like object Vector representing values of parameter position, the starting state in markov chain. num_samples: int Number of samples to be generated trajectory_length: int or float Target trajectory length, stepsize * number of steps(L), where L is the number of steps taken per HMC iteration, and stepsize is step size for splitting time method. stepsize: float , defaults to None The stepsize for proposing new values of position and momentum in simulate_dynamics If None, then will be choosen suitably return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument Examples -------- >>> from pgmpy.sampling import HamiltonianMC as HMC, GradLogPDFGaussian, ModifiedEuler >>> from pgmpy.factors.continuous import JointGaussianDistribution as JGD >>> import numpy as np >>> mean = np.array([1, -1]) >>> covariance = np.array([[1, 0.2], [0.2, 1]]) >>> model = JGD(['x', 'y'], mean, covariance) >>> sampler = HMC(model=model, grad_log_pdf=GradLogPDFGaussian, simulate_dynamics=ModifiedEuler) >>> samples = sampler.sample(np.array([1, 1]), num_samples = 5, ... trajectory_length=6, stepsize=0.25, return_type='dataframe') >>> samples x y 0 1.000000e+00 1.000000e+00 1 1.592133e+00 1.152911e+00 2 1.608700e+00 1.315349e+00 3 1.608700e+00 1.315349e+00 4 6.843856e-01 6.237043e-01 >>> mean = np.array([4, 1, -1]) >>> covariance = np.array([[1, 0.7 , 0.8], [0.7, 1, 0.2], [0.8, 0.2, 1]]) >>> model = JGD(['x', 'y', 'z'], mean, covariance) >>> sampler = HMC(model=model, grad_log_pdf=GLPG) >>> samples = sampler.sample(np.array([1, 1]), num_samples = 10000, ... trajectory_length=6, stepsize=0.25, return_type='dataframe') >>> np.cov(samples.values.T) array([[ 1.00795398, 0.71384233, 0.79802097], [ 0.71384233, 1.00633524, 0.21313767], [ 0.79802097, 0.21313767, 0.98519017]]) """ self.accepted_proposals = 1.0 initial_pos = _check_1d_array_object(initial_pos, 'initial_pos') _check_length_equal(initial_pos, self.model.variables, 'initial_pos', 'model.variables') if stepsize is None: stepsize = self._find_reasonable_stepsize(initial_pos) types = [(var_name, 'float') for var_name in self.model.variables] samples = np.zeros(num_samples, dtype=types).view(np.recarray) # Assigning after converting into tuple because value was being changed after assignment # Reason for this is unknown samples[0] = tuple(initial_pos) position_m = initial_pos lsteps = int(max(1, round(trajectory_length / stepsize, 0))) for i in range(1, num_samples): # Genrating sample position_m, _ = self._sample(position_m, trajectory_length, stepsize, lsteps) samples[i] = position_m self.acceptance_rate = self.accepted_proposals / num_samples return _return_samples(return_type, samples)
def sample(self, initial_pos, num_adapt, num_samples, stepsize=None, return_type='dataframe'): """ Returns samples using No U Turn Sampler with dual averaging Parameters ---------- initial_pos: A 1d array like object Vector representing values of parameter position, the starting state in markov chain. num_adapt: int The number of interations to run the adaptation of stepsize num_samples: int Number of samples to be generated stepsize: float , defaults to None The stepsize for proposing new values of position and momentum in simulate_dynamics If None, then will be choosen suitably return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument Examples --------- >>> from pgmpy.sampling import NoUTurnSamplerDA as NUTSda, GradLogPDFGaussian, LeapFrog >>> from pgmpy.factors.continuous import GaussianDistribution as JGD >>> import numpy as np >>> mean = np.array([10, -13]) >>> covariance = np.array([[16, -3], [-3, 13]]) >>> model = JGD(['x', 'y'], mean, covariance) >>> sampler = NUTSda(model=model, grad_log_pdf=GradLogPDFGaussian, simulate_dynamics=LeapFrog) >>> samples = sampler.sample(initial_pos=np.array([12, -4]), num_adapt=10, num_samples=10, ... stepsize=0.1, return_type='dataframe') >>> samples x y 0 12.000000 -4.000000 1 11.864821 -3.696109 2 10.546986 -4.892169 3 8.526596 -21.555793 4 8.526596 -21.555793 5 11.343194 -6.353789 6 -1.583269 -12.802931 7 12.411957 -11.704859 8 13.253336 -20.169492 9 11.295901 -7.665058 """ initial_pos = _check_1d_array_object(initial_pos, 'initial_pos') _check_length_equal(initial_pos, self.model.variables, 'initial_pos', 'model.variables') if stepsize is None: stepsize = self._find_reasonable_stepsize(initial_pos) if num_adapt <= 1: return NoUTurnSampler(self.model, self.grad_log_pdf, self.simulate_dynamics).sample( initial_pos, num_samples, stepsize) mu = np.log(10.0 * stepsize) stepsize_bar = 1.0 h_bar = 0.0 types = [(var_name, 'float') for var_name in self.model.variables] samples = np.zeros(num_samples, dtype=types).view(np.recarray) samples[0] = tuple(initial_pos) position_m = initial_pos for i in range(1, num_samples): position_m, alpha, n_alpha = self._sample(position_m, stepsize) samples[i] = position_m if i <= num_adapt: stepsize, stepsize_bar, h_bar = self._adapt_params( stepsize, stepsize_bar, h_bar, mu, i, alpha, n_alpha) else: stepsize = stepsize_bar return _return_samples(return_type, samples)
def sample(self, initial_pos, num_samples, stepsize=None, return_type='dataframe'): """ Method to return samples using No U Turn Sampler Parameters ---------- initial_pos: A 1d array like object Vector representing values of parameter position, the starting state in markov chain. num_samples: int Number of samples to be generated stepsize: float , defaults to None The stepsize for proposing new values of position and momentum in simulate_dynamics If None, then will be choosen suitably return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument Examples --------- >>> from pgmpy.sampling import NoUTurnSampler as NUTS, GradLogPDFGaussian, LeapFrog >>> from pgmpy.factors.continuous import GaussianDistribution as JGD >>> import numpy as np >>> mean = np.array([0, 0, 0]) >>> covariance = np.array([[6, 0.7, 0.2], [0.7, 3, 0.9], [0.2, 0.9, 1]]) >>> model = JGD(['x', 'y', 'z'], mean, covariance) >>> sampler = NUTS(model=model, grad_log_pdf=GradLogPDFGaussian, simulate_dynamics=LeapFrog) >>> samples = sampler.sample(initial_pos=np.array([1, 1, 1]), num_samples=10, ... stepsize=0.4, return_type='dataframe') >>> samples x y z 0 1.000000 1.000000 1.000000 1 1.760756 0.271543 -0.613309 2 1.883387 0.990745 -0.611720 3 0.980812 0.340336 -0.916283 4 0.781338 0.647220 -0.948640 5 0.040308 -1.391406 0.412201 6 1.179549 -1.450552 1.105216 7 1.100320 -1.313926 1.207815 8 1.484520 -1.349247 0.768599 9 0.934942 -1.894589 0.471772 """ initial_pos = _check_1d_array_object(initial_pos, 'initial_pos') _check_length_equal(initial_pos, self.model.variables, 'initial_pos', 'model.variables') if stepsize is None: stepsize = self._find_reasonable_stepsize(initial_pos) types = [(var_name, 'float') for var_name in self.model.variables] samples = np.zeros(num_samples, dtype=types).view(np.recarray) samples[0] = tuple(initial_pos) position_m = initial_pos for i in range(1, num_samples): # Genrating sample position_m = self._sample(position_m, stepsize) samples[i] = position_m return _return_samples(return_type, samples)
def likelihood_weighted_sample(self, evidence=None, size=1, return_type="dataframe"): """ Generates weighted sample(s) from joint distribution of the bayesian network, that comply with the given evidence. 'Probabilistic Graphical Model Principles and Techniques', Koller and Friedman, Algorithm 12.2 pp 493. Parameters ---------- evidence: list of `pgmpy.factor.State` namedtuples None if no evidence size: int size of sample to be generated return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples with corresponding weights Examples -------- >>> from pgmpy.factors.discrete import State >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.sampling import BayesianModelSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> inference = BayesianModelSampling(student) >>> evidence = [State('diff', 0)] >>> inference.likelihood_weighted_sample(evidence=evidence, size=2, return_type='recarray') rec.array([(0, 0, 1, 0.6), (0, 0, 2, 0.6)], dtype=[('diff', '<i8'), ('intel', '<i8'), ('grade', '<i8'), ('_weight', '<f8')]) """ types = [(var_name, 'int') for var_name in self.topological_order] types.append(('_weight', 'float')) sampled = np.zeros(size, dtype=types).view(np.recarray) sampled['_weight'] = np.ones(size) evidence_dict = {var: st for var, st in evidence} for node in self.topological_order: cpd = self.model.get_cpds(node) states = range(self.cardinality[node]) evidence = cpd.get_evidence() if evidence: evidence_values = np.vstack([sampled[i] for i in evidence]) cached_values = self.pre_compute_reduce(node) weights = list(map(lambda t: cached_values[tuple(t)], evidence_values.T)) if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= weights[i][evidence_dict[node]] else: sampled[node] = sample_discrete(states, weights) else: if node in evidence_dict: sampled[node] = evidence_dict[node] for i in range(size): sampled['_weight'][i] *= cpd.values[evidence_dict[node]] else: sampled[node] = sample_discrete(states, cpd.values, size) return _return_samples(return_type, sampled)
def sample(self, initial_pos, num_adapt, num_samples, trajectory_length, stepsize=None, return_type='dataframe'): """ Method to return samples using Hamiltonian Monte Carlo Parameters ---------- initial_pos: A 1d array like object Vector representing values of parameter position, the starting state in markov chain. num_adapt: int The number of interations to run the adaptation of stepsize num_samples: int Number of samples to be generated trajectory_length: int or float Target trajectory length, stepsize * number of steps(L), where L is the number of steps taken per HMC iteration, and stepsize is step size for splitting time method. stepsize: float , defaults to None The stepsize for proposing new values of position and momentum in simulate_dynamics If None, then will be choosen suitably return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument Examples --------- >>> from pgmpy.sampling import HamiltonianMCDA as HMCda, GradLogPDFGaussian as GLPG, LeapFrog >>> from pgmpy.factors.continuous import GaussianDistribution as JGD >>> import numpy as np >>> mean = np.array([1, 1]) >>> covariance = np.array([[1, 0.7], [0.7, 3]]) >>> model = JGD(['x', 'y'], mean, covariance) >>> sampler = HMCda(model=model, grad_log_pdf=GLPG, simulate_dynamics=LeapFrog) >>> samples = sampler.sample(np.array([1, 1]), num_adapt=10000, num_samples = 10000, ... trajectory_length=2, stepsize=None, return_type='recarray') >>> samples_array = np.array([samples[var_name] for var_name in model.variables]) >>> np.cov(samples_array) array([[ 0.98432155, 0.66517394], [ 0.66517394, 2.95449533]]) """ self.accepted_proposals = 1.0 initial_pos = _check_1d_array_object(initial_pos, 'initial_pos') _check_length_equal(initial_pos, self.model.variables, 'initial_pos', 'model.variables') if stepsize is None: stepsize = self._find_reasonable_stepsize(initial_pos) if num_adapt <= 1: # Return samples genrated using Simple HMC algorithm return HamiltonianMC.sample(self, initial_pos, num_samples, trajectory_length, stepsize) # stepsize is epsilon # freely chosen point, after each iteration xt(/position) is shrunk towards it mu = np.log(10.0 * stepsize) # log(10 * stepsize) large values to save computation # stepsize_bar is epsilon_bar stepsize_bar = 1.0 h_bar = 0.0 # See equation (6) section 3.2.1 for details types = [(var_name, 'float') for var_name in self.model.variables] samples = np.zeros(num_samples, dtype=types).view(np.recarray) samples[0] = tuple(initial_pos) position_m = initial_pos for i in range(1, num_samples): # Genrating sample position_m, alpha = self._sample(position_m, trajectory_length, stepsize) samples[i] = position_m # Adaptation of stepsize till num_adapt iterations if i <= num_adapt: stepsize, stepsize_bar, h_bar = self._adapt_params(stepsize, stepsize_bar, h_bar, mu, i, alpha) else: stepsize = stepsize_bar self.acceptance_rate = self.accepted_proposals / num_samples return _return_samples(return_type, samples)
def sample(self, initial_pos, num_samples, trajectory_length, stepsize=None, return_type='dataframe'): """ Method to return samples using Hamiltonian Monte Carlo Parameters ---------- initial_pos: A 1d array like object Vector representing values of parameter position, the starting state in markov chain. num_samples: int Number of samples to be generated trajectory_length: int or float Target trajectory length, stepsize * number of steps(L), where L is the number of steps taken per HMC iteration, and stepsize is step size for splitting time method. stepsize: float , defaults to None The stepsize for proposing new values of position and momentum in simulate_dynamics If None, then will be choosen suitably return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument Examples -------- >>> from pgmpy.sampling import HamiltonianMC as HMC, GradLogPDFGaussian, ModifiedEuler >>> from pgmpy.factors.continuous import GaussianDistribution as JGD >>> import numpy as np >>> mean = np.array([1, -1]) >>> covariance = np.array([[1, 0.2], [0.2, 1]]) >>> model = JGD(['x', 'y'], mean, covariance) >>> sampler = HMC(model=model, grad_log_pdf=GradLogPDFGaussian, simulate_dynamics=ModifiedEuler) >>> samples = sampler.sample(np.array([1, 1]), num_samples = 5, ... trajectory_length=6, stepsize=0.25, return_type='dataframe') >>> samples x y 0 1.000000e+00 1.000000e+00 1 1.592133e+00 1.152911e+00 2 1.608700e+00 1.315349e+00 3 1.608700e+00 1.315349e+00 4 6.843856e-01 6.237043e-01 >>> mean = np.array([4, 1, -1]) >>> covariance = np.array([[1, 0.7 , 0.8], [0.7, 1, 0.2], [0.8, 0.2, 1]]) >>> model = JGD(['x', 'y', 'z'], mean, covariance) >>> sampler = HMC(model=model, grad_log_pdf=GLPG) >>> samples = sampler.sample(np.array([1, 1]), num_samples = 10000, ... trajectory_length=6, stepsize=0.25, return_type='dataframe') >>> np.cov(samples.values.T) array([[ 1.00795398, 0.71384233, 0.79802097], [ 0.71384233, 1.00633524, 0.21313767], [ 0.79802097, 0.21313767, 0.98519017]]) """ self.accepted_proposals = 1.0 initial_pos = _check_1d_array_object(initial_pos, 'initial_pos') _check_length_equal(initial_pos, self.model.variables, 'initial_pos', 'model.variables') if stepsize is None: stepsize = self._find_reasonable_stepsize(initial_pos) types = [(var_name, 'float') for var_name in self.model.variables] samples = np.zeros(num_samples, dtype=types).view(np.recarray) # Assigning after converting into tuple because value was being changed after assignment # Reason for this is unknown samples[0] = tuple(initial_pos) position_m = initial_pos lsteps = int(max(1, round(trajectory_length / stepsize, 0))) for i in range(1, num_samples): # Genrating sample position_m, _ = self._sample(position_m, trajectory_length, stepsize, lsteps) samples[i] = position_m self.acceptance_rate = self.accepted_proposals / num_samples return _return_samples(return_type, samples)
def sample(self, evidence=None, start_state=None, size=1, return_type="dataframe"): """ Sample from the Markov Chain. Parameters: ----------- start_state: dict or array-like iterable Representing the starting states of the variables. If None is passed, a random start_state is chosen. evidence: array-like iterable Representing states of the evidence variables size: int Number of samples to be generated. return_type: string (dataframe | recarray) Return type for samples, either of 'dataframe' or 'recarray'. Defaults to 'dataframe' Returns ------- sampled: A pandas.DataFrame or a numpy.recarray object depending upon return_type argument the generated samples Examples: --------- >>> from pgmpy.models.BayesianModel import BayesianModel >>> from pgmpy.factors.discrete import TabularCPD >>> from GibbsSamplingWithEvidence import GibbsSampling >>> student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) >>> cpd_d = TabularCPD('diff', 2, [[0.6], [0.4]]) >>> cpd_i = TabularCPD('intel', 2, [[0.7], [0.3]]) >>> cpd_g = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, ... 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], ... ['intel', 'diff'], [2, 2]) >>> student.add_cpds(cpd_d, cpd_i, cpd_g) >>> gibbs_sampler = GibbsSampling(student) >>> samples = gibbs_sampler.sample(size=5, evidence=[('grade',1)]) >>> print(samples) diff grade intel 0 1 1 1 1 1 1 0 2 0 1 0 3 0 1 0 4 1 1 0 """ if start_state is None and self.state is None: self.state = self.random_state() elif start_state is not None: self.set_start_state(start_state) #overwriting with evidence if evidence is not None: for j, (var, st) in enumerate(self.state): for key, value in evidence.items(): #for k, (v_e, st_e) in enumerate(evidence): if var == key: #print(var, self.state[j]) self.state[j] = State(var, value) types = [(var_name, 'int') for var_name in self.variables] sampled = np.zeros(size, dtype=types).view(np.recarray) sampled[0] = tuple([st for var, st in self.state]) for i in range(size - 1): for j, (var, st) in enumerate(self.state): # check for evidence next_st = None if evidence is not None: for v_e, st_e in evidence.items(): if var == v_e: next_st = st_e if next_st is None: other_st = tuple(st for v, st in self.state if var != v) next_st = sample_discrete( list(range(self.cardinalities[var])), self.transition_models[var][other_st])[0] self.state[j] = State(var, next_st) sampled[i + 1] = tuple([st for var, st in self.state]) return _return_samples(return_type, sampled)