def gen_plot_bernoulli(): # generate bernoulli print("\nBernoulli pmf w/ scipy.stats.bernoulli.pmf(0, p=0.3): {:.3f}". format(bernoulli.pmf(0, p=0.3))) print("\nBernoulli pmf w/ scipy.stats.bernoulli.pmf(range(3), p=0.3): {}". format(bernoulli.pmf(range(3), p=0.3))) print( "\nBernoulli cdf w/ scipy.stats.bernoulli.cdf([0, 0.5, 1, 1.5], p=0.3): {}" .format(bernoulli.cdf([0, 0.5, 1, 1.5], p=0.3))) # plot Bernoulli plt.stem([-0.2, 0, 1, 1.2], bernoulli.pmf([-0.2, 0, 1, 1.2], p=.3)) plt.plot(np.linspace(-0.1, 1.1, 1200), bernoulli.cdf(np.linspace(-0.1, 1.1, 1200), p=0.3), 'g') plt.xlim([-0.1, 1.1]) plt.ylim([-0.2, 1.1]) plt.show() # generate Bernoulli samples print( "\nBernoulli samples w/ scipy.stats.bernoulli.rvs(size=10, p=.3): {}". format(bernoulli.rvs(size=10, p=.3))) plt.hist(bernoulli.rvs(size=10, p=.3), density=True) plt.show() return None
def compute_vote_likelihood_ratio(name, true_ideal_points, votes, senator_indices, bill_indices, voter_map, popularity_mean, polarity_mean, null_ideal_point=0., query_size=10): """Compute top votes based on likelihood ratio statistic. Args: name: Name of Senator to calculate ratio for. true_ideal_points: A vector of length [num_voters] containing the learned vote ideal points. votes: An array with shape [num_votes], containing the binary vote cast for all recorded votes. senator_indices: An array with shape [num_votes], where each entry is an integer in {0, 1, ..., num_voters - 1}, containing the index for the Senator corresponding to the vote in `votes`. bill_indices: An array with shape [num_votes], where each entry is an integer in {0, 1, ..., num_bills - 1}, containing the index for the bill corresponding to the vote in `votes`. voter_map: A string array with length [num_voters] containing the name for each voter in the data set. popularity_mean: The learned variational mean for the bill popularities (alpha). A vector with shape [num_bills]. polarity_mean: The learned variational mean for the bill polarities (eta). A vector with shape [num_bills]. null_ideal_point: The null ideal point for the likelihood ratio test. For example, a null ideal point of 0 would capture why the author is away from 0 (which documents and words make this author extreme?). query_size: Number of documents to query. Returns: top_indices: A vector of ints with shape [query_size], representing the indices for the top bills identified by the likelihood ratio statistic. """ voter_index = np.where(voter_map == name)[0][0] relevant_indices = np.where(senator_indices == voter_index)[0] fitted_logit = (true_ideal_points[voter_index] * polarity_mean[bill_indices[relevant_indices]] + popularity_mean[bill_indices[relevant_indices]]) null_logit = ( null_ideal_point * polarity_mean[bill_indices[relevant_indices]] + popularity_mean[bill_indices[relevant_indices]]) # Unlike TBIP likelihood ratio statistic, the output distirbution for vote # ideal points is Bernoulli. Here we compute the logit. fitted_mean = 1. / (1. + np.exp(-fitted_logit)) null_mean = 1. / (1. + np.exp(-null_logit)) fitted_log_likelihood = bernoulli.pmf(votes[relevant_indices], fitted_mean) null_log_likelihood = bernoulli.pmf(votes[relevant_indices], null_mean) log_likelihood_differences = fitted_log_likelihood - null_log_likelihood top_indices = bill_indices[relevant_indices[np.argsort( -log_likelihood_differences)[:query_size]]] return top_indices
def predict(self, X_test): probas_xi = bernoulli.pmf(X_test, self.means[0]) probas_xi[probas_xi==0] = 0.01 probas_y0 = np.sum(np.log(probas_xi),1)+np.log(self.probas_y[0]) probas_xi = bernoulli.pmf(X_test, self.means[1]) probas_xi[probas_xi==0] = 0.01 probas_y1 = np.sum(np.log(probas_xi),1)+np.log(self.probas_y[1]) return (probas_y1 > probas_y0).astype(int)
def probability_of_signal_given_state(signal_strength, closest_z, base_position, max_range): closest_x, closest_y = closest_z #Idea: Use measurement position instead of closest_point_in_state #Problem: Missing data will cascade through algorithm #Solution: Use measurement position if available. Otherwise current solution distance_to_state = np.linalg.norm(base_position - np.array([closest_x, closest_y])) if np.isnan(signal_strength): return 1 elif signal_strength == 0: return bernoulli.pmf(0, max(0, 1 - distance_to_state/max_range)) else: return bernoulli.pmf(1, max(0, 1 - distance_to_state/max_range))*beta.pdf(signal_strength, 2, 5*distance_to_state/max_range)
def _negative_log_likelihood(self, w, y, X, mask=None): """ Returns logistic regression negative log likelihood :param w: the parameters at their current estimates of shape (n_features,) :param y: the response vector of shape (n_obs,) :param X: the design matrix of shape (n_features, n_obs) :param mask: the binary mask vector of shape (n_obs,). 1 if observed, 0 o/w :returns: negative log likelihood value :rtype: float """ sigm = _sigmoid(w.dot(X)) if mask is not None: return -np.sum(np.log(bernoulli.pmf(y, sigm) * mask + 1e-5)) else: return -np.sum(np.log(bernoulli.pmf(y, sigm) + 1e-5))
def sim(samples): n = 201 * 91 sims = { "x": empty(n), "y": empty(n), } i = 0 for y in range(-45, 46, 1): for x in range(0, 201, 1): sims["x"][i] = x sims["y"][i] = y i += 1 mean = samples.mean() sims["shot_prob"] = expit( norm.logpdf(sims["x"], mean.shot_mu_x, mean.shot_sigma_x) + norm.logpdf(sims["y"], mean.shot_mu_y, mean.shot_sigma_y), ) m = sims["shot_prob"].min() sims["shot_prob"] = (sims["shot_prob"] - m) / (sims["shot_prob"].max() - m) sims["goal_prob"] = bernoulli.pmf( 1, expit( norm.logpdf(sims["x"], mean.goal_mu_x, mean.goal_sigma_x) + norm.logpdf(sims["y"], mean.goal_mu_y, mean.goal_sigma_y) + mean.goal_offset, )) return sims
def pdf(self, x: float): """Find the PDF for a certain x value. Args: x (float): The value for which the PDF is needed. """ return bernoulli.pmf(x, self.p)
def plot(self, n, p, x): pmf = bernoulli.pmf(x, n, p) plt.plot(x, pmf, 'o-') plt.title('Bernaulli: n=%i , p=%.2f' % (n, p), fontsize='value') plt.xlabel('Number of successes') plt.ylable('Probability of Successes', fontsize='value') plt.show()
def pmf(self): """ Compute the probability mass function of the distribution Returns: -------- pmf : float """ return bernoulli.pmf(self.__r, self.__p)
def SampleMethod2(numSamples): ys = [] thetas = [] for i in range(numSamples): y = [] p = 1.0 / 3.0 for j in range(5): theta = bernoulli.rvs(p, size=1) if theta == 0: sample = bernoulli.rvs(1.0 / 2.0, size=1) elif theta == 1: sample = bernoulli.rvs(3.0 / 4.0, size=1) y.append(sample) p = p * bernoulli.pmf(sample, 3.0 / 4.0) / ( p * bernoulli.pmf(sample, 3.0 / 4.0) + (1 - p) * bernoulli.pmf(sample, 1.0 / 2.0)) ys.append(np.sum(y)) thetas.append(bernoulli.rvs(p, size=1)) return (thetas, ys)
def pmfs(self): """ Compute the probability mass function of the distribution the success and failure in one trial p, 1-p Returns: -------- pmf : numpy.narray """ return bernoulli.pmf(self.__all_r, self.__p)
def likelihood_ber(self, Z, i, k): result=1 num=1 den1=1 for d in range(self.D): for k in range(self.K): #compute theta_d equation (7) num=num*self.theta[k,d]**Z[i,k] den1=den1*(1-self.theta[k,d])**Z[i,k] theta_d=num/(den1+num) result=result*bernoulli.pmf(k=self.X[i,d],p=theta_d) #compute likelihood return result
def test_bernoulli(self): fig, ax = plt.subplots(1, 1) p = 0.3 mean, var, skew, kurt = bernoulli.stats(p, moments='mvsk') x = np.arange(bernoulli.ppf(0.01, p), bernoulli.ppf(0.99, p)) ax.plot(x, bernoulli.pmf(x, p), 'bo', ms=8, label='bernoulli pmf') ax.vlines(x, 0, bernoulli.pmf(x, p), colors='b', lw=5, alpha=0.5) rv = bernoulli(p) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) self.assertEqual("AxesSubplot(0.125,0.11;0.775x0.77)", str(ax))
def predict(self, test_data): self.p_digit_class = [] self.predicted = [] if self.p == [] or self.digits_p_ink == []: print("Fit your model to training data first") return [] for i in range(10): berpmf = bernoulli.pmf(test_data, self.digits_p_ink[i]) p_post = np.sum(np.log(berpmf), axis=1) + np.log(self.p[i]) self.p_digit_class.append(p_post) self.p_digit_class = np.array(self.p_digit_class) self.predicted = np.argmax(self.p_digit_class, axis=0) return self.predicted
def bernoulli_mixture_pmf(data, means, K): '''To compute the probability of x for each bernouli distribution data = N X D matrix means = K X D matrix prob (result) = N X K matrix ''' N = len(data) D = len(data[0]) # compute prob(x/mean) # prob[i, k] for ith data point, and kth cluster/mixture distribution prob = np.zeros((N, K)) for k in range(K): b = lambda row: np.prod(bern.pmf(row, means[k])) prob[:, k] = np.apply_along_axis(b, 1, data) return prob
def predict(img, paras, classParas, bern=False): resultSet = [] for i in range(0, len(classParas)): resultSet.append(0) for classIndex in range(0, len(classParas)): if (bern): resultSet[classIndex] = np.nansum( np.log(bernoulli.pmf(img, paras[classIndex]))) else: resultSet[classIndex] = np.nansum( np.log( norm.pdf(img, paras[classIndex][0], paras[classIndex][1]))) for i in range(0, len(classParas)): resultSet[i] += np.log(classParas[i]) return np.argmax(resultSet)
def expected_log_likelihood(data, weights, means, K): '''To compute expectation of the loglikelihood of Mixture of Beroullie distributions Since computing E(LL) requires computing responsibilities, this function does a double-duty to return responsibilities too ''' N = len(data) responsibilities = compute_responsibilities(data, weights, means, K) ll = 0 sumK = np.zeros(N) for k in range(K): b = lambda row: np.log(bern.pmf(row, np.absolute(means[k]))) temp1 = np.apply_along_axis(b, 1, data) sumK += responsibilities[:, k] * (np.log(np.absolute(weights[k])) + np.sum(temp1, axis=1)) sumK = np.nan_to_num(sumK) ll += np.sum(sumK) return (ll, responsibilities)
def _compute_total_log_likelihood(self): log_likelihood = 0 theta = self.get_theta() log_theta = np.log(theta) phi = self.get_phi() log_phi = np.log(phi) ALPHA = self.alpha_0 * np.ones(self.n_topics) for document_index in range(self.n_documents): # theta # log_likelihood += np.log(dirichlet.pdf(theta[document_index], ALPHA)) log_likelihood += self.dirichlet_pdf_log(theta[document_index], ALPHA) for token_index in range(len(self.Z[document_index])): word_index, topic_index = self.Z[document_index][token_index] if topic_index != WEIFTM.NO_TOPIC: # w log_likelihood += log_phi[topic_index, word_index] # z log_likelihood += log_theta[document_index, topic_index] log_likelihood += np.sum( np.log(bernoulli.pmf(self.b, sigmoid(self.pi)))) for k in range(self.n_topics): # phi b_k_nonzero = self.b[k].nonzero()[0] BETA = self.beta_0 * np.ones(b_k_nonzero.shape[0]) # log_likelihood += np.log(dirichlet.pdf(phi[k][b_k_nonzero], BETA)) log_likelihood += self.dirichlet_pdf_log(phi[k][b_k_nonzero], BETA) # c log_likelihood += np.log(norm.pdf(self.c[k], 0, self.sig_0)) for l in range(self.embedding_size): # lamb log_likelihood += np.log( norm.pdf(self.lamb[k, l], 0, self.sig_0)) return log_likelihood
from scipy.stats import bernoulli import numpy as np import matplotlib.pyplot as plt p = 0.8 k1 = 0 k2 = 1 k = np.linspace(k1, k2, 100) temp1 = bernoulli.cdf(k, p) temp2 = bernoulli.pmf(k1, p) temp3 = bernoulli.pmf(k2, p) print(temp1) print(temp2) print(temp3) plt.plot(k, temp1, 'o-', color='orange') plt.plot(p, temp2, 'o-', color='crimson') plt.plot(p, temp3, 'o-', color='crimson') plt.xlabel('$x$') plt.ylabel('$y$') plt.grid() plt.show()
# In[19]: from scipy.stats import bernoulli brv = bernoulli(p=0.3) brv.rvs(size=20) # In[20]: event_space = [0, 1] plt.figure(figsize=(12, 8)) colors = sns.color_palette() for i, p in enumerate([0.1, 0.2, 0.5, 0.7]): ax = plt.subplot(1, 4, i + 1) plt.bar(event_space, bernoulli.pmf(event_space, p), label=p, color=colors[i], alpha=0.5) plt.plot(event_space, bernoulli.cdf(event_space, p), color=colors[i], alpha=0.5) ax.xaxis.set_ticks(event_space) plt.ylim((0, 1)) plt.legend(loc=0) if i == 0: plt.ylabel("PDF at $k$") plt.tight_layout()
import matplotlib.pyplot as plt from scipy.stats import bernoulli import numpy as np import pandas as pd # %matplotlib inline plt.style.use("ggplot") p_a = 3.0 / 10.0 p_b = 5.0 / 9.0 p_prior = 0.5 #0:blue, 1:red data = [0,1,0,0,1,1,1] N_data = 7 likehood_a = bernoulli.pmf(data[:N_data], p_a) likehood_b = bernoulli.pmf(data[:N_data], p_b) likehood_a pa_posterior = p_prior # 事前分布 pb_posterior = p_prior pa_posterior *= np.prod(likehood_a) # 積計算 pb_posterior *= np.prod(likehood_b) norm = pa_posterior + pb_posterior # エビデンス(規格化) df = pd.DataFrame([pa_posterior/norm, pb_posterior/norm], columns=["post"]) # 事後分布の確率分布 x = np.arange(df.shape[0]) plt.bar(x,df["post"]) plt.xticks(x,["a","b"])
def score(self,X,y): prediction = self.predict(X) return np.log(bernoulli.pmf(y,prediction)).sum()
plt.xlabel("Obama Electoral College Votes") plt.ylabel("Probability") sns.despine() plot_simulation(result) from scipy.stats import bernoulli brv = bernoulli(p=0.3) brv.rvs(size=20) event_space=[0,1] plt.figure(figsize=(12,8)) colors=sns.color_palette() for i, p in enumerate([0.1, 0.2, 0.5, 0.7]): ax = plt.subplot(1, 4, i+1) plt.bar(event_space, bernoulli.pmf(event_space, p), label=p, color = colors[i], alpha = 0.5) plt.plot(event_space, bernoulli.cdf(event_space, p), color = colors[i], alpha=0.5) ax.xaxis.set_ticks(event_space) plt.ylim((0,1)) plt.legend(loc=0) if i == 0: plt.ylabel("PDF at $k$") plt.tight_layout() CDF = lambda x: np.float(np.sum(result < x))/result.shape[0] for votes in [200, 300, 320, 340, 360, 400, 500]: print "Obama Win CDF at votes=", votes, " is ", CDF(votes) votelist = np.arange(0,540, 5)
def common_dists(): """Show some commonly used distributions.""" # prep the subplots fig, axes = plt.subplots(2, 3, figsize=(15, 10)) axes = axes.flatten() # gaussian mu, sigma = 0, 1 x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100) axes[0].plot(x, norm.pdf(x, mu, sigma)) axes[0].set_title('Gaussian PDF') axes[0].set_ylabel('density') axes[0].set_xlabel('x') axes[0].annotate(r'$\mu$', xy=(mu, 0.4), xytext=(mu - 0.09, 0.3), arrowprops=dict(arrowstyle='->')) axes[0].annotate('', xy=(mu - sigma, 0.25), xytext=(mu + sigma, 0.25), arrowprops=dict(arrowstyle='|-|, widthB=0.5, widthA=0.5')) axes[0].annotate(r'$2\sigma$', xy=(mu - 0.15, 0.22)) # uniform distribution defined by min (a) and max (b) a, b = 0, 1 peak = 1 / (b - a) axes[1].plot([a, a, b, b], [0, peak, peak, 0]) axes[1].set_title('Uniform PDF') axes[1].set_ylabel('density') axes[1].set_xlabel('x') axes[1].annotate('min', xy=(a, peak), xytext=(a + 0.2, peak - 0.2), arrowprops=dict(arrowstyle='->')) axes[1].annotate('max', xy=(b, peak), xytext=(b - 0.3, peak - 0.2), arrowprops=dict(arrowstyle='->')) axes[1].set_ylim(0, 1.5) # exponential x = np.linspace(0, 5, 100) axes[2].plot(x, expon.pdf(x, scale=1 / 3)) axes[2].set_title('Exponential PDF') axes[2].set_ylabel('density') axes[2].set_xlabel('x') axes[2].annotate(r'$\lambda$ = 3', xy=(0, 3), xytext=(0.5, 2.8), arrowprops=dict(arrowstyle='->')) # Bernoulli of coin toss axes[3].bar(['heads', 'tails'], bernoulli.pmf([0, 1], p=0.5)) axes[3].set_title('Bernoulli with fair coin toss (p = 0.5)') axes[3].set_ylabel('probability') axes[3].set_xlabel('coin toss result') axes[3].set_ylim(0, 1) # Binomial of tossing a fair coin many times x = np.arange(0, 10) axes[4].plot(x, binom.pmf(x, n=x.shape, p=0.5), linestyle='--', marker='o') axes[4].set_title('Binomial PMF - many Bernoulli trials') axes[4].set_ylabel('probability') axes[4].set_xlabel('number of heads') # Poisson PMF (probability mass function) because this is a discrete random variable x = np.arange(0, 10) axes[5].plot(x, poisson.pmf(x, mu=3), linestyle='--', marker='o') axes[5].set_title('Poisson PMF') axes[5].set_ylabel('mass') axes[5].set_xlabel('x') axes[5].annotate(r'$\lambda$ = 3', xy=(3, 0.225), xytext=(1.9, 0.2), arrowprops=dict(arrowstyle='->')) # add a title plt.suptitle('Some commonly used distributions', fontsize=15, y=0.95) return axes
plt.style.use('seaborn') import seaborn as sns # ### Bernoulli Distribution # In[2]: #Bernoulli Distribution from scipy.stats import bernoulli p = 0.7 x = np.arange(bernoulli.ppf(0.01, p), bernoulli.ppf( 0.99, p)) #Percent Point Function (inverse of cdf — percentiles) print("Mean : ", bernoulli.stats(p, moments='m')) print("Variance : ", bernoulli.stats(p, moments='v')) print("Prob. Mass Func. : ", bernoulli.pmf(x, p).item()) print("Cum. Density Func.: ", bernoulli.cdf(x, p).item()) fig = plt.figure(figsize=(20, 10)) plt.subplot(221) plt.plot(x, bernoulli.pmf(x, p), 'ro', ms=8, label='PMF=(1-p)') plt.plot(1 - x, 1 - bernoulli.pmf(x, p), 'go', ms=8, label='PMF=p') plt.vlines(x, 0, bernoulli.pmf(x, p), colors='r', lw=5, alpha=0.5) plt.vlines(1 - x, 0, 1 - bernoulli.pmf(x, p), colors='g', lw=5, alpha=0.5) plt.xlabel("Sample Space of Bernoulli Distribution", fontsize=14) plt.ylabel("PMF", fontsize=14) plt.title("Probability Distribution of Bernoulli(p=0.7) Distribution", fontsize=16) plt.xticks(np.arange(0, 2, 1)) plt.yticks(np.arange(0, 1.1, 0.1)) plt.legend(loc='best', shadow=True)
from scipy.stats import bernoulli, poisson, uniform, expon, erlang, norm, triang # Bernoulli p = 0.5 x = 1 # Generate a probability using the PMF print(bernoulli.pmf(x, p)) # 0.5 # Generate a probability using the CDF print(bernoulli.cdf(x, p)) # 1.0 # Generate three Bernoulli random numbers print(bernoulli.rvs(p, size=3)) # [0 1 0] # Poisson lmda = 2 x = 5 print(poisson.pmf(x, lmda)) print(poisson.cdf(x, lmda)) print(poisson.rvs(lmda, size=3)) # Uniform a = 3 b = 10 x = 10 print(uniform.pdf(x, loc=a, scale=(b - a))) # = 1 / 7 print(uniform.cdf(x, loc=a, scale=(b - a))) # = 1.0 print(uniform.rvs(loc=3, scale=(b - a), size=3)) # Exponential mu = 2 x = 2 print(expon.pdf(x, scale=(1 / mu)))
if __name__ == '__main__': filename = 'iris.data' data, classLabels = readData(filename) #print(data[1], classLabels) p = 0.3 mean, var, skew, kurt = bernoulli.stats(p, moments='mvsk') print(mean, var, skew, kurt) fig, ax = plt.subplots(1, 1) # x = np.arange(bernoulli.ppf(0.01, p), bernoulli.ppf(0.99, p)) #for line in range(len(data)-1): dataL1 = data[:, 1] ax.plot(dataL1, bernoulli.pmf(dataL1, p), 'bo', ms=8, label='bernoulli pmf') plt.show() fig2, ax2 = plt.subplots(1, 1) ax2.scatter(range(150), dataL1) plt.show() fig3, ax3 = plt.subplots(1, 1) y = multivariate_normal.pdf(dataL1, mean=None, cov=1) ax3.scatter(dataL1, y) plt.show() iris = load_iris() X = iris.data
import numpy as np import matplotlib import matplotlib.pyplot as plt import scipy.stats from scipy.stats import bernoulli, poisson, binom a = np.arange(2) colors = matplotlib.rcParams['axes.color_cycle'] plt.figure(figsize=(12,8)) for i, p in enumerate([.1, .2, .6, .7]): ax = plt.subplot(1, 4, i+1) plt.bar(a, bernoulli.pmf(a, p), label=p, color=colors[i], alpha=.5) ax.xaxis.set_ticks(a) plt.legend(loc=0) if i == 0: plt.ylabel("PDF at $k$") plt.suptitle("Bernoulli probabability") plt.show() k = np.arange(20) colors = matplotlib.rcParams['axes.color_cycle'] plt.figure(figsize=(12,8)) for i, lambda_ in enumerate([1, 4, 6, 12]): plt.bar(k, poisson.pmf(k, lambda_), label=lambda_, color=colors[i], alpha=0.4, edgecolor=colors[i], lw=3)
def p(t): prior_t = uniform.pdf(t, loc=0, scale=1) likl = np.prod(bernoulli.pmf(x, p=t)) return prior_t * likl
# E\left[x \right]=\mu # $ # # #### 分散 # $ # V\left[x\right]=\mu\left(1-\mu \right) # $ # #### 確率質量関数 # In[6]: from scipy.stats import bernoulli mu = 0.3 print(bernoulli.pmf(0, mu)) print(bernoulli.pmf(1, mu)) # #### サンプリング # 1000個サンプリングして、ヒストグラムで表示してみます。 # In[7]: from scipy.stats import bernoulli mu = 0.3 size = 1000 x = bernoulli.rvs(mu, size=size) plt.hist(x, bins=3)
def data_prob(pm, y): return bernoulli.pmf(y, pm['p'])
def __call__(self, sample): if bernoulli.pmf(random.randrange(1, 11)%2,self.p) == 0.3: sample = np.fliplr(sample) return sample
from scipy.stats import bernoulli import matplotlib.pyplot as plt import numpy as np fig, ax = plt.subplots(1, 1) # Calculate a few first moments: p = 0.3 mean, var, skew, kurt = bernoulli.stats(p, moments='mvsk') # Display the probability mass function (pmf): x = np.arange(bernoulli.ppf(0.01, p), bernoulli.ppf(0.99, p)) ax.plot(x, bernoulli.pmf(x, p), 'bo', ms=8, label='bernoulli pmf') ax.vlines(x, 0, bernoulli.pmf(x, p), colors='b', lw=5, alpha=0.5) # Freeze the distribution and display the frozen pmf: rv = bernoulli(p) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) plt.show() # Check accuracy of cdf and ppf: prob = bernoulli.cdf(x, p) np.allclose(x, bernoulli.ppf(prob, p)) # Generate random numbers: r = bernoulli.rvs(p, size=1000)
fit = model.sampling(data=data) toc('Model fitting') means = get_posterior_means(fit) log_l = 0 for subject,question in zip(*np.where(holdout==0)): r_subject = means['r_subject'][subject] r_q = means['r_q'][question] if subject < len(ctrl): r_q = means['r_q'][question] if subject >= len(ctrl): r_q = means['r_q_pd'][question] is_correct = correct[subject][question] prob_correct = logit(r_subject + r_q) likelihood = bernoulli.pmf(is_correct,prob_correct) log_l += np.log(likelihood) num_not_held_out = len(np.where(holdout==0)[0]) print "Log-likelihood per sample in sample is %.2f" % (log_l/num_not_held_out) log_ls_in.append(log_l/num_not_held_out) log_l = 0 for subject,question in zip(*np.where(holdout!=0)): r_subject = means['r_subject'][subject] if subject < len(ctrl): r_q = means['r_q'][question] if subject >= len(ctrl): r_q = means['r_q_pd'][question] is_correct = correct[subject][question] prob_correct = logit(r_subject + r_q) likelihood = bernoulli.pmf(is_correct,prob_correct)