def scoreregex(self, r, depth=0): if depth < maxDepth: logp_regex = self.logp_regex else: logp_regex = self.logp_regex_no_recursion if r in self.character_classes: return logp_regex[r] else: R = type(r) p = logp_regex[R] if R == pre.String: return p + pre.Plus(pre.dot, p=0.3).match(r.arg) elif R == pre.Concat: n = len(r.values) return p + geom(0.8, loc=1).logpmf(n) + sum( [self.scoreregex(s, depth=depth + 1) for s in r.values]) elif R == pre.Alt: n = len(r.values) if all(x == r.ps[0] for x in r.ps): param_score = math.log(1 / 2) else: param_score = math.log(1 / 2) - (len(r.ps) + 1) #~AIC return p + geom(0.8, loc=1).logpmf(n) + param_score + sum( [self.scoreregex(s, depth=depth + 1) for s in r.values]) elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]: return p + self.scoreregex(r.val, depth=depth + 1)
def scoreregex(self, r, trace, depth=0): if depth==0: logp_regex = self.logp_regex_no_concepts elif depth==maxDepth: logp_regex = self.logp_regex_no_recursion if trace.baseConcepts else self.logp_regex_no_concepts_no_recursion else: logp_regex = self.logp_regex if trace.baseConcepts else self.logp_regex_no_concepts if type(r) is RegexWrapper and r.concept in trace.baseConcepts: return logp_regex[CONCEPT]# + trace.logpConcept(r.concept) deal with this in trace._addConcept elif r in self.character_classes: return logp_regex[r] else: R = type(r) p = logp_regex[R] if R == pre.String: return p + pre.Plus(pre.dot, p=0.3).match(r.arg) elif R == pre.Concat: n = len(r.values) return p + geom(0.8, loc=1).logpmf(n) + sum([self.scoreregex(s, trace, depth=depth+1) for s in r.values]) elif R == pre.Alt: n = len(r.values) if all(x==r.ps[0] for x in r.ps): param_score = math.log(1/2) else: param_score = math.log(1/2) - (len(r.ps)+1) #~AIC return p + geom(0.8, loc=1).logpmf(n) + param_score + sum([self.scoreregex(s, trace, depth=depth+1) for s in r.values]) elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]: return p + self.scoreregex(r.val, trace, depth=depth+1)
def generate(tau, theta): f = [stats.geom(theta[0]), stats.geom(theta[1])] while True: if np.random.uniform(0, 1) < tau[0]: dist = 0 else: dist = 1 yield f[dist].rvs(1)[0]
def recalc_log_gt_posteriors(log_gt_priors, down, up, p_geom, read_counts_array, nalleles, allele_sizes, diploid=False, norm=False): stutter_dist = geom(p_geom) nsamples = read_counts_array.shape[0] log_down, log_eq, log_up = map(numpy.log, [down, 1-down-up, up]) if diploid: num_gts = nalleles**2 LLs = numpy.zeros((nsamples, num_gts)) + log_gt_priors gtind = 0 for a1 in xrange(nalleles): for a2 in xrange(nalleles): if a1 != a2 and DEBUG_HAPLOID: LLs[:,gtind] = numpy.log(0) gtind += 1 continue step_probs1 = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a1])) for x in range(0, a1)], [log_eq], [log_up + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a1])) for x in range(a1+1, nalleles)])) step_probs2 = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a2])) for x in range(0, a2)], [log_eq], [log_up + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a2])) for x in range(a2+1, nalleles)])) step_probs = numpy.logaddexp(step_probs1+log_one_half, step_probs2+log_one_half) LLs[:,gtind] += numpy.sum(read_counts_array*step_probs, axis=1) # if a1 == a2: LLs[:,gtind]+= numpy.log(2) # account for phase gtind += 1 else: LLs = numpy.zeros((nsamples, nalleles)) + log_gt_priors for j in xrange(nalleles): step_probs = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[j])) for x in range(0, j)], [log_eq], [log_up + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[j])) for x in range(j+1, nalleles)])) LLs [:,j] += numpy.sum(read_counts_array*step_probs, axis=1) if norm: return numpy.sum(logsumexp(LLs, axis=1)) else: log_samp_totals = logsumexp(LLs, axis=1)[numpy.newaxis].T return LLs - log_samp_totals
def lag_distribution(durations, expo=0.4): binned = np.bincount(durations.duration) normalized_binned = binned.astype(float) / durations.duration.count() geom_dist = stats.geom(expo) expected_values = geom_dist.pmf(np.arange(len(normalized_binned))) df = pd.DataFrame({"Duration frequency": normalized_binned, "Expected values": expected_values}) return df
def construct_matrix(self, down, up, p_geom, min_allele, max_allele): self.log_down = numpy.log(down) self.log_eq = numpy.log(1.0 - down - up) self.log_up = numpy.log(up) self.p_geom = p_geom self.min_allele = min_allele self.max_allele = max_allele self.nalleles = self.max_allele - self.min_allele + 1 self.stutter_dist = geom(self.p_geom) # Construct matrix where each row contains the stutter transition probabilites for a particular allele for j in xrange(self.nalleles): allele_probs = numpy.hstack(([ self.log_down + self.stutter_dist.logpmf(j - x) for x in range(0, j) ], [self.log_eq], [ self.log_up + self.stutter_dist.logpmf(x - j) for x in range(j + 1, self.nalleles) ])) if j == 0: step_probs = allele_probs else: step_probs = numpy.vstack((step_probs, allele_probs)) if self.nalleles == 1: step_probs = numpy.expand_dims(step_probs, axis=0) self.step_probs = step_probs
def plot_geometric_fit(data, fit_results, title=None, x_label=None, x_range=None, y_range=None, fig_size=(6, 5), bin_width=1, filename=None): """ :param data: (numpy.array) observations :param fit_results: dictionary with keys "p" and "loc" :param title: title of the figure :param x_label: label to show on the x-axis of the histogram :param x_range: (tuple) x range :param y_range: (tuple) y range (the histogram shows the probability density so the upper value of y_range should be 1). :param fig_size: int, specify the figure size :param bin_width: bin width :param filename: filename to save the figure as """ plot_fit_discrete(data=data, dist=stat.geom(p=fit_results['p'], loc=fit_results['loc']), label='Geometric', bin_width=bin_width, title=title, x_label=x_label, x_range=x_range, y_range=y_range, fig_size=fig_size, filename=filename)
def plot_means(d, params, fig, ax, repeat, size=200): result = None means = [] for i in range(repeat): if d == 'Poisson': result = stats.poisson(**params).rvs(size) means.append(result.mean()) elif d == 'Binomial': result = stats.binom(**params).rvs(size) means.append(result.mean()) elif d == 'Exponential': result = stats.expon(**params).rvs(size) means.append(result.mean()) elif d == 'Geometric': result = stats.geom(**params).rvs(size) means.append(result.mean()) elif d == 'Uniform': result = stats.uniform(**params).rvs(size) means.append(result.mean()) ax = fig.add_subplot(ax[0], ax[1], ax[2]) ax.hist(means, bins=100) ax.set_title(f"{d}-repeat:{repeat}-size:{size}")
def initGeometric(init_strat, bias): if isinstance(init_strat, AugurDefault): return 1 elif isinstance(init_strat, AugurRandom): return sps.geom(bias).rvs() else: raise False
def probability_of_exiting(states, n_tests): score = [] if states.finite_horizon: game_horizon = states.time_horizon elif states.infinite_horizon_discounted: time_horizon_distribution = geom(p=1 / states.lifetime_mean) for _ in range(n_tests): if states.infinite_horizon_discounted: game_horizon = time_horizon_distribution.rvs() state = states.initial_state t = 0 while True: if state.losing(): score.append(False) break elif state.winning(): score.append(True) break elif t == game_horizon - 1: score.append(False) break if states.finite_horizon: action = state.player.action[t] else: action = state.player.action state = choice(states.possible_next_states(state, action)) t += 1 return sum(score) / n_tests
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': list(range(1, X.shape[1])), 'randomforestclassifier__n_estimators': geom(1. / 100) })
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'kernelridge__alpha': expon(0, 1), 'kernelridge__degree': geom(.5, loc=1), 'kernelridge__kernel': ['linear', 'poly', 'rbf', 'laplacian'] })
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'svr__C': expon(0, 1), 'svr__degree': geom(.3), 'svr__kernel': ['linear', 'poly', 'rbf'], })
def create_distribution(self): """Creates the CP distribution using the object properties. NOTE: At this point, it just operates with the geometric distribution. Once g and g_0 are allowed as input, this function handles the more general case, too.""" return stats.geom(self.intensity)
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': list(range(1, X.shape[1])), 'adaboostregressor__n_estimators': geom(1. / 2**5) })
def __init__(self, p: float) -> None: """ Constructor for an object of type binomial. """ super().__init__(a_dist=stats.geom(p)) self._lower = 1 self._p = p
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': _get_n_pca_components_distribution(X), 'adaboostregressor__n_estimators': geom(1. / 2**5) })
def recalc_stutter_params(log_gt_posteriors, read_counts, nalleles, allele_sizes, down, up, pgeom, max_stutter, diploid=False): # Pre-calculate stutter probabilities for old model stutter_dist = geom(pgeom) stutter_probs = [stutter_dist.logpmf(i) for i in range(1, max_stutter + 1)] # Set up counts nsamples = log_gt_posteriors.shape[0] log_counts = [[0], [0], [0]] # Pseudocounts log_diffs = [0, numpy.log(2)] # Step sizes of 1 and 2, so that p_geom < 1 if diploid: for i in xrange(nsamples): gtind = 0 for a1 in xrange(nalleles): for a2 in xrange(nalleles): log_post = log_gt_posteriors[i][gtind] # print i, down, up, pgeom, (allele_sizes[a1], allele_sizes[a2]), numpy.exp(log_post), dict([(allele_sizes[r], read_counts[i][r]) for r in read_counts[i]]) for read_index, count in read_counts[i].items(): log_count = numpy.log(count) diff1 = allele_sizes[read_index] - allele_sizes[a1] diff2 = allele_sizes[read_index] - allele_sizes[a2] phase_posts = GetReadPhasePosts(allele_sizes[a1], allele_sizes[a2], \ allele_sizes[read_index], down, up, stutter_probs) diffs = [diff1, diff2] # print allele_sizes[read_index], allele_sizes[a1], allele_sizes[a2], diffs, numpy.exp(phase_posts), numpy.exp(log_post) for j in range(len(diffs)): if diffs[j] != 0: log_diffs.append(log_count + log_post + phase_posts[j] + numpy.log(abs(diffs[j]))) log_counts[numpy.sign(diffs[j]) + 1].append(log_post + phase_posts[j] + log_count) gtind += 1 else: for i in xrange(nsamples): for j in xrange(nalleles): log_post = log_gt_posteriors[i][j] for read_index, count in read_counts[i].items(): log_count = numpy.log(count) diff = allele_sizes[read_index] - allele_sizes[j] if diff != 0: log_diffs.append(log_count + log_post + numpy.log(abs(diff))) log_counts[numpy.sign(diff) + 1].append(log_post + log_count) log_tot_counts = map(logsumexp, log_counts) p_hat = numpy.exp( logsumexp([log_tot_counts[0], log_tot_counts[2]]) - logsumexp(log_diffs)) log_freqs = log_tot_counts - logsumexp(log_tot_counts) return numpy.exp(log_freqs[0]), numpy.exp(log_freqs[2]), p_hat
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': list(range(1, X.shape[1])), 'kneighborsregressor__n_neighbors': geom(1 / (.05 * X.shape[0])), 'kneighborsregressor__weights': ['uniform', 'distance'] })
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': _get_n_pca_components_distribution(X), 'kneighborsclassifier__n_neighbors': geom(1 / (.05 * X.shape[0])), 'kneighborsclassifier__weights': ['uniform', 'distance'] })
def lag_distribution(durations, expo=0.4): binned = np.bincount(durations.duration) normalized_binned = (binned.astype(float) / durations.duration.count()) geom_dist = stats.geom(expo) expected_values = geom_dist.pmf(np.arange(len(normalized_binned))) df = pd.DataFrame({ 'Duration frequency': normalized_binned, 'Expected values': expected_values }) return df
def test_rvs(self): vals = stats.geom.rvs(0.75, size=(2, 50)) assert numpy.all(vals >= 0) assert numpy.shape(vals) == (2, 50) assert vals.dtype.char in typecodes["AllInteger"] val = stats.geom.rvs(0.75) assert isinstance(val, int) val = stats.geom(0.75).rvs(3) assert isinstance(val, numpy.ndarray) assert val.dtype.char in typecodes["AllInteger"]
def test_entropy(): """ Test entropy. """ geom_benchmark = stats.geom(0.7) expect_entropy = geom_benchmark.entropy().astype(np.float32) entropy = EntropyH() output = entropy() tol = 1e-6 assert (np.abs(output.asnumpy() - expect_entropy) < tol).all()
def test_rvs(self): vals = stats.geom.rvs(0.75, size=(2, 50)) assert_(numpy.all(vals >= 0)) assert_(numpy.shape(vals) == (2, 50)) assert_(vals.dtype.char in typecodes['AllInteger']) val = stats.geom.rvs(0.75) assert_(isinstance(val, int)) val = stats.geom(0.75).rvs(3) assert_(isinstance(val, numpy.ndarray)) assert_(val.dtype.char in typecodes['AllInteger'])
def get_iti_distribution(n_trials, p, rs): """Return a vector of ITIs (in TR units) for each trial.""" x = np.arange(*p.eff_geom_support) iti_pmf = stats.geom(p.eff_geom_p, loc=p.eff_geom_loc).pmf(x) iti_counts = np.round((iti_pmf / iti_pmf.sum()) * n_trials) iti_counts[0] += (n_trials - iti_counts.sum()) iti_trs = [np.repeat(x_i, c) for x_i, c in zip(x, iti_counts)] iti_trs = np.concatenate(iti_trs) return iti_trs
def gencsr(shape=(NUMNODES, NUMNODES), density=0.05, fname="random_graphs/"): print("generating density: ", density) MAX_IN_SHARD = int(10**8.7) #cannot do 1e9 numpoints2gen = MAX_IN_SHARD # DEBUG statements # numpoints2gen = 15 # shape=(6,6) # density=0.5 # numpoints2gen = int(shape[0]*(shape[0]-1)/2) # END DEBUG actualnumpoints = int(shape[0] * (shape[0] - 1) / 2) d = geom(density) points = d.rvs((numpoints2gen, )).astype( np.int64) # TODO I would do uint64 but it is not supported on GPU # note, this only generates values strictly above the diagonal mvalue = shape[0] - 1 incrs = np.ones(mvalue) * mvalue - np.arange(mvalue) row_dense_upper_starts = np.zeros(shape[0] + 1).astype(np.int64) row_dense_upper_starts[1:-1] = incrs.cumsum() row_dense_upper_starts[-1] = row_dense_upper_starts[ -2] #monkey patching because we need there to be a sentinal element points[0] -= 1 points_dense_upper_idx = points.cumsum() lastreal = np.searchsorted(points_dense_upper_idx, row_dense_upper_starts[-3] + 1) points_dense_upper_idx = points_dense_upper_idx[:lastreal] print(lastreal / actualnumpoints) row_csr_starts = np.searchsorted(points_dense_upper_idx, row_dense_upper_starts).astype(np.int64) point2row = ( np.searchsorted(row_dense_upper_starts, points_dense_upper_idx + 1) - 1).astype(np.int64) col_csr = points_dense_upper_idx - row_dense_upper_starts[point2row] col_csr += np.arange(shape[0]).astype(np.int64)[point2row] + 1 with open(fname + str(shape[0]) + '_' + str(density) + "csr_cols.bin", "wb") as f: f.write(col_csr.tobytes()) with open(fname + str(shape[0]) + '_' + str(density) + "csr_rows.bin", "wb") as f: f.write(row_csr_starts.tobytes()) checkcsr(col_csr, row_csr_starts, shape) # print("lengh of row_csr_starts",row_csr_starts.shape) # print(row_csr_starts) # pdb.set_trace() return (row_csr_starts, col_csr)
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': _get_n_pca_components_distribution(X), 'svc__C': expon(0, 1), 'svc__degree': geom(.3), 'svc__kernel': ['linear', 'poly', 'rbf'], })
def other_Q2(S0, p, mu0, sigma0, mu1, sigma1, rho, r, b, gamma, delta, n0, N0): burn_in = n0 num_sample = N0 z = sct.norm.ppf(1 - delta / 2) # 1 - delta/2 quantile of N(0, 1) r_star = 1 - pow(2, -1.5) # optimal success rate for the geometric of N confidence_interval = float('inf') running_mean = 0 running_2moment = 0 num_estimator = 0 #count of number of estimators generated CIs = np.zeros((1, num_sample)) estimation = np.zeros((1, num_sample)) while (num_estimator < num_sample or confidence_interval >= delta): N = np.random.geometric(p=r_star) samples = sampler(riskless, risky, N, S0, n0, p, mu0, sigma0, mu1, sigma1, rho, r, b) samples_odd = samples[0::2] samples_even = samples[1::2] samples_n_0 = samples[0:pow(2, n0)] theta_N = np.mean(samples) theta_N_odd = np.mean(samples_odd) theta_N_even = np.mean(samples_even) theta_n_0 = np.mean(samples_n_0) X_star = (theta_N - (theta_N_odd + theta_N_even) / 2) / sct.geom(r_star).pmf(N + 1) + theta_n_0 running_mean = (running_mean * num_estimator + X_star) / (num_estimator + 1) running_2moment = (running_2moment * num_estimator + pow(X_star, 2)) / (num_estimator + 1) sample_std = math.sqrt(running_2moment - pow(running_mean, 2)) num_estimator = num_estimator + 1 confidence_interval = z * sample_std / (math.sqrt(num_estimator)) estimation[:, num_estimator - 1] = running_mean CIs[:, num_estimator - 1] = confidence_interval lower = estimation - CIs upper = estimation + CIs print('Generate', num_estimator, 'samples \n') n_range = np.arange(burn_in - 1, num_sample) plt.plot(n_range, estimation[0, n_range], label='estimation') plt.plot(n_range, lower[0, n_range], label='lower CI') plt.plot(n_range, upper[0, n_range], label='upper CI') plt.legend(loc='upper right') plt.show() return running_mean, confidence_interval
def Geometric(p, tag=None): """ A Geometric random variate Parameters ---------- p : scalar The probability of success """ assert 0<p<1, 'Geometric probability "p" must be between zero and one, non-inclusive' return uv(ss.geom(p), tag=tag)
def Geometric(p, tag=None): """ A Geometric random variate Parameters ---------- p : scalar The probability of success """ assert 0 < p < 1, 'Geometric probability "p" must be between zero and one, non-inclusive' return uv(ss.geom(p), tag=tag)
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': list(range(1, X.shape[1])), 'svr__C': expon(0, 1), 'svr__degree': geom(.3), 'svr__kernel': ['linear', 'poly', 'rbf'], })
def get_param_distributions(self, X, y): return super().get_param_distributions({ 'polynomialfeatures__degree': [1, 2], 'pca__n_components': list(range(1, X.shape[1])), 'kernelridge__alpha': expon(0, 1), 'kernelridge__degree': geom(.5, loc=1), 'kernelridge__kernel': ['linear', 'poly', 'rbf', 'laplacian'] })
def test_log_likelihood(): """ Test log_pmf. """ geom_benchmark = stats.geom(0.7) expect_logpmf = geom_benchmark.logpmf([1, 2, 3, 4, 5]).astype(np.float32) logprob = LogProb() x_ = Tensor(np.array([0, 1, 2, 3, 4]).astype(np.int32), dtype=dtype.float32) output = logprob(x_) tol = 1e-6 assert (np.abs(output.asnumpy() - expect_logpmf) < tol).all()
def test_NBinom_to_Geometric(self): exp_list, obs_list = [], [] X = NegativeBinomial(r=1, p=0.8) sims = X.sim(Nsim) simulated = sims.tabulate() for k in range(10): expected = Nsim * stats.geom(p=0.8).pmf(k) if expected > 5: exp_list.append(expected) obs_list.append(simulated[k]) pval = stats.chisquare(obs_list, exp_list).pvalue self.assertTrue(pval > 0.01)
def UpdateK(k_old,z_old,T,lambda_old,rou,u,phi,alpha,iterNum,eta): k_new = [] z_new = [] for t in range(T-1): dK = stats.binom(1,0.5) temp = dK.rvs(1) d_k = 0 if temp[0] ==0: d_k = 1 else: d_k = -1 epsilon_K = stats.geom(1.0/(1+z_old[t])) epsilon = epsilon_K.rvs(1) k_new_temp = k_old[t]+d_k*epsilon[0] # step 3 if k_new_temp < 0: k_new.append(k_old[t]) z_new.append(z_old[t]) else: p_k = (lambda_old/((1-rou)*u))**k_old[t]*(phi[t]*lambda_old*rou/((1-rou)*u))**k_old[t]*\ phi[t+1]/(math.factorial(k_old[t])*spec.gamma(lambda_old+k_old[t])) p_k_new = (lambda_old/((1-rou)*u))**k_new_temp*(phi[t]*lambda_old*rou/((1-rou)*u))**k_new_temp*\ phi[t+1]/(math.factorial(k_new_temp)*spec.gamma(lambda_old+k_new_temp)) ap = min(1,p_k_new/p_k) y_AP = stats.binom(1,ap) temp = y_AP.rvs(1) if temp[0] ==0: k_new.append(k_new_temp) else: k_new.append(k_old[t]) temp_z = z_old[t]+ iterNum**(-1.0*eta)*(ap-alpha) z_new.append(temp_z) # step 4 print "k z new:\n" print k_new print z_new print "\n" return k_new, z_new
def UpdateK_sigma(k_sigma_old,z_sigma_old,lambda_sigma,rou_sigma,u_sigma,sigma_2,iterNum,eta,alpha,T): k_sigma_new = [] z_sigma_new = [] for t in range(T-1): dK = stats.binom(1,0.5) temp = dK.rvs(1) d_k = 0 if temp[0] ==0: d_k = 1 else: d_k =-1 epsilon_K = stats.geom(1.0/(1+z_sigma_old[t])) epsilon = epsilon_K.rvs(1) k_sigma_new_temp = k_sigma_old[t]+d_k*epsilon[0] # step 3 if k_sigma_new_temp < 0: k_sigma_new.append(k_sigma_old[t]) z_sigma_new.append(z_sigma_old[t]) else: # step 2 p_k_sigma = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]*\ (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]\ *(sigma_2[t+1])**k_sigma_old[t]/(math.factorial(k_sigma_old[t])*spec.gamma(lambda_sigma+k_sigma_old[t])) p_k_sigma_new = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp*\ (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp\ *(sigma_2[t+1])**k_sigma_new_temp/(math.factorial(k_sigma_new_temp)*spec.gamma(lambda_sigma+k_sigma_new_temp)) ap = min(1,p_k_sigma_new/p_k_sigma) y_AP = stats.binom(1,ap) temp = y_AP.rvs(1) if temp[0] ==0: k_sigma_new.append(k_sigma_new_temp) else: k_sigma_new.append(k_sigma_old[t]) # step 4 temp_z = z_sigma_old[t]+ iterNum**(-1.0*eta)*(ap-alpha) z_sigma_new.append(temp_z) return k_sigma_new,z_sigma_new
def test_pageout_maintains_size(self): self.uut = MMCPolicyOne( cache_size_limit=10, full_cache_size_limit=20, trace_size_limit=15) g = stats.geom(0.05) # Fill up the cache. for page in range(100): self.uut.request(page) # Request some cache hits and some cache misses. for page in range(5) + range(100, 105): self.uut.request(page) self.assertEqual(len(self.uut.cache_list), 10) self.assertEqual(len(self.uut.full_cache), 20) self.assertEqual(len(self.uut.trace), 15)
def test_normalizations(self): emissions = np.ones((3, 7)) tmat = np.eye(3) durations = [geom(0.3)] * 3 support_cutoff = 2 hsmm = HSMMModel( MultinomialEmissions(emissions), durations, tmat, support_cutoff=support_cutoff ) expected_durations = np.empty((3, 2)) expected_durations[:, 0] = 0.58823529 expected_durations[:, 1] = 0.41176471 np.testing.assert_array_almost_equal( hsmm._durations, expected_durations )
def __init__(self, obs_pts=None, cmplx=None, gamma=.9, lmbda=.2, use_gp=True, obs_sigma=OBS_SIGMA, propose_sigma=.0005, birth_sigma=.1, d=2, obs=None, N=None, P=None, n_clusters_init=5): """ gamma: geometric variable for prior on number of simplices sigma_sq: variance of d: dimension of embedding space """ assert not (obs_pts is None and cmplx is None) self.gamma = gamma self.N_prior = geom(gamma) self.d = d self.lmbda = lmbda self.len_prior = expon(self.lmbda) self.propose_mvn = mvn(np.zeros(self.d), propose_sigma*np.eye(self.d)) self.obs_sigma=obs_sigma self.obs_dist = norm(loc=0, scale=obs_sigma) self.birth_proposal = norm(loc=0, scale=birth_sigma) self.use_gp = use_gp self.cmplx = cmplx if self.cmplx is None: # obs_pts is not None self.cmplx = SimplicialComplex() ## this is a 1d complex self.cmplx.initialize(obs_pts, 1, n_clusters=n_clusters_init) self.N = self.cmplx.simplex_count() if obs_pts is None: # self.sample_obs(self.N * 10) self.sample_obs(self.N * 100) else: self.observations = [] for pt in obs_pts: self.observations.append(Obs(pt, self.cmplx))
def recalc_stutter_params(log_gt_posteriors, read_counts, nalleles, allele_sizes, down, up, pgeom, max_stutter, diploid=False): # Pre-calculate stutter probabilities for old model stutter_dist = geom(pgeom) stutter_probs = [stutter_dist.logpmf(i) for i in range(1, max_stutter+1)] # Set up counts nsamples = log_gt_posteriors.shape[0] log_counts = [[0], [0], [0]] # Pseudocounts log_diffs = [0, numpy.log(2)] # Step sizes of 1 and 2, so that p_geom < 1 if diploid: for i in xrange(nsamples): gtind = 0 for a1 in xrange(nalleles): for a2 in xrange(nalleles): log_post = log_gt_posteriors[i][gtind] # print i, down, up, pgeom, (allele_sizes[a1], allele_sizes[a2]), numpy.exp(log_post), dict([(allele_sizes[r], read_counts[i][r]) for r in read_counts[i]]) for read_index, count in read_counts[i].items(): log_count = numpy.log(count) diff1 = allele_sizes[read_index]-allele_sizes[a1] diff2 = allele_sizes[read_index]-allele_sizes[a2] phase_posts = GetReadPhasePosts(allele_sizes[a1], allele_sizes[a2], \ allele_sizes[read_index], down, up, stutter_probs) diffs = [diff1, diff2] # print allele_sizes[read_index], allele_sizes[a1], allele_sizes[a2], diffs, numpy.exp(phase_posts), numpy.exp(log_post) for j in range(len(diffs)): if diffs[j] != 0: log_diffs.append(log_count+log_post+phase_posts[j]+numpy.log(abs(diffs[j]))) log_counts[numpy.sign(diffs[j])+1].append(log_post+phase_posts[j]+log_count) gtind += 1 else: for i in xrange(nsamples): for j in xrange(nalleles): log_post = log_gt_posteriors[i][j] for read_index,count in read_counts[i].items(): log_count = numpy.log(count) diff = allele_sizes[read_index] - allele_sizes[j] if diff != 0: log_diffs.append(log_count + log_post + numpy.log(abs(diff))) log_counts[numpy.sign(diff)+1].append(log_post + log_count) log_tot_counts = map(logsumexp, log_counts) p_hat = numpy.exp(logsumexp([log_tot_counts[0], log_tot_counts[2]]) - logsumexp(log_diffs)) log_freqs = log_tot_counts - logsumexp(log_tot_counts) return numpy.exp(log_freqs[0]), numpy.exp(log_freqs[2]), p_hat
def recDistribution(): data = loadFeatures() recs = array(data['recs'], float) mu = numpy.average(recs) print mu dist = poisson(mu) dist2 = geom((1.0/mu)) dist3 = pareto(recs) x = numpy.arange(1, numpy.amax(recs)) h = plt.hist(recs, bins=range(40), normed=True) plt.plot(dist3[0], dist3[1], color='yellow', label='Pareto', linewidth=3) plt.plot(x, dist.pmf(x), color='black', label='Poisson', linewidth=3) plt.plot(x, dist2.pmf(x), color='red', label='Geometric', linewidth=3) plt.legend() plt.xlabel('Recommendation Count') plt.ylabel('Actual Value (% of Data) / Probability') plt.legend() plt.suptitle('Fitting Rec. Count') plt.xlim(0,40) plt.show()
def __call__(self, options, pars): """Simulate process model to get predicted choice and sample size distributions""" start = time() N = pars.get('N', 500000) max_T = pars.get('max_T', 500) minsamplesize = pars.get('minsamplesize', 1) - 1 p_stop_geom = pars.get('p_stop_geom', 0) fixed_dist = geom(p_stop_geom, loc=(minsamplesize - 1)) outcomes = pars['obs']['outcomes'] samplesize = outcomes.shape[0] fixed_p_stop = fixed_dist.pmf(samplesize - 1) p_stop_choose_A = fixed_p_stop * 0.5 p_stop_choose_B = fixed_p_stop * 0.5 return {'p_stop_choose_A': p_stop_choose_A, 'p_stop_choose_B': p_stop_choose_B}
def construct_matrix(self, down, up, p_geom, min_allele, max_allele): self.log_down = numpy.log(down) self.log_eq = numpy.log(1.0-down-up) self.log_up = numpy.log(up) self.p_geom = p_geom self.min_allele = min_allele self.max_allele = max_allele self.nalleles = self.max_allele - self.min_allele + 1 self.stutter_dist = geom(self.p_geom) # Construct matrix where each row contains the stutter transition probabilites for a particular allele for j in xrange(self.nalleles): allele_probs = numpy.hstack(([self.log_down + self.stutter_dist.logpmf(j-x) for x in range(0, j)], [self.log_eq], [self.log_up + self.stutter_dist.logpmf(x-j) for x in range(j+1, self.nalleles)])) if j == 0: step_probs = allele_probs else: step_probs = numpy.vstack((step_probs, allele_probs)) if self.nalleles == 1: step_probs = numpy.expand_dims(step_probs,axis=0) self.step_probs = step_probs
# -*- coding: utf-8 -*- # By Vamei from scipy.stats import geom rv = geom(0.45) x = np.arange(-1, 15, 1) y = rv.pmf(x) plt.bar(x-0.2, y, width=0.4) plt.ylim([0, 0.5]) plt.title("geometric distribution") plt.xlabel("RV") plt.ylabel("P(X=x)") plt.show()
from termcolor import colored,cprint import matplotlib.pyplot as plt import numpy as np from scipy.stats import geom # Here set up the parameters for the geometric distribution. p = 0.5 dist = geom(p) # Set up the sample range. x = np.linspace(0, 5, 10) # Retrieving geom's PMF and CDF pmf = dist.pmf(x) cdf = dist.cdf(x) # Here we draw out 500 rand print( colored( x,'green'),colored(dist,'red'), colored(pmf,'blue'),colored(cdf,'red'))
def func(self, x): p = self.p return geom(p).pmf(x)
def sample(self, N=None): p = self.p return geom(p).rvs(size=N, random_state=self.random)
def __call__(self, options, pars, trackobs=True): """Simulate process model to get predicted choice and sample size distributions""" start = time() N = pars.get('N', 500) max_T = pars.get('max_T', 100) minsamplesize = pars.get('minsamplesize', 1) - 1 p_sample_H = pars.get('p_sample_H', .5) p_sample_L = 1 - p_sample_H if self.stopdist == 'fixed-T': stop_T = pars.get('stop_T', 2) fixed_dist = randint(stop_T, stop_T+1) elif self.stopdist == 'geometric': p_stop = pars.get('p_stop', 0) fixed_dist = geom(p_stop, loc=(minsamplesize - 1)) if 'obs' in pars: # assume a single sequence of known observations sampled_option = pars['obs']['sampled_option'] outcomes = pars['obs']['outcomes'] max_T = outcomes.shape[0] fixed_p_stop = fixed_dist.pmf(max_T - 1) opt_exp = [] for i, opt in enumerate(options): opt_exp_i = [] for j, x in enumerate(opt): ind = np.where((sampled_option==i) & (outcomes==x[0]))[0] n = float(len(np.where(sampled_option==i)[0])) if n > 0: opt_exp_i.append([x[0], len(ind)/n]) else: opt_exp_i.append([x[0], 0]) opt_exp_i = np.array(opt_exp_i) # assume single observation of zero if opt_exp_i[:,1].sum()==0: zero = np.where(opt_exp_i[:,0]==0)[0][0] opt_exp_i[zero,1] = 1 opt_exp.append(opt_exp_i) opt_exp = np.array(opt_exp) # compute value and attentional weights for # each outcome weights = np.array([cpt.pweight_prelec(option, pars) for i, option in enumerate(opt_exp)]) values = np.array([cpt.value_fnc(option[:,0], pars) for option in opt_exp]) # choice function s = pars.get('s', 1.) # softmax temp vL, vH = [np.dot(w, v) for (w, v) in zip(weights, values)] cp = np.exp(vH * s) / (np.exp(vH * s) + np.exp(vL * s)) p_stop_choose_A = fixed_p_stop * (1 - cp) p_stop_choose_B = fixed_p_stop * cp return {'p_stop_choose_A': p_stop_choose_A, 'p_stop_choose_B': p_stop_choose_B, 'cp_B': cp} else: values = np.array([cpt.value_fnc(option[:,0], pars) for option in options]) # apply a fixed sample size samplesize = fixed_dist.rvs(size=N) max_T = samplesize.max() sampled_option = np.zeros((N, max_T), int) sampled_option = np.random.choice([0,1], p=[p_sample_L, p_sample_H], size=(N, max_T)) # assume 2nd sample is from other option sampled_option[:,1] = np.abs(1 - sampled_option[:,0]) sampled_A = sampled_option==0 sampled_B = sampled_option==1 # observation matrix observed = np.zeros((N, max_T)) observed_A = np.random.choice(range(options[0].shape[0]), size=sampled_A.sum(), p=options[0][:,1]) observed_B = np.random.choice(range(options[1].shape[0]), size=sampled_B.sum(), p=options[1][:,1]) observed[sampled_A] = observed_A observed[sampled_B] = observed_B # outcomes experienced obj_outcomes = options[:,:,0] outcomes = np.zeros((N, max_T)) outcomes[sampled_A] = obj_outcomes[0][observed_A] outcomes[sampled_B] = obj_outcomes[1][observed_B] # get relative frequencies wopt = deepcopy(options) wopt[:,:,0] = values choice = [] for it in range(N): sampled_option_i = sampled_option[it,:(samplesize[it]+1)] outcomes_i = outcomes[it,:(samplesize[it]+1)] opt_exp = [] for i, opt in enumerate(options): opt_exp_i = [] for j, x in enumerate(opt): ind = np.where((sampled_option_i==i) & (outcomes_i==x[0]))[0] n = float(len(np.where(sampled_option_i==i)[0])) opt_exp_i.append([x[0], len(ind)/n]) opt_exp.append(opt_exp_i) opt_exp = np.array(opt_exp) weights = np.array([cpt.pweight_prelec(option, pars) for i, option in enumerate(opt_exp)]) wopt[:,:,1] = weights pH = cpt.choice_prob(wopt, pars) if np.random.random() < pH: choice.append(1) else: choice.append(0) choice = np.array(choice) p_resp = choice.mean() ss_A = samplesize[choice==0] ss_B = samplesize[choice==1] p_stop_A = np.bincount(ss_A, minlength=max_T) p_stop_A = p_stop_A/float(p_stop_A.sum()) p_stop_B = np.bincount(ss_B, minlength=max_T) p_stop_B = p_stop_B/float(p_stop_B.sum()) p_stop_cond = np.transpose([p_stop_A, p_stop_B]) # only include data up to choice sampled_option = [sampled_option[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] outcomes = [outcomes[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] outcome_ind = [observed[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])] return {'choice': choice, 'samplesize': samplesize, 'p_resp': np.array([1-p_resp, p_resp]), 'p_stop_cond': p_stop_cond, 'sampled_option': sampled_option, 'outcomes': outcomes, 'outcome_ind': outcome_ind }
def create_random_data(filename, seconds, sample_rate, baseline=0.0, noise=None, event_rate=0, event_durations=None, event_depths=None, overwrite=False): """ Creates random sample data. Leaves the first 200 data points free of events. :param str filename: Filename for the data. If the file already exists and overwrite=False, an IOError is raised. :param float seconds: Number of seconds of data. :param float sample_rate: The sampling rate of the data, in Hz. :param float baseline: The baseline of the data, in uA. :param scipy.stats.distributions.rv_frozen noise: A frozen :mod:`scipy.stats` probability distribution\ for the noise. An example normal distribution with mean 2 uA and std dev 3 uA is :: from scipy.stats import norm noise = norm(loc=2, scale=3) Default is no noise. :param float event_rate: Rate of events in Hz. :param scipy.stats.distributions.rv_frozen event_durations: A frozen :mod:`scipy.stats` probability distribution\ for the event duration (in seconds). :param scipy.stats.distributions.rv_frozen event_depths: A frozen :mod:`scipy.stats` probability distribution\ for the event depth, in uA. :param bool overwrite: Whether overwriting an existing file at filename is allowed. If false, and filename exists.\ an IOError will be raised. :raises: :py:exc:`IOError` - If the filename already exists and overwrite=False. >>> from pypore.sampledata.creator import create_random_data >>> from scipy import stats >>> seconds = 1. # 1 second of data. >>> sample_rate = 1.e6 # 1 MHz sample rate. >>> event_rate = 100. # 100 events/sec, on average. >>> baseline = 10. # 10 uA baseline. >>> noise = stats.norm(scale=.5) # Normal distributed noise with mean of 0 std dev of 0.5 uA. >>> event_depths = stats.norm(loc=2., scale=1.) # Normal distributed events with mean of 2 and std dev of 1 uA. >>> event_durations = stats.norm(loc=100.e-6, scale=10.e-6) # Normal distributed event durations with mean of 100 us and std dev 10 us. >>> n_events = create_random_data('random_trace.h5', seconds, sample_rate, baseline, noise, event_rate, event_durations, event_depths) """ if not overwrite and os.path.exists(filename): raise IOError( "File already exists. Use a different filename, or call with overwrite=True to over-write existing file.") n_points = int(seconds * sample_rate) f = open_file(filename, mode='w', n_points=n_points, sample_rate=sample_rate) data = np.zeros(n_points) + baseline if noise is not None: data += noise.rvs(size=n_points) event_count = 0 if event_rate > 0: i = 200 mean_length = event_durations.mean() * sample_rate expected_events = seconds * event_rate # Available space that is not events or the beginning of the data. free_space = n_points - expected_events * mean_length - i event_probability = expected_events/free_space # Use geometric distribution to find the next starting spot of an event. rv = geom(event_probability) while i < n_points: # get next event start distance i += rv.rvs() event_count += 1 event_length = event_durations.rvs() * sample_rate event_depth_i = event_depths.rvs() if i + event_length > n_points: event_length = n_points - i data[i:i+event_length] += event_depth_i i += event_length f.root.data[:] = data[:] f.close() return event_count