def give_page(self): #get product product_i = random.randint(0, len(productids)-1) product = productids[product_i] #get price prod_price_dist = beta.rvs(self.product_price_alpha[product_i], self.product_price_beta[product_i])*(prices/50.0) price_i = np.argmax(prod_price_dist) price = prices[price_i] #get header header_dist = beta.rvs(self.header_alpha, self.header_beta) header_i = np.argmax(header_dist) header = headers[header_i] #get adtype adtype_dist = beta.rvs(self.adtype_alpha, self.adtype_beta) adtype_i = np.argmax(adtype_dist) adtype = adtypes[adtype_i] #get color color_dist = beta.rvs(self.color_alpha, self.color_beta) color_i = np.argmax(color_dist) color = colors[color_i] return {'header': header, 'adtype': adtype, 'color': color, 'productid': product, 'price': price}, [header_i, adtype_i, color_i, product_i, price_i]
def give_page(self): #get header header_dist = [beta.rvs(self.header_alpha[i], self.header_beta[i]) for i in range(len(headers))] header_i = header_dist.index(max(header_dist)) header = headers[header_i] #get adtype adtype_dist = [beta.rvs(self.adtype_alpha[i], self.adtype_beta[i]) for i in range(len(adtypes))] adtype_i = adtype_dist.index(max(adtype_dist)) adtype = adtypes[adtype_i] #get color color_dist = [beta.rvs(self.color_alpha[i], self.color_beta[i]) for i in range(len(colors))] color_i = color_dist.index(max(color_dist)) color = colors[color_i] #get productid product_dist = [beta.rvs(self.product_alpha[i], self.product_beta[i]) for i in range(len(productids))] product_i = product_dist.index(max(product_dist)) product = productids[product_i] #get price price_dist = [beta.rvs(self.price_alpha[i], self.price_beta[i]) for i in range(len(prices))]*(prices/50.0) price_i = np.argmax(price_dist) price = prices[price_i] return {'header': header, 'adtype': adtype, 'color': color, 'productid': product, 'price': price}, [header_i, adtype_i, color_i, product_i, price_i]
def give_page(self, context): product_alpha, product_beta, product_price_alpha, product_price_beta, header_alpha, header_beta, adtype_alpha, adtype_beta, color_alpha, color_beta = self.get_context_ab(context) header_dist = [beta.rvs(header_alpha[i], header_beta[i]) for i in range(len(headers))] header_i = header_dist.index(max(header_dist)) header = headers[header_i] #get adtype adtype_dist = [beta.rvs(adtype_alpha[i], adtype_beta[i]) for i in range(len(adtypes))] adtype_i = adtype_dist.index(max(adtype_dist)) adtype = adtypes[adtype_i] #get color color_dist = [beta.rvs(color_alpha[i], color_beta[i]) for i in range(len(colors))] color_i = color_dist.index(max(color_dist)) color = colors[color_i] #get productid product_dist = [beta.rvs(product_alpha[i], product_beta[i]) for i in range(len(productids))] product_i = product_dist.index(max(product_dist)) product = productids[product_i] #get price price_alpha = product_price_alpha[product_i] price_beta = product_price_beta[product_i] price_dist = [beta.rvs(price_alpha[i], price_beta[i]) for i in range(len(prices))] price_i = price_dist.index(max(price_dist)) price = prices[price_i] return {'header': header, 'adtype': adtype, 'color': color, 'productid': product, 'price': price}, [product_i, price_i, header_i, adtype_i, color_i]
def get_issue_lift(all_visits, top_issues, half_life=90, min_lift=1): """ get_issue_lift uses the bureau visit data and calculate the lift between issues i.e. if a client ask for help for issue A, is he more likely to ask for help for issue B in the future? mathematically it is defined as P(B|A)/P(B). If the lift is > 1 than A is a good indicator that of future need of B. The half_life allows for aging i.e. the importance of issue A as a predictor of B halves every x days. Return a dict {(issue_A, issue_B):lift} """ number_of_clients = len(all_visits) # Remove "Other" from top_issues for i in xrange(len(top_issues)): if top_issues[i][0]=="Other": del top_issues[i] break # Save time and only calculate for the top issues top_issue_set = frozenset(map(operator.itemgetter(0), top_issues)) all_visits = filter_visits_by_issues(all_visits, top_issue_set) # Count the number of clients affected by each of the top issue issue_count = {issue:sum(any(issue in visit_issues for visit_issues in client_visits.itervalues()) for client_visits in all_visits.itervalues()) for issue in top_issue_set} # Iterate through the clients and find all A->B pairs and their ages # for each A->B pair only take the one that is closest together decay = 0.5**(1.0/half_life) lift_dict = defaultdict(float) for client_id, client_visits in all_visits.iteritems(): number_of_visits = len(client_visits) visit_days = sorted(client_visits.keys()) client_pairs = {} for i in xrange(number_of_visits): current_date = visit_days[i] current_issues = client_visits[current_date] for issue_A in current_issues: for future_date in visit_days[i+1:]: gap = (future_date-current_date).days future_issues = client_visits[future_date] for issue_B in future_issues: if issue_A!=issue_B: client_pairs[(issue_A,issue_B)] = min(gap, client_pairs.get((issue_A,issue_B),1e100)) # Turn the gaps into weights and add to the lift dict for pair,gap in client_pairs.iteritems(): lift_dict[pair] += decay**gap # turn the appearance count into lifts for (issue_A,issue_B),count in lift_dict.iteritems(): p_issues_A_B = beta.rvs(1+count,1+issue_count[issue_B]-count,size=10000) p_issue_A = beta.rvs(1+issue_count[issue_A], 1+number_of_clients-issue_count[issue_A],size=10000) lift_dict[(issue_A, issue_B)] = np.median((p_issues_A_B-p_issue_A)/p_issue_A)+1 # Filter out the pairs below the threshold lift_dict = {pair:lift for pair,lift in lift_dict.iteritems() if lift>min_lift} return lift_dict
def sample_posteriors( dataA, dataB ): samplesA = beta.rvs( ALPHA_ + dataA['conversions'], BETA_ + dataA['total'] - dataA['conversions'], size = 1000) samplesB = beta.rvs( ALPHA_ + dataB['conversions'], BETA_ + dataB['total'] - dataB['conversions'], size = 1000) return samplesA, samplesB
def simulation(a, b, c, d, samples=100000, prob_diff=0.0, significance_level=0.95, ensure_convergence=False, ensure_samples=10000000, ensure_radius=0.005): """For what it's worth... Not as accurate or efficient as the integration, but still fun to play with""" difference = beta.rvs(a + 1, c + 1, size=samples) - beta.rvs(b + 1, d + 1, size=samples) successes = (difference > prob_diff).sum() result = successes / samples if ensure_convergence and abs(result - significance_level) < ensure_radius: result = test(a, b, c, d, samples=ensure_samples, prob_diff=prob_diff, ensure_convergence=False) return result
def parse_distribution(params, dist=False): distribName = params[0] params[1], params[2] = float(params[1]), float(params[2]) if distribName == 'norm': if len(params)!=3: displaymessage("Normal distribution takes two parameters",end_execution=False) out=1 loc, scale = params[1], params[2] if dist: out = lambda x : norm.rvs(loc, scale) else: loop_counter=0; out=norm.rvs(loc,scale) while (out<0): out=norm.rvs(loc,scale) loop_counter+=1 if loop_counter>100: displaymessage("Distribution too far in the negative. Set to default 1.",end_execution=False) out=1 break elif distribName == 'uniform': #constant between loc and loc+scale if len(params)!=3: displaymessage("Uniform distribution takes two parameters",end_execution=False) out=1 loc, scale = params[1], params[2] if (loc+scale <= 0): displaymessage("Distribution lies entirely to the left of the y-axis. Changed to default value 1.",end_execution=False) out=1 elif loc<0: displaymessage("Uniform distribution takes negative values.",end_execution=False) loc = max(loc,0) else: if dist: out = lambda x : uniform.rvs(loc, scale) else: out = uniform.rvs(loc, scale) elif distribName == 'beta': if len(params)!=5: displaymessage("Normal distribution takes four parameters",end_execution=False) out=1 a, b, loc, scale = params[1], params[2], float(params[3]), float(params[4]) if dist: out = lambda x : beta.rvs(a, b, loc, scale) else: out = beta.rvs(a, b, loc, scale) return out
def sample_dist_beta(nsample, median, ll_cl, up_cl, blur=50, assume='norm', func='', func_args=[3], source=0): ''' Use beta distribution to add skew. ''' window = up_cl - ll_cl sd_s = (window) / 2 rate = (median - ll_cl) / window print(rate) t = np.pi / 2 a = np.sin(rate * t) * blur b = np.cos(rate * t) * blur f = beta.rvs(a, b, size=nsample) if not source: f = f * window + ll_cl if func: f = [func(x, *func_args) for x in f] return f
def _parse_random_params(prior_a, prior_c, prior_b, prior_d): """Interprets parameters as random variables of parameters. Args: prior_a,prior_c,prior_b (tuple of floats): Parameters of Beta random variables. prior_d (tuple of floats): Parameters of Uniform random variable. Returns: (tuple (4) of floats): Parameters a,c,b,d. """ # random draws a = beta.rvs(*prior_a, size=1)[0] c = beta.rvs(*prior_c, size=1)[0] b = beta.rvs(*prior_b, size=1)[0] d = uniform.rvs(*prior_d, size=1)[0] # return parameters return a, c, b, d
def generate_samples(dist, curr_params, n): """ This method generates a sample from a given distribution with given parameters :param dist: str Type of distribution from which to draw a sample :param curr_params: tuple[float, float] Parameters for a distribution :param n: int Length of generated sample :return: tuple[float, float] A pair (quantile skewness, quantile kurtosis) which can be viewed as a point and act as a representation of the generated sample on a Pearson System """ if dist == 'beta': a, b = curr_params curr_sample = beta.rvs(a, b, size=n) elif dist == 'betaprime': a, b = curr_params curr_sample = betaprime.rvs(a, b, size=n) else: return (None, None) curr_sample.sort() curr_e = [curr_sample[int(j * n)] for j in OCTILES] curr_s = (curr_e[5] - 2 * curr_e[3] + curr_e[1]) / (curr_e[5] - curr_e[1]) curr_t = (curr_e[6] - curr_e[4] + curr_e[2] - curr_e[0]) / (curr_e[5] - curr_e[1]) return (curr_s, curr_t)
def FisherDistribution2(k, n, m, ele=0, azi=0): # m is the number of dimensions b = (-2 * k + np.sqrt(4 * k**2 + (m - 1)**2)) / (m - 1) x0 = (1 - b) / (1 + b) c = k * x0 + (m - 1) * np.log(1 - x0**2) numvec = 0 cartesian_sample = np.zeros((n, m)) while numvec < n: Z = beta.rvs((m - 1) / 2, (m - 1) / 2, size=1) U = np.random.uniform(0, 1, 1) W = (1 - (1 + b) * Z) / (1 - (1 - b) * Z) if k * W + (m - 1) * np.log(1 - x0 * W) - c < np.log(U): continue else: theta = 2 * np.pi * np.random.uniform(0, 1, 1)[0] V = np.array([np.cos(theta), np.sin(theta)]) # 2d vector X = np.concatenate((np.sqrt(1 - W**2) * V, W)) cartesian_sample[numvec, :] = X numvec += 1 # count the free rows in cartesian, when fully filled - quit if ele == 0 and azi == 0: pass else: cartesian_sample = cartesian_sample.T transformed = Rz(azi, Ry(ele, cartesian_sample)) cartesian_sample = transformed.T return cartesian_sample
def update_current_sample(n_clicks): # first sample from current posterior beta distribution p = beta.rvs(a, b, size=1)[0] print("Sample: " + str(p)) return json.dumps({'sample':p})
def generate_dataset(num_points, true_k, dim=2, verbose=False): """ Generates num_points datapoints with true_k clusters :param num_points: Number of data points :param true_k: Number of clusters in the generated data :param dim: dimensionality of the data space :param verbose: print stuff :return: dataset in [num_points,dim] """ # Construct a dataset true_z = [i % true_k for i in range(num_points)] # I like to sample from the peaked beta distro to get more interpretable results. But feel free to sample # from the random uniform if False: probs_per_cluster = np.random.rand(dim, true_k) else: probs_per_cluster = beta.rvs(0.5, 0.5, size=(true_k, dim)) if verbose: print('Probabilities for the generated data') for i, trueprobs in enumerate(probs_per_cluster): print('Cluster %3i with %s' % (i, ' - '.join(['%5.1f' % prob for prob in trueprobs]))) print('-' * 30) probs = probs_per_cluster[true_z, :] # Sample from the probabilities data = (probs > np.random.rand(num_points, dim)).astype(np.float32) return data
def plotter(being: str, name: str, axis, data: dict): # Color-blind friendly palette colors = { 'forest': '#d7191c', 'network': '#fdae61', 'random': '#abd9e9', 'humans': '#2c7bb6', } # Beta Distribution generation from the measured predictive-quality ypred = beta.rvs( 1 + data[f'total {name}'][0] * data[name][0], 1 + data[f'total {name}'][0] * (1 - data[name][0]), size=4000, ) # plot with Seaborn that binnarizes automatically sns.distplot(100 * ypred, bins=None, hist=True, kde=False, label=f'{being}', ax=axis, color=colors[being]) return
def search(self, state, color, depth, subtree): """ path: if you choose 19 from enable=[13,19,20,21] (choose enable[1]) and then opponent choose 32 from enable=[14,24,32,53], (enable[2]) the path is [1,2] """ enable = reversi.getPossiblePoints(state, color) if depth == self.depth + 1: # no thinking (simulate) if len(enable) == 0: return self.simulate(state, 65, color) row, line = random.choice(enable) return self.simulate(state, row * 8 + line, color) if len(enable) == 0: return self.search(state, color ^ 1, depth + 1, subtree) if len(subtree) == 0: # first visit subtree.extend([[0, 0, []] for _ in enable]) wins = np.array([node[0] for node in subtree]) loses = np.array([node[1] for node in subtree]) values = beta.rvs(wins + 1, loses + 1) choice = values.argmax() row, line = enable[choice] reversi.putStone(state, row, line, color) r = self.search(state, color ^ 1, depth + 1, subtree[choice][2]) if r == color: subtree[choice][0] += 1 else: subtree[choice][1] += 1 return r
def get_particle_from_state(self, state, obs): """ Returns a particle from this state, as well as the log_density of this particle """ sample_means = beta.rvs(state['successes'], state['failures']) log_density = np.sum(beta.logpdf(sample_means, state['successes'], state['failures'])) return sample_means, log_density
def lichess_computer(self, difficulty=10): url = 'https://www.chessdb.cn/cdb.php?action=queryall&board=' + self._fen() + '&json=1' r = requests.get(url) try: moves = print_json(r.text)['moves'] #The moves are already ordered based on their score board_move = [] for move in moves: board_move.append(move['uci']) if len(list(self._board.legal_moves)) == len(board_move): #Quick way, the database has that position and any possible derivate one alpha = len(board_move) beta_v = 1 pesos = [] for i in range(alpha): r = beta.rvs(alpha, beta_v, size=1) beta_v += difficulty pesos.append((r[0]**7)*100) pesos = sorted(pesos, reverse=True) cum_pesos = np.cumsum(pesos) choice = random.choices(board_move, cum_weights=cum_pesos, k=1)[0] else: choice = self.temp_backup() except: choice = self.temp_backup() self.make_move(choice) print(f'{choice} was moved.\n')
def blend(x, context, weights, respect_side=True, ab=0.8): if len(x) < 2: return x, context, weights # the leftover data point if exists is dismissed (it will come up in future epochs, batches are shuffled) if x.shape[0] % 2 > 0: x = x[:-1] context = context[:-1] weights = weights[:-1] if respect_side: side_idx = x[:, 0].sort()[1] x = x[side_idx] context = context[side_idx] weights = weights[side_idx] b = torch.tensor(beta.rvs(ab, ab, size=x.shape[0] // 2), device='cuda', dtype=torch.float32).reshape(-1, 1) # blending pairs blended_x = b * x[::2] + (1 - b) * x[1::2] blended_c = b * context[::2] + (1 - b) * context[1::2] blended_w = b * weights[::2] + (1 - b) * weights[1::2] # the side of the blended data points is collapsed to the closest value blended_c[:, 0] = torch.where(b > 0.5, context[::2, 0].reshape(-1, 1), context[1::2, 0].reshape(-1, 1)).squeeze() return blended_x, blended_c, blended_w
def _test_mu_y_and_cov_y(self): # NOTE: disable `skip_nullspace` for this test alpha = 40.0 beta = 20.0 ln_sigma_mu = np.log(0.1) ln_sigma_sigma = 0.0 ln_amp_mu = np.log(0.2) ln_amp_sigma = 0.0 self.set_hyperparams(alpha, beta, ln_sigma_mu, ln_sigma_sigma, ln_amp_mu, ln_amp_sigma) x = -np.exp(ln_amp_mu) amp = x / (x - 1) sigma = np.exp(ln_sigma_mu) nsamples = 100000 np.random.seed(1) y = np.zeros((nsamples, (self.ydeg + 1)**2)) for n in range(nsamples): lat = np.arccos(Beta.rvs(alpha, beta)) * 180.0 / np.pi lat *= 2.0 * (int(np.random.random() > 0.5) - 0.5) lon = 360.0 * np.random.random() y[n] = self.y([amp], [sigma], [lat], [lon]) mu_y_num = np.mean(y, axis=0) cov_y_num = np.cov(y.T) # Compare assert np.allclose(mu_y_num, self.mu_y, atol=1e-3) assert np.allclose(cov_y_num, self.cov_y, atol=1e-3)
def sample_one(self): xs = [] for a, b in zip(self.alphas, self.betas): xs.append(beta.rvs(a, b, size=1)[0]) this_choice = argmax(xs) logger.debug("a,b,[x_0,x_1,...], this_choice:", a, b, xs, this_choice) return this_choice
def sample(self, X, n_samples=10): n_items, _ = X.shape betas = self.get_betas(X) samples = beta.rvs(a=betas[:, 1], b=betas[:, 0], size=(n_samples, n_items)) return samples
def evaluate(self, node_list): samples = [ beta.rvs(a=node.win_count + 1, b=node.visit_count - node.win_count + 1, size=1)[0] for node in node_list ] return node_list[np.argmax(samples)]
def generate_data(self, N): """ Generates labels associated with the features :param N: number of 1-D feature vector needed :return: N x 1 numpy array with feature vectors, N x 1 numpy array with labels """ # Generate features features = np.array(beta.rvs(a=self.alpha, b=self.beta, size=N)) features.resize((N, 1)) # Generate probabilities probability = self.label_function(features) # Generate noise to add to the probabilities random_noise = np.random.normal(loc=0, scale=self.noise, size=features.shape[0]) random_noise.resize((features.shape[0], 1)) probability += random_noise probability[probability < 0] = 0 probability[probability > 1] = 1 # Generate labels with a bernouilli labels = np.array([np.random.binomial(n=1, p=probability[n:n+1]) for n in range(N)]) labels.resize((N, 1)) return features, labels
def simulate_stupidDPM(iter_num, M): # Generate mixture sample N = 1000 mu = [0.0, 10.0, 3.0] components = np.random.choice(range(3), size = N, replace = True, p = [0.3, 0.5, 0.2]) samples = [norm.rvs(size = 1, loc = mu[components[i]], scale = 1)[0] for i in range(N)] ## Sample G from DP(M, G0) v = beta.rvs(a = 1.0, b = M, size = N) prob_vector = np.append(np.array(v[0]), v[1:] * np.cumprod(1.0 - v[:-1])) thetas = norm.rvs(size = N, loc = 1.0, scale = 1.0) ### Initialize thetas thetas = np.random.choice(thetas, size = N, replace = True, p = prob_vector) ### Start MCMC chain for i in xrange(iter_num): for j in xrange(N): theta_temp = np.append(thetas[:j], thetas[j+1:]) p = np.append(norm.pdf(samples[j], loc = theta_temp, scale = 1.0), M * norm.pdf(samples[j], loc = 1.0, scale = np.sqrt(2.0))) p = p / sum(p) temp = np.random.choice(np.append(theta_temp, N), size = 1, replace = True, p = p) if (temp == N): thetas[j] = norm.rvs(size = 1, loc = 0.5 * (samples[j] + 1), scale = np.sqrt(0.5)) else: thetas[j] = temp print(thetas) return {"thetas": thetas, "y": samples}
def get_iops(iops, duration): iops_vals = [-1] if iops['randvar'] == 'normal': assert ('mean' in iops.keys() and 'stddev' in iops.keys()), \ "Error: invalid parameters for normal distribution!" while min(iops_vals) < 0: iops_vals = norm.rvs(size=duration, loc=iops['mean'], scale=iops['stddev']).astype(int).tolist() elif iops['randvar'] == 'uniform': assert ('min' in iops.keys() and 'max' in iops.keys()), \ "Error: invalid parameters for uniform distribution!" while min(iops_vals) < 0: iops_vals = uniform.rvs(size=duration, loc=iops['min'], scale=iops['max']).astype(int).tolist() elif iops['randvar'] == 'poisson': assert ('mu' in iops.keys()), \ "Error: invalid parameters for poisson distribution!" while min(iops_vals) < 0: iops_vals = poisson.rvs(size=duration, mu=iops['mu']).astype(int).tolist() elif iops['randvar'] == 'beta': assert ('alpha' in iops.keys() and 'beta' in iops.keys() and 'shift' in iops.keys() and 'scale' in iops.keys()), \ "Error: invalid parameters for beta distribution!" while min(iops_vals) < 0: iops_vals = beta.rvs(size=duration, a=iops['alpha'], b=iops['beta'], loc=iops['shift'], scale=iops['scale']).astype(int).tolist() else: print("Error: invalid random distribution!") exit(0) print('Min IOPS: ' + str(min(iops_vals))) print('Max IOPS: ' + str(max(iops_vals))) return iops_vals
def thompson_sample(self, pred_ctr, k, max_pos, weight_type='propensity'): """ :param pred_ctr: list of predicted ctr :param k: position to explore.(count from 1) :param max_pos: explore candidate's maximum position.(count from 1) :param weight_type: 'propensity' or 'multinomial'. otherwise assign unit weight. :return: index of item to explore (index count from 0); index of bucket/arm (count from 0); the chosen arm's weight (propensity weight or multinomial weight.) """ assert 10 >= max_pos > k > 1 active_arms = [] for i, score in enumerate(pred_ctr[k - 1:max_pos]): assert 0 <= score <= 1 temp = dict() temp['exp_idx'] = i + k - 1 bucket_idx = int(score * self.bucket_size) % self.bucket_size temp['bucket_idx'] = bucket_idx temp['ts_score'] = beta.rvs(self.buckets[bucket_idx]['a'], self.buckets[bucket_idx]['b']) active_arms.append(temp) arm_chosen = max(active_arms, key=lambda x: x['ts_score']) self.exp_times += 1 self.exp_times_each[arm_chosen['bucket_idx']] += 1 if weight_type == 'propensity': weight = 1.0 * self.exp_times / self.exp_times_each[ arm_chosen['bucket_idx']] elif weight_type == 'multinomial': weight = 1.0 * sum( pred_ctr[k - 1:max_pos]) / pred_ctr[arm_chosen['exp_idx']] else: weight = 1 return arm_chosen['exp_idx'], arm_chosen['bucket_idx'], weight
def bandit(self, stop_alpha=0.05, stop_value=0.95, iterat=1000): """ Run bandit Stop criterion: 1st Use Bayes' theorem to compute the probability that variation beats others, if 95% sure that a variation beats the others then "a winner has been found" 2nd Potential value remaining in the experiment - the "value remaining" is the amount of increased conversion rate we could get by switching away from the current champion """ simul_m = np.zeros((10000,self.n_arm)) stop_first = np.zeros((10000,1)) while ((iterat!=None) and (iterat>=self.k) ): self.choose_arm() for i in range(10000): simul_m[i] = beta.rvs(1 + self.reward, 1 + 1*self.count - self.reward) stop_first[i] = np.argmax(simul_m[i]) unique, counts = np.unique(stop_first, return_counts=True) arm_prob = np.array((unique, counts/10000.0),dtype='float64').T opt_arm = int(arm_prob[np.argmax(arm_prob[:,1], axis=0),0]) stop_second = np.percentile((np.max(simul_m,axis=1) - simul_m[:,opt_arm])/ simul_m[:,opt_arm], stop_value*100) if np.max(arm_prob[:,1])>=(1-stop_alpha): opt_arm = arm_prob[np.argmax(arm_prob[:,1], axis=0),0] print('The winner has been found! The arm number {} has been found as optimal at the {} confidence level after {} page views.'.format(opt_arm,stop_alpha, self.k)) break elif arm_prob[np.argmax(arm_prob[:,1], axis=0),1]*0.01 >=stop_second: print('The winner has been found! The arm number {} has been found as optimal, as with {}% probability, the value remaining in the experiment is less than 1% possible improvement{}.'.format(opt_arm,stop_value*100,self.k)) break elif iterat==self.k: print('After {} iterations, the winning arm is number {}.'.format(iterat, opt_arm))
def update_current_sample(n_clicks): # first sample from current posterior beta distribution p = beta.rvs(a, b, size=1)[0] print("Sample: " + str(p)) return json.dumps({'sample': p})
def simulation(n_seeds, n_targets, n_signals): mixing_matrix = np.random.rand(n_seeds, n_signals) mixing_matrix = mixing_matrix / np.sum(mixing_matrix**2, axis=0) signals = np.zeros((n_signals, n_targets)) #beta parameters from fit to real aptx data a = np.absolute(normal(0.25, 0.1, n_signals)) b = np.absolute(normal(640, 550, n_signals)) scale = np.absolute(normal(0.32, 0.26, n_signals)) #generate signals with beta distributions for i in range(n_signals): signals[i, :] = beta.rvs(a[i], b[i], loc=0, scale=scale[i], size=n_targets) #rescale signals = np.exp(signals) - 1 #normalise signals = signals / (np.amax(signals, axis=1, keepdims=True) + 0.001) return signals, mixing_matrix
def sample_lam(alpha, reformulate=False): """ Sample a lambda from symmetric beta distribution with given alpha :param alpha: Alpha value for beta distribution :param reformulate: If True, uses the reformulation of [1]. """ if reformulate: lam = beta.rvs( alpha + 1, alpha ) # rvs(arg1,arg2,loc=期望, scale=标准差, size=生成随机数的个数) 从分布中生成指定个数的随机数 else: lam = beta.rvs( alpha, alpha ) # rvs(arg1,arg2,loc=期望, scale=标准差, size=生成随机数的个数) 从分布中生成指定个数的随机数 return lam
def Prior_MCsim(self): print('Generate ' + str(self.size) + " samples for " + self.name + ' distribution') if self.name == 'Beta': return beta.rvs(a = self.par[0], b = self.par[1], size = self.size, random_state = self.random_state) else : print("Not Available") return []
def switching_binomial_motion(N_trials, N_blocks, tau, seed, Jeffreys=True, N_layer=3): """ A 3-layered model for generating samples. about Jeffrey's prior: see https://en.wikipedia.org/wiki/Jeffreys_prior """ if Jeffreys: from scipy.stats import beta np.random.seed(seed) trials = np.arange(N_trials) p = np.random.rand(N_trials, N_blocks, N_layer) for trial in trials: # drawing all switches p[trial, :, 2] = np.random.rand(1, N_blocks) < 1./tau if Jeffreys: p_random = beta.rvs(a=.5, b=.5, size=N_blocks) else: p_random = np.random.rand(1, N_blocks) # drawing all probability biases p[trial, :, 1] = (1 - p[trial, :, 2])*p[trial-1, :, 1] + p[trial, :, 2] * p_random # drawing all samples p[trial, :, 0] = p[trial, :, 1] > np.random.rand(1, N_blocks) return (trials, p)
def iterate(number_of_ones, alpha): ones = [] #count of x in every run runs = args.runs learning = args.learning #expt=args.exponent for r in range(runs): if learning == "sample": language = beta.rvs(alpha + number_of_ones, alpha + (10 - number_of_ones)) # sampling elif learning == "max": language = (alpha + number_of_ones - 1) / (alpha * 2 + 10 - 2 ) # maximising elif learning == "avg": language = (alpha + number_of_ones) / (alpha * 2 + 10) # averaging data = [produce(language) for _ in range(10)] #one list of 01s #print data count_of_ones = float(data.count(1)) ones.append(count_of_ones) #if r < 10: #print number_of_ones #print "list of ones: ",ones[1:10] #dictionary with x_possible_values:freqs(x), ordered by n_of_x d = {} for c in ones: count = ones.count(c) d[c] = count #print "dictionary: ",d.items()[1:10] #get probabilities of proportion_of_ones as list of tuples (n,prob(n)) prob = [(n, float(freq) / len(ones)) for n, freq in d.items()] return prob
def property_beta_distribution(n_props, beta_prop_1, beta_prop_2): # Samples from beta probability distribution # Output: matrix of probabilities (size n_props) property_probs = beta.rvs(beta_prop_1, beta_prop_2, size = n_props) probabilities = np.array(property_probs) probs_shape = probabilities.reshape(1, n_props) return probs_shape
def gene_beta_distribution(n_genes, beta_gene_1, beta_gene_2): # Samples from beta probability distribution # Output: matrix of probabilities (size n_genes) gene_probs = beta.rvs(beta_gene_1, beta_gene_2, size = n_genes) genes = np.array(gene_probs) properties_genes = genes.reshape(n_genes, 1) return properties_genes
def reference_sim(self, A, classified,labels): num_centers = len(set(labels)) small = .0000000000001 ideal_A = np.zeros([A.shape[0],A.shape[1]]) for i in range(0,len(labels)): for j in range(0,i+1): if labels[i] == labels[j]: ideal_A[i,j] = 1 ideal_A[j,i] = 1 pred_pos = A[ideal_A ==1] pred_neg = A[ideal_A ==0] pos_a,pos_b,pos_loc, pos_scale= beta.fit(pred_pos) neg_a,neg_b,neg_loc, neg_scale= beta.fit(pred_neg) fits = [] #Fit comparison iwth more than 1 clust for sim in range(0, 50): simulated_mat = np.ones([A.shape[0],A.shape[1]]) for i in range(0,len(labels)): for j in range(0,i): if ideal_A[i,j] ==0: simulated_mat[i,j] = simulated_mat[j,i]= beta.rvs(max(neg_a,small), max(small,neg_b), loc=neg_loc,scale =neg_scale) else: simulated_mat[i, j ] = simulated_mat[j, i ] = beta.rvs(max(pos_a,small), max(small,pos_b), loc=pos_loc,scale =pos_scale) self.one_clust_test = False whereAreNaNs = np.isnan(simulated_mat) simulated_mat[whereAreNaNs] = 0 self.fit(simulated_mat) #print simulated_mat fits.append(self.gap_stat_) multi_fit = np.mean(fits) fits_one = [] pos_a,pos_b,pos_loc, pos_scale= beta.fit(A) for sim in range(0, 50): simulated_mat = np.ones([A.shape[0],A.shape[1]]) for i in range(0,len(labels)): for j in range(0,i): simulated_mat[i,j] = simulated_mat[j,i]= beta.rvs(max(small,pos_a), max(small,pos_b), loc=pos_loc,scale =pos_scale) whereAreNaNs = np.isnan(simulated_mat) simulated_mat[whereAreNaNs] = 0 e_vals, e_vecs = np.linalg.eigh(simulated_mat) #2. Get Reverse Sorted Order - largest to smallest e_order = np.argsort(e_vals)[::-1] self.one_clust_fit_alt(e_vecs,e_order) fits_one.append(self.gap_stat_) one_fit = np.mean(fits_one) return multi_fit, one_fit
def ep2_rvs(mu, sigma, alpha, size=1): u = uniform.rvs(loc=0, scale=1, size=size) b = beta.rvs(1. / alpha, 1 - 1. / alpha, size=size) r = np.sign(uniform.rvs(loc=0, scale=1, size=size) - .5) z = r * (-alpha * b * np.log(u))**(1. / alpha) return z
def beta_proposal(current, var): alpha_beta_fwd = jmutils.beta_shape(current, var) proposed = beta.rvs(*alpha_beta_fwd) fwd_prob = beta.pdf(proposed, *alpha_beta_fwd) alpha_beta_back = jmutils.beta_shape(proposed, var) back_prob = beta.pdf(current, *alpha_beta_back) log_back_fwd = math.log(back_prob / fwd_prob) return proposed, log_back_fwd
def __init__(self, n_arms, random_seed=None): if random_seed is not None: np.random.seed(random_seed) self.arm_probs = {} self.arm_features = [] temp_val_list = [] scaled_vals = [] self.arm_probs = beta.rvs(1,1,size=n_arms)
def sample_an_arm(): max_sample = float('-inf') best_arm = None for arm in range(0, arm_num): r = beta.rvs(prior[arm][0], prior[arm][1]) # print r if r > max_sample: max_sample = r best_arm = arm return best_arm
def get_pi(cl_ind, g_values, data): cl = [cl_ind*2, cl_ind*2+1] count_p, count_m = 0, 0 for obj in cl: for g in g_values[obj][1]: if g == 1: count_p += 1 else: count_m += 1 pi_new = beta.rvs(count_p + gamma1, count_m + gamma2, size=1)[0] return pi_new
def _make_data(self): self.Z = [] self.counts = [] self.Z.append(1) self.counts.append(1) for n in np.arange(1,self.n_peaks): # List concatanation temp = np.array(self.counts + [self.hyper_pars.conc_par]) temp = temp/temp.sum() prob = temp.cumsum() pos = np.nonzero(np.random.rand()<prob)[0][0] self.Z.append(pos) if pos >= len(self.counts): self.counts.append(1) else: self.counts[pos] += 1 self.Z = np.sort(self.Z) self.K = np.max(self.Z) self.intensities = [] for n in np.arange(self.n_peaks): self.intensities.append(np.random.rand()) # self.corr_mat = np.zeros((self.n_peaks,self.n_peaks)) self.corr_mat = lil_matrix((self.n_peaks,self.n_peaks),dtype=np.float64) for n in np.arange(self.n_peaks-1): this_cluster = self.Z[n] for m in np.arange(n+1,self.n_peaks): if self.Z[m] == this_cluster: if np.random.rand() < self.hyper_pars.in_prob: this_val = beta.rvs(self.hyper_pars.in_alpha,self.hyper_pars.in_beta) else: this_val = 0 else: if np.random.rand() < self.hyper_pars.out_prob: this_val = beta.rvs(self.hyper_pars.out_alpha,self.hyper_pars.out_beta) else: this_val = 0 if this_val >0: self.corr_mat[n,m] = this_val self.corr_mat[m,n] = this_val
def draw_from_beta_distributions(alphas, betas, possible_pages): max_id = 0 max_=0 for i in range(0, (alphas.size-1)): beta_outcome = float(beta.rvs(alphas[i], betas[i])*float(possible_pages[i]['price']/50)) if beta_outcome > max_: max_ = beta_outcome max_id = i return max_id
def run_model(self, *args, **kwargs): groups = kwargs.get('groups', self.groups) samples = kwargs.get('samples', self.samples) df = kwargs.get('df', self.df) for group in groups: group_data = df[df[self.groupcol] == group] total = group_data[self.totalcol] successes = group_data[self.successcol] mc_data = beta.rvs( successes + self.alpha_, total - successes + self.beta_, size=samples) self.distributions[group] = mc_data
def beta_proposal_old(current, var): proposed = 0 tries = 0 while (proposed == 0) or (proposed == 1): proposed = beta.rvs(c * current, c * (1 - current)) tries += 1 if (tries > 1000): 1/0 print("Sampler is jammed") fwd_prob = beta.pdf(proposed, c * current, c * (1 - current)) back_prob = beta.pdf(current, c * proposed, c * (1 - proposed)) log_back_fwd = math.log(back_prob / fwd_prob) return proposed, log_back_fwd
def sample(self,m): """ Samples m samples from the current Kumaraswamy distribution. :param m: Number of samples to draw. :type m: int. :rtype: natter.DataModule.Data :returns: A Data object containing the samples """ return Data(self.param['B']*beta.rvs(1,self.param['b'],size=m)**(1/self.param['a']),'%i samples from %s' % (m,self.name))
def __call__(self, nvot, ncand, vType=DimVoter): """Tests? Making statistical tests that would pass reliably is a huge hassle. Sorry, maybe later. """ vType.resetClusters() e = self.builtElectorate() e.dcs = [] #number of dimensions in each dc e.dimWeights = [] #raw importance of each dimension, regardless of dc clusterWeight = 1 while clusterWeight > self.dccut: dimweight = clusterWeight dimnum = 0 while dimweight > self.wccut: e.dimWeights.append(dimweight) dimnum += 1 dimweight *= beta.rvs(*self.wcdecay) e.dcs.append(dimnum) clusterWeight *= beta.rvs(*self.dcdecay) e.numClusters = len(e.dcs) e.numSubclusters = [0] * e.numClusters e.chooseClusters(nvot + ncand, self.wcalpha, lambda:beta.rvs(*self.vccaring)) return self.makeElectorate(e, nvot, ncand, vType)
def plot2(_list, chr_brps, centromere_brps, line_names=None): if not line_names: line_names = range(1, _list.shape[0]+1) inflated_table = np.vstack([inflate_tags(x[0, :], 25) for x in np.split(_list, _list.shape[0])]) gs = gridspec.GridSpec(4, 4) ax1 = plt.subplot(gs[:-1, :]) plt.imshow(inflated_table, interpolation='nearest', cmap='coolwarm') show_breakpoints([0] + chr_brps + [_list.shape[1]], 'k') show_breakpoints(list(set(centromere_brps) - set(chr_brps)), 'g') ax2 = plt.subplot(gs[-1, :], sharex=ax1) red_run = np.nanmean((_list > 0).astype(np.float), 0) blue_run = np.nanmean((_list < 0).astype(np.float), 0) stack = np.hstack((blue_run, red_run)) mean = np.mean(stack) std = np.std(stack) _alpha = ((1 - mean)/std**2 - 1/mean)*mean**2 _beta = _alpha*(1/mean-1) r = beta.rvs(_alpha, _beta, size=1000) _min, _max = beta.interval(0.95, _alpha, _beta) plt.plot(blue_run, 'b') plt.plot(red_run, 'r') plt.axhline(y=_min, color='g') plt.axhline(y=_max, color='g') show_breakpoints([0] + chr_brps + [_list.shape[1]], 'k') show_breakpoints(list(set(centromere_brps) - set(chr_brps)), 'g') chr_arm_locations, chr_arm_names = sf.align_chromosome_edges(chr_brps, centromere_brps) ax1.set_xticks(chr_arm_locations) ax1.set_xticklabels(chr_arm_names, rotation='vertical') ax1.set_yticks(range(0, _list.shape[0]*25+1, 25)) ax1.set_yticklabels(line_names) ax2.set_xticks(chr_arm_locations) ax2.set_xticklabels(chr_arm_names, rotation='vertical') plt.show() smooth_histogram(r, 'b') smooth_histogram(stack) plt.axvline(x=_max, color='g') plt.axvline(x=_min, color='g') plt.show()
def get_a(counts, s_ind): count_p, count_m = 0, 0 for obj in counts.keys(): sources = counts[obj][0] if s_ind not in sources: continue c_ind = sources.index(s_ind) c = counts[obj][1][c_ind] if c == 1: count_p += 1 else: count_m += 1 a_new = beta.rvs(count_p + alpha1, count_m + alpha2, size=1)[0] return a_new
def guess_preferences(self): """ Input: no inputs Output: no outputs Notes: this function will take the updated score for each metric, compute a beta distribution defined by the win/loss scores, sample from each distribution and return the metric that corresponds to the greatest probability. The winning metric is added to recommendation_history as the best guess of user preference. """ user_preference = None max_prob = 0 for metric in self.metrics: self.params[metric] = (self.scores[metric] + 1, self.pairs_served - self.scores[metric] + 1) prob = beta.rvs(self.params[metric][0] + 1, self.params[metric][1] + 1) # sample form the dist for each metric if prob > max_prob: max_prob = prob user_preference = metric self.recommendation_history[self.pairs_served]['estimated_user_preference'] = user_preference
def beta_dist(N, Nx, a, b): N = int(N) xis = beta.rvs(a, b, size=N) xmin = 0.0 xmax = 1.0 # Create a grid of points and do initial evaluate of Qtrue xgrid = sp.linspace(xmin, xmax, Nx) Qtrue = beta.pdf(xgrid, a, b) # Remove indices where Qtrue blows up indices = (Qtrue > -sp.inf)*(Qtrue < sp.inf) Qtrue = Qtrue[indices] xgrid = xgrid[indices] # Return return [xis, xgrid, Qtrue]
def plot_results(B,pcF,a,b,loc,scale,numsamp,sampsize,curcolor,lstyle,ax): Bfit,pcFfit,RMSE,a,b,loc,scale=betadist_leastsquare_fitting(B,pcF,a,b, loc, scale) beta_mode=scale*(a-1.)/(a+b-2.) ax[0].plot(Bfit,pcFfit,ls=lstyle,color=curcolor, label=Lith_list[j]) ax[0].plot(B,pcF,'x',c=curcolor) ax[0].legend(fontsize='x-small', loc='best') if Lith_list[j]=='UM': ax[0].grid(b=None, which='major', axis='x') #generate random variables from beta distribution betarvs=beta.rvs(a,b,loc=loc,scale=scale,size=numsamp*100) ax[1].hist(betarvs, bins=np.linspace(0,5,1+int(5/0.05)),normed=True, histtype='step',lw=0.5,color=curcolor) ax[1].plot(Bfit, beta.pdf(Bfit,a,b,loc=loc,scale=scale),ls=lstyle,color=curcolor,lw=2) if Lith_list[j]=='UM': ax[1].grid(b=None, which='major', axis='x') samp_modes=np.empty(numsamp) for i in range(numsamp): indx=np.arange(len(betarvs)) np.random.shuffle(indx) indx=indx[:sampsize] #computing and appending sample mode kde,maxima=kde_minmode(betarvs[indx],np.linspace(0,5,1+int(5/0.001)),2,0.1) samp_modes[i]=maxima[np.argmax(kde(maxima))] #ax[2].plot(np.linspace(0,5,1+int(5/0.001)),kde(np.linspace(0,5,1+int(5/0.001))),'-',c='0.6') kde,maxima=kde_minmode(samp_modes,np.linspace(0,5,1+int(5/0.001)),2,0.1) #ax[2].plot(np.linspace(0,5,1+int(5/0.001)),kde(np.linspace(0,5,1+int(5/0.001))),'-',c=curcolor) ksnorm_pval=kstest(samp_modes,'norm',args=(np.mean(samp_modes),np.std(samp_modes)))[1] if round(ksnorm_pval,3)>0.05: ax[2].hist(samp_modes, bins=np.linspace(0,5,1+int(5/0.05)),normed=True, histtype='step',lw=0.5,color=curcolor) ax[2].plot(np.linspace(0,5,1+int(5/0.001)),norm.pdf(np.linspace(0,5,1+int(5/0.001)),loc=np.mean(samp_modes),scale=np.std(samp_modes)), '-',c=curcolor, lw=2) if Lith_list[j]=='UM': ax[2].grid(b=None, which='major', axis='x') return a,b, round(beta_mode,3),round(ksnorm_pval,3), samp_modes
def get_a(data, s, counts): count_p = 0 count_m = 0 obj_index_list = data.keys() for obj_index in obj_index_list: obj_data = data[obj_index] sources = obj_data[0] if s not in sources: continue obj_counts = counts[obj_index][1] s_index = sources.index(s) c = obj_counts[s_index] if c == 1: count_p += 1 else: count_m += 1 a_new = beta.rvs(count_p + alpha1, count_m + alpha2, size=1)[0] return a_new
def sample_an_arm(): max_sample = float('-inf') best_arm = None mature_ones = [] for arm in range(0, arm_num): if win[arm] >= 10 and prior[arm][0] < prior[arm][1]: continue if win[arm] >= 10 and prior[arm][0] > prior[arm][1]: mature_ones.append(arm) continue r = beta.rvs(prior[arm][0], prior[arm][1]) # print r if r > max_sample: max_sample = r best_arm = arm if len(mature_ones) > 0: return find_best_one(mature_ones) if best_arm is not None: return best_arm else: return find_best_one(range(bandit.arm_num()))