def _correct_score_grid(exp_home, exp_away, max_goals=MAX_GOALS): """ Calculate a probability space of given dimension using joint poisson pmf. Note: In prod this would need standardising to 1. """ return np.fromfunction( lambda hgoals, agoals: poisson.pmf(hgoals, exp_home) * poisson.pmf( agoals, exp_away), (max_goals, max_goals))
def compute_probabiblities_all(picklambdax=3, picklambday=4, returnlambdax=3, returnlambday=2): probabilitiesx = numpy.zeros([21, 21]) probabilitiesy = numpy.zeros([21, 21]) for picks in range(0, 21): for returns in range(0, 21): probabilitiesx[picks, returns] += round( poisson.pmf(picks, picklambdax) * poisson.pmf(returns, returnlambdax), 30) probabilitiesy[picks, returns] += round( poisson.pmf(picks, picklambday) * poisson.pmf(returns, returnlambday), 30) return probabilitiesx, probabilitiesy
def make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames): """ This function makes three matrices: observed, expected and logmp. The logmp contains the statistical significance for each comparison. """ m, n = len(qpadnames), len(spadnames) qpadorder, spadorder = qpadbed.order, spadbed.order qpadid = dict((a, i) for i, a in enumerate(qpadnames)) spadid = dict((a, i) for i, a in enumerate(spadnames)) qpadlen = dict((a, len(b)) for a, b in qpadbed.sub_beds()) spadlen = dict((a, len(b)) for a, b in spadbed.sub_beds()) qsize, ssize = len(qpadbed), len(spadbed) assert sum(qpadlen.values()) == qsize assert sum(spadlen.values()) == ssize # Populate arrays of observed counts and expected counts logging.debug("Initialize array of size ({0} x {1})".format(m, n)) observed = np.zeros((m, n)) fp = open(blastfile) all_dots = 0 for row in fp: b = BlastLine(row) qi, q = qpadorder[b.query] si, s = spadorder[b.subject] qseqid, sseqid = q.seqid, s.seqid qsi, ssi = qpadid[qseqid], spadid[sseqid] observed[qsi, ssi] += 1 all_dots += 1 assert int(round(observed.sum())) == all_dots logging.debug("Total area: {0} x {1}".format(qsize, ssize)) S = qsize * ssize expected = np.zeros((m, n)) qsum = 0 for i, a in enumerate(qpadnames): alen = qpadlen[a] qsum += alen for j, b in enumerate(spadnames): blen = spadlen[b] expected[i, j] = all_dots * alen * blen * 1.0 / S assert int(round(expected.sum())) == all_dots # Calculate the statistical significance for each cell from scipy.stats.distributions import poisson M = m * n # multiple testing logmp = np.zeros((m, n)) for i in range(m): for j in range(n): obs, exp = observed[i, j], expected[i, j] pois = max(poisson.pmf(obs, exp), 1e-250) # Underflow logmp[i, j] = max(-log(pois), 0) return logmp
def make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames): """ This function makes three matrices: observed, expected and logmp. The logmp contains the statistical significance for each comparison. """ m, n = len(qpadnames), len(spadnames) qpadorder, spadorder = qpadbed.order, spadbed.order qpadid = dict((a, i) for i, a in enumerate(qpadnames)) spadid = dict((a, i) for i, a in enumerate(spadnames)) qpadlen = dict((a, len(b)) for a, b in qpadbed.sub_beds()) spadlen = dict((a, len(b)) for a, b in spadbed.sub_beds()) qsize, ssize = len(qpadbed), len(spadbed) assert sum(qpadlen.values()) == qsize assert sum(spadlen.values()) == ssize # Populate arrays of observed counts and expected counts logging.debug("Initialize array of size ({0} x {1})".format(m, n)) observed = np.zeros((m, n)) fp = open(blastfile) all_dots = 0 for row in fp: b = BlastLine(row) qi, q = qpadorder[b.query] si, s = spadorder[b.subject] qseqid, sseqid = q.seqid, s.seqid qsi, ssi = qpadid[qseqid], spadid[sseqid] observed[qsi, ssi] += 1 all_dots += 1 assert int(round(observed.sum())) == all_dots logging.debug("Total area: {0} x {1}".format(qsize, ssize)) S = qsize * ssize expected = np.zeros((m, n)) qsum = 0 for i, a in enumerate(qpadnames): alen = qpadlen[a] qsum += alen for j, b in enumerate(spadnames): blen = spadlen[b] expected[i, j] = all_dots * alen * blen * 1.0 / S assert int(round(expected.sum())) == all_dots # Calculate the statistical significance for each cell from scipy.stats.distributions import poisson M = m * n # multiple testing logmp = np.zeros((m, n)) for i in xrange(m): for j in xrange(n): obs, exp = observed[i, j], expected[i, j] pois = max(poisson.pmf(obs, exp), 1e-250) # Underflow logmp[i, j] = max(-log(pois), 0) return logmp
def count_likelihood_poisson(this_counts, tot_counts, this_port, this_cv, this_fract_recov=1): L = [poisson.pmf(this_c,tot_c*this_port*this_fract_recov) \ for this_c,tot_c in zip(this_counts,tot_counts)] logL = numpy.log2(numpy.array(L)) return sum(logL)
def prob_ret_req(n_morning, n_night, lambda_ret, lambda_req): """ Probability for one agency of having n_morning cars in the morning and n_night cars in the night. Depends on the probabilities of returns and requests, as well as the max car availability. """ prob = 0 difference = n_night - n_morning R = 0 for ret in range(int(10*lambda_ret)): for req in range(int(10*lambda_req)): if ret-req != difference: continue p_ret = poisson.pmf(ret, lambda_ret) p_req = poisson.pmf(req, lambda_req) prob += p_ret*p_req R += p_ret * p_req * req * 10 # expected reward return prob, R
def poiTotalVar(hist): n = sum(hist) mode = numpy.argmax(hist[2:]) + 2 est_mean = mode+1 est_tv = 1.0 for m in range(mode-2, mode+3): emp_pmf = hist/n poi_pmf = poisson.pmf(arange(len(hist)), m) residual_mass = 1.0 - sum(poi_pmf) total_var = 0.5*sum( abs(p1-p2) for p1, p2 in itertools.izip(emp_pmf, poi_pmf)) + 0.5*residual_mass if total_var < est_tv: est_tv = total_var est_mean = m return (est_tv, est_mean)
k = new_k res.append(k) return res if __name__ == '__main__': lmbd = 7.3 n_steps = 20 distr = [] for j in range(1000): distr.append(mcmc()) print(', '.join(map(str, distr[-1]))) conv = [] freq = [] for idx in range(n_steps): distr_on_step = [line[idx] for line in distr] # print(distr_on_step) freq = Counter(distr_on_step) norm = sum(freq.values()) diff = 0 for num in range(0, 20): if num in freq: diff += abs(freq[num] / norm - poisson.pmf(num, lmbd)) conv.append(diff) plt.plot(np.array(conv)) plt.show()
def Estep(x, mu, psi): a = (1 - psi) * (x == 0) b = psi * poisson.pmf(x, mu) return b / (a + b)
def Estep(x, mu, psi): a = (1 - psi)*(x==0) b = psi * poisson.pmf(x, mu) return b / (a + b)
def count_likelihood_poisson(this_counts,tot_counts,this_port,this_cv,this_fract_recov=1): L = [poisson.pmf(this_c,tot_c*this_port*this_fract_recov) \ for this_c,tot_c in zip(this_counts,tot_counts)] logL = numpy.log2(numpy.array(L)) return sum(logL)
def component_likelihood(x, lambda_i, pi_k): return pi_k * poisson.pmf(x, lambda_i)
def pois_likelihood(x, lambd): return poisson.pmf(x, lambd)
def particle_filter_detector(ser1, taps, models): # particle : (id, rate, censor, last_censor prev_particle) # Model paramaters normal_std_factor = 4 censorship_std_factor = 7 censorship_prior_model = 0.01 change_tap_prior_model = 0.1 # Sampling parameters change_tap_sample = 0.2 censorship_prior_sample = 0.3 particle_number = 1000 mult_particles = 1 # Check consistancy once for t in models: assert len(ser1) == len(models[t]) # Clean up a bit the data series2 = [] last = None first = None # Process series for s in ser1: if s == None: series2 += [last] else: if first == None: first = s series2 += [s] last = s series2 = [s if s != None else first for s in series2] series = series2 # Data structures to keep logs particles = {} outputlog = [(series[0],series[0])] # Initial particles: particles[0] = [] G = gamma(max(1,series[0]), 1) for pi, r in enumerate(G.rvs(particle_number)): particles[0] += [(pi, r, False, None, 0, random.choice(taps), False)] # Now run the sampler for all times for pi in range(1, len(series)): assert models != None assert taps != None # Normal distributions from taps and the model standard deviation for normality and censorship round_models = {} for ti in taps: NoCensor = norm(models[ti][pi][0], (models[ti][pi][1] * normal_std_factor)**2) Censor = norm(models[ti][pi][0], (models[ti][pi][1] * censorship_std_factor)**2) round_models[ti] = (NoCensor, Censor) # Store for expanded pool of particles temporary_particles = [] # Expand the distribution for p in particles[pi-1]: p_old, C_old, j = tracebackp(particles, p, pi-1, p[5] - 1) # taps[0] - 1) # Serial number of old particle p_old_num = None if p_old != None: p_old_num = p_old[0] # Create a number of candidate particles from each previous particle for _ in range(mult_particles): # Sample a new tap for the candidate particle new_tap = p[5] if random.random() < change_tap_sample: new_tap = random.choice(taps) # Update this censorship flag C = False if random.random() < censorship_prior_sample: C = True # Determine new rate new_p = None if p_old == None: new_p = p[1] # continue as before if C | C_old: while new_p == None or new_p < 0: new_p = p_old[1] * (1 + round_models[new_tap][1].rvs(1)[0]) ## censor models else: while new_p == None or new_p < 0: new_p = p_old[1] * (1 + round_models[new_tap][0].rvs(1)[0]) ## no censor models # Build and register new particle newpi = (None, new_p, C, p[0], pi, new_tap, C | C_old) temporary_particles += [newpi] # Assign a weight to each sampled candidtae particle weights = [] for px in temporary_particles: wx = 1.0 # Adjust weight to observation if not series[pi] == None: poisson_prob = poisson.pmf(series[pi], px[1]) #print poisson_prob, px wx *= poisson_prob # Adjust the probability of censorship if px[2]: wx *= censorship_prior_model / censorship_prior_sample else: wx *= (1 - censorship_prior_model) / (1 - censorship_prior_sample) # Adjust the probability of changing the tap if px[5] == particles[pi-1][px[3]][5]: wx *= (1 - change_tap_prior_model) / (((1-change_tap_sample) + change_tap_sample*(1.0 / len(taps)))) else: wx *= (change_tap_prior_model) / (1 - (((1-change_tap_sample) + change_tap_sample*(1.0 / len(taps))))) weights += [wx] weights_sum = sum(weights) ## Resample according to weight particles[pi] = [] for pid in range(particle_number): px = samplep(weights, weights_sum, temporary_particles) px = (pid, px[1], px[2], px[3], px[4], px[5], px[6]) particles[pi] += [px] ## Collect some statistics ## stats Ci = 0 mean = 0 for px in particles[pi]: if px[2]: Ci += 1 mean += px[1] mean = mean / len(particles[pi]) # Diversity Div = len(set([pv[3] for pv in particles[pi]])) # Range of values range_normal = sorted([pn[1] for pn in temporary_particles if not pn[2]]) Base = range_normal[len(range_normal)/2] Mn = range_normal[len(range_normal)*1/100] Mx = range_normal[len(range_normal)*99/100] outputlog += [(Mn, Mx)] # How many are using the censorship model at any time? censor_model_stat = len([1 for pn in particles[pi] if pn[6]])* 100 / len(particles[pi]) # Build histogram of taps tap_hist = {} for px in particles[pi]: tap_hist[px[5]] = tap_hist.get(px[5], 0) + 1 print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (pi, Ci, mean, series[pi], tap_hist, Base, Mn, Mx, Div, censor_model_stat) # print " [%s - %s]" % (key_series_point*(1+NoCensor.ppf(0.00001)), key_series_point*(1+NoCensor.ppf(0.99999))) return particles, outputlog