def _correct_score_grid(exp_home, exp_away, max_goals=MAX_GOALS):
    """ Calculate a probability space of given dimension using joint poisson pmf.
    Note: In prod this would need standardising to 1.
    """
    return np.fromfunction(
        lambda hgoals, agoals: poisson.pmf(hgoals, exp_home) * poisson.pmf(
            agoals, exp_away), (max_goals, max_goals))
Esempio n. 2
0
def compute_probabiblities_all(picklambdax=3,
                               picklambday=4,
                               returnlambdax=3,
                               returnlambday=2):
    probabilitiesx = numpy.zeros([21, 21])
    probabilitiesy = numpy.zeros([21, 21])
    for picks in range(0, 21):
        for returns in range(0, 21):
            probabilitiesx[picks, returns] += round(
                poisson.pmf(picks, picklambdax) *
                poisson.pmf(returns, returnlambdax), 30)
            probabilitiesy[picks, returns] += round(
                poisson.pmf(picks, picklambday) *
                poisson.pmf(returns, returnlambday), 30)
    return probabilitiesx, probabilitiesy
Esempio n. 3
0
def make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames):
    """
    This function makes three matrices: observed, expected and logmp. The logmp
    contains the statistical significance for each comparison.
    """
    m, n = len(qpadnames), len(spadnames)
    qpadorder, spadorder = qpadbed.order, spadbed.order
    qpadid = dict((a, i) for i, a in enumerate(qpadnames))
    spadid = dict((a, i) for i, a in enumerate(spadnames))
    qpadlen = dict((a, len(b)) for a, b in qpadbed.sub_beds())
    spadlen = dict((a, len(b)) for a, b in spadbed.sub_beds())

    qsize, ssize = len(qpadbed), len(spadbed)

    assert sum(qpadlen.values()) == qsize
    assert sum(spadlen.values()) == ssize

    # Populate arrays of observed counts and expected counts
    logging.debug("Initialize array of size ({0} x {1})".format(m, n))
    observed = np.zeros((m, n))
    fp = open(blastfile)
    all_dots = 0
    for row in fp:
        b = BlastLine(row)
        qi, q = qpadorder[b.query]
        si, s = spadorder[b.subject]
        qseqid, sseqid = q.seqid, s.seqid
        qsi, ssi = qpadid[qseqid], spadid[sseqid]
        observed[qsi, ssi] += 1
        all_dots += 1

    assert int(round(observed.sum())) == all_dots

    logging.debug("Total area: {0} x {1}".format(qsize, ssize))
    S = qsize * ssize
    expected = np.zeros((m, n))
    qsum = 0
    for i, a in enumerate(qpadnames):
        alen = qpadlen[a]
        qsum += alen
        for j, b in enumerate(spadnames):
            blen = spadlen[b]
            expected[i, j] = all_dots * alen * blen * 1.0 / S

    assert int(round(expected.sum())) == all_dots

    # Calculate the statistical significance for each cell
    from scipy.stats.distributions import poisson

    M = m * n  # multiple testing
    logmp = np.zeros((m, n))
    for i in range(m):
        for j in range(n):
            obs, exp = observed[i, j], expected[i, j]
            pois = max(poisson.pmf(obs, exp), 1e-250)  # Underflow
            logmp[i, j] = max(-log(pois), 0)

    return logmp
Esempio n. 4
0
File: pad.py Progetto: rrane/jcvi
def make_arrays(blastfile, qpadbed, spadbed, qpadnames, spadnames):
    """
    This function makes three matrices: observed, expected and logmp. The logmp
    contains the statistical significance for each comparison.
    """
    m, n = len(qpadnames), len(spadnames)
    qpadorder, spadorder = qpadbed.order, spadbed.order
    qpadid = dict((a, i) for i, a in enumerate(qpadnames))
    spadid = dict((a, i) for i, a in enumerate(spadnames))
    qpadlen = dict((a, len(b)) for a, b in qpadbed.sub_beds())
    spadlen = dict((a, len(b)) for a, b in spadbed.sub_beds())

    qsize, ssize = len(qpadbed), len(spadbed)

    assert sum(qpadlen.values()) == qsize
    assert sum(spadlen.values()) == ssize

    # Populate arrays of observed counts and expected counts
    logging.debug("Initialize array of size ({0} x {1})".format(m, n))
    observed = np.zeros((m, n))
    fp = open(blastfile)
    all_dots = 0
    for row in fp:
        b = BlastLine(row)
        qi, q = qpadorder[b.query]
        si, s = spadorder[b.subject]
        qseqid, sseqid = q.seqid, s.seqid
        qsi, ssi = qpadid[qseqid], spadid[sseqid]
        observed[qsi, ssi] += 1
        all_dots += 1

    assert int(round(observed.sum())) == all_dots

    logging.debug("Total area: {0} x {1}".format(qsize, ssize))
    S = qsize * ssize
    expected = np.zeros((m, n))
    qsum = 0
    for i, a in enumerate(qpadnames):
        alen = qpadlen[a]
        qsum += alen
        for j, b in enumerate(spadnames):
            blen = spadlen[b]
            expected[i, j] = all_dots * alen * blen * 1.0 / S

    assert int(round(expected.sum())) == all_dots

    # Calculate the statistical significance for each cell
    from scipy.stats.distributions import poisson

    M = m * n  # multiple testing
    logmp = np.zeros((m, n))
    for i in xrange(m):
        for j in xrange(n):
            obs, exp = observed[i, j], expected[i, j]
            pois = max(poisson.pmf(obs, exp), 1e-250)  # Underflow
            logmp[i, j] = max(-log(pois), 0)

    return logmp
Esempio n. 5
0
def count_likelihood_poisson(this_counts,
                             tot_counts,
                             this_port,
                             this_cv,
                             this_fract_recov=1):
    L = [poisson.pmf(this_c,tot_c*this_port*this_fract_recov) \
         for this_c,tot_c in zip(this_counts,tot_counts)]
    logL = numpy.log2(numpy.array(L))
    return sum(logL)
Esempio n. 6
0
def prob_ret_req(n_morning, n_night, lambda_ret, lambda_req):
    """ 
    Probability for one agency of having n_morning cars in the morning and
    n_night cars in the night. Depends on the probabilities of returns and
    requests, as well as the max car availability.
    """
    prob = 0
    difference = n_night - n_morning
    R = 0

    for ret in range(int(10*lambda_ret)):
        for req in range(int(10*lambda_req)):
            if ret-req != difference:
                continue
            p_ret = poisson.pmf(ret, lambda_ret)
            p_req = poisson.pmf(req, lambda_req)
            


            prob += p_ret*p_req

            R += p_ret * p_req * req * 10  # expected reward

    return prob, R
Esempio n. 7
0
def poiTotalVar(hist):
    n = sum(hist)
    mode = numpy.argmax(hist[2:]) + 2

    est_mean = mode+1
    est_tv = 1.0
    for m in range(mode-2, mode+3):
        emp_pmf = hist/n
        poi_pmf = poisson.pmf(arange(len(hist)), m)
        residual_mass = 1.0 - sum(poi_pmf)

        total_var = 0.5*sum( abs(p1-p2) for p1, p2 in itertools.izip(emp_pmf, poi_pmf)) + 0.5*residual_mass

        if total_var < est_tv:
            est_tv = total_var
            est_mean = m

    return (est_tv, est_mean)
            k = new_k
        res.append(k)
    return res


if __name__ == '__main__':
    lmbd = 7.3
    n_steps = 20

    distr = []
    for j in range(1000):
        distr.append(mcmc())

    print(', '.join(map(str, distr[-1])))

    conv = []
    freq = []
    for idx in range(n_steps):
        distr_on_step = [line[idx] for line in distr]
        # print(distr_on_step)
        freq = Counter(distr_on_step)
        norm = sum(freq.values())
        diff = 0
        for num in range(0, 20):
            if num in freq:
                diff += abs(freq[num] / norm - poisson.pmf(num, lmbd))
        conv.append(diff)

    plt.plot(np.array(conv))
    plt.show()
Esempio n. 9
0
def Estep(x, mu, psi):
    a = (1 - psi) * (x == 0)
    b = psi * poisson.pmf(x, mu)
    return b / (a + b)
Esempio n. 10
0
def Estep(x, mu, psi):
    a = (1 - psi)*(x==0)
    b = psi * poisson.pmf(x, mu)
    return b / (a + b)
Esempio n. 11
0
def count_likelihood_poisson(this_counts,tot_counts,this_port,this_cv,this_fract_recov=1):
    L = [poisson.pmf(this_c,tot_c*this_port*this_fract_recov) \
         for this_c,tot_c in zip(this_counts,tot_counts)]
    logL = numpy.log2(numpy.array(L))
    return sum(logL)
Esempio n. 12
0
def component_likelihood(x, lambda_i, pi_k):
    return pi_k * poisson.pmf(x, lambda_i)
Esempio n. 13
0
def pois_likelihood(x, lambd):
    return poisson.pmf(x, lambd)
Esempio n. 14
0
def particle_filter_detector(ser1, taps, models):
  # particle : (id, rate, censor, last_censor prev_particle)
  
  # Model paramaters
  normal_std_factor = 4
  censorship_std_factor = 7
  censorship_prior_model = 0.01
  change_tap_prior_model = 0.1

  # Sampling parameters
  change_tap_sample = 0.2
  censorship_prior_sample = 0.3
  particle_number = 1000
  mult_particles = 1
  
  # Check consistancy once
  for t in models:    
    assert len(ser1) == len(models[t]) 

  # Clean up a bit the data
  series2 = []
  last = None
  first = None
  # Process series
  for s in ser1:
    if s == None:
      series2 += [last]
    else:
      if first == None:
        first = s
      series2 += [s]
      last = s

  series2 = [s if s != None else first for s in series2]
  series = series2

  # Data structures to keep logs
  particles = {}
  outputlog = [(series[0],series[0])]

  # Initial particles:
  particles[0] = []
  G = gamma(max(1,series[0]), 1)
  for pi, r in enumerate(G.rvs(particle_number)):
    particles[0] += [(pi, r, False, None, 0, random.choice(taps), False)]

  # Now run the sampler for all times
  for pi in range(1, len(series)):
    assert models != None
    assert taps != None

    # Normal distributions from taps and the model standard deviation for normality and censorship
    round_models = {}
    for ti in taps:
      NoCensor = norm(models[ti][pi][0], (models[ti][pi][1] * normal_std_factor)**2)
      Censor = norm(models[ti][pi][0], (models[ti][pi][1] * censorship_std_factor)**2)
      round_models[ti] = (NoCensor, Censor)

    # Store for expanded pool of particles
    temporary_particles = []

    # Expand the distribution
    for p in particles[pi-1]:
      p_old, C_old, j = tracebackp(particles, p, pi-1, p[5] - 1) # taps[0] - 1)

      # Serial number of old particle
      p_old_num = None
      if p_old != None:
        p_old_num = p_old[0]

      # Create a number of candidate particles from each previous particle
      for _ in range(mult_particles):

        # Sample a new tap for the candidate particle
        new_tap = p[5]
        if random.random() < change_tap_sample:
          new_tap = random.choice(taps)
        
        # Update this censorship flag
        C = False  
        if random.random() < censorship_prior_sample:
          C = True

        # Determine new rate
        new_p = None
        if p_old == None:          
          new_p = p[1] # continue as before
        if C | C_old:
          while new_p == None or new_p < 0:
            new_p = p_old[1] * (1 + round_models[new_tap][1].rvs(1)[0]) ## censor models
        else:
          while new_p == None or new_p < 0:
            new_p = p_old[1] * (1 + round_models[new_tap][0].rvs(1)[0]) ## no censor models
        
        # Build and register new particle
        newpi = (None, new_p, C, p[0], pi, new_tap, C | C_old)
        temporary_particles += [newpi]


    # Assign a weight to each sampled candidtae particle
    weights = []
    for px in temporary_particles:
      wx = 1.0

      # Adjust weight to observation
      if not series[pi] == None:
        poisson_prob = poisson.pmf(series[pi], px[1])
        #print poisson_prob, px
        wx *= poisson_prob

      # Adjust the probability of censorship
      if px[2]:
        wx *= censorship_prior_model / censorship_prior_sample
      else:
        wx *= (1 - censorship_prior_model) / (1 - censorship_prior_sample)

      # Adjust the probability of changing the tap
      if px[5] == particles[pi-1][px[3]][5]:
        wx *= (1 - change_tap_prior_model) / (((1-change_tap_sample) + change_tap_sample*(1.0 / len(taps))))
      else:
        wx *= (change_tap_prior_model) / (1 - (((1-change_tap_sample) + change_tap_sample*(1.0 / len(taps)))))
          
      weights += [wx]

    weights_sum = sum(weights)
    
    ## Resample according to weight
    particles[pi] = []
    for pid in range(particle_number):
      px = samplep(weights, weights_sum, temporary_particles)
      px = (pid, px[1], px[2], px[3], px[4], px[5], px[6])
      particles[pi] += [px]

    ## Collect some statistics

    ## stats
    Ci = 0
    mean = 0
    for px in particles[pi]:
      if px[2]:
        Ci += 1
      mean += px[1]
    mean = mean / len(particles[pi])

    # Diversity
    Div = len(set([pv[3] for pv in particles[pi]]))

    # Range of values
    range_normal = sorted([pn[1] for pn in temporary_particles if not pn[2]])    
    Base = range_normal[len(range_normal)/2]
    Mn = range_normal[len(range_normal)*1/100]
    Mx = range_normal[len(range_normal)*99/100]
    outputlog += [(Mn, Mx)]

    # How many are using the censorship model at any time?
    censor_model_stat = len([1 for pn in particles[pi] if pn[6]])* 100 / len(particles[pi])

    # Build histogram of taps
    tap_hist = {}
    for px in particles[pi]:
      tap_hist[px[5]] = tap_hist.get(px[5], 0) + 1
          
    print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (pi, Ci, mean, series[pi], tap_hist, Base, Mn, Mx, Div, censor_model_stat)
    # print "      [%s - %s]" % (key_series_point*(1+NoCensor.ppf(0.00001)), key_series_point*(1+NoCensor.ppf(0.99999)))

  return particles, outputlog