예제 #1
0
 def log_likelihood(self, tau, kappa):
     estimates = self.precompute_likelihood_estimates(tau, kappa)
     
     if var(estimates) > 0:
         logging.info("Performing exponential Russian Roulette on %d precomputed samples" % 
                      len(estimates))
         rr_ified = self.rr_instance.exponential(estimates)
         return rr_ified
     else:
         logging.warn("Russian Roulette on one estimate not possible. Returning the estimate")
         return mean(estimates)
예제 #2
0
def ciRegenerative(N):
    sim = simulateLindleyEfficient(lam, mu, N)
    # Now we're going to split the simulation results vector every time we encounter
    # an ampty system (i.e. a waiting time of zero)
    idx = where(sim == 0)[0]  # the positions of the zeros
    sa = split(sim, idx)  # split the list into sub-lists
    Yi = [sum(x) for x in sa]  # the sum of the waiting times in each sub-list
    Ni = [len(x) for x in sa]  # the number of waiting times in each sub-list
    M = len(Yi)  # the number of sub-lists
    Yavg = mean(Yi)  # The average of the sums of the waiting times
    Navg = mean(Ni)  # the mean number of waiting times of the sub-lists
    Wavg = Yavg / Navg  # The overall mean waiting time

    cv = cov(
        Yi, Ni
    )[0,
      1]  # sample covariance is element at (0, 1) or (1, 0) of the covariance matrix
    sV2 = var(Yi) + Wavg**2 * var(Ni) - 2 * Wavg * cv
    print(sV2)
    ci = Wavg - 1.96 * sqrt(sV2 / M) / Navg, Wavg + 1.96 * sqrt(sV2 / M) / Navg
    return (ci)
예제 #3
0
def ciBatchMeans(M, N, k):
    sim = simulateLindleyEfficient(lam, mu, M * N + k)

    # throw away the first k observations, and divide the rest into
    # subruns of length N each
    run = sim[k:(M * N + k)]
    p = reshape(run, (M, N))
    sample = mean(p, axis=0)  # take row means
    meanW = mean(sample)
    varW = var(sample)
    ci = meanW - 1.96 * sqrt(varW / M), meanW + 1.96 * sqrt(varW / M)
    return ci
예제 #4
0
print(test_1)
#--------------------↑前六列测试数据----------------------
p_0_pre = diff_0 * test_1
print("前六列为'否'各概率:")
print(p_0_pre)
print("前六列为'是'各概率:")
p_1_pre = diff_1 * test_1
print(p_1_pre)
#--------------------↑前六列概率计算----------------------
print("val0: (存放'否'类中后两列密度和含糖率的数据)")
print(val0)
print("val1: (存放'是'类中后两列密度和含糖率的数据)")
print(val1)
mean0 = np.array([mean(val0[0]), mean(val0[1])])  #两个数 分别为0(否)类 密度和含糖率的均值
mean1 = np.array([mean(val1[0]), mean(val1[1])])  #两个数 分别为1(是)类 密度和含糖率的均值
var0 = np.array([var(val0[0]), var(val0[1])])  #两个数 分别为0(否)类 密度和含糖率的方差
var1 = np.array([var(val1[0]), var(val1[1])])  #两个数 分别为1(是)类 密度和含糖率的方差


#--------------------↑后两列均值方差计算----------------------
def gaussian(mean, var, x):
    res = 1 / sqrt(2 * 3.14 * var) * np.exp(-(mean - x)**2 / 2 * var)
    return res


p_0_pro = gaussian(mean0[0], var0[0], 0.697) + gaussian(
    mean0[1], var0[1], 0.460)
print(p_0_pro)
p_1_pro = gaussian(mean1[0], var1[0], 0.697) + gaussian(
    mean1[1], var1[1], 0.460)
print(p_1_pro)
예제 #5
0
def detect_insertions( h0_mean, h1_mean, h2_mean, heads_per_base, inputf, reads_per_base, tails_per_base):
    fp_scores, tp_scores = [], []
    not_found_insertions = json.load(open(data_d('subject_genome.fa.alu_positions.json')))

    if not not_found_insertions:
        return

    pers_alu_info = copy.deepcopy(not_found_insertions)
    total_alus = count_alus(not_found_insertions)
    false_positives = []
    skip_until = -1
    window = []

    for col in inputf.pileup():
        if col.pos < skip_until:
            continue

        if len(window) == WIN_LENGTH:
            window = window[1:WIN_LENGTH]

        window.append(site(col))

        #        if col.pos < 36265890:
        #            continue
        if boundary(window) and enough_coverage(window, reads_per_base):
            reason = potential_ALU_insert(window, heads_per_base, tails_per_base)

            if reason:
                spanning = window_stats(window)

                if spanning >= h0_mean:
                    continue
                    #            hyp, _ = min((None, h0_mean), ('heterozygous', h1_mean), ('homozygous', h2_mean),
                    #                             key = lambda (hyp, mean): abs(spanning - mean))
                hyp, _ = min(('heterozygous', h1_mean), ('homozygous', h2_mean),
                                key = lambda (hyp, mean): abs(spanning - mean))

                if hyp:
                    chrom = inputf.getrname(col.tid)
                    window = []
                    skip_until = col.pos + RLEN

                    fp = True
                    for hap_no, haplotype_positions in enumerate(pers_alu_info[chrom]):
                        for inserted in haplotype_positions:
                            if abs(inserted['ref_pos'] - col.pos) < 300:
                                if inserted in not_found_insertions[chrom][hap_no]: not_found_insertions[chrom][
                                                                                    hap_no].remove(inserted)
                                fp = False

                    if fp:
                        false_positives.append([hyp, spanning, chrom, col.pos, reason])
                        fp_scores.append(spanning)
                    else:
                        tp_scores.append(spanning)

                    logm('\t'.join(map(str, [hyp, not fp, spanning, chrom, col.pos, reason])))

    print 'False positives:\n', pformat(sorted(false_positives, key=lambda fp: (fp[-1], fp[-2])))
    print 'False negatives:\n', pformat(not_found_insertions)
    print
    #print bgr_spanning, reads_per_base
    print 'True positives: %d (%.2lf%%)\t' % (len(tp_scores), float(100 * len(tp_scores)) / (len(tp_scores) + len(fp_scores))), mean(tp_scores), var(tp_scores)

    if fp_scores:
        print 'False positives: %d (%.2lf%%)\t' % (

        len(fp_scores), float(100 * len(fp_scores)) / (len(tp_scores) + len(fp_scores))), mean(fp_scores), var(fp_scores)

    print 'False negatives: %d(%.2f%%)\n' % (count_alus(not_found_insertions), float(100 * count_alus(not_found_insertions)) / total_alus)