def log_likelihood(self, tau, kappa): estimates = self.precompute_likelihood_estimates(tau, kappa) if var(estimates) > 0: logging.info("Performing exponential Russian Roulette on %d precomputed samples" % len(estimates)) rr_ified = self.rr_instance.exponential(estimates) return rr_ified else: logging.warn("Russian Roulette on one estimate not possible. Returning the estimate") return mean(estimates)
def ciRegenerative(N): sim = simulateLindleyEfficient(lam, mu, N) # Now we're going to split the simulation results vector every time we encounter # an ampty system (i.e. a waiting time of zero) idx = where(sim == 0)[0] # the positions of the zeros sa = split(sim, idx) # split the list into sub-lists Yi = [sum(x) for x in sa] # the sum of the waiting times in each sub-list Ni = [len(x) for x in sa] # the number of waiting times in each sub-list M = len(Yi) # the number of sub-lists Yavg = mean(Yi) # The average of the sums of the waiting times Navg = mean(Ni) # the mean number of waiting times of the sub-lists Wavg = Yavg / Navg # The overall mean waiting time cv = cov( Yi, Ni )[0, 1] # sample covariance is element at (0, 1) or (1, 0) of the covariance matrix sV2 = var(Yi) + Wavg**2 * var(Ni) - 2 * Wavg * cv print(sV2) ci = Wavg - 1.96 * sqrt(sV2 / M) / Navg, Wavg + 1.96 * sqrt(sV2 / M) / Navg return (ci)
def ciBatchMeans(M, N, k): sim = simulateLindleyEfficient(lam, mu, M * N + k) # throw away the first k observations, and divide the rest into # subruns of length N each run = sim[k:(M * N + k)] p = reshape(run, (M, N)) sample = mean(p, axis=0) # take row means meanW = mean(sample) varW = var(sample) ci = meanW - 1.96 * sqrt(varW / M), meanW + 1.96 * sqrt(varW / M) return ci
print(test_1) #--------------------↑前六列测试数据---------------------- p_0_pre = diff_0 * test_1 print("前六列为'否'各概率:") print(p_0_pre) print("前六列为'是'各概率:") p_1_pre = diff_1 * test_1 print(p_1_pre) #--------------------↑前六列概率计算---------------------- print("val0: (存放'否'类中后两列密度和含糖率的数据)") print(val0) print("val1: (存放'是'类中后两列密度和含糖率的数据)") print(val1) mean0 = np.array([mean(val0[0]), mean(val0[1])]) #两个数 分别为0(否)类 密度和含糖率的均值 mean1 = np.array([mean(val1[0]), mean(val1[1])]) #两个数 分别为1(是)类 密度和含糖率的均值 var0 = np.array([var(val0[0]), var(val0[1])]) #两个数 分别为0(否)类 密度和含糖率的方差 var1 = np.array([var(val1[0]), var(val1[1])]) #两个数 分别为1(是)类 密度和含糖率的方差 #--------------------↑后两列均值方差计算---------------------- def gaussian(mean, var, x): res = 1 / sqrt(2 * 3.14 * var) * np.exp(-(mean - x)**2 / 2 * var) return res p_0_pro = gaussian(mean0[0], var0[0], 0.697) + gaussian( mean0[1], var0[1], 0.460) print(p_0_pro) p_1_pro = gaussian(mean1[0], var1[0], 0.697) + gaussian( mean1[1], var1[1], 0.460) print(p_1_pro)
def detect_insertions( h0_mean, h1_mean, h2_mean, heads_per_base, inputf, reads_per_base, tails_per_base): fp_scores, tp_scores = [], [] not_found_insertions = json.load(open(data_d('subject_genome.fa.alu_positions.json'))) if not not_found_insertions: return pers_alu_info = copy.deepcopy(not_found_insertions) total_alus = count_alus(not_found_insertions) false_positives = [] skip_until = -1 window = [] for col in inputf.pileup(): if col.pos < skip_until: continue if len(window) == WIN_LENGTH: window = window[1:WIN_LENGTH] window.append(site(col)) # if col.pos < 36265890: # continue if boundary(window) and enough_coverage(window, reads_per_base): reason = potential_ALU_insert(window, heads_per_base, tails_per_base) if reason: spanning = window_stats(window) if spanning >= h0_mean: continue # hyp, _ = min((None, h0_mean), ('heterozygous', h1_mean), ('homozygous', h2_mean), # key = lambda (hyp, mean): abs(spanning - mean)) hyp, _ = min(('heterozygous', h1_mean), ('homozygous', h2_mean), key = lambda (hyp, mean): abs(spanning - mean)) if hyp: chrom = inputf.getrname(col.tid) window = [] skip_until = col.pos + RLEN fp = True for hap_no, haplotype_positions in enumerate(pers_alu_info[chrom]): for inserted in haplotype_positions: if abs(inserted['ref_pos'] - col.pos) < 300: if inserted in not_found_insertions[chrom][hap_no]: not_found_insertions[chrom][ hap_no].remove(inserted) fp = False if fp: false_positives.append([hyp, spanning, chrom, col.pos, reason]) fp_scores.append(spanning) else: tp_scores.append(spanning) logm('\t'.join(map(str, [hyp, not fp, spanning, chrom, col.pos, reason]))) print 'False positives:\n', pformat(sorted(false_positives, key=lambda fp: (fp[-1], fp[-2]))) print 'False negatives:\n', pformat(not_found_insertions) print #print bgr_spanning, reads_per_base print 'True positives: %d (%.2lf%%)\t' % (len(tp_scores), float(100 * len(tp_scores)) / (len(tp_scores) + len(fp_scores))), mean(tp_scores), var(tp_scores) if fp_scores: print 'False positives: %d (%.2lf%%)\t' % ( len(fp_scores), float(100 * len(fp_scores)) / (len(tp_scores) + len(fp_scores))), mean(fp_scores), var(fp_scores) print 'False negatives: %d(%.2f%%)\n' % (count_alus(not_found_insertions), float(100 * count_alus(not_found_insertions)) / total_alus)