def get_group_likelihood(log_p, allsams, num_present, sam2total): llhs = [] for sam in allsams: logmu = log_p + log10(sam2total[sam]) llh = poisson.logsf(1, 10 ** logmu) llhs.append(llh) sum_llh = sum([10 ** l for l in llhs]) x = num_present return poisson.logsf(x, sum_llh), sum_llh
def poisson(self, op_id, log_overflow=1000.): if bool(self): from scipy.stats import poisson const, measures, labeled = self._measure(op_id) out = list() with warnings.catch_warnings(): warnings.simplefilter('error') for row in self.data_.itertuples(index=False): # Calculate expectation item = const * measures[labeled[0]][_map(row, labeled[0])] for col in labeled[1:]: item *= measures[col][_map(row, col)] / self.total_sum # Log Fish p, pexp = self.weight(row), item if p < pexp: # Xo < pois(Xexp) try: val = poisson.logcdf(p, pexp) # P(pois(Xexp)<Xo), Xo = pt[v], Xexp = pexp[v] except RuntimeWarning: val = log_overflow else: try: val = -poisson.logsf(p, pexp) # P(pois(Xexp)>=Xo), Xo = pt[v], Xexp = pexp[v] except RuntimeWarning: val = -log_overflow out.append(self.dump_row(row) + (val,)) return self._weighted_output(out) else: return self.__class__()
def aaclones_likelihood(clone2sams, model, db_dir, sam2total, group2sams, outfile, ingroup, outgroup): f = open(outfile, 'w') f.write("sample\tnum_ntclones\tprob_observed\n") for clone, (insams, outsams) in clone2sams.iteritems(): f.write("#%s\n" % clone) events = [] event_llhs = [] for i, sams in enumerate([insams, outsams]): if not sams: continue sam2ntclones = get_ntclones(clone, sams, db_dir) f.write("#Group_%d\n" % (i + 1)) for sam, ntclones in sam2ntclones.iteritems(): total = sam2total[sam] llhoods = [] for ntclone in ntclones: clonellhood = rcommon.ntclone_likelihood(ntclone, model) #prob_observed = clonellhood + log10(total) logmu = log10(total) + clonellhood prob_observed = poisson.logsf(1, 10 ** logmu) # prob. observing >=1 ntclone llhoods.append(prob_observed) if not rcommon.visited_event(events, ntclone): events.append(ntclone) event_llhs.append(clonellhood) #if clonellhood != float(-inf): # event_llhs.append(clonellhood) llhoods_str = ",".join(["%f" % llh for llh in llhoods]) f.write("%s\t%d\t%s\n" % (sam, len(ntclones), llhoods_str)) # calc prob to observe the aa clones (sum of all nt events) if sum([10**llh for llh in event_llhs]) > 0: aa_llh = log10(sum([10**llh for llh in event_llhs])) avr_total = (sam2total[ingroup] + sam2total[outgroup]) / 2 avr_logmu = aa_llh + log10(avr_total) avr_aa_llh = poisson.logsf(1, 10 ** avr_logmu) f.write("#Clone_log_likelihood: %f, %f\n" % (aa_llh, avr_aa_llh)) ingroup_llh = get_group_likelihood(aa_llh, group2sams[ingroup], insams, sam2total) outgroup_llh = get_group_likelihood(aa_llh, group2sams[outgroup], outsams, sam2total) f.write("#Ingrp vs Outgrp: %f vs %f\n#\n" % (ingroup_llh, outgroup_llh)) f.close()
def clean_p_values(counts, lambdas): with scipy.errstate(divide='ignore'): p_values = poisson.logsf(counts, lambdas) p_values /= -baseEtoTen p_values[counts == 0] = 0 p_values[np.isinf(p_values)] = 1000 return p_values
def logprob_not_dark_hot(cts, exp, ref, quantile=64): """Log-probabilities of pixels being not dark and not hot. The cumulative Poisson distribution function (for dark pixels) and its inverse, the survival function (for hot pixels) are calculated and normalised to account for the number of dead pixels and the additional scatter due to mismatch of the reference shape. The pixel ``quantile`` is used for shape normalisation. Arguments ---------- cts : ndarray of ints counts per pixel exp : ndarray of floats exposure per pixel ref : ndarray of floats reference intensity per pixel Keyword arguments ----------------- quantile reference pixel for shape normalisation Returns ------- (d, h) : log-probabilities of pixels being not (dark, hot) """ if hasattr(cts, 'mask'): invalid = cts.mask valid = np.logical_not(invalid) else: invalid = [] valid = Ellipsis ref_cts = ref / ref[valid].mean() * cts[valid].mean() logcdf = poisson.logcdf(cts, ref_cts) logsf = poisson.logsf(cts, ref_cts) logcdf[invalid] = 0 logsf[invalid] = 0 logcdf_s = np.sort(logcdf[valid]) logsf_s = np.sort(logsf[valid]) logcdf, logsf = \ [f(cts, ref_cts) for f in (poisson.logcdf, poisson.logsf)] for f in logcdf, logsf: f[invalid] = 0 logcdf_s, logsf_s = \ [np.sort(f[valid]) for f in (logcdf, logsf)] norm_cdf, norm_sf = \ [np.log((cts.count() - quantile) / quantile) / logf_s[quantile] for logf_s in (logcdf_s, logsf_s)] offset = np.log(1 - ((128**2 - cts.count()) / 128**2)) return (offset - logcdf * norm_cdf, offset - logsf * norm_sf)
def computeLambda(tileCoverage, args): """ This function is called by the writeBedGraph workers for every tile in the genome that is considered """ treatmentWindowTags = tileCoverage[0] controlWindowTags = tileCoverage[1] treatmentExtraSignalTags = treatmentWindowTags - args['treatmentMean'] controlLambda = args['controlMean'] + (treatmentExtraSignalTags * args['controlSignalRatio']) log10pvalue = -1* poisson.logsf(controlWindowTags, controlLambda) / np.log(10) return log10pvalue
def aaclone_llh(clone, cloneinfo, model, lencount_dir, group2sams, outfile, ingroup, outgroup, len2llh, aa_llh): f = open(outfile, "w") f.write("sample\tnum_ntclones\tprob_observed\n") items = clone.split("_") v = items[0] l = len(items[1]) len_llh = len2llh[l] aa_llh_cond = aa_llh - len_llh f.write("#Log_len_llh: %f\n" % len_llh) f.write("#Log_aaclone_llh_cond: %f\n" % aa_llh_cond) # sam2numlen = get_numclone_fixedlen(db_dir, l) sam2numlen = get_lencount_fixedlen(lencount_dir, l) # for i, sams in enumerate([insams, outsams]): # sam2ntclones = get_ntclones(clone, sams, db_dir) # f.write("#Group_%d\n" % (i + 1)) # for sam, ntclones in sam2ntclones.iteritems(): # totallen = sam2numlen[sam] # f.write("%s\t%d\n" % (sam, len(ntclones))) # calc prob to observe the aa clones (sum of all nt events) avr_totallen = sum(sam2numlen.values()) / len(sam2numlen) avr_logmu = aa_llh_cond + log10(avr_totallen) avr_aa_llh = poisson.logsf(1, 10 ** avr_logmu) f.write("#Clone_log_likelihood: %f, %f\n" % (aa_llh_cond, avr_aa_llh)) if isinstance(cloneinfo, int): obs_numsam = cloneinfo # observed # samples allsams = [] for sams in group2sams.values(): allsams.extend(sams) group_llh, expected_sams = get_group_likelihood(aa_llh_cond, allsams, obs_numsam, sam2numlen) f.write("#Llh,Obs vs Exp:\t%f\t%d\t%f\n#\n" % (group_llh, obs_numsam, expected_sams)) else: insams = cloneinfo[0] outsams = cloneinfo[1] ingroup_llh, in_expected_sams = get_group_likelihood(aa_llh_cond, group2sams[ingroup], len(insams), sam2numlen) outgroup_llh, out_expected_sams = get_group_likelihood( aa_llh_cond, group2sams[outgroup], len(outsams), sam2numlen ) f.write("#Ingrp vs Outgrp: %f vs %f\n" % (ingroup_llh, outgroup_llh)) f.write("#Expected Ingrp, Outgrp:\t%f\t%f\n" % (in_expected_sams, out_expected_sams)) f.write("#Observed Ingrp, Outgrp:\t%d\t%d\n#\n" % (len(insams), len(outsams))) f.close()
def get_p_dict(self): p_value_dict = defaultdict(dict) count_dict = defaultdict(int) baseEtoTen = np.log(10) for node_id, valued_indexes in self.data.items(): start = 0 val = valued_indexes.start_value for start, end, val in valued_indexes: if val[1] not in p_value_dict[val[0]]: log_p_val = poisson.logsf(val[1], val[0]) p_value_dict[val[0]][val[1]] = -log_p_val / baseEtoTen p = p_value_dict[val[0]][val[1]] count_dict[p] += end - start self.p_value_dict = p_value_dict self.count_dict = count_dict
def computePvalue(tileCoverage, args): """ This function is called by the writeBedGraph workers for every tile in the genome that is considered It computes a pvalue based on an expected lambda comming from the correction of treatment when the input is considered. """ treatmentWindowTags = tileCoverage[0] controlWindowTags = tileCoverage[1] treatmentLambda = controlWindowTags * args['treatmentControlRatio'] log10pvalue = min(300, -1* poisson.logsf(treatmentWindowTags, treatmentLambda) / np.log(10)) return log10pvalue