예제 #1
0
def get_group_likelihood(log_p, allsams, num_present, sam2total):
    llhs = []
    for sam in allsams:
        logmu = log_p + log10(sam2total[sam])
        llh = poisson.logsf(1, 10 ** logmu)
        llhs.append(llh)
    sum_llh = sum([10 ** l for l in llhs])
    x = num_present
    return poisson.logsf(x, sum_llh), sum_llh
예제 #2
0
    def poisson(self, op_id, log_overflow=1000.):
        if bool(self):
            from scipy.stats import poisson

            const, measures, labeled = self._measure(op_id)
            out = list()

            with warnings.catch_warnings():
                warnings.simplefilter('error')
                for row in self.data_.itertuples(index=False):
                    # Calculate expectation
                    item = const * measures[labeled[0]][_map(row, labeled[0])]
                    for col in labeled[1:]:
                        item *= measures[col][_map(row, col)] / self.total_sum

                    # Log Fish
                    p, pexp = self.weight(row), item
                    if p < pexp:
                        # Xo < pois(Xexp)
                        try:
                            val = poisson.logcdf(p, pexp)  # P(pois(Xexp)<Xo), Xo = pt[v], Xexp = pexp[v]
                        except RuntimeWarning:
                            val = log_overflow
                    else:
                        try:
                            val = -poisson.logsf(p, pexp)  # P(pois(Xexp)>=Xo), Xo = pt[v], Xexp = pexp[v]
                        except RuntimeWarning:
                            val = -log_overflow

                    out.append(self.dump_row(row) + (val,))

            return self._weighted_output(out)
        else:
            return self.__class__()
예제 #3
0
def aaclones_likelihood(clone2sams, model, db_dir, sam2total, group2sams,
                        outfile, ingroup, outgroup):
    f = open(outfile, 'w')
    f.write("sample\tnum_ntclones\tprob_observed\n")
    for clone, (insams, outsams) in clone2sams.iteritems():
        f.write("#%s\n" % clone)
        events = []
        event_llhs = []
        for i, sams in enumerate([insams, outsams]):
            if not sams:
                continue
            sam2ntclones = get_ntclones(clone, sams, db_dir)
            f.write("#Group_%d\n" % (i + 1))
            for sam, ntclones in sam2ntclones.iteritems():
                total = sam2total[sam]
                llhoods = []
                for ntclone in ntclones:
                    clonellhood = rcommon.ntclone_likelihood(ntclone, model)
                    #prob_observed = clonellhood + log10(total)
                    logmu = log10(total) + clonellhood
                    prob_observed = poisson.logsf(1, 10 ** logmu)  # prob. observing >=1 ntclone
                    llhoods.append(prob_observed)

                    if not rcommon.visited_event(events, ntclone):
                        events.append(ntclone)
                        event_llhs.append(clonellhood)
                        #if clonellhood != float(-inf):
                        #    event_llhs.append(clonellhood)

                llhoods_str = ",".join(["%f" % llh for llh in llhoods])
                f.write("%s\t%d\t%s\n" % (sam, len(ntclones), llhoods_str))
        
        # calc prob to observe the aa clones (sum of all nt events)
        if sum([10**llh for llh in event_llhs]) > 0:
            aa_llh = log10(sum([10**llh for llh in event_llhs]))
            avr_total = (sam2total[ingroup] + sam2total[outgroup]) / 2
            avr_logmu = aa_llh + log10(avr_total)
            avr_aa_llh = poisson.logsf(1, 10 ** avr_logmu)
            f.write("#Clone_log_likelihood: %f, %f\n" % (aa_llh, avr_aa_llh))
            
            ingroup_llh = get_group_likelihood(aa_llh, group2sams[ingroup],
                                               insams, sam2total)
            outgroup_llh = get_group_likelihood(aa_llh, group2sams[outgroup],
                                                outsams, sam2total)
            f.write("#Ingrp vs Outgrp: %f vs %f\n#\n" % (ingroup_llh, outgroup_llh))

    f.close()
예제 #4
0
        def clean_p_values(counts, lambdas):
            with scipy.errstate(divide='ignore'):
                p_values = poisson.logsf(counts, lambdas)

                p_values /= -baseEtoTen
                p_values[counts == 0] = 0
                p_values[np.isinf(p_values)] = 1000
                return p_values
예제 #5
0
def logprob_not_dark_hot(cts, exp, ref, quantile=64):
    """Log-probabilities of pixels being not dark and not hot.

    The cumulative Poisson distribution function (for dark pixels) and its
    inverse, the survival function (for hot pixels) are calculated and
    normalised to account for the number of dead pixels and the additional
    scatter due to mismatch of the reference shape. The pixel ``quantile``
    is used for shape normalisation.

    Arguments
    ----------
    cts : ndarray of ints
        counts per pixel
    exp : ndarray of floats
        exposure per pixel
    ref : ndarray of floats
        reference intensity per pixel

    Keyword arguments
    -----------------
    quantile
        reference pixel for shape normalisation

    Returns
    -------
    (d, h) : log-probabilities of pixels being not (dark, hot)
    """

    if hasattr(cts, 'mask'):
        invalid = cts.mask
        valid = np.logical_not(invalid)
    else:
        invalid = []
        valid = Ellipsis
    ref_cts = ref / ref[valid].mean() * cts[valid].mean()

    logcdf = poisson.logcdf(cts, ref_cts)
    logsf = poisson.logsf(cts, ref_cts)
    logcdf[invalid] = 0
    logsf[invalid] = 0
    logcdf_s = np.sort(logcdf[valid])
    logsf_s = np.sort(logsf[valid])

    logcdf, logsf = \
        [f(cts, ref_cts) for f in (poisson.logcdf, poisson.logsf)]
    for f in logcdf, logsf:
        f[invalid] = 0
    logcdf_s, logsf_s = \
        [np.sort(f[valid]) for f in (logcdf, logsf)]
    norm_cdf, norm_sf = \
        [np.log((cts.count() - quantile) / quantile) / logf_s[quantile]
         for logf_s in (logcdf_s, logsf_s)]
    offset = np.log(1 - ((128**2 - cts.count()) / 128**2))

    return (offset - logcdf * norm_cdf,
            offset - logsf * norm_sf)
예제 #6
0
def computeLambda(tileCoverage, args):
    """
    This function is called by the writeBedGraph workers for every 
    tile in the genome that is considered
    """

    treatmentWindowTags = tileCoverage[0]
    controlWindowTags = tileCoverage[1]
    treatmentExtraSignalTags = treatmentWindowTags - args['treatmentMean']
    
    controlLambda = args['controlMean'] + (treatmentExtraSignalTags * args['controlSignalRatio'])
    
    log10pvalue = -1* poisson.logsf(controlWindowTags, controlLambda) / np.log(10)

    return log10pvalue
예제 #7
0
def aaclone_llh(clone, cloneinfo, model, lencount_dir, group2sams, outfile, ingroup, outgroup, len2llh, aa_llh):
    f = open(outfile, "w")
    f.write("sample\tnum_ntclones\tprob_observed\n")

    items = clone.split("_")
    v = items[0]
    l = len(items[1])

    len_llh = len2llh[l]
    aa_llh_cond = aa_llh - len_llh
    f.write("#Log_len_llh: %f\n" % len_llh)
    f.write("#Log_aaclone_llh_cond: %f\n" % aa_llh_cond)
    # sam2numlen = get_numclone_fixedlen(db_dir, l)
    sam2numlen = get_lencount_fixedlen(lencount_dir, l)

    # for i, sams in enumerate([insams, outsams]):
    #    sam2ntclones = get_ntclones(clone, sams, db_dir)
    #    f.write("#Group_%d\n" % (i + 1))
    #    for sam, ntclones in sam2ntclones.iteritems():
    #        totallen = sam2numlen[sam]
    #        f.write("%s\t%d\n" % (sam, len(ntclones)))

    # calc prob to observe the aa clones (sum of all nt events)

    avr_totallen = sum(sam2numlen.values()) / len(sam2numlen)

    avr_logmu = aa_llh_cond + log10(avr_totallen)
    avr_aa_llh = poisson.logsf(1, 10 ** avr_logmu)
    f.write("#Clone_log_likelihood: %f, %f\n" % (aa_llh_cond, avr_aa_llh))

    if isinstance(cloneinfo, int):
        obs_numsam = cloneinfo  # observed # samples
        allsams = []
        for sams in group2sams.values():
            allsams.extend(sams)
        group_llh, expected_sams = get_group_likelihood(aa_llh_cond, allsams, obs_numsam, sam2numlen)
        f.write("#Llh,Obs vs Exp:\t%f\t%d\t%f\n#\n" % (group_llh, obs_numsam, expected_sams))
    else:
        insams = cloneinfo[0]
        outsams = cloneinfo[1]
        ingroup_llh, in_expected_sams = get_group_likelihood(aa_llh_cond, group2sams[ingroup], len(insams), sam2numlen)
        outgroup_llh, out_expected_sams = get_group_likelihood(
            aa_llh_cond, group2sams[outgroup], len(outsams), sam2numlen
        )
        f.write("#Ingrp vs Outgrp: %f vs %f\n" % (ingroup_llh, outgroup_llh))
        f.write("#Expected Ingrp, Outgrp:\t%f\t%f\n" % (in_expected_sams, out_expected_sams))
        f.write("#Observed Ingrp, Outgrp:\t%d\t%d\n#\n" % (len(insams), len(outsams)))
    f.close()
예제 #8
0
    def get_p_dict(self):
        p_value_dict = defaultdict(dict)
        count_dict = defaultdict(int)
        baseEtoTen = np.log(10)
        for node_id, valued_indexes in self.data.items():
            start = 0
            val = valued_indexes.start_value
            for start, end, val in valued_indexes:
                if val[1] not in p_value_dict[val[0]]:
                    log_p_val = poisson.logsf(val[1], val[0])
                    p_value_dict[val[0]][val[1]] = -log_p_val / baseEtoTen
                p = p_value_dict[val[0]][val[1]]
                count_dict[p] += end - start

        self.p_value_dict = p_value_dict
        self.count_dict = count_dict
예제 #9
0
def computePvalue(tileCoverage, args):
    """
    This function is called by the writeBedGraph workers for every 
    tile in the genome that is considered

    It computes a pvalue based on an expected lambda comming from 
    the correction of treatment when the input is considered.
    """

    treatmentWindowTags = tileCoverage[0]
    controlWindowTags = tileCoverage[1]
    
    treatmentLambda = controlWindowTags * args['treatmentControlRatio']
        
    log10pvalue = min(300, -1* poisson.logsf(treatmentWindowTags, treatmentLambda) / np.log(10))

    return log10pvalue