Beispiel #1
0
    def poisson(self, op_id, log_overflow=1000.):
        if bool(self):
            from scipy.stats import poisson

            const, measures, labeled = self._measure(op_id)
            out = list()

            with warnings.catch_warnings():
                warnings.simplefilter('error')
                for row in self.data_.itertuples(index=False):
                    # Calculate expectation
                    item = const * measures[labeled[0]][_map(row, labeled[0])]
                    for col in labeled[1:]:
                        item *= measures[col][_map(row, col)] / self.total_sum

                    # Log Fish
                    p, pexp = self.weight(row), item
                    if p < pexp:
                        # Xo < pois(Xexp)
                        try:
                            val = poisson.logcdf(p, pexp)  # P(pois(Xexp)<Xo), Xo = pt[v], Xexp = pexp[v]
                        except RuntimeWarning:
                            val = log_overflow
                    else:
                        try:
                            val = -poisson.logsf(p, pexp)  # P(pois(Xexp)>=Xo), Xo = pt[v], Xexp = pexp[v]
                        except RuntimeWarning:
                            val = -log_overflow

                    out.append(self.dump_row(row) + (val,))

            return self._weighted_output(out)
        else:
            return self.__class__()
Beispiel #2
0
def logprob_not_dark_hot(cts, exp, ref, quantile=64):
    """Log-probabilities of pixels being not dark and not hot.

    The cumulative Poisson distribution function (for dark pixels) and its
    inverse, the survival function (for hot pixels) are calculated and
    normalised to account for the number of dead pixels and the additional
    scatter due to mismatch of the reference shape. The pixel ``quantile``
    is used for shape normalisation.

    Arguments
    ----------
    cts : ndarray of ints
        counts per pixel
    exp : ndarray of floats
        exposure per pixel
    ref : ndarray of floats
        reference intensity per pixel

    Keyword arguments
    -----------------
    quantile
        reference pixel for shape normalisation

    Returns
    -------
    (d, h) : log-probabilities of pixels being not (dark, hot)
    """

    if hasattr(cts, 'mask'):
        invalid = cts.mask
        valid = np.logical_not(invalid)
    else:
        invalid = []
        valid = Ellipsis
    ref_cts = ref / ref[valid].mean() * cts[valid].mean()

    logcdf = poisson.logcdf(cts, ref_cts)
    logsf = poisson.logsf(cts, ref_cts)
    logcdf[invalid] = 0
    logsf[invalid] = 0
    logcdf_s = np.sort(logcdf[valid])
    logsf_s = np.sort(logsf[valid])

    logcdf, logsf = \
        [f(cts, ref_cts) for f in (poisson.logcdf, poisson.logsf)]
    for f in logcdf, logsf:
        f[invalid] = 0
    logcdf_s, logsf_s = \
        [np.sort(f[valid]) for f in (logcdf, logsf)]
    norm_cdf, norm_sf = \
        [np.log((cts.count() - quantile) / quantile) / logf_s[quantile]
         for logf_s in (logcdf_s, logsf_s)]
    offset = np.log(1 - ((128**2 - cts.count()) / 128**2))

    return (offset - logcdf * norm_cdf,
            offset - logsf * norm_sf)
Beispiel #3
0
def computePvalue(tileCoverage, args):
    """
    This function is called by the writeBedGraph workers for every 
    tile in the genome that is considered
    """
#    if tileCoverage == (0,0):
#        return np.nan

    treatmentWindowTags = tileCoverage[0]
    controlWindowTags = tileCoverage[1]
    if controlWindowTags == 0:
        return np.nan

    treatmentExtraSignalTags = treatmentWindowTags - args['treatmentMean']
    
    controlLambda = args['controlMean'] + (treatmentExtraSignalTags * args['controlSignalRatio'])
    
    log10pvalue = -1* poisson.logcdf(controlWindowTags, controlLambda) / np.log(10)
#    log10pvalue = -1* poisson.logsf(controlWindowTags, controlLambda) / np.log(10)

    return log10pvalue
Beispiel #4
0
def log_polyg(lx_var, alpha_var, d_var):
    """
    compute gumbel polylog
    :param lx_var:
    :param alpha_var:
    :param d_var:
    :return:
    >>> lsum(np.transpose(ipsi_gumbel(np.array([
    ...    [0.42873569, 0.18285458, 0.9514195],
    ...    [0.25148149, 0.05617784, 0.3378213],
    ...    [0.79410993, 0.76175687, 0.0709562],
    ...    [0.02694249, 0.45788802, 0.6299574],
    ...    [0.39522060, 0.02189511, 0.6332237],
    ...    [0.66878367, 0.38075101, 0.5185625],
    ...    [0.90365653, 0.19654621, 0.6809525],
    ...    [0.28607729, 0.82713755, 0.7686878],
    ...    [0.22437343, 0.16907646, 0.5740400],
    ...    [0.66752741, 0.69487362, 0.3329266]
    ...    ]),
    ...    1.2,
    ...    is_log=True
    ...    )))
    array([1.00636964, 1.81365937, 1.27973155, 1.76000074, 1.84085744,
           0.64075371, 0.77684883, 0.49862315, 1.41268535, 0.56279559])
    >>> log_polyg(
    ...    lsum(np.transpose(ipsi_gumbel(np.array([
    ...    [0.42873569, 0.18285458, 0.9514195],
    ...    [0.25148149, 0.05617784, 0.3378213],
    ...    [0.79410993, 0.76175687, 0.0709562],
    ...    [0.02694249, 0.45788802, 0.6299574],
    ...    [0.39522060, 0.02189511, 0.6332237],
    ...    [0.66878367, 0.38075101, 0.5185625],
    ...    [0.90365653, 0.19654621, 0.6809525],
    ...    [0.28607729, 0.82713755, 0.7686878],
    ...    [0.22437343, 0.16907646, 0.5740400],
    ...    [0.66752741, 0.69487362, 0.3329266]
    ...    ]),
    ...    1.2,
    ...    is_log=True
    ...    ))) * 1/1.2, 1/1.2, 3
    ... )
    array([2.24028738, 4.12345214, 2.86724735, 3.99567951, 4.1883271 ,
           1.42510898, 1.72500906, 1.11696417, 3.17657604, 1.25542124])
    >>> log_polyg(
    ...    lsum(np.transpose(ipsi_gumbel(np.array([
    ...    [0.42873569, 0.18285458, 0.9514195],
    ...    [0.25148149, 0.05617784, 0.3378213],
    ...    [0.79410993, 0.76175687, 0.0709562],
    ...    [0.02694249, 0.45788802, 0.6299574],
    ...    [0.39522060, 0.02189511, 0.6332237],
    ...    [0.66878367, 0.38075101, 0.5185625],
    ...    [0.90365653, 0.19654621, 0.6809525],
    ...    [0.28607729, 0.82713755, 0.7686878],
    ...    [0.22437343, 0.16907646, 0.5740400],
    ...    [0.66752741, 0.69487362, 0.3329266]
    ...    ]),
    ...    3.2,
    ...    is_log=True
    ...    ))) * 1/3.2, 1/3.2, 3
    ... )
    array([ 0.35110025,  1.31419104,  1.07707314,  1.68854151,  1.80435943,
           -0.43406987,  0.23166651, -0.18316099,  0.62329368, -0.35013782])
    """
    k = np.linspace(start=1.0, stop=d_var, num=int(d_var))
    x = np.exp(lx_var)
    lppois = np.zeros(shape=[int(d_var), int(lx_var.shape[0])])
    for i in range(int(d_var)):
        lppois[i] = poisson.logcdf(d_var - k[i], x)
    llx = np.dot(k.reshape(-1, 1), lx_var.reshape(1, -1))
    labspoch = np.zeros(shape=int(d_var))
    for i in range(int(d_var)):
        with np.errstate(divide='ignore'):
            labspoch[i] = np.sum(np.log(
                abs(alpha_var * float(i + 1) - (k - 1.0))),
                                 axis=0)
    lfac = np.log(factorial(k))
    lxabs = llx + lppois + np.tile(labspoch - lfac, int(
        lx_var.shape[0])).reshape(
            (int(d_var), int(lx_var.shape[0])), order='F') + np.repeat(
                x, int(d_var)).reshape(
                    (int(d_var), int(lx_var.shape[0])), order='F')
    return lssum(
        x_values=lxabs,
        x_sign=signff(alpha_var, k, d_var),
    )