def poisson(self, op_id, log_overflow=1000.): if bool(self): from scipy.stats import poisson const, measures, labeled = self._measure(op_id) out = list() with warnings.catch_warnings(): warnings.simplefilter('error') for row in self.data_.itertuples(index=False): # Calculate expectation item = const * measures[labeled[0]][_map(row, labeled[0])] for col in labeled[1:]: item *= measures[col][_map(row, col)] / self.total_sum # Log Fish p, pexp = self.weight(row), item if p < pexp: # Xo < pois(Xexp) try: val = poisson.logcdf(p, pexp) # P(pois(Xexp)<Xo), Xo = pt[v], Xexp = pexp[v] except RuntimeWarning: val = log_overflow else: try: val = -poisson.logsf(p, pexp) # P(pois(Xexp)>=Xo), Xo = pt[v], Xexp = pexp[v] except RuntimeWarning: val = -log_overflow out.append(self.dump_row(row) + (val,)) return self._weighted_output(out) else: return self.__class__()
def logprob_not_dark_hot(cts, exp, ref, quantile=64): """Log-probabilities of pixels being not dark and not hot. The cumulative Poisson distribution function (for dark pixels) and its inverse, the survival function (for hot pixels) are calculated and normalised to account for the number of dead pixels and the additional scatter due to mismatch of the reference shape. The pixel ``quantile`` is used for shape normalisation. Arguments ---------- cts : ndarray of ints counts per pixel exp : ndarray of floats exposure per pixel ref : ndarray of floats reference intensity per pixel Keyword arguments ----------------- quantile reference pixel for shape normalisation Returns ------- (d, h) : log-probabilities of pixels being not (dark, hot) """ if hasattr(cts, 'mask'): invalid = cts.mask valid = np.logical_not(invalid) else: invalid = [] valid = Ellipsis ref_cts = ref / ref[valid].mean() * cts[valid].mean() logcdf = poisson.logcdf(cts, ref_cts) logsf = poisson.logsf(cts, ref_cts) logcdf[invalid] = 0 logsf[invalid] = 0 logcdf_s = np.sort(logcdf[valid]) logsf_s = np.sort(logsf[valid]) logcdf, logsf = \ [f(cts, ref_cts) for f in (poisson.logcdf, poisson.logsf)] for f in logcdf, logsf: f[invalid] = 0 logcdf_s, logsf_s = \ [np.sort(f[valid]) for f in (logcdf, logsf)] norm_cdf, norm_sf = \ [np.log((cts.count() - quantile) / quantile) / logf_s[quantile] for logf_s in (logcdf_s, logsf_s)] offset = np.log(1 - ((128**2 - cts.count()) / 128**2)) return (offset - logcdf * norm_cdf, offset - logsf * norm_sf)
def computePvalue(tileCoverage, args): """ This function is called by the writeBedGraph workers for every tile in the genome that is considered """ # if tileCoverage == (0,0): # return np.nan treatmentWindowTags = tileCoverage[0] controlWindowTags = tileCoverage[1] if controlWindowTags == 0: return np.nan treatmentExtraSignalTags = treatmentWindowTags - args['treatmentMean'] controlLambda = args['controlMean'] + (treatmentExtraSignalTags * args['controlSignalRatio']) log10pvalue = -1* poisson.logcdf(controlWindowTags, controlLambda) / np.log(10) # log10pvalue = -1* poisson.logsf(controlWindowTags, controlLambda) / np.log(10) return log10pvalue
def log_polyg(lx_var, alpha_var, d_var): """ compute gumbel polylog :param lx_var: :param alpha_var: :param d_var: :return: >>> lsum(np.transpose(ipsi_gumbel(np.array([ ... [0.42873569, 0.18285458, 0.9514195], ... [0.25148149, 0.05617784, 0.3378213], ... [0.79410993, 0.76175687, 0.0709562], ... [0.02694249, 0.45788802, 0.6299574], ... [0.39522060, 0.02189511, 0.6332237], ... [0.66878367, 0.38075101, 0.5185625], ... [0.90365653, 0.19654621, 0.6809525], ... [0.28607729, 0.82713755, 0.7686878], ... [0.22437343, 0.16907646, 0.5740400], ... [0.66752741, 0.69487362, 0.3329266] ... ]), ... 1.2, ... is_log=True ... ))) array([1.00636964, 1.81365937, 1.27973155, 1.76000074, 1.84085744, 0.64075371, 0.77684883, 0.49862315, 1.41268535, 0.56279559]) >>> log_polyg( ... lsum(np.transpose(ipsi_gumbel(np.array([ ... [0.42873569, 0.18285458, 0.9514195], ... [0.25148149, 0.05617784, 0.3378213], ... [0.79410993, 0.76175687, 0.0709562], ... [0.02694249, 0.45788802, 0.6299574], ... [0.39522060, 0.02189511, 0.6332237], ... [0.66878367, 0.38075101, 0.5185625], ... [0.90365653, 0.19654621, 0.6809525], ... [0.28607729, 0.82713755, 0.7686878], ... [0.22437343, 0.16907646, 0.5740400], ... [0.66752741, 0.69487362, 0.3329266] ... ]), ... 1.2, ... is_log=True ... ))) * 1/1.2, 1/1.2, 3 ... ) array([2.24028738, 4.12345214, 2.86724735, 3.99567951, 4.1883271 , 1.42510898, 1.72500906, 1.11696417, 3.17657604, 1.25542124]) >>> log_polyg( ... lsum(np.transpose(ipsi_gumbel(np.array([ ... [0.42873569, 0.18285458, 0.9514195], ... [0.25148149, 0.05617784, 0.3378213], ... [0.79410993, 0.76175687, 0.0709562], ... [0.02694249, 0.45788802, 0.6299574], ... [0.39522060, 0.02189511, 0.6332237], ... [0.66878367, 0.38075101, 0.5185625], ... [0.90365653, 0.19654621, 0.6809525], ... [0.28607729, 0.82713755, 0.7686878], ... [0.22437343, 0.16907646, 0.5740400], ... [0.66752741, 0.69487362, 0.3329266] ... ]), ... 3.2, ... is_log=True ... ))) * 1/3.2, 1/3.2, 3 ... ) array([ 0.35110025, 1.31419104, 1.07707314, 1.68854151, 1.80435943, -0.43406987, 0.23166651, -0.18316099, 0.62329368, -0.35013782]) """ k = np.linspace(start=1.0, stop=d_var, num=int(d_var)) x = np.exp(lx_var) lppois = np.zeros(shape=[int(d_var), int(lx_var.shape[0])]) for i in range(int(d_var)): lppois[i] = poisson.logcdf(d_var - k[i], x) llx = np.dot(k.reshape(-1, 1), lx_var.reshape(1, -1)) labspoch = np.zeros(shape=int(d_var)) for i in range(int(d_var)): with np.errstate(divide='ignore'): labspoch[i] = np.sum(np.log( abs(alpha_var * float(i + 1) - (k - 1.0))), axis=0) lfac = np.log(factorial(k)) lxabs = llx + lppois + np.tile(labspoch - lfac, int( lx_var.shape[0])).reshape( (int(d_var), int(lx_var.shape[0])), order='F') + np.repeat( x, int(d_var)).reshape( (int(d_var), int(lx_var.shape[0])), order='F') return lssum( x_values=lxabs, x_sign=signff(alpha_var, k, d_var), )