Ejemplo n.º 1
0
 def logp_r(self, i: int) -> float:
     if self.verbose_prob:
         print(f"[{color_asgn('R')}] ", end='')
     intvls = self.pread.intvls
     I = intvls[i]
     if max(I.cb, I.ce) >= self.cp.depths['R']:
         if self.verbose_prob:
             print(f"> Global R-cov")
         return 0.
     # est_cnt = self._est_cov(i, I.b, s)
     l, r = self._find_nn(i, 'D', only_rel=True)   # FIXME: reconsider coverage estimation
     if l is None and r is None:
         dcov_l = dcov_r = self.cp.depths['D']
     elif l is None:
         dcov_l = dcov_r = intvls[r].cb
     elif r is None:
         dcov_l = dcov_r = intvls[l].ce
     else:
         dcov_l, dcov_r = intvls[l].ce, intvls[r].cb
     rcov_l, rcov_r = int(dcov_l * self.cp.DR_RATIO), int(dcov_r * self.cp.DR_RATIO)
     if I.cb >= rcov_l or I.ce >= rcov_r:
         if self.verbose_prob:
             print(f"> Est R-cov (B: {I.cb} >= {rcov_l} or E: {I.ce} >= {rcov_r})")   # FIXME: "slipping interval" in repeats
         return R_LOGP
     logp_l = binom.logpmf(I.cb, rcov_l, 1 - 0.01)   # TODO: use smaller n-sigma and use calc_logp
     logp_r = binom.logpmf(I.ce, rcov_r, 1 - 0.01)
     logp = logp_l + logp_r
     if self.verbose_prob:
         print(f"ER={logp_l:5.0f} + {logp_r:5.0f} -> logp={logp:5.0f}")
     return logp
Ejemplo n.º 2
0
    def compute_likelihood(self, data, **kwargs):
        # The likelihood of the human data
        assert len(data) == 0

        alpha = self.value['alpha'].value[0]
        beta = self.value['beta'].value[0]
        llt = self.value['likelihood_temperature'].value
        pt = self.value['prior_temperature'].value

        # compute each hypothesis' prior, fixed over all data
        priors = np.ones(self.N_hyps) * self.prior_offset #   #h x 1 vector
        for nt in self.nts: # sum over all nonterminals
            priors = priors + np.dot(np.log(self.value['rulep'][nt].value), self.Counts[nt].T)

        priors = priors - np.log(sum(np.exp(priors)))
        priors = priors / pt # include prior temp

        pos = 0 # what response are we on?
        likelihood = 0.0
        # for g in [randint(0, self.N_groups - 1) for _ in xrange(10)]
        for g in xrange(self.N_groups):
            posteriors =  self.L[g]/llt + priors # posterior score
            posteriors = np.exp(posteriors - logsumexp(posteriors)) # posterior probability

            # Now compute the probability of the human data
            for _ in xrange(self.GroupLength[g]):
                ps = (1 - alpha) * beta + alpha * np.dot(posteriors, self.ModelResponse[pos])

                likelihood += binom.logpmf(self.Nyes[pos], self.Ntrials[pos], ps)
                pos = pos + 1

        return likelihood
Ejemplo n.º 3
0
    def _compute_log_likelihood(self, X):
        t = time()
        matrix = []
        lookup = {}
        k = 0
        for x in X:
            row = []
            for i in range(self.n_components):
                sum = 0
                for j in range(2):
                    index = (x[j], self.n[j], self.p[j][i])
                    if lookup.has_key(index):
                        sum += lookup[index]
                        k += 1
                    else:
                        y = binom.logpmf(x[j], self.n[j], self.p[j][i])
                        lookup[index] = y
                        sum += y
                row.append(sum)

            matrix.append(row)
#         print('time to compute log-likelihood matrix (compute_log_likelihood): ',\
#               time()-t, file=sys.stderr)
#print("HALLO")
#print(np.asarray(matrix))
        return np.asarray(matrix)
Ejemplo n.º 4
0
    def _compute_log_likelihood(self, X):
        #t = time()
        matrix = []
        lookup = {}
        k = 0
        for x in X:
            row = []
            for state in range(self.n_components): #state
                res = 0
                for dim in range(self.n_features): #dim
                    for comp in range(self.distr_magnitude):
                        index = (x[dim], self.n[dim], self.p[dim][state][comp])
                        if lookup.has_key( index ):
                            res += lookup[index] * self.c[dim][state][comp]
                            k += 1
                        else:
                            y = binom.logpmf(x[dim], self.n[dim], self.p[dim][state][comp])
                            lookup[index] = y
                            res += y * self.c[dim][state][comp]
                row.append(res)
                
            matrix.append(row)
#         print('time to compute log-likelihood matrix (compute_log_likelihood): ',\
#               time()-t, file=sys.stderr)
        #print("HALLO")
        #print(np.asarray(matrix))
        return np.asarray(matrix)
Ejemplo n.º 5
0
def dbinom(x, size=1, prob=0.5, log=False):
    """
    ============================================================================
                                                                        dbinom()
    ============================================================================
    Density Function for the binomial distribution.
    Returns the probability of getting "x" successes out of "size" number of
    trials, given a probability of "prob" for each success.

    USAGE:
    dbinom(x, size, prob=0.5, log=False)
    pbinom(q, size, prob=0.5, lowertail=True, log=False)
    qbinom(p, size, prob=0.5, lowertail=True, log=False)
    rbinom(n=1, size=1, prob=0.5)

    :param x:       int. or array of ints. The number of successes
    :param size:    int. Number of trials
    :param prob:    float. Probability of a success
    :param log:     bool. take the log?
    :return:
    ============================================================================
    """
    if log:
        # note, scipy flips meaning of n and size
        return binom.logpmf(x, n=size, p=prob, loc=0)
    else:
        # note, scipy flips meaning of n and size
        return binom.pmf(x, n=size, p=prob)
Ejemplo n.º 6
0
    def _log_binom_expect(self, n, p, scaled_renyi_fn, ldistr, rdistr):
        """
            Computes logarithm of expectation over binomial distribution with parameters (n, p).
            
            Parameters
            ----------
            n : number, required
                Number of Bernoulli trials.
            p : number, required
                Probability of success.
            scaled_renyi_fn : function, required
                Function pointer to compute scaled Renyi divergence (inside Bernoulli expectation).
            ldistr : tuple or array, required
                Parameters of the left distribution (i.e., imposed by D).
            rdistr : tuple or array, required
                Parameters of the right distribution (i.e., imposed by D').

            Returns
            -------
            out : tuple
                Logarithm of expectation of scaled_renyi_fn over binomial distribution.
        """
        k = torch.arange(n + 1, dtype=torch.float)
        log_binom_coefs = torch.tensor(binom.logpmf(k, n=n, p=p))
        return torch.logsumexp(log_binom_coefs +
                               scaled_renyi_fn(k, ldistr, rdistr),
                               dim=1)
Ejemplo n.º 7
0
    def _compute_log_likelihood(self, X):
        #t = time()
        matrix = []
        lookup = {}
        k = 0
        for x in X:
            row = []
            for state in range(self.n_components): #state
                res = 0
                for dim in range(self.n_features): #dim
                    for comp in range(self.distr_magnitude):
                        index = (x[dim], self.n[dim], self.p[dim][state][comp])
                        if lookup.has_key( index ):
                            res += lookup[index] * self.c[dim][state][comp]
                            k += 1
                        else:
                            y = binom.logpmf(x[dim], self.n[dim], self.p[dim][state][comp])
                            lookup[index] = y
                            res += y * self.c[dim][state][comp]
                row.append(res)
                
            matrix.append(row)
#         print('time to compute log-likelihood matrix (compute_log_likelihood): ',\
#               time()-t, file=sys.stderr)
        #print("HALLO")
        #print(np.asarray(matrix))
        return np.asarray(matrix)
def counts_log_likelihood(proportions, methylated, unmethylated, reference):

    b = np.matmul(proportions, reference)

    ll = np.sum(binom.logpmf(methylated, methylated + unmethylated, b, loc=0))

    return -ll / 1000
Ejemplo n.º 9
0
    def logp_r(self, i: int, st_pred: CovsT) -> float:
        """Given an imaginary R-cov (and its position) larger than D-cov,
        compute the probability of transition from it to `i`-th interval.
        If R-cov is larger, larger counts are required to have higher prob.
        If R-cov is smaller, smaller counts are classified as R.
        """
        I = self.intvls[i]
        beg_pos, beg_cnt, _, _ = self._expand_intvl(I)
        st = st_pred['R']

        logp_sf = -inf
        # logp_sf = calc_logp_trans(self._pred(st.pos), beg_pos,
        #                           st.cnt, beg_cnt,
        #                           st.cnt, self.cp.read_len)
        logp_er = (binom.logpmf(beg_cnt, st.cnt, 1 -
                                0.01) if beg_cnt < st.cnt else -inf)
        logp = max(logp_sf, logp_er)
        if self.verbose_prob:
            print(f"SF={logp_sf:5.0f}{'*' if logp_sf >= logp_er else ' '} "
                  f"ER={logp_er:5.0f}{'*' if logp_er >= logp_sf else ' '}")

        # FIXME: revise the following codes
        if logp > R_LOGP:
            return logp
        if max(I.ccb, I.cce) >= self.cp.depths['R']:
            if self.verbose_prob:
                print(' ' * 6 + f"Counts >= Global R-cov")
            return R_LOGP
        if max(I.ccb, I.cce) >= st.cnt:
            if self.verbose_prob:
                print(' ' * 6 + f" Counts >= Est R-cov")
            return R_LOGP
        return logp
Ejemplo n.º 10
0
    def _compute_log_likelihood(self, X):
        t = time()
        matrix = []
        lookup = {}
        k = 0
        for x in X:
            row = []
            for i in range(self.n_components):
                sum = 0
                for j in range(2):
                    index = (x[j], self.n[j], self.p[j][i])
                    if lookup.has_key( index ):
                        sum += lookup[index]
                        k += 1
                    else:
                        y = binom.logpmf(x[j], self.n[j], self.p[j][i])
                        lookup[index] = y
                        sum += y
                row.append(sum)
                  
            matrix.append(row)
#         print('time to compute log-likelihood matrix (compute_log_likelihood): ',\
#               time()-t, file=sys.stderr)
        #print("HALLO")
        #print(np.asarray(matrix))
        return np.asarray(matrix)
Ejemplo n.º 11
0
def pdf_integral(p1, data):

    # calculate pdens for range of p1
    xj, nj, c, p2, var = data

    dens = pdf(p1, data=data[2:])
    return np.exp(np.log(dens) + binom.logpmf(xj, nj, p=p1))
Ejemplo n.º 12
0
    def compute_likelihood(self, data, **kwargs):
        # The likelihood of the human data
        assert len(data) == 0

        # compute each hypothesis' prior, fixed over all data
        priors = np.ones(self.N_hyps) * self.prior_offset #   #h x 1 vector
        for nt in self.nts: # sum over all nonterminals
            priors = priors + np.dot(np.log(self.value[nt].value), self.Counts[nt].T)

        priors = priors - np.log(sum(np.exp(priors)))

        pos = 0 # what response are we on?
        likelihood = 0.0
        for g in xrange(self.N_groups):
            posteriors =  self.L[g] + priors # posterior score
            posteriors = np.exp(posteriors - logsumexp(posteriors)) # posterior probability

            # Now compute the probability of the human data
            for _ in xrange(self.GroupLength[g]):
                ps = np.dot(posteriors, self.ModelResponse[pos])

                likelihood += binom.logpmf(self.Nyes[pos], self.Ntrials[pos], ps)
                pos = pos + 1

        return likelihood
Ejemplo n.º 13
0
def dbinom(x, size=1, prob=0.5, log=False):
    """
    ============================================================================
                                                                        dbinom()
    ============================================================================
    Density Function for the binomial distribution.
    Returns the probability of getting "x" successes out of "size" number of
    trials, given a probability of "prob" for each success.

    USAGE:
    dbinom(x, size, prob=0.5, log=False)
    pbinom(q, size, prob=0.5, lowertail=True, log=False)
    qbinom(p, size, prob=0.5, lowertail=True, log=False)
    rbinom(n=1, size=1, prob=0.5)

    :param x:       int. or array of ints. The number of successes
    :param size:    int. Number of trials
    :param prob:    float. Probability of a success
    :param log:     bool. take the log?
    :return:
    ============================================================================
    """
    if log:
        # note, scipy flips meaning of n and size
        return binom.logpmf(x, n=size, p=prob, loc=0)
    else:
        # note, scipy flips meaning of n and size
        return binom.pmf(x, n=size, p=prob)
Ejemplo n.º 14
0
 def calc_logp(i: int, state: str, intervals: List[CountIntvl],
               assignments: List[str]) -> float:
     """Compute probablity that the state of `intvl` is `state` by calculating
     smoothness of `intvl` given adjacent intervals of the same state.
     """
     intvl = intervals[i]
     if state == 'E':
         return (logp_poisson(intvl.start.count, mean_depths[state]) +
                 logp_poisson(intvl.end.count, mean_depths[state]))
     elif state in ('H', 'D'):
         """
         p_depth, n_depth = calc_neighbor_depth(i, n_boundary, 'D',
                                                intervals,
                                                assignments)
         assert p_depth >= 0 or n_depth < len(intervals), \
             "No diploid states"
         if p_depth < 0:
             p_depth = n_depth
         elif n_depth >= len(intervals):
             n_depth = p_depth
         """
         p, n = find_nearest(i, state, intervals, assignments)
         if p < 0 and n >= len(intervals):
             return -np.inf
         prev_count = (intervals[p].end.count
                       if p >= 0 else intervals[n].start.count)
         next_count = (intervals[n].start.count
                       if n < len(intervals) else intervals[p].end.count)
         return (binom.logpmf(min(intvl.start.count, prev_count),
                              max(intvl.start.count, prev_count), 0.92) +
                 binom.logpmf(min(intvl.end.count, next_count),
                              max(intvl.end.count, next_count), 0.92))
     else:  # 'R'
         p_depth, n_depth = calc_neighbor_depth(i, n_boundary, 'D',
                                                intervals, assignments)
         assert p_depth >= 0 or n_depth < len(intervals), \
             "No diploid states"
         if p_depth < 0:
             p_depth = n_depth
         elif n_depth >= len(intervals):
             n_depth = p_depth
         if (p_depth + mean_depths['H'] / 2 <= intvl.start.count
                 or n_depth + mean_depths['H'] / 2 <= intvl.end.count):
             return np.inf
         else:
             return -np.inf
def ML_Bin(
    data,
    model_pred,
    threshold=5,
    approx=True,
    factor=1,
):
    """
	This function calculates the log-likelihood of the Bin approximation of the
	measurment of illnes in israel.
	It assumes the number of tests is n_{j,k,t}, the probability for getting a
	result is p_{j,k,t} - the model prediction, and the data point is q_{j,k,t}.
	in total the likelihood P(X=q)~Bin(n,p) per data point.
	For cells (specific t,j,k triplet) of not sufficient number of tests:
	with n_{j,k,t} < threshold the likelihood will be ignored.
	:param data: np.array of 4 dimensions :
					axis 0: n, q - representing different values:
							starting from  total tests, and
							than positives rate.
					axis 1: t - time of sample staring from the first day in
							quastion calibrated to the model.
					axis 2: k - area index
					axis 3: j - age index
			data - should be smoothed.
			(filled with zeros where no test accured)
	:param model_pred: np.ndarray of 3 dimensions representing the probability:
						axis 1: t - time of sample staring from the first day
						 		in quastion calibrated to the model.
						axis 2: k - area index
						axis 3: j - age index
	:return: the -log-likelihood of the data given the prediction of the model.
	"""
    n = data[0, :, :]
    q = factor * data[1, :, :]
    p = model_pred
    if approx:
        ##				###
        # poison approx. ##
        ##				###
        ll = -poisson.logpmf(
            k=n * q,
            mu=n * p,
        )
    else:
        ##				###
        # Binomial dist. ##
        ##				###
        ll = -binom.logpmf(
            k=n * q,
            n=n,
            p=p,
        )

    # cut below threshold values
    ll = np.nan_to_num(ll, nan=0, posinf=0, neginf=0)
    ll = ll * (n > threshold)
    return ll.sum()
Ejemplo n.º 16
0
 def null_loglike(self):
     types, tokens = self.endog, self.exog
     projected_n_types = np.median(self.ttrs) * tokens.reshape((-1, ))
     p = .5
     binom_ns = np.floor((1 / p) * projected_n_types)
     logprobs = list(
         binom.logpmf(t, bn, p) for t, bn in zip(types, binom_ns))
     logprobs_clipped = np.clip(logprobs, -10**6, 0)
     return sum(logprobs_clipped)
Ejemplo n.º 17
0
    def _plumb_mle(self, parameters):
        days = self._fittingPeriod[1]-self._fittingPeriod[0]
        params = dict(zip(self._paramNames, parameters))

        if self._stochastic:
            # Stochastic : on fait plusieurs experimentations
            # et chaque expérimentation a un peu de random dedans.

            # et on prend la moyenne
            experiments = []  # dims : [experiment #][day][value]

            for i in range(self._nbExperiments):
                res = self.predict(end = days, parameters = params)
                experiments.append(res)
            # print("... done running experiments")

            experiments = np.stack(experiments)

        else:
            res = self.predict(end = days, parameters = params)


        #if self._stochastic:
        lhs = dict()
        for state, obs, param in [(StateEnum.SYMPTOMATIQUE, ObsEnum.DHDT, params['Tau']),
                                  (StateEnum.DSPDT, ObsEnum.NUM_TESTED, params['Mu']),
                                  (StateEnum.DTESTEDDT, ObsEnum.NUM_POSITIVE, params['Eta']), #]:
                                  (StateEnum.CRITICAL, ObsEnum.DFDT, params['Theta'])]:
            # donc 1) depuis le nombre predit de personne SymPtomatique et le parametre tau, je regarde si l'observations dhdt est probable
            #      2) depuis le nombre predit de personne Critical et le parametre theta, je regarde si l'observations dfdt est probable
            #      3) sur la transition entre Asymptomatique et Symptomatique ( sigma*A -> dSPdt) avec le parmetre de test(mu), je regarde si l'observation num_tested est probable
            log_likelihood = 0
            for day in np.arange(0, days):
                # Take all the values of experiments on a given day day_ndx
                # for a given measurement (state.value)

                observation = max(1, self._data[day + self._fittingPeriod[0]][obs.value])
                prediction = None
                if self._stochastic:
                    values = experiments[:, day, state.value]  # binomial
                    prediction = np.mean(values)
                else:
                    prediction = res[day, state.value]

                try:
                    log_bin = binom.logpmf(observation, np.round(np.mean(prediction)), param)
                    if prediction == 0: #log_bin == float("-inf"):
                        log_bin = 0
                except FloatingPointError as exception:
                    log_bin = -999
                log_likelihood += log_bin
                #if log_likelihood == float("-inf"):
                    #print("Error likelihood")

            lhs[obs] = log_likelihood
        return -sum(lhs.values())
Ejemplo n.º 18
0
def get_log_value(x, distr):
    if distr['distr_name'] == 'binomial':
        if lookup_pmf.has_key(x):
            return lookup_pmf[x]
        else:
            v = binom.logpmf(x, distr['n'], distr['p'])
            lookup_pmf[x] = v
            return v
    if distr['distr_name'] == 'nb':
        return distr['distr'].logpdf(x)
Ejemplo n.º 19
0
def get_sex(sample, Nx, Na, Lx, La):
    Rx = float(Nx) / (Nx + Na)

    # Beta CI with non-informative prior, aka Jefferey's interval.
    # See Brown, Cai, and DasGupta (2001). doi:10.1214/ss/1009213286
    Rx_CI = beta.interval(0.99, Nx + 0.5, Na + 0.5)

    # expected ratios from the chromosome lengths
    Elx_X0 = float(Lx) / (Lx + 2 * La)
    Elx_XX = float(Lx) / (Lx + La)

    #ll_x0 = beta.logpdf(Elx_X0, Nx+0.5, Na+0.5)
    #ll_xx = beta.logpdf(Elx_XX, Nx+0.5, Na+0.5)
    ll_x0 = binom.logpmf(Nx, Nx + Na, Elx_X0)
    ll_xx = binom.logpmf(Nx, Nx + Na, Elx_XX)

    # likelihood ratio test
    alpha = 0.001
    if chi2.sf(2 * (ll_x0 - ll_xx), 1) < alpha:
        sex = 'M'
    elif chi2.sf(2 * (ll_xx - ll_x0), 1) < alpha:
        sex = 'F'
    else:
        # indeterminate
        sex = 'U'

    if ll_x0 > ll_xx:
        Elx = 2 * Elx_X0
    else:
        Elx = Elx_XX

    Mx = Rx / Elx
    Mx_CI = [Rx_CI[0] / Elx, Rx_CI[1] / Elx]

    if Mx < 0.4 or Mx > 1.2:
        #print("Warning: {} has unexpected Mx={:g}".format(sample, Mx), file=sys.stderr)
        pass

    if Mx > 0.6 and Mx < 0.8:
        # suspicious sample, may be contaminated
        sex = 'U'

    return Elx, Mx, Mx_CI, sex
Ejemplo n.º 20
0
    def nll((a, b, g, l)):  # expects a tuple "x" from the minimizer
        """
        negative log likelihood function.
        """

        res = p_func(stim, a, b, g, l)
        # p = np.nan_to_num(binom.pmf(n, m, res))
        # log_p = np.nan_to_num(np.log(p))  # underflow of 'p' causes this to go to -infinity, which I hate.

        return -np.sum(binom.logpmf(n, m, res))
Ejemplo n.º 21
0
def get_candidate_del_loci(hap_cov, transition_prob=1e-2, het_read_prob=0.9):
    sel_cols = ['cov_q30_hap' + str(i) for i in range(3)]
    #hap_cov['total_cov'] = hap_cov[sel_cols].sum(axis=1)
    hap_cov['total_cov'] = hap_cov["cov_q30_hap0"] + hap_cov["cov_q30_hap1"]
    npos = len(hap_cov)

    ### Emission probabilities
    em_probs = np.ones((npos, 2)) * MIN_LOG_PROB
    # emission given no del
    em_probs[:, 0] = np.maximum(MIN_LOG_PROB, binom.logpmf(np.maximum(hap_cov.cov_q30_hap0, hap_cov.cov_q30_hap1),
                                                           np.array(hap_cov.total_cov), 0.65))
    em_probs[:, 1] = np.maximum(MIN_LOG_PROB, binom.logpmf(np.maximum(hap_cov.cov_q30_hap0, hap_cov.cov_q30_hap1),
                                                           np.array(hap_cov.total_cov), het_read_prob))
    ### Transition probabilities
    # [no-del -> no-del , del -> no-del,
    #  no-del -> del    , del -> del]
    trans_probs = np.array([[1 - transition_prob, transition_prob],
                            [transition_prob    , 1 - transition_prob]])
    trans_probs = np.log(trans_probs)

    ### Prior state probabilities (prob of starting on a del or no del)
    del_prior_prob = DEL_PRIOR_PROB
    priors = np.array([1 - del_prior_prob, del_prior_prob])
    priors = np.log(priors)

    max_probs = np.zeros((npos, 2))
    max_state = np.zeros((npos, 2))

    max_probs[0, :] = priors + em_probs[0, :]

    for i in range(1, npos):
        # max_probs[i-1, 0] is the probability of the most probable path
        # (i.e. hidden state sequence) that ends at position i-1 with a 0
        new_probs = max_probs[i - 1, :] + trans_probs
        max_probs[i, :] = em_probs[i, :] + np.max(new_probs, axis=1)
        max_state[i, :] = np.argmax(new_probs, axis=1)

    best_path = np.zeros((npos, ))
    best_path[-1] = np.argmax(max_probs[-1, :])
    for i in range(npos - 2, 0, -1):
        best_path[i] = max_state[i, best_path[i + 1]]

    return best_path
Ejemplo n.º 22
0
def get_log_value(x, distr):
    if distr['distr_name'] == 'binomial':
        if lookup_pmf.has_key(x):
            return lookup_pmf[x]
        else:
            v = binom.logpmf(x, distr['n'], distr['p'])
            lookup_pmf[x] = v
            return v
    if distr['distr_name'] == 'nb':
        return distr['distr'].logpdf(x)
Ejemplo n.º 23
0
def metric_computational(counts: np.ndarray, shots: int) -> np.float_:
    """The negative loglikelihood of the 01 and 10 counts assuming a 
    binomial probability distribution with equal probability

    Args:
        counts: a dict of counts with measurements as strings
    
    Returns:
        the metric value
    """
    return -binom.logpmf([(counts.get('01') or 0), (counts.get('10') or 0)], n=shots, p=0.5).sum()
Ejemplo n.º 24
0
def log_binomial_likelihood(k, n, mu):
    # example:
    # k: array([2, 3, 2])
    # n: array([2, 3, 2])
    # mu: [0.3, 0.2]
    # return array([2, 3, 2])
    # print k.shape, n.shape, mu.shape
    nn = n * np.ones((mu.shape[0], n.shape[0]))
    kk = k * np.ones((mu.shape[0], k.shape[0]))
    mumu = mu[np.newaxis, :].T * np.ones((mu.shape[0], n.shape[0]))
    ll = binom.logpmf(kk, nn, mumu)
    return ll.transpose()
Ejemplo n.º 25
0
 def _compute_log_likelihood(self, X):
     """Return the log of the binomial probability density.
     Needs to return a matrix that is of shape
     (n_obs in sequence, n_components).
     In order to accommodate having variable probabilities in each bin
     along the genome (and having the (X, lengths) model of hmmlearn),
     I need to synthetically combine X and p so that they are divided
     up along the chromosomes together when hmmlearn calls
     iter_from_X_lengths()
     Thus, X is combined binomial counts (col 1), size (col2), and
     probabilities (col 3 + 4).
     xs and ns have shape (n_obs in sequence, n_features==1).
     ps is the emission probabilities and has shape
         (n_components [states] in HMM == 2, n_features==1)."""
     assert type(X).__module__ == "numpy"
     xs = X[:, 0]
     ns = X[:, 1]
     ps = X[:, 2:4]
     ref = binom.logpmf(xs, ns, ps[:, 0])
     nonref = binom.logpmf(xs, ns, ps[:, 1])
     return np.matrix([ref, nonref]).T
Ejemplo n.º 26
0
    def predict(self, Y, X, parameter_sample):

        probs = expit(np.matmul(X, np.transpose(parameter_sample)))

        # get predictive log lik
        predictive_log_likelihoods = binom.logpmf(Y[:, None], max(Y), probs)

        # calculate squared and absolute error
        SE = (Y[:, None] - max(Y) * probs)**2
        AE = np.abs(Y[:, None] - max(Y) * probs)

        return predictive_log_likelihoods, SE, AE
Ejemplo n.º 27
0
def likelihood(x):
    def elo(delta):
        return 1.0 / (1 + 10.0**(delta / 400.0))

    p_win = elo(x[nz[1]] - x[nz[0]])
    p = binom.logpmf(wins[nz], count[nz], p_win)

    assert (p_win <
            1).all()  # check that we do not predict any perfect winners
    assert (p < 0).all()  # check that probability is between [0, 1)

    return -0.5 * np.sum(p)
Ejemplo n.º 28
0
def get_snv_log_likelihood(a_vec, d_vec, F, num_clusts, num_samples):
    # Calculate the likelihood of it coming from any of the clusters
    cluster_likelihoods = []
    for i in range(num_clusts):
        clust_ll = 0
        for j in range(num_samples):
            freq = min(1, F.item((i, j)) + 0.00001)
            likelihood = binom.logpmf(a_vec[j], d_vec[j], freq)
            clust_ll += likelihood

        if not (np.isnan(clust_ll) or clust_ll == float("-inf")):
            cluster_likelihoods.append(clust_ll)

    return logsumexp(cluster_likelihoods)
Ejemplo n.º 29
0
def logp_r_short(i, intvls, asgn, profile, DEPTHS, verbose, n_sigma=1):
    if verbose:
        print("### REPEAT ###")
    ib, ie = intvls[i]
    if max(profile[ib], profile[ie - 1]) >= DEPTHS['R']:
        return 0.
    #pc, nc = estimate_true_counts_intvl(i, 'D', 'b', intvls, asgn, profile)
    pc, nc = estimate_true_counts(i, 'D', 'b', intvls, asgn, profile)
    #p, n = nn_intvl(i, 'D', 'b', asgn)
    #pc, nc = profile[intvls[p][1] - 1] if p >= 0 else -1, profile[intvls[n][0]] if n <len(intvls) else -1
    if pc == -1 and nc == -1:
        #pc, nc = estimate_true_counts_intvl(i, 'H', 'b', intvls, asgn, profile)
        pc, nc = estimate_true_counts(i, 'H', 'b', intvls, asgn, profile)
        #p, n = nn_intvl(i, 'H', 'b', asgn)
        #pc, nc = profile[intvls[p][1] - 1] if p >= 0 else -1, profile[intvls[n][0]] if n <len(intvls) else -1
        if pc == -1 and nc == -1:
            pc, nc = DEPTHS['D'], DEPTHS['D']
        elif pc == -1:
            pc = nc
        elif nc == -1:
            nc = pc
    elif pc == -1:
        pc = nc
    elif nc == -1:
        nc = pc
    dr_ratio = 1 + n_sigma * (1 / np.sqrt(DEPTHS['D']))  # X-sigma interval
    pc, nc = pc * dr_ratio, nc * dr_ratio
    if verbose:
        print(
            f"[LEFT] R_est={pc}, {profile[ib]} ~ [RIGHT] R_est={nc}, {profile[ie - 1]}"
        )
    if profile[ib] >= pc or profile[ie - 1] >= nc:
        return 0.
    else:
        return binom.logpmf(profile[ib], pc, 1 - 0.01) + binom.logpmf(
            profile[ie - 1], nc, 1 - 0.01)
Ejemplo n.º 30
0
def fitDeathValuesToData(country, theta, psi, model_output, args):
    mortal_idx = 1
    if args.use_infected:
        mortal_idx = 0
    start_t, end_t = country.getFullModelTimespan()
    N = country.pop_size
    # add psi zeros to the front of the zvals array
    historic_zvals = np.concatenate((np.zeros(psi), model_output[:-psi,
                                                                 mortal_idx]))
    recorded_deaths = country.getRecordedMortalityValues()
    #country.country_data[country.country_data.daysSinceOrigin.between(start_t,end_t)].deaths.values
    #    print (len(historic_zvals),len(recorded_deaths))
    #    print (historic_zvals)
    log_likelihood = sum(
        binom.logpmf(recorded_deaths, N * historic_zvals, theta))
    return -log_likelihood
Ejemplo n.º 31
0
def counts_log_likelihood(alpha_est, X, X_depth, gamma):
    """
    calculates a binomial log likelihood 
    :param array alpha_est: estimate of the cell type proportions 
    :param array X: methylation counts for cfDNA input 
    :param array X_depth: total depths for cfDNA input
    :param array gamma: reference methylation proportions
    """

    alpha_est = compute_projection(
        alpha_est).flatten()  # compute the projection of the estimates
    b = np.matmul(
        alpha_est, gamma
    )  # the probability a cfDNA cpg comes from a reference tissue is the weighted average of the estimates of the tissues contributing to that person
    ll = np.sum(binom.logpmf(X, X_depth, b, loc=0))  # log likelihood

    return -ll  # optimize negative ll
Ejemplo n.º 32
0
def get_maxll_cluster(a_vec, d_vec, F, num_clusts, num_samples):
    # Calculate the likelihood of it coming from any of the clusters
    cluster_likelihoods = []
    maxll = float("-inf")
    max_clust = None
    for i in range(num_clusts):
        clust_ll = 0
        for j in range(num_samples):
            freq = min(1, F.item((i, j)) + 0.00001)
            likelihood = binom.logpmf(a_vec[j], d_vec[j], freq / 2.)
            clust_ll += likelihood

        if clust_ll >= maxll:
            maxll = clust_ll
            max_clust = i

    return max_clust
def loglik2(Sigmai, ps, xs, ns, trans=transf):
    ps2 = trans(ps)
    p_start = ps2[0, :]
    p_rest = ps2[1:, :]
    p_diffs = p_rest - p_start
    p_diffs_scaled = p_diffs / np.sqrt(p_start * (1.0 - p_start))
    n, N = p_diffs.shape

    binomial_part = np.sum(binom.logpmf(xs, ns, ps2))
    normal_part = 0
    for j in range(N):
        p0 = p_start[j]
        normal_part += multivariate_normal.logpdf(p_diffs[:, j],
                                                  mean=np.array([0] * n),
                                                  cov=np.linalg.inv(Sigmai) *
                                                  p0 * (1 - p0))

    return binomial_part + normal_part
Ejemplo n.º 34
0
def test_binom():

    # Test we can at match a Binomial distribution from scipy

    p = 0.5
    n = 5
    dist = lk.Binomial()

    x = np.random.randint(low=0, high=n, size=(10,))

    p1 = binom.logpmf(x, p=p, n=n)
    p2 = dist.loglike(x, p, n)

    np.allclose(p1, p2)

    p1 = binom.cdf(x, p=p, n=n)
    p2 = dist.cdf(x, p, n)

    np.allclose(p1, p2)
Ejemplo n.º 35
0
 def _compute_log_likelihood(self, X):
     res = []
     
     for x in X: #over all observations
         row = []
         for i in range(self.n_components): #over number of HMM's state
             r_sum = 0
             for j in range(self.n_features): #over dim
                 it = range(self.dim[0]) if j == 0 else range(self.dim[0], self.dim[0] + self.dim[1]) #grab proper observation
                 for k in it:
                     index = (int(x[k]), self.p[j][i], self.n[j])
                     if not self.lookup_logpmf.has_key( index ):
                         self.lookup_logpmf[index] = binom.logpmf(x[k], self.n[j], self.p[j][i])
                     r_sum += self.lookup_logpmf[index]
             row.append(r_sum)
     
         res.append(row)
     
     return np.asarray(res)
Ejemplo n.º 36
0
def mle(infile):
    '''
	Estimate the overall contamination percentage using maximum likelihood estimation (MLE)
	 '''
    candidate_PIs = [i / 1000.0 for i in range(0, 501)]
    snp_hom = []
    snp_het = []
    for l in open(infile, 'r'):
        l = l.strip()
        if l.startswith('Chrom'): continue
        f = l.split()
        if f[12] == 'Fail': continue

        allele_1_count = int(f[3])
        allele_2_count = int(f[5])

        if f[9] == 'Hom':
            snp_hom.append(
                [allele_1_count + allele_2_count, allele_2_count, "Hom"])  #n,k
        elif f[9] == 'Het':
            snp_het.append(
                [allele_1_count + allele_2_count, allele_2_count, "Het"])  #n,k
        else:
            continue

    print >> sys.stderr, '@ ' + strftime(
        "%Y-%m-%d %H:%M:%S"
    ) + ": Estimating contamination from homozygous SNPs ..."
    prob = -float("inf")
    pi_of_max_prob_hom = 0.0
    for pi in candidate_PIs:
        p2 = pi / 2.0

        joint_prob = 0
        for n, k, t in snp_hom:
            pmf_2 = binom.logpmf(k, n, p2)
            joint_prob += pmf_2

        if joint_prob > prob:
            prob = joint_prob
            pi_of_max_prob_hom = pi
    return pi_of_max_prob_hom
Ejemplo n.º 37
0
    def loglike(self, params):
        K, beta = params

        if beta > 1. or K < 1:
            return -np.inf

        types, tokens = self.endog, self.exog

        # V(n) = K*n**beta
        projected_n_types = K * tokens**beta
        p = .5

        # binom mode = floor((n+1)*p),
        # so binom_n = floor(1/p*n)
        binom_ns = np.floor((1 / p) * projected_n_types)

        logprobs = list(
            binom.logpmf(t, bn, p)[0] for t, bn in zip(types, binom_ns))
        logprobs_clipped = np.clip(logprobs, -10**6, 0)
        return sum(logprobs_clipped)  # - beta*1000
Ejemplo n.º 38
0
def _loss_fun(pars, x, k, n, S, fixed):
    """ A binomial loss function. Returns negative log likelihood
    for k successes in n binomial trials at stimulus level x, fit with
    psychometric fun S.

    :param pars: the vector of parameters to be fit. Order = (m, w, lam, gam).
    :param x:  the stimulus level; if S is weibull should be in log units.
    :param k:  number of successes
    :param n:  number of trials
    :param S:  the unscaled Sigmoid to fit; a function taking
               (x, m, w) as input.
    :param fixed:  dictionary of values for fixed params,
                   e.g. {'lam': 0, 'gam':0.5} for a 2AFC with
                   no lapse rate.

    :returns:  the negative of the summed log likeihoods.

    """
    yhat = psy_pred(pars, x, S, fixed)
    ll = binom.logpmf(k, n, yhat)
    return -ll.sum()
Ejemplo n.º 39
0
    def loglike(self, y, f, n):
        r"""
        Binomial log likelihood.

        Parameters
        ----------
        y: ndarray
            array of 0, 1 valued integers of targets
        f: ndarray
            latent function from the GLM prior (:math:`\mathbf{f} =
            \boldsymbol\Phi \mathbf{w}`)
        n: ndarray
            the total number of observations

        Returns
        -------
        logp: ndarray
            the log likelihood of each y given each f under this
            likelihood.
        """
        ll = binom.logpmf(y, n=n, p=expit(f))
        return ll
Ejemplo n.º 40
0
Archivo: LTP.py Proyecto: saramuel/PhD
def loglik_fermi(x0,ntrig,nall,r,E):
 return -1.*binom.logpmf(ntrig,nall,fermi(r,E,x0)).sum()
Ejemplo n.º 41
0
def _choice_traj_likelihood(tau, p_0, p_1, q, n, t):
    if tau < 0: return np.inf
    p_traj = _exp_choice_traj(tau, p_0, p_1, t)
    log_lik = binom.logpmf(q,n,p_traj).sum()
    return -log_lik
Ejemplo n.º 42
0
 def data_log_likelihood(self, successes, trials, beta):
     '''Calculates the log-likelihood of a Polya tree bin given the beta values.'''
     return binom.logpmf(successes, trials, 1.0 / (1 + np.exp(-beta))).sum()
Ejemplo n.º 43
0
    def _logp(self, value, p, k):

        return np.sum(binom.logpmf(value, k, p, loc=0))
Ejemplo n.º 44
0
Archivo: LTP.py Proyecto: saramuel/PhD
def loglik_binom(x0,ntrig,nall,r,E):
 return -1.*binom.logpmf(ntrig,nall,inverrf(r,E,x0)).sum()