Exemple #1
0
def get_reversible_differential_entropy(Q, stationary_distn, t):
    """
    Compute differential entropy of a time-reversible Markov jump process.

    This is the differential entropy of the distribution over trajectories.
    The rate matrix Q must be time-reversible
    with the given stationary distribution.

    Parameters
    ----------
    Q : 2d ndarray
        Matrix of transition rates.
    stationary_distn : 1d ndarray
        Stationary distribution of the process.
    t : float
        Amount of time over which the process is observed.

    Returns
    -------
    differential_entropy : float
        The differential entropy of the distribution over trajectories.
        This is not the Shannon entropy, and may be negative.

    """
    stationary_entropy = -special.xlogy(stationary_distn, stationary_distn)
    tmp_trans = Q - special.xlogy(Q, Q)
    transition_entropy = tmp_trans.sum(axis=0).dot(stationary_distn)
    differential_entropy = stationary_entropy + t * transition_entropy
    return differential_entropy
Exemple #2
0
def _jensen_shannon_divergence(a, b):
    """Compute Jensen-Shannon Divergence

    Lifted from github/scipy:
    https://github.com/luispedro/scipy/blob/ae9ad67bfc2a89aeda8b28ebc2051fff19f1ba4a/scipy/stats/stats.py

    Parameters
    ----------
    a : array-like
        possibly unnormalized distribution
    b : array-like
        possibly unnormalized distribution. Must be of same size as ``a``.
    
    Returns
    -------
    j : float
    """
    a = np.asanyarray(a, dtype=np.float)
    b = np.asanyarray(b, dtype=np.float)
    a = a/a.sum()
    b = b/b.sum()
    m = (a + b)
    m /= 2.
    m = np.where(m, m, 1.)
    return 0.5*np.sum(special.xlogy(a, a/m)+special.xlogy(b, b/m))
 def objective(AB):
     # From Platt (beginning of Section 2.2)
     P = expit(-(AB[0] * F + AB[1]))
     loss = -(xlogy(T, P) + xlogy(T1, 1. - P))
     if sample_weight is not None:
         return (sample_weight * loss).sum()
     else:
         return loss.sum()
Exemple #4
0
def differential_entropy_helper(
        Q, prior_root_distn,
        post_root_distn, post_dwell_times, post_transitions,
        ):
    """
    Use posterior expectations to help compute differential entropy.

    Parameters
    ----------
    Q : weighted directed networkx graph
        Rate matrix.
    prior_root_distn : dict
        Prior distribution at the root.
        If Q is a time-reversible rate matrix,
        then the prior root distribution
        could be the stationary distribution associated with Q.
    post_root_distn : dict
        Posterior state distribution at the root.
    post_dwell_times : dict
        Posterior expected dwell time for each state.
    post_transitions : weighted directed networkx graph
        Posterior expected count of each transition type.

    Returns
    -------
    diff_ent_init : float
        Initial state distribution contribution to differential entropy.
    diff_ent_dwell : float
        Dwell time contribution to differential entropy.
    diff_ent_trans : float
        Transition contribution to differential entropy.

    """
    # Get the total rates.
    total_rates = get_total_rates(Q)

    # Initial state distribution contribution to differential entropy.
    diff_ent_init = 0.0
    for state, prob in post_root_distn.items():
        diff_ent_init -= special.xlogy(prob, prior_root_distn[state])

    # Dwell time contribution to differential entropy.
    diff_ent_dwell = 0.0
    for s in set(total_rates) & set(post_dwell_times):
        diff_ent_dwell += post_dwell_times[s] * total_rates[s]

    # Transition contribution to differential entropy.
    diff_ent_trans = 0.0
    for sa in set(Q) & set(post_transitions):
        for sb in set(Q[sa]) & set(post_transitions[sa]):
            rate = Q[sa][sb]['weight']
            ntrans_expected = post_transitions[sa][sb]['weight']
            diff_ent_trans -= special.xlogy(ntrans_expected, rate)

    # Return the contributions to differential entropy.
    return diff_ent_init, diff_ent_dwell, diff_ent_trans
def test_chi2_contingency_g():
    c = np.array([[15, 60], [15, 90]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=False)
    assert_allclose(g, 2*xlogy(c, c/e).sum())

    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', correction=True)
    c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
    assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())

    c = np.array([[10, 12, 10], [12, 10, 10]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
    assert_allclose(g, 2*xlogy(c, c/e).sum())
Exemple #6
0
def differential_entropy_helper(
        Q, prior_root_distn,
        post_root_distn, post_dwell_times, post_transitions,
        ):
    """
    Use posterior expectations to help compute differential entropy.

    Parameters
    ----------
    Q : 2d ndarray
        Rate matrix.
    prior_root_distn : 1d ndarray
        Prior distribution at the root.
        If Q is a time-reversible rate matrix,
        then the prior root distribution
        could be the stationary distribution associated with Q.
    post_root_distn : 1d ndarray
        Posterior state distribution at the root.
    post_dwell_times : 1d ndarray
        Posterior expected dwell time for each state.
    post_transitions : 2d ndarray
        Posterior expected count of each transition type.

    Returns
    -------
    diff_ent_init : float
        Initial state distribution contribution to differential entropy.
    diff_ent_dwell : float
        Dwell time contribution to differential entropy.
    diff_ent_trans : float
        Transition contribution to differential entropy.

    """
    _density.check_square_dense(Q)
    _density.check_square_dense(post_transitions)
    nstates = Q.shape[0]

    # Get the total rates.
    total_rates = get_total_rates(Q)

    # Initial state distribution contribution to differential entropy.
    diff_ent_init = -special.xlogy(post_root_distn, prior_root_distn).sum()

    # Dwell time contribution to differential entropy.
    diff_ent_dwell = post_dwell_times.dot(total_rates)

    # Transition contribution to differential entropy.
    diff_ent_trans = -special.xlogy(post_transitions, Q).sum()

    # Return the contributions to differential entropy.
    return diff_ent_init, diff_ent_dwell, diff_ent_trans
def poisson(self, x, mu):
    """
    Poisson function taken from:
    https://github.com/scipy/scipy/blob/master/scipy/stats/_discrete_distns.py

    For license see documentation/BSDLicense_scipy.md

    Author:  Travis Oliphant  2002-2011 with contributions from
             SciPy Developers 2004-2011
    """
    if len(np.atleast_1d(x)) == 1:
        check_val = x
    else:
        check_val = x[0]

    if check_val > 1e18:
        self.log.warning('The current value in the poissonian distribution '
                         'exceeds 1e18! Due to numerical imprecision a valid '
                         'functional output cannot be guaranteed any more!')

    # According to the central limit theorem, a poissonian distribution becomes
    # a gaussian distribution for large enough x. Since the numerical precision
    # is limited to calculate the logarithmized poissonian and obtain from that
    # the exponential value, a self defined cutoff is introduced and set to
    # 1e12. Beyond that number a gaussian distribution is assumed, which is a
    # completely valid assumption.

    if check_val < 1e12:
        return np.exp(xlogy(x, mu) - gammaln(x + 1) - mu)
    else:
        return np.exp(-((x - mu) ** 2) / (2 * mu)) / (np.sqrt(2 * np.pi * mu))
Exemple #8
0
 def set_cost_fn(self, reg_str = ' ', hyperp=0.1):
     """Set the cost function. Also assign a regularization to the same
     cost function. Regularizations are between l1 and l2, where the weights
     are summed over using a hyperparameter."""
     self.hyperp = hyperp
     self.reg_str = reg_str
     if self.cost_fn_str == "MSE":
         self.cost_fn = lambda u: \
             (1./2*self.features) * np.sum(self.y-u)**2 +\
                 self.regularization()
         self.cost_der = lambda u: \
             np.vectorize(-(self.y - u))
     elif self.cost_fn_str == "xentropy":
         self.cost_fn = lambda u: -np.sum(sps.xlogy(self.y, u) +\
             sps.xlogy(1-self.y, 1-u))/self.y.shape[0] #+ self.regularization()
         self.cost_der = lambda u: u - self.y.reshape(-1, 1)
     else:
         raise SyntaxError("Cost function must be 'MSE' or 'xentropy'")
Exemple #9
0
    def _boost_real(self, iboost, X, y, sample_weight, random_state):
        """Implement a single boost using the SAMME.R real algorithm."""
        estimator = self._make_estimator(random_state=random_state)

        estimator.fit(X, y, sample_weight=sample_weight)

        y_predict_proba = estimator.predict_proba(X)

        if iboost == 0:
            self.classes_ = getattr(estimator, 'classes_', None)
            self.n_classes_ = len(self.classes_)

        y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1),
                                       axis=0)

        # Instances incorrectly classified
        incorrect = y_predict != y

        # Error fraction
        estimator_error = np.mean(
            np.average(incorrect, weights=sample_weight, axis=0))

        # Stop if classification is perfect
        if estimator_error <= 0:
            return sample_weight, 1., 0.

        # Construct y coding as described in Zhu et al [2]:
        #
        #    y_k = 1 if c == k else -1 / (K - 1)
        #
        # where K == n_classes_ and c, k in [0, K) are indices along the second
        # axis of the y coding with c being the index corresponding to the true
        # class label.
        n_classes = self.n_classes_
        classes = self.classes_
        y_codes = np.array([-1. / (n_classes - 1), 1.])
        y_coding = y_codes.take(classes == y[:, np.newaxis])

        # Displace zero probabilities so the log is defined.
        # Also fix negative elements which may occur with
        # negative sample weights.
        proba = y_predict_proba  # alias for readability
        np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)

        # Boost weight using multi-class AdaBoost SAMME.R alg
        estimator_weight = (-1. * self.learning_rate *
                            ((n_classes - 1.) / n_classes) *
                            xlogy(y_coding, y_predict_proba).sum(axis=1))

        # Only boost the weights if it will fit again
        if not iboost == self.n_estimators - 1:
            # Only boost positive weights
            sample_weight *= np.exp(estimator_weight *
                                    ((sample_weight > 0) |
                                     (estimator_weight < 0)))

        return sample_weight, 1., estimator_error
Exemple #10
0
 def numeric(self, values):
     x = values[0]
     results = -xlogy(x, x)
     # Return -inf outside the domain
     if np.isscalar(results):
         return -np.inf
     else:
         results[np.isnan(results)] = -np.inf
         return results
Exemple #11
0
 def _revaluate(self, r, t=0.):
     """Potential as a function of r and time"""
     x = r / self.a
     return -self.a**2. * numpy.pi * (-numpy.pi / x + 2. *
                                      (1. / x + 1) * numpy.arctan(1 / x) +
                                      (1. / x + 1) * numpy.log(
                                          (1. + 1. / x)**2. /
                                          (1. + 1 / x**2.)) +
                                      special.xlogy(2. / x, 1. + x**2.))
Exemple #12
0
 def pointwise_loss(self, y_true, raw_predictions):
     # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to
     # return a view.
     raw_predictions = raw_predictions.reshape(-1)
     # TODO: For speed, we could remove the constant xlogy(y_true, y_true)
     # Advantage of this form: minimum of zero at raw_predictions = y_true.
     loss = (xlogy(y_true, y_true) - y_true * (raw_predictions + 1) +
             np.exp(raw_predictions))
     return loss
Exemple #13
0
def binary_log_loss(y_true, y_prob):
    """Compute binary logistic loss for classification.
    This is identical to log_loss in binary classification case,
    but is kept for its use in multilabel case.
    Parameters
    ----------
    y_true : array-like or label indicator matrix
        Ground truth (correct) labels.
    y_prob : array-like of float, shape = (n_samples, n_classes)
        Predicted probabilities, as returned by a classifier's
        predict_proba method.
    Returns
    -------
    loss : float
        The degree to which the samples are correctly predicted.
    """
    return -(xlogy(y_true, y_prob) +
             xlogy(1 - y_true, 1 - y_prob)).sum() / y_prob.shape[0]
Exemple #14
0
def beta_logpdf(x, a, b, loc=0, scale=1):
    x = (x - loc) / scale
    z = special.xlog1py(b - 1.0, -x) + special.xlogy(a - 1.0, x)
    z -= special.betaln(a, b)
    z -= np.log(scale)
    z = np.where((x < 0) | (x > 1), -np.inf, z)
    if z.ndim == 0:
        return z[()]
    return z
    def _boost_real(self, iboost, X, y, sample_weight, random_state):
        """Implement a single boost using the SAMME.R real algorithm."""
        estimator = self._make_estimator(random_state=random_state)

        estimator.fit(X, y, sample_weight=sample_weight)

        y_predict_proba = estimator.predict_proba(X)

        if iboost == 0:
            self.classes_ = getattr(estimator, 'classes_', None)
            self.n_classes_ = len(self.classes_)

        y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1),
                                       axis=0)

        # Instances incorrectly classified
        incorrect = y_predict != y

        # Error fraction
        estimator_error = np.mean(
            np.average(incorrect, weights=sample_weight, axis=0))

        # Stop if classification is perfect
        if estimator_error <= 0:
            return sample_weight, 1., 0.

        # Construct y coding as described in Zhu et al [2]:
        #
        #    y_k = 1 if c == k else -1 / (K - 1)
        #
        # where K == n_classes_ and c, k in [0, K) are indices along the second
        # axis of the y coding with c being the index corresponding to the true
        # class label.
        n_classes = self.n_classes_
        classes = self.classes_
        y_codes = np.array([-1. / (n_classes - 1), 1.])
        y_coding = y_codes.take(classes == y[:, np.newaxis])

        # Displace zero probabilities so the log is defined.
        # Also fix negative elements which may occur with
        # negative sample weights.
        proba = y_predict_proba  # alias for readability
        np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)

        # Boost weight using multi-class AdaBoost SAMME.R alg
        estimator_weight = (-1. * self.learning_rate
                            * ((n_classes - 1.) / n_classes)
                            * xlogy(y_coding, y_predict_proba).sum(axis=1))

        # Only boost the weights if it will fit again
        if not iboost == self.n_estimators - 1:
            # Only boost positive weights
            sample_weight *= np.exp(estimator_weight *
                                    ((sample_weight > 0) |
                                     (estimator_weight < 0)))

        return sample_weight, 1., estimator_error
Exemple #16
0
def multiclass_model_data(n_nonevent,
                          n_event,
                          max_pvalue,
                          max_pvalue_policy,
                          scale=None):

    n, n_classes = n_nonevent.shape

    DD = []
    PV = []
    VV = []

    for c in range(n_classes):
        t_n_event = n_event[:, c].sum()
        t_n_nonevent = n_nonevent[:, c].sum()

        D = []
        V = []

        E = []
        NE = []

        for i in range(1, n + 1):
            s_event = n_event[:i, c][::-1].cumsum()[::-1]
            s_nonevent = n_nonevent[:i, c][::-1].cumsum()[::-1]
            rate = s_event / (s_nonevent + s_event)

            p = s_event / t_n_event
            q = s_nonevent / t_n_nonevent
            iv = special.xlogy(p - q, p / q)

            if scale is not None:
                rate *= scale
                iv *= scale

                rate = rate.astype(np.int64)
                iv = iv.astype(np.int64)

            D.append(rate)
            V.append(iv)

            if max_pvalue is not None:
                E.append(s_event)
                NE.append(s_nonevent)

        if max_pvalue is not None:
            pvalue_violation_indices = find_pvalue_violation_indices(
                n, E, NE, max_pvalue, max_pvalue_policy)
        else:
            pvalue_violation_indices = []

        DD.append(D)
        VV.append(V)
        PV.append(pvalue_violation_indices)

    return DD, VV, PV
Exemple #17
0
def model_sim(model1,
              model2,
              H_model1_bg,
              H_model2_bg,
              H_model1,
              H_model2,
              min_overlap=2):

    models_switched = False

    # my design model2 cannot be longer than model1
    if len(model1) < len(model2):
        model1, model2 = model2, model1
        H_model1_bg, H_model2_bg = H_model2_bg, H_model1_bg
        H_model1, H_model2 = H_model2, H_model1
        models_switched = True

    scores = []
    contributions = []
    slices = []

    for sl1, sl2 in create_slices(len(model1), len(model2), min_overlap):
        background_score = 0
        # so we want the contributions of the background
        background_score += H_model1_bg[sl1].sum()
        background_score += H_model2_bg[sl2].sum()

        cross_score = 0
        # and the contributions of model1 vs. model2
        cross_score += H_model1[sl1].sum()  # entropy of model1
        cross_score += H_model2[sl2].sum()  # entropy of model2

        # cross entropy part
        p_bar = 0.5 * (model1[sl1, :] + model2[sl2, :])
        p_bar_entropy = xlogy(p_bar, p_bar) / loge2
        cross_score -= p_bar_entropy.sum()

        scores.append(background_score - cross_score)
        contributions.append((background_score, cross_score))
        slices.append((sl1, sl2))

    # very neat: https://stackoverflow.com/a/6193521/2272172
    max_index, max_score = max(enumerate(scores), key=operator.itemgetter(1))
    max_slice1, max_slice2 = slices[max_index]

    # gotta love python for that: https://stackoverflow.com/a/13335254/2272172
    start1, end1, _ = max_slice1.indices(len(model1))
    start2, end2, _ = max_slice2.indices(len(model2))

    contrib = contributions[max_index]

    if models_switched:
        return max_score, (start2 + 1, end2), (start1 + 1, end1), contrib
    else:
        return max_score, (start1 + 1, end1), (start2 + 1, end2), contrib
def get_entropy(wf, cut_at=None):
    """
    Calculates entanglement entropy of the TT wavefunction
    at the specified cut.
    If cut is not specified, the entropy is calculated at
    the middle of the chain.
    Parameters:
    -----------
    wf: tt.Vector
         Tensor train vector
    cut_at: int, default None
         The position of the internal index at which the
         entropy is calculated. Should be in [0, d-2]
         If not provided, the cut is assumed at the central
         internal index for even dimension TT vectors, or
         at the index left to the central site for odd
         dimension TT vectors.
    """
    # import pdb
    # pdb.set_trace()
    num_dimensions = wf.d
    if cut_at is None:
        cut_at = wf.d // 2 - 1  # this is the middle bond if d is even
        # or the bond left to the central cite is d is odd
    assert (cut_at <= num_dimensions - 2 and cut_at >= 0)

    # Get rid of redundant ranks (they cause technical difficulties).
    wf = wf.round(eps=0)

    coresX = tt.tensor.to_list(wf)

    # Left orthogonalize cores
    for current_dim in range(0, cut_at):
        coresX = tt.riemannian.riemannian.cores_orthogonalization_step(
            coresX, current_dim, left_to_right=True)
    # Right orthogonalize cores
    for current_dim in range(num_dimensions - 1, cut_at + 1, -1):
        coresX = tt.riemannian.riemannian.cores_orthogonalization_step(
            coresX, current_dim, left_to_right=False)

    # Now we have two adjacent non-orthogonal cores at cut_at and cut_at+1
    # locations. Merge them and perform SVD
    left_core = coresX[cut_at]
    right_core = coresX[cut_at + 1]
    r11, n1, r12 = left_core.shape
    r21, n2, r22 = right_core.shape
    merged = np.matmul(left_core.reshape([r11 * n1, r12]),
                       right_core.reshape([r21, n2 * r22]))
    u, s, vh = np.linalg.svd(merged)

    # Truncate singular values to the size of the rank
    s2 = (s[:r12])**2

    return -np.sum(xlogy(s2, s2))
Exemple #19
0
def poisson(self, x, mu):
    """
    Poisson function taken from:
    https://github.com/scipy/scipy/blob/master/scipy/stats/_discrete_distns.py

    For license see documentation/BSDLicense_scipy.md

    Author:  Travis Oliphant  2002-2011 with contributions from
             SciPy Developers 2004-2011
    """
    return np.exp(special.xlogy(x, mu) - gamln(x + 1) - mu)
Exemple #20
0
def get_trajectory_log_likelihood(
        T_aug, root, prior_root_distn, Q_default, nstates):
    """

    Parameters
    ----------
    T_aug : undirected weighted networkx graph
        Trajectory with weighted edges annotated with states.
    root : integer
        Root node.
    prior_root_distn : 1d ndarray
        Prior distribution over states at the root.
    Q_default : 2d ndarray
        Rate matrix which applies to all edges.
    nstates : integer
        Number of states.

    Returns
    -------
    log_likelihood : float
        Logarithm of the trajectory likelihood
        according to the given Markov jump process.

    Notes
    -----
    Regarding the order of the arguments of this function, T_aug is first
    to facilitate functools.partial wrapping for MCMC callback.

    """
    # Compute the total rates.
    nstates = prior_root_distn.shape[0]
    total_rates = get_total_rates(Q_default)

    # Compute primary process statistics.
    # These will be used for two purposes.
    # One of the purposes is as the denominator of the
    # importance sampling ratio.
    # The second purpose is to compute contributions
    # to the neg log likelihood estimate.
    info = get_history_statistics(T_aug, nstates, root=root)
    dwell_times, root_state, transitions = info

    # contribution of root state to log likelihood
    init_ll = np.log(prior_root_distn[root_state])

    # contribution of dwell times
    dwell_ll = -np.dot(dwell_times, total_rates)

    # contribution of transitions
    trans_ll = special.xlogy(transitions, Q_default).sum()

    # Return the sum of the log likelihood contributions.
    log_likelihood = init_ll + dwell_ll + trans_ll
    return log_likelihood
Exemple #21
0
    def _real_pdf(self, arm, theta):
        p = 0
        for pos in range(self.n_positions):
            pos_prob = self.position_probabilities[pos]
            a = self.S_kl[arm, pos]
            b = self.N_kl[arm, pos] - self.S_kl[arm, pos]
            p += sc.xlog1py(b, -theta * pos_prob) + sc.xlogy(a, theta)
            p -= sc.betaln(a, b)
            p += a * np.log(pos_prob)

        return np.exp(p)
Exemple #22
0
def phot_loglike(data, data_err, data_mask, models, dim_prior=True):
    """
    Computes the log-likelihood between noisy data and noiseless models.

    Parameters
    ----------
    data : `~numpy.ndarray` of shape `(Nfilt)`
        Observed data values.

    data_err : `~numpy.ndarray` of shape `(Nfilt)`
        Associated (Normal) errors on the observed values.

    data_mask : `~numpy.ndarray` of shape `(Nfilt)`
        Binary mask (0/1) indicating whether the data was observed.

    models : `~numpy.ndarray` of shape `(Nmodel, Nfilt)`
        Models predictions.

    dim_prior : bool, optional
        Whether to apply a dimensional-based correction (prior) to the
        log-likelihood. Transforms the likelihood to a chi2 distribution
        with `Nfilt - 3` degrees of freedom. Default is `True`.

    Returns
    -------
    lnlike : `~numpy.ndarray` of shape `(Nmodel)`
        Log-likelihood values.

    """

    # Subselect only clean observations.
    Ndim = sum(data_mask)  # number of dimensions
    flux, fluxerr = data[data_mask], data_err[data_mask]  # mean, error
    mfluxes = models[:, data_mask]  # model predictions
    tot_var = np.square(fluxerr) + np.zeros_like(mfluxes)  # total variance

    # Compute residuals.
    resid = flux - mfluxes

    # Compute chi2.
    chi2 = np.sum(np.square(resid) / tot_var, axis=1)

    # Compute multivariate normal logpdf.
    lnl = -0.5 * chi2
    lnl += -0.5 * (Ndim * np.log(2. * np.pi) +
                   np.sum(np.log(tot_var), axis=1))

    # Apply dimensionality prior.
    if dim_prior:
        # Compute logpdf of chi2 distribution.
        a = 0.5 * (Ndim - 3)  # effective dof
        lnl = xlogy(a - 1., chi2) - (chi2 / 2.) - gammaln(a) - (np.log(2.) * a)

    return lnl
    def test_entropy(self):
        # Simple tests of entropy.
        hg = stats.hypergeom(4, 1, 1)
        h = hg.entropy()
        expected_p = np.array([0.75, 0.25])
        expected_h = -np.sum(xlogy(expected_p, expected_p))
        assert_allclose(h, expected_h)

        hg = stats.hypergeom(1, 1, 1)
        h = hg.entropy()
        assert_equal(h, 0.0)
Exemple #24
0
    def compute_score(self, predictions):
        """
        Compute the score according to the heuristic.

        Args:
            predictions (ndarray): Array of predictions

        Returns:
            Array of scores.
        """
        assert predictions.ndim >= 3
        # [n_sample, n_class, ..., n_iterations]

        expected_entropy = - np.mean(np.sum(xlogy(predictions, predictions), axis=1),
                                     axis=-1)  # [batch size, ...]
        expected_p = np.mean(predictions, axis=-1)  # [batch_size, n_classes, ...]
        entropy_expected_p = - np.sum(xlogy(expected_p, expected_p),
                                      axis=1)  # [batch size, ...]
        bald_acq = entropy_expected_p - expected_entropy
        return bald_acq
    def test_entropy(self):
        # Basic tests of entropy.
        pvals = np.array([0.25, 0.45, 0.3])
        p = stats.rv_discrete(values=([0, 1, 2], pvals))
        expected_h = -sum(xlogy(pvals, pvals))
        h = p.entropy()
        assert_allclose(h, expected_h)

        p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0]))
        h = p.entropy()
        assert_equal(h, 0.0)
Exemple #26
0
    def _score_submodel(self, weights, intercepts, dm, binned):
        """
        Utility function for computing D^2 (pseudo R^2) on a given set of weights and
        intercepts. Is be used in both model subsetting and the mother score() function of the GLM.

        Parameters
        ----------
        weights : pd.Series
            Series in which entries are numpy arrays containing the weights for a given cell.
            Indices should be cluster ids.
        intercepts : pd.Series
            Series in which elements are the intercept fit to each cell. Indicies should match
            weights.
        dm : numpy.ndarray
            Design matrix. Should not contain the bias column. dm.shape[1] should be the same as
            the length of an element in weights.
        binned : numpy.ndarray
            nT x nCells array, in which each column is the binned spike train for a single unit.
            Should be the same number of rows as dm.

        Returns
        -------
        pd.Series
            Pandas series containing the scores of the given model for each cell.
        """
        scores = pd.Series(index=weights.index, name='scores')
        for cell in weights.index:
            cell_idx = np.argwhere(self.clu_ids == cell)[0, 0]
            wt = weights.loc[cell].reshape(-1, 1)
            bias = intercepts.loc[cell]
            y = binned[:, cell_idx]
            pred = np.exp(dm @ wt + bias)
            null_pred = np.ones_like(pred) * np.mean(y)
            null_deviance = 2 * np.sum(
                xlogy(y, y / null_pred.flat) - y + null_pred.flat)
            with np.errstate(divide='ignore', invalid='ignore'):
                full_deviance = 2 * np.sum(
                    xlogy(y, y / pred.flat) - y + pred.flat)
            d_sq = 1 - (full_deviance / null_deviance)
            scores.at[cell] = d_sq
        return scores
def estimate_topic_counts(in_tsv,
                          vocab_index,
                          author_index,
                          thetas,
                          phis,
                          verbose=False):
    n_authors = len(author_index)
    n_docs, n_topics = thetas.shape
    n_topics, n_vocab = phis.shape
    topic_term_counts = np.zeros((n_topics, n_vocab))
    topic_author_term_counts = [
        lil_matrix((n_authors, n_vocab)) for t in range(n_topics)
    ]
    nz_phis = phis > 0
    log_phis = xlogy(nz_phis, phis)
    for doc_id, line in enumerate(open(in_tsv, mode='r', encoding='utf-8')):
        if verbose and doc_id and doc_id % 1000 == 0:
            print('{}/{}'.format(doc_id, n_docs), file=sys.stderr)
        fields = line.strip().split('\t')
        author = fields[1]
        author_id = author_index[author]
        tokens = np.array(fields[2].split())
        theta_d = thetas[doc_id]
        nz_theta_d = theta_d > 0
        log_theta_d = xlogy(nz_theta_d, theta_d)

        for term, count in Counter(tokens).items():
            term_id = vocab_index[term]
            topic_dist = np.where(nz_phis.T[term_id] * nz_theta_d != 0,
                                  np.exp(log_phis.T[term_id] + log_theta_d),
                                  0.0).ravel()
            if topic_dist.sum() == 0:
                continue
            topic_dist = topic_dist / topic_dist.sum()

            topics = np.random.choice(n_topics, size=count, p=topic_dist)
            for topic in topics:
                topic_term_counts[topic, term_id] += 1
                topic_author_term_counts[topic][author_id, term_id] += 1
    topic_author_term_counts = [x.tocsr() for x in topic_author_term_counts]
    return topic_term_counts, topic_author_term_counts
Exemple #28
0
    def get_loss_function(self, coeff: list) -> Tuple[float, list, list]:
        coeff = np.array(coeff)
        X = self.get_design_matrix(self.params.columns.features)
        y = self.get_target_column(self.params.columns.target,
                                   self.params.outcome)
        X, y = np.array(X), np.array(y)

        z = X @ coeff
        s = expit(z)
        d = s * (1 - s)
        D = np.diag(d)

        hess = X.T @ D @ X
        y_ratio = (y - s) / d
        y_ratio[(y == 0) & (s == 0)] = -1
        y_ratio[(y == 1) & (s == 1)] = 1

        grad = X.T @ D @ (z + y_ratio)

        loglike = float(np.sum(xlogy(y, s) + xlogy(1 - y, 1 - s)))
        return loglike, grad, hess
Exemple #29
0
    def smoothed(self):
        idfmatrix = (len(self.texts) / self.binarymatrix.sum(axis=0)) + 1
        idfmatrix.astype(np.float16)

        idfmatrix = xlogy(np.sign(idfmatrix), idfmatrix) / np.log(2)
        # print(idfmatrix)

        data = np.asarray(idfmatrix).reshape(-1)

        self.idfmatrix = diags(data, 0)

        self.tfidfmatrix = self.tfmatrix * self.idfmatrix
Exemple #30
0
    def _joint_entropy(self, predictions, selected):
        K = predictions.shape[-1]
        M = selected.shape[0]

        exp_y = np.array(
            [np.matmul(selected, predictions[i].T) for i in range(predictions.shape[0])]) / K
        mean_entropy = selected.mean(-1, keepdims=True)[None]

        step = 256
        for idx in range(0, exp_y.shape[0], step):
            b_preds = exp_y[idx: (idx + step)]
            yield np.sum(-xlogy(b_preds, b_preds) / mean_entropy, axis=(1, -1)) / M
Exemple #31
0
def dbin_llf(r, p, n):
    r"""
    binomial distribution

    r ~ binomial(p, n); r = 0, ..., n

    ..math::

        P(r; p, n) = \frac{n!}{r!(n-r)!} p^r (1-p)^{n-r}

    """
    return binomln(n, r) + xlogy(r, p) + xlog1py(n - r, -p)
Exemple #32
0
 def pmf_for_val(self,x):
     lambda_xi_x = self.my_lambda[...,None] + self.xi[...,None]*x
     log_pmf=( np.log(self.my_lambda[...,None]) 
                 + xlogy(x-1,lambda_xi_x) 
                 - lambda_xi_x
                 - gammaln(x+1) # Gamma(x+1)=x!
             )
     # Those values beyond x_max will have nans
     # that we replace with -np.inf
     log_pmf[np.isnan(log_pmf)]=-np.inf
     # Normalized
     return np.exp(log_pmf-logsumexp(log_pmf, axis=-1, keepdims=True))
Exemple #33
0
 def loglik(self,x):
     full_x=np.arange(*self.get_max_support())[(None,)*len(self.my_lambda.shape)]
     assert np.all(x>=full_x.min()) and np.all(x<=full_x.max())
     
     lambda_xi_x = self.my_lambda[...,None] + self.xi[...,None]*full_x
     log_pmf=( np.log(self.my_lambda[...,None]) 
                 + xlogy(full_x-1,lambda_xi_x) 
                 - lambda_xi_x 
                 - gammaln(full_x+1) # Gamma(x+1)=x!
             )
     # Normalized
     return log_pmf[x]-logsumexp(log_pmf, axis=-1, keepdims=True)
def normalized_non_maximum_entropy_detector(probabilities):
    indices = numpy.argmax(probabilities, axis=1)
    normalized_probabilities = numpy.copy(probabilities)
    normalized_probabilities[numpy.arange(probabilities.shape[0]), indices] = 0
    normalized_probabilities = normalized_probabilities / numpy.sum(
        normalized_probabilities, axis=1).reshape(-1, 1)
    from scipy.special import xlogy

    confidences = -numpy.sum(
        xlogy(normalized_probabilities, normalized_probabilities), axis=1)
    #confidences /= math.log(probabilities.shape[1])
    return confidences
Exemple #35
0
    def mix_ground_truth(self):
        # fx and fy are  x and y marginal probability density functions(pdf) of mix-gaussian distribution
        # fxy is the joint probability density function of mix-gaussian distribution
        # the mutual information ground truth is the difference between sum of entropy of individual variables and joint entropy of all variables
        # the entropies are computed by integrating the expectation of pdf of variables involved
        mix, mix2, covMat1, covMat2, mu = self.mix, self.mix2, self.covMat1, self.covMat2, self.mu

        def fxy1(x, y):
            X = np.array([x, y])
            temp1 = np.matmul(np.matmul(X - mu, np.linalg.inv(covMat1)),
                              (X - mu).transpose())
            temp2 = np.matmul(np.matmul(X + mu, np.linalg.inv(covMat2)),
                              (X + mu).transpose())
            return mix*np.exp(-.5*temp1) / (2*np.pi * np.sqrt(np.linalg.det(covMat1))) \
                + (1-mix)*np.exp(-.5*temp2) / \
                (2*np.pi * np.sqrt(np.linalg.det(covMat2)))

        def fxy2(x, y):
            X = np.array([x, y])
            temp1 = np.matmul(np.matmul(X + mu, np.linalg.inv(covMat1)),
                              (X + mu).transpose())
            temp2 = np.matmul(np.matmul(X - mu, np.linalg.inv(covMat2)),
                              (X - mu).transpose())
            return mix*np.exp(-.5*temp1) / (2*np.pi * np.sqrt(np.linalg.det(covMat1))) \
                + (1-mix)*np.exp(-.5*temp2) / \
                (2*np.pi * np.sqrt(np.linalg.det(covMat2)))

        def fxy(x, y):
            return mix2 * fxy1(x, y) + (1 - mix2) * fxy2(x, y)

        lim = np.inf
        hxy = dblquad(lambda x, y: -xlogy(fxy(x, y), fxy(x, y)), -lim, lim,
                      lambda x: -lim, lambda x: lim)
        hxy1 = dblquad(lambda x, y: -xlogy(fxy1(x, y), fxy1(x, y)), -lim, lim,
                       lambda x: -lim, lambda x: lim)
        hxy2 = dblquad(lambda x, y: -xlogy(fxy2(x, y), fxy2(x, y)), -lim, lim,
                       lambda x: -lim, lambda x: lim)
        con_entropy = mix2 * hxy1[0] + (1 - mix2) * hxy2[0]
        #       print(hxy[0], hxy1[0], hxy2[0])
        return hxy[0] - con_entropy
Exemple #36
0
 def logpmf(self):
     x=np.arange(*self.get_max_support())[(None,)*len(self.my_lambda.shape)]
     lambda_xi_x = self.my_lambda[...,None] + self.xi[...,None]*x
     log_pmf=( np.log(self.my_lambda[...,None]) 
                 + xlogy(x-1,lambda_xi_x) 
                 - lambda_xi_x
                 - gammaln(x+1) # Gamma(x+1)=x!
             )
     # Those values beyond x_max will have nans
     # that we replace with -np.inf
     log_pmf[np.isnan(log_pmf)]=-np.inf
     # Normalized
     return log_pmf-logsumexp(log_pmf, axis=-1, keepdims=True)
Exemple #37
0
def binary_log_loss(y_true, y_prob):
    """Compute binary logistic loss for classification.

    This is identical to log_loss in binary classification case,
    but is kept for its use in multilabel case.

    Parameters
    ----------
    y_true : array-like or label indicator matrix
        Ground truth (correct) labels.

    y_prob : array-like of float, shape = (n_samples, n_classes)
        Predicted probabilities, as returned by a classifier's
        predict_proba method.

    Returns
    -------
    loss : float
        The degree to which the samples are correctly predicted.
    """
    return -(xlogy(y_true, y_prob) +
             xlogy(1 - y_true, 1 - y_prob)).sum() / y_prob.shape[0]
Exemple #38
0
def observed_objective(T, root, edges, data_count_pairs, log_params):
    pman = ParamManager(edge_labels=edges).set_packed(log_params)
    edge_rates, nt_probs, kappa, penalty = pman.get_implicit()
    nt_distn1d = np.array(nt_probs)
    Q = npctmctree.hkymodel.get_normalized_Q(kappa, nt_distn1d)
    edge_to_P = {}
    for edge, edge_rate in zip(edges, edge_rates):
        edge_to_P[edge] = expm(edge_rate * Q)
    xmaps, counts = zip(*data_count_pairs)
    lhoods = dynamic_xmap_lhood.get_iid_lhoods(
            T, edge_to_P, root, nt_distn1d, xmaps)
    log_likelihood = xlogy(counts, lhoods).sum()
    return -log_likelihood + penalty
Exemple #39
0
def cross_entropy(y_true, y_pred, delta=1e-9):
    """
    Binary Cross Entropy.
    
    While the definition varies a little bit across ML or information theory
    domains, Cross-entropy in general is a method of measuring the difference
    of information between two probability distributions (i.e. a way of
    measuring how similar they are). In our context, cross entropy is the
    difference between our observed values (which can be viewed as a
    probability distribution where every probability is either 0 or 1, since
    every value is known and thus has an extreme probability) and the
    predicted values (which are actual probabilities since the true values
    are unknown). Since we are interested in predicting values that follow a
    bernoulli distribution, the cross entropy takes the form of the negative
    log-likelihood of the bernoulli distribution.
    
    With this cost function defined, Neural Networks are just performing
    a more BA version of maximum likelihood estimation. A negative cost is
    defined because maximizing the likelihood is the same as minimizing the
    negative likelihood. 
    
    Parameters
    ----------
    y_true : numpy array
        True, observed values. The outcome of an event (where 1 == success).
    y_pred : numpy array
        The predicted success probabilities.
    delta : float, optional
        A small, positive constant to offset predicted probabilities that are
        zero, which avoids log(0). Is ignored if delta = 0.
        The default is 1e-9.

    Returns
    -------
    cost : float
        The binary cross-entropy.

    """

    # Compute the cross-entropy cost
    # To avoid log(0) errors (not necessary in most cases)
    ypred = y_pred.copy()
    if delta != 0:
        ypred[ypred <= delta] = delta
        ypred[ypred >= 1 - delta] = 1 - delta

    # m is the number of observations, and m_scale is a scaling factor to make
    # the computation easier in case the gradients are really big.

    cost = -np.sum(xlogy(y_true, ypred))
    return cost
def entropy(y):
    """Return the empirical entropy of samples y of 
    a categorical distribution

    Arguments:
        y: np.array (N, C) , categorical labels
    Returns:
        H: float
    """
    if len(y) == 0:
        return 0
    py = y.mean(0)
    h = -np.sum(xlogy(py, py))
    return h
Exemple #41
0
def model_data(n_nonevent,
               n_event,
               max_pvalue,
               max_pvalue_policy,
               scale=None,
               return_nonevent_event=False):
    n = len(n_nonevent)

    t_n_event = n_event.sum()
    t_n_nonevent = n_nonevent.sum()

    D = []
    V = []

    E = []
    NE = []

    for i in range(1, n + 1):
        s_event = n_event[:i][::-1].cumsum()[::-1]
        s_nonevent = n_nonevent[:i][::-1].cumsum()[::-1]
        rate = s_event / (s_nonevent + s_event)

        p = s_event / t_n_event
        q = s_nonevent / t_n_nonevent
        iv = special.xlogy(p - q, p / q)

        if scale is not None:
            rate *= scale
            iv *= scale

            D.append(rate.astype(np.int64))
            V.append(iv.astype(np.int64))
        else:
            D.append(rate)
            V.append(iv)

        if max_pvalue is not None or return_nonevent_event:
            E.append(s_event)
            NE.append(s_nonevent)

    if max_pvalue is not None:
        pvalue_violation_indices = find_pvalue_violation_indices(
            n, E, NE, max_pvalue, max_pvalue_policy)
    else:
        pvalue_violation_indices = []

    if return_nonevent_event:
        return D, V, NE, E, pvalue_violation_indices

    return D, V, pvalue_violation_indices
Exemple #42
0
def entropy(x):
    """Calculate the entropy of a discrete probability distribution.

    Parameters
    ----------
    x : array-like
        Discrete probability distribution.

    Returns
    -------
    entropy : float
    """
    x = np.asarray(x)
    return -special.xlogy(x, x).sum()
Exemple #43
0
def get_entropy(series, nbins=15):
    """
    :param series: a 1-D array for which we need to find the entropy
    :param nbins: number of bins for histogram
    :return: entropy
    """
    # https://www.mathworks.com/matlabcentral/answers/27235-finding-entropy-from-a-probability-distribution

    counts, bin_edges = np.histogram(series, bins=nbins)
    p = counts / np.sum(counts, dtype=float)
    bin_width = np.diff(bin_edges)
    entropy = -np.sum(xlogy(p, p / bin_width))

    return entropy
    def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0)
Exemple #45
0
    def entropy_rate(self):
        """
        Returns the estimated entropy rate of the Markov chain
        """
        from rpy2.robjects import r, globalenv
        from itertools import product
        import pandas as pd
        import pandas.rpy.common as com
        from scipy.special import xlogy

        r("library('DTMCPack')")
        globalenv['transmat'] = com.convert_to_r_dataframe(pd.DataFrame(self.transmat))
        stationary_dist = r("statdistr(transmat)")
        # long_as = lambda x: range(len(x))
        rate = 0
        for s1, s2 in product(range(len(self.means)), range(len(self.means))):
            p = self.transmat[s1][s2]
            rate -= stationary_dist[s1] * xlogy(p, p)
        return rate
Exemple #46
0
def log_loss(y_true, y_prob):
    """Compute Logistic loss for classification.

    Parameters
    ----------
    y_true : array-like or label indicator matrix
        Ground truth (correct) labels.

    y_prob : array-like of float, shape = (n_samples, n_classes)
        Predicted probabilities, as returned by a classifier's
        predict_proba method.

    Returns
    -------
    loss : float
        The degree to which the samples are correctly predicted.
    """
    if y_prob.shape[1] == 1:
        y_prob = np.append(1 - y_prob, y_prob, axis=1)

    if y_true.shape[1] == 1:
        y_true = np.append(1 - y_true, y_true, axis=1)

    return - xlogy(y_true, y_prob).sum() / y_prob.shape[0]
Exemple #47
0
 def _logpmf(self, x, n, p):
     coeff = gamln(n + x) - gamln(x + 1) - gamln(n)
     return coeff + special.xlogy(n, p) + special.xlog1py(x, -p)
Exemple #48
0
 def numeric(self, values):
     x = values[0]
     results = -xlogy(x, x)
     # Return -inf outside the domain
     results[np.isnan(results)] = -np.inf
     return results
 def _logpmf(self, x, n, p):
     k = floor(x)
     combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1)))
     return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
 def _logpmf(self, k, mu):
     Pk = special.xlogy(k, mu) - gamln(k + 1) - mu
     return Pk
Exemple #51
0
    def get_endpoint_neg_ll(self, J_other):
        """
        Compute the log likelihood for only the endpoint distribution.

        """
        return -xlogy(J_other, self.joint_distn).sum()
Exemple #52
0
 def numeric(self, values):
     x = values[0]
     y = values[1]
     #TODO return inf outside the domain
     return xlogy(x, x/y) - x + y
Exemple #53
0
 def _entropy(self, p):
     h = -special.xlogy(p, p) - special.xlogy(1 - p, 1 - p)
     return h
Exemple #54
0
 def _entropy(self, M, n, N):
     k = np.r_[N - (M - n):min(n, N) + 1]
     vals = self.pmf(k, M, n, N)
     h = -np.sum(special.xlogy(vals, vals), axis=0)
     return h
Exemple #55
0
 def _entropy(self, n, p):
     k = np.r_[0:n + 1]
     vals = self._pmf(k, n, p)
     h = -np.sum(special.xlogy(vals, vals), axis=0)
     return h
def run(model, compound_states, node_to_data_fset):
    """

    Parameters
    ----------
    model : Python module
        a module with hardcoded information about the model

    """
    # Get the analog of the genetic code.
    primary_to_tol = model.get_primary_to_tol()

    # Get the number of compound states.
    ncompound = len(compound_states)

    # Get the prior blink state distribution.
    blink_distn = model.get_blink_distn()

    #TODO check that the primary rate matrix is time-reversible
    # Get the primary rate matrix and convert it to a dense ndarray.
    nprimary = 6
    Q_primary_nx = model.get_Q_primary()
    Q_primary_dense = nx_to_np_rate_matrix(Q_primary_nx, range(nprimary))
    primary_distn = model.get_primary_distn()
    primary_distn_dense = np.array([primary_distn[i] for i in range(nprimary)])

    # Get the expected rate using only the nx rate matrix and the nx distn.
    expected_primary_rate = 0
    for sa, sb in Q_primary_nx.edges():
        p = primary_distn[sa]
        rate = Q_primary_nx[sa][sb]['weight']
        expected_primary_rate += p * rate

    # Normalize the primary rate matrix by dividing all rates
    # by the expected rate.
    for sa, sb in Q_primary_nx.edges():
        Q_primary_nx[sa][sb]['weight'] /= expected_primary_rate

    # Get the rooted directed tree shape.
    T, root = model.get_T_and_root()

    # Get the map from ordered tree edge to branch length.
    # The branch length has complicated units.
    # It is the expected number of primary process transitions
    # along the branch conditional on all tolerance classes being tolerated.
    edge_to_blen = model.get_edge_to_blen()

    # Define some indicators for the compound process
    indicators = define_compound_process(
            Q_primary_nx, compound_states, primary_to_tol)
    I_syn, I_non, I_on, I_off = indicators

    # Define the dense compound transition rate matrix
    on_rate = model.get_rate_on()
    off_rate = model.get_rate_off()
    Q_compound_nx = get_Q_compound(
            Q_primary_nx, on_rate, off_rate, primary_to_tol, compound_states)
    Q_compound = nx_to_np(Q_compound_nx, compound_states)
    row_sums = np.sum(Q_compound, axis=1)
    Q_compound = Q_compound - np.diag(row_sums)
    
    # Define a sparse stationary distribution over compound states.
    compound_distn = {}
    for state in compound_states:
        if compound_state_is_ok(primary_to_tol, state):
            p = 1.0
            p *= primary_distn[state.P]
            for tol_name in 'T0', 'T1', 'T2':
                if primary_to_tol[state.P] != tol_name:
                    tol_state = getattr(state, tol_name)
                    p *= blink_distn[tol_state]
            compound_distn[state] = p
    total = sum(compound_distn.values())
    assert_allclose(total, 1)

    # Convert the compound state distribution to a dense array.
    # Check that the distribution is at equilibrium.
    compound_distn_np = np.array([
            compound_distn.get(k, 0) for k in compound_states])
    equilibrium_rates = np.dot(compound_distn_np, Q_compound)
    assert_allclose(equilibrium_rates, 0, atol=1e-10)

    # Make the np and nx transition probability matrices.
    # Map each branch to the transition matrix.
    edge_to_P_np = {}
    edge_to_P_nx = {}
    for edge in T.edges():
        t = edge_to_blen[edge]
        P_np = scipy.linalg.expm(Q_compound * t)
        P_nx = np_to_nx_transition_matrix(P_np, compound_states)
        edge_to_P_np[edge] = P_np
        edge_to_P_nx[edge] = P_nx

    # Compute the likelihood
    lhood = dynamic_fset_lhood.get_lhood(
            T, edge_to_P_nx, root, compound_distn, node_to_data_fset)
    print('likelihood:')
    print(lhood)
    print()

    # Compute the map from node to posterior state distribution.
    # Convert the dict distribution back into a dense distribution.
    # This is used in the calculation of expected log likelihood.
    node_to_distn = dynamic_fset_lhood.get_node_to_distn(
            T, edge_to_P_nx, root, compound_distn, node_to_data_fset)
    root_distn = node_to_distn[root]
    print('prior distribution at the root:')
    for i, p in sorted(compound_distn.items()):
        print(i, p)
    print()
    print('posterior distribution at the root:')
    for i, p in sorted(root_distn.items()):
        print(i, p)
    print()
    root_distn_np = np.zeros(ncompound)
    for i, s in enumerate(compound_states):
        if s in root_distn:
            root_distn_np[i] = root_distn[s]

    # Compute the map from edge to posterior joint state distribution.
    # Convert the nx transition probability matrices back into dense ndarrays.
    edge_to_nxdistn = dynamic_fset_lhood.get_edge_to_nxdistn(
            T, edge_to_P_nx, root, compound_distn, node_to_data_fset)
    edge_to_J = {}
    for edge, J_nx in edge_to_nxdistn.items():
        J_np = nx_to_np(J_nx, compound_states)
        edge_to_J[edge] = J_np


    # Initialize contributions to the expected log likelihood.
    #
    # Compute the contribution of the initial state distribution.
    ell_init = xlogy(root_distn_np, compound_distn_np).sum()
    # Initialize the contribution of the expected transitions.
    I_all = I_on + I_off + I_syn + I_non
    I_log_all = xlogy(I_all, Q_compound)
    ell_trans = 0
    # Initialize the contribution of the dwell times.
    ell_dwell = 0

    # Compute labeled transition count expectations
    # using the rate matrix, the joint posterior state distribution matrices,
    # the indicator matrices, and the conditional transition probability
    # distribution matrix.
    primary_expectation = 0
    blink_expectation = 0
    for edge in T.edges():
        va, vb = edge
        Q = Q_compound
        J = edge_to_J[edge]
        P = edge_to_P_np[edge]
        t = edge_to_blen[edge]

        # primary transition event count expectations
        syn_total = compute_edge_expectation(Q, P, J, I_syn, t)
        non_total = compute_edge_expectation(Q, P, J, I_non, t)
        primary_expectation += syn_total
        primary_expectation += non_total
        print('edge %s -> %s syn expectation %s' % (va, vb, syn_total))
        print('edge %s -> %s non expectation %s' % (va, vb, non_total))

        # blink transition event count expectations
        on_total = compute_edge_expectation(Q, P, J, I_on, t)
        off_total = compute_edge_expectation(Q, P, J, I_off, t)
        blink_expectation += on_total
        blink_expectation += off_total
        print('edge %s -> %s on expectation %s' % (va, vb, on_total))
        print('edge %s -> %s off expectation %s' % (va, vb, off_total))

        # Compute expectation of logs of rates of observed transitions.
        # This is part of the expected log likelihood calculation.
        contrib = compute_edge_expectation(Q, P, J, I_log_all, t)
        ell_trans += contrib
        print('edge %s -> %s ell trans contrib %s' % (va, vb, contrib))

        # Compute sum of expectations of dwell times
        contrib = compute_dwell_times(Q, P, J, -row_sums, t)
        ell_dwell += contrib
        print('edge %s -> %s ell dwell contrib %s' % (va, vb, contrib))
        
        print()

    print('expected count of primary process transitions:')
    print(primary_expectation)
    print()

    print('expected count of blink process transitions:')
    print(blink_expectation)
    print()

    print('expected log likelihood:')
    print('contribution of initial state distribution :', ell_init)
    print('contribution of expected transition counts :', ell_trans)
    print('contribution of expected dwell times       :', ell_dwell)
    print('total                                      :', (
        ell_init + ell_trans + ell_dwell))
    print()
Exemple #57
0
def get_tolerance_ll_contribs(
        rate_on, rate_off, total_tree_length,
        expected_initial_on, expected_initial_off,
        expected_dwell_on, expected_dwell_off,
        expected_nabsorptions,
        expected_ngains, expected_nlosses,
        ):
    """
    Tolerance process log likelihood contributions.

    Note that the contributions associated with dwell times
    subsume the primary process dwell time log likelihood contributions.
    The first group of args defines parameters of the process.
    The second group defines the posterior tolerance distribution at the root.
    The third group defines the posterior tolerance dwell times.
    The fourth group is just a virtual posterior nabsorptions count which
    is related to dwell times of the primary process.
    The fifth group defines posterior transition expectations.

    Parameters
    ----------
    rate_on : float
        x
    rate_off : float
        x
    total_tree_length : float
        x
    expected_initial_on : float
        x
    expected_initial_off : float
        x
    expected_dwell_on : float
        x
    expected_dwell_off : float
        x
    expected_nabsorptions : float
        x
    expected_ngains : float
        x
    expected_nlosses : float
        x

    Returns
    -------
    init_ll_contrib : float
        x
    dwell_ll_contrib_prim : float
        x
    dwell_ll_contrib_tol : float
        x
    trans_ll_contrib : float
        x

    """
    tolerance_distn = get_tolerance_distn(rate_off, rate_on)
    init_ll_contrib = (
            special.xlogy(expected_initial_on - 1, tolerance_distn[1]) +
            special.xlogy(expected_initial_off, tolerance_distn[0]))
    dwell_ll_contrib_prim = -expected_nabsorptions
    dwell_ll_contrib_tol = -(
            expected_dwell_off * rate_on +
            (expected_dwell_on - total_tree_length) * rate_off)
    trans_ll_contrib = (
            special.xlogy(expected_ngains, rate_on) +
            special.xlogy(expected_nlosses, rate_off))
    ll_contribs = (
            init_ll_contrib, dwell_ll_contrib_prim,
            dwell_ll_contrib_tol, trans_ll_contrib)
    return ll_contribs
Exemple #58
0
    def test_fully_augmented_likelihood_sufficient_statistics(self):
        # If we fix all of the parameters of the model except for the two
        # parameters that correspond to the tolerance transition rates,
        # then this model has low-dimensional sufficient statistics.
        # I think that these two parameters are associated
        # with three sufficient statistics.

        # Define the tolerance process rates.
        rate_on = 0.5
        rate_off = 1.5

        # Define some other properties of the process,
        # in a way that is not object-oriented.
        info = get_example_tolerance_process_info(rate_on, rate_off)
        (primary_distn, Q, primary_to_part,
                compound_to_primary, compound_to_tolerances, compound_distn,
                Q_compound) = info

        # Summarize the other properties.
        nprimary = len(primary_distn)
        nparts = len(set(primary_to_part.values()))
        total_tolerance_rate = rate_on + rate_off
        tolerance_distn = get_tolerance_distn(rate_off, rate_on)

        # Define a tree with edge weights.
        T = nx.Graph()
        T.add_edge(0, 1, weight=0.1)
        T.add_edge(2, 1, weight=0.2)
        T.add_edge(1, 3, weight=5.3)
        T.add_edge(3, 4, weight=0.4)
        T.add_edge(3, 5, weight=0.5)

        # Summarize the total tree length.
        total_tree_length = sum(T[a][b]['weight'] for a, b in T.edges())

        # Randomly assign compound leaf states.
        choices = list(compound_distn)
        node_to_compound_state = dict(
                (n, random.choice(choices)) for n in (0, 2, 4, 5))

        # Test the likelihood calculations
        # for a few conditionally sampled histories on the tree.
        nhistories = 10
        for compound_process_history in gen_histories(
                T, Q_compound, node_to_compound_state, nhistories=nhistories):

            # Summarize the compound process history.
            dwell_times = get_history_dwell_times(
                    compound_process_history)
            root_state, transitions = get_history_root_state_and_transitions(
                    compound_process_history)

            # Get the total rate away from each compound state.
            total_rates = get_total_rates(Q_compound)

            # Directly compute the log likelihood of the history.
            ll_initial = np.log(compound_distn[root_state])
            ll_dwell = 0.0
            for compound_state, dwell_time in dwell_times.items():
                ll_dwell -= dwell_time * total_rates[compound_state]
            ll_transitions = 0.0
            for a, b in transitions.edges():
                ntrans = transitions[a][b]['weight']
                rate = Q_compound[a][b]['weight']
                ll_transitions += special.xlogy(ntrans, rate)

            direct_ll_initrans = ll_initial + ll_transitions
            direct_ll_dwell = ll_dwell

            # Compute the log likelihood through sufficient statistics.

            # Get the number of tolerance gains
            # plus the number of initial tolerances,
            # and the number of tolerance losses
            # plus the number of initial lack-of-tolerances.
            ngains_stat = 0
            nlosses_stat = 0
            for tol in compound_to_tolerances[root_state]:
                if tol == 1:
                    ngains_stat += 1
                elif tol == 0:
                    nlosses_stat += 1
                else:
                    raise Exception('invalid root tolerance state')
            for a, b in transitions.edges():
                if a == b:
                    continue
                ntransitions = transitions[a][b]['weight']
                prim_a = compound_to_primary[a]
                prim_b = compound_to_primary[b]
                tols_a = compound_to_tolerances[a]
                tols_b = compound_to_tolerances[b]
                tols_diff = [y-x for x, y in zip(tols_a, tols_b)]
                ndiffs = sum(1 for x in tols_diff if x)
                if prim_a == prim_b:
                    if ndiffs == 0:
                        raise Exception(
                                'expected each non-self transition '
                                'to have either a primary state change '
                                'or a tolerance state change')
                    elif ndiffs > 1:
                        raise Exception(
                                'expected at most one tolerance state '
                                'difference but observed %d' % ndiffs)
                    elif ndiffs != 1:
                        raise Exception('internal error')
                    signed_hdist = sum(tols_diff)
                    if signed_hdist == 1:
                        ngains_stat += ntransitions
                    elif signed_hdist == -1:
                        nlosses_stat += ntransitions
                    else:
                        raise Exception('invalid tolerance process transition')

            # Get the total amount of time spent in tolerated states,
            # summed over each tolerance class.
            tolerance_duration_stat = 0.0
            for compound_state, dwell_time in dwell_times.items():
                ntols = sum(compound_to_tolerances[compound_state])
                tolerance_duration_stat += dwell_time * ntols

            # Initialize the log likelihood for this more clever approach.
            ll_initrans = 0.0
            ll_dwell = 0.0

            # Add the log likelihood contributions that involve
            # the sufficient statistics and the on/off tolerance rates.
            ll_initrans -= special.xlogy(nparts-1, total_tolerance_rate)
            ll_initrans += special.xlogy(ngains_stat-1, rate_on)
            ll_initrans += special.xlogy(nlosses_stat, rate_off)
            ll_dwell -= rate_off * (
                    tolerance_duration_stat - total_tree_length)
            ll_dwell -= rate_on * (
                    total_tree_length * nparts - tolerance_duration_stat)

            # Add the log likelihood contributions that involve
            # general functions of the data and not the on/off tolerance rates.
            # On the other hand, they do involve the tolerance state.
            root_primary_state = compound_to_primary[root_state]
            ll_initrans += np.log(primary_distn[root_primary_state])
            for compound_state, dwell_time in dwell_times.items():
                primary_state = compound_to_primary[compound_state]
                primary_rate_out = 0.0
                for sink in Q_compound[compound_state]:
                    if compound_to_primary[sink] != primary_state:
                        rate = Q_compound[compound_state][sink]['weight']
                        primary_rate_out += rate
                ll_dwell -= dwell_time * primary_rate_out
            for a, b in transitions.edges():
                edge = transitions[a][b]
                ntransitions = edge['weight']
                prim_a = compound_to_primary[a]
                prim_b = compound_to_primary[b]
                if prim_a != prim_b:
                    rate = Q_compound[a][b]['weight']
                    ll_initrans += special.xlogy(ntransitions, rate)

            clever_ll_initrans = ll_initrans
            clever_ll_dwell = ll_dwell

            # Compare the two log likelihood calculations.
            assert_allclose(direct_ll_initrans, clever_ll_initrans)
            assert_allclose(direct_ll_dwell, clever_ll_dwell)
Exemple #59
0
 def _h_thin_plate(self, r):
     return xlogy(r**2, r)
Exemple #60
0
def get_trajectory_log_likelihood(
        T_aug, root, prior_root_distn, Q_default=None):
    """

    Parameters
    ----------
    T_aug : undirected weighted networkx graph
        Trajectory with weighted edges annotated with states.
    root : integer
        Root node.
    prior_root_distn : dict
        Prior distribution over states at the root.
    Q_default : directed weighted networkx graph
        Rate matrix which applies to all edges.

    Returns
    -------
    log_likelihood : float
        Logarithm of the trajectory likelihood
        according to the given Markov jump process.

    Notes
    -----
    Regarding the order of the arguments of this function, T_aug is first
    to facilitate functools.partial wrapping for MCMC callback.

    """
    # Compute the total rates.
    total_rates = get_total_rates(Q_default)

    # Compute primary process statistics.
    # These will be used for two purposes.
    # One of the purposes is as the denominator of the
    # importance sampling ratio.
    # The second purpose is to compute contributions
    # to the neg log likelihood estimate.
    info = get_history_statistics(T_aug, root=root)
    dwell_times, root_state, transitions = info

    # contribution of root state to log likelihood
    if root_state in prior_root_distn:
        init_ll = np.log(prior_root_distn[root_state])
    else:
        init_ll = -np.inf

    # contribution of dwell times
    ll = 0.0
    for state, dwell in dwell_times.items():
        if state in total_rates:
            ll -= dwell * total_rates[state]
    dwell_ll = ll

    # contribution of transitions
    ll = 0.0
    for sa, sb in transitions.edges():
        ntransitions = transitions[sa][sb]['weight']
        if ntransitions:
            if Q_default.has_edge(sa, sb):
                rate = Q_default[sa][sb]['weight']
                ll += special.xlogy(ntransitions, rate)
            else:
                ll = -np.inf
    trans_ll = ll

    # Return the sum of the log likelihood contributions.
    log_likelihood = init_ll + dwell_ll + trans_ll
    return log_likelihood