Exemple #1
0
    def _estimate_log_prob_resp(self, X, group=None):
        """Estimate log probabilities and responsibilities for each sample.

        Compute the log probabilities, weighted log probabilities per
        component and responsibilities for each sample in X with respect to
        the current state of the model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)

        Returns
        -------
        log_prob_norm : array, shape (n_samples,)
            log p(X)

        log_responsibilities : array, shape (n_samples, n_components)
            logarithm of the responsibilities
        """
        weighted_log_prob = self._estimate_weighted_log_prob(X, group=group)
        log_prob_norm = logsumexp(weighted_log_prob, axis=1)

        with np.errstate(under='ignore'):
            # ignore underflow
            log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]
        return log_prob_norm, log_resp
    def estimate_predict(self, X, y, X_test):
    
        _, n_features = X.shape
        self.n_features_ = n_features

        labelbin = LabelBinarizer()
        Y = labelbin.fit_transform(y)
        self.classes_ = labelbin.classes_
        if Y.shape[1] == 1:
            Y = np.concatenate((1 - Y, Y), axis=1)

        n_effective_classes = Y.shape[1]

        self.starting_values(n_effective_classes, n_features)
        self.count(X, Y)
        alpha = 0.01
        self.update_feature_log_distribution(alpha)
        self.update_class_log_distribution()
        # The maxium of posteriori (MAP)
        jll = self.joint_log_likelihood(X_test)
        log_prob_x = logsumexp(jll, axis=1)
        predict_log_prob = jll - np.atleast_2d(log_prob_x).T
        predict_prob = np.exp(predict_log_prob)

        predict = self.classes_[np.argmax(jll, axis=1)]
        
        return predict, predict_prob
Exemple #3
0
def test_multinomial_loss_ground_truth():
    # n_samples, n_features, n_classes = 4, 2, 3
    n_classes = 3
    X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
    y = np.array([0, 1, 2, 0])
    lbin = LabelBinarizer()
    Y_bin = lbin.fit_transform(y)

    weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
    intercept = np.array([1., 0, -.2])
    sample_weights = np.array([0.8, 1, 1, 0.8])

    prediction = np.dot(X, weights) + intercept
    logsumexp_prediction = logsumexp(prediction, axis=1)
    p = prediction - logsumexp_prediction[:, np.newaxis]
    loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
    diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
    grad_1 = np.dot(X.T, diff)

    weights_intercept = np.vstack((weights, intercept)).T.ravel()
    loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
                                               0.0, sample_weights)
    grad_2 = grad_2.reshape(n_classes, -1)
    grad_2 = grad_2[:, :-1].T

    assert_almost_equal(loss_1, loss_2)
    assert_array_almost_equal(grad_1, grad_2)

    # ground truth
    loss_gt = 11.680360354325961
    grad_gt = np.array([[-0.557487, -1.619151, +2.176638],
                        [-0.903942, +5.258745, -4.354803]])
    assert_almost_equal(loss_1, loss_gt)
    assert_array_almost_equal(grad_1, grad_gt)
Exemple #4
0
    def _e_step(self, X):
        """

        Parameters
        ----------
        X: array-like, (n_samples, n_features)
            The data.

        Output
        ------
        out: dict

        out['log_resp']: array-like, (n_samples, n_components)
            The responsitiblities.

        out['obs_nll']: float
            The observed negative log-likelihood of the data at the current
            parameters.
        """
        log_prob = self.log_probs(X)
        log_resp = self.log_resps(log_prob)

        obs_nll = -logsumexp(log_prob, axis=1).mean()

        return {'log_resp': log_resp, 'obs_nll': obs_nll}
def test_multinomial_loss_ground_truth():
    # n_samples, n_features, n_classes = 4, 2, 3
    n_classes = 3
    X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
    y = np.array([0, 1, 2, 0])
    lbin = LabelBinarizer()
    Y_bin = lbin.fit_transform(y)

    weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
    intercept = np.array([1., 0, -.2])
    sample_weights = np.array([0.8, 1, 1, 0.8])

    prediction = np.dot(X, weights) + intercept
    logsumexp_prediction = logsumexp(prediction, axis=1)
    p = prediction - logsumexp_prediction[:, np.newaxis]
    loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
    diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
    grad_1 = np.dot(X.T, diff)

    weights_intercept = np.vstack((weights, intercept)).T.ravel()
    loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
                                               0.0, sample_weights)
    grad_2 = grad_2.reshape(n_classes, -1)
    grad_2 = grad_2[:, :-1].T

    assert_almost_equal(loss_1, loss_2)
    assert_array_almost_equal(grad_1, grad_2)

    # ground truth
    loss_gt = 11.680360354325961
    grad_gt = np.array([[-0.557487, -1.619151, +2.176638],
                        [-0.903942, +5.258745, -4.354803]])
    assert_almost_equal(loss_1, loss_gt)
    assert_array_almost_equal(grad_1, grad_gt)
    def _estimate_prob_resp(self, X):
        """Estimate log probabilities and responsibilities for each sample.

        Compute the log probabilities, weighted log probabilities per
        component and responsibilities for each sample in X with respect to
        the current state of the model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)

        Returns
        -------
        log_prob_norm : array, shape (n_samples,)
            log p(X)

        log_responsibilities : array, shape (n_samples, n_components)
            logarithm of the responsibilities
        """
        weighted_log_prob = self._estimate_weighted_log_prob(X)
        log_prob_norm = logsumexp(weighted_log_prob, axis=1)
        with np.errstate(under='ignore'):
            # ignore underflow
            log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]

        temp = np.zeros_like(log_resp)
        # print('log resp: ',log_resp)
        # print('resp: ', np.exp(log_resp))
        n_sample, n_component = log_resp.shape

        temp[np.arange(n_sample), np.argmax(log_resp, axis=1)] = 1
        resp = temp

        return log_prob_norm, resp
Exemple #7
0
def p_i(X, i):
    diff_embedded = X[i] - X
    dist_embedded = np.einsum('ij,ij->i', diff_embedded, diff_embedded)
    dist_embedded[i] = np.inf

    # compute exponentiated distances (use the log-sum-exp trick to
    # avoid numerical instabilities
    exp_dist_embedded = np.exp(-dist_embedded - logsumexp(-dist_embedded))
    return exp_dist_embedded
Exemple #8
0
    def _e_step(self, X):
        """
        Parameters
        ----------
        X:
            The observed data.


        Output
        ------
        E_out: dict
            E_out['log_resp']: array-like

            E_out['obs_nll']: float

            E_out['evals']: array-like, (n_blocks, )

            E_out['eig_var']: array-like

        """

        # standard E-step
        log_prob = self.log_probs(X)
        log_resp = self.log_resps(log_prob)

        obs_nll = - logsumexp(log_prob, axis=1).mean()

        if self.n_blocks is not None:
            B = self.n_blocks
        else:
            B = len(self.eval_weights)

        assert self.__mode in ['lap_pen', 'fine_tune_bd']
        if self.__mode == 'lap_pen' and self.n_blocks != 1:

            if self.lap == 'sym':

                evals, eig_var = geigh_Lsym_bp_smallest(X=self.bd_weights_,
                                                        rank=B,
                                                        zero_tol=1e-10,
                                                        method='tsym')

            elif self.lap == 'un':
                Lun = get_unnorm_laplacian_bp(self.bd_weights_)
                all_evals, all_evecs = eigh_wrapper(Lun)
                eig_var = all_evecs[:, -B:]
                evals = all_evals[-B:]

        else:  # if self.__mode == 'fine_tune_bd':
            evals = None
            eig_var = None

        return {'log_resp': log_resp,
                'obs_nll': obs_nll,
                'evals': evals,
                'eig_var': eig_var}
def p_i(X, i):
    diff_embedded = X[i] - X
    dist_embedded = np.einsum('ij,ij->i', diff_embedded,
                              diff_embedded)
    dist_embedded[i] = np.inf

    # compute exponentiated distances (use the log-sum-exp trick to
    # avoid numerical instabilities
    exp_dist_embedded = np.exp(-dist_embedded -
                               logsumexp(-dist_embedded))
    return exp_dist_embedded
 def predict(self, test_set):
     predictions = []
     predict_prob = []
     for example in test_set:
         cleaned_example = self.tokenize(example)
         post_prob = self.joint_log_likelihood(cleaned_example)
         predictions.append(self.classese[np.argmax(post_prob)])
         
         log_prob_x = logsumexp(post_prob)
         predict_log_prob = post_prob - np.atleast_2d(log_prob_x).T
         predict_prob.append(np.exp(predict_log_prob))
     
     return np.array(predictions), np.concatenate(predict_prob, axis=0)
    def preProba(self, testData):
        logSum = np.zeros(shape=(testData.shape[0], 2))
        for i in range(200):
            clf = self.clf[i]
            prob = clf.predict_proba(testData[['var_' + str(i)]])
            logSum += np.log(prob)

        logSum += np.array([
            np.log(self.zero) - np.log(self.total),
            np.log(self.one) - np.log(self.total)
        ])
        log_prob_x = logsumexp(logSum, axis=1)
        return np.exp(logSum - np.atleast_2d(log_prob_x).T)
Exemple #12
0
    def _loss_grad_lbfgs(self, A, X, mask, sign=1.0):

        if self.n_iter_ == 0 and self.verbose:
            header_fields = ['Iteration', 'Objective Value', 'Time(s)']
            header_fmt = '{:>10} {:>20} {:>10}'
            header = header_fmt.format(*header_fields)
            cls_name = self.__class__.__name__
            print('[{cls}]'.format(cls=cls_name))
            print('[{cls}] {header}\n[{cls}] {sep}'.format(cls=cls_name,
                                                           header=header,
                                                           sep='-' *
                                                           len(header)))

        start_time = time.time()

        A = A.reshape(-1, X.shape[1])
        X_embedded = np.dot(X, A.T)  # (n_samples, n_components)
        # Compute softmax distances
        p_ij = pairwise_distances(X_embedded, squared=True)
        np.fill_diagonal(p_ij, np.inf)
        p_ij = np.exp(-p_ij - logsumexp(-p_ij, axis=1)[:, np.newaxis])
        print('p_ij', p_ij)
        # (n_samples, n_samples)

        # Compute loss
        masked_p_ij = p_ij * mask
        p = masked_p_ij.sum(axis=1, keepdims=True)  # (n_samples, 1)
        loss = p.sum()

        # Compute gradient of loss w.r.t. `transform`
        weighted_p_ij = masked_p_ij - p_ij * p
        weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T
        np.fill_diagonal(weighted_p_ij_sym, -weighted_p_ij.sum(axis=0))
        gradient = 2 * (X_embedded.T.dot(weighted_p_ij_sym)).dot(X)

        if self.verbose:
            start_time = time.time() - start_time
            values_fmt = '[{cls}] {n_iter:>10} {loss:>20.6e} {start_time:>10.2f}'
            print(
                values_fmt.format(cls=self.__class__.__name__,
                                  n_iter=self.n_iter_,
                                  loss=loss,
                                  start_time=start_time))
            sys.stdout.flush()

        self.n_iter_ += 1
        return sign * loss, sign * gradient.ravel()
Exemple #13
0
    def score_samples(self, X):
        """Compute the weighted log probabilities for each sample.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        log_prob : array, shape (n_samples,)
            Log probabilities of each data point in X.
        """
        self._check_is_fitted()
        X = _check_X(X, None, self.means_.shape[1])

        return logsumexp(self._estimate_weighted_log_prob(X), axis=1)
Exemple #14
0
    def _estimate_log_resp(self, X, tau, use_prior=False):
        logP_mtrx = self.predict_logP_mtrx(X)
        if use_prior:
            log_weights = np.log(self.pi)
        else:
            log_weights = np.log(
                np.ones(self.n_components) / self.n_components)

        weighted_logP_mtrx = logP_mtrx + log_weights

        weighted_logP_mtrx = weighted_logP_mtrx * (1 / tau)

        log_prob_norm = logsumexp(weighted_logP_mtrx, axis=1)

        with np.errstate(under="ignore"):
            log_resp = weighted_logP_mtrx - log_prob_norm[:, np.newaxis]

        return log_resp
Exemple #15
0
    def score_samples(self, X):
        """
        Computes the observed data log-likelihood for each sample.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        log_prob : array, shape (n_samples,)
            Log probabilities of each data point in X.
        """
        check_is_fitted(self)
        # X = _check_X(X, None, self.metadata_['n_features'])

        return logsumexp(self.log_probs(X), axis=1)
Exemple #16
0
    def _loss(self, flatA, X, y):

        if self.n_iter_ == 0 and self.verbose:
            header_fields = ['Iteration', 'Objective Value', 'Time(s)']
            header_fmt = '{:>10} {:>20} {:>10}'
            header = header_fmt.format(*header_fields)
            cls_name = self.__class__.__name__
            print('[{cls}]'.format(cls=cls_name))
            print('[{cls}] {header}\n[{cls}] {sep}'.format(cls=cls_name,
                                                           header=header,
                                                           sep='-' *
                                                           len(header)))

        start_time = time.time()

        A = flatA.reshape((-1, X.shape[1]))
        X_embedded = np.dot(X, A.T)
        dist = pairwise_distances(X_embedded, squared=True)
        np.fill_diagonal(dist, np.inf)
        softmax = np.exp(-dist - logsumexp(-dist, axis=1)[:, np.newaxis])
        yhat = softmax.dot(y)
        ydiff = yhat - y
        cost = (ydiff**2).sum()

        # also compute the gradient
        W = softmax * ydiff[:, np.newaxis] * (y - yhat[:, np.newaxis])
        W_sym = W + W.T
        np.fill_diagonal(W_sym, -W.sum(axis=0))
        grad = 4 * (X_embedded.T.dot(W_sym)).dot(X)

        if self.verbose:
            start_time = time.time() - start_time
            values_fmt = '[{cls}] {n_iter:>10} {loss:>20.6e} {start_time:>10.2f}'
            print(
                values_fmt.format(cls=self.__class__.__name__,
                                  n_iter=self.n_iter_,
                                  loss=cost,
                                  start_time=start_time))
            sys.stdout.flush()

        self.n_iter_ += 1

        return cost, grad.ravel()
Exemple #17
0
  def _loss_grad_lbfgs(self, A, X, mask, sign=1.0):

    if self.n_iter_ == 0 and self.verbose:
      header_fields = ['Iteration', 'Objective Value', 'Time(s)']
      header_fmt = '{:>10} {:>20} {:>10}'
      header = header_fmt.format(*header_fields)
      cls_name = self.__class__.__name__
      print('[{cls}]'.format(cls=cls_name))
      print('[{cls}] {header}\n[{cls}] {sep}'.format(cls=cls_name,
                                                     header=header,
                                                     sep='-' * len(header)))

    start_time = time.time()

    A = A.reshape(-1, X.shape[1])
    X_embedded = np.dot(X, A.T)  # (n_samples, num_dims)
    # Compute softmax distances
    p_ij = pairwise_distances(X_embedded, squared=True)
    np.fill_diagonal(p_ij, np.inf)
    p_ij = np.exp(-p_ij - logsumexp(-p_ij, axis=1)[:, np.newaxis])
    # (n_samples, n_samples)

    # Compute loss
    masked_p_ij = p_ij * mask
    p = masked_p_ij.sum(axis=1, keepdims=True)  # (n_samples, 1)
    loss = p.sum()

    # Compute gradient of loss w.r.t. `transform`
    weighted_p_ij = masked_p_ij - p_ij * p
    weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T
    np.fill_diagonal(weighted_p_ij_sym, - weighted_p_ij.sum(axis=0))
    gradient = 2 * (X_embedded.T.dot(weighted_p_ij_sym)).dot(X)

    if self.verbose:
        start_time = time.time() - start_time
        values_fmt = '[{cls}] {n_iter:>10} {loss:>20.6e} {start_time:>10.2f}'
        print(values_fmt.format(cls=self.__class__.__name__,
                                n_iter=self.n_iter_, loss=loss,
                                start_time=start_time))
        sys.stdout.flush()

    self.n_iter_ += 1
    return sign * loss, sign * gradient.ravel()
Exemple #18
0
    def predict_log_proba(self, X):
        """
        Return log-probability estimates for the test vector X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        C : array-like, shape = [n_samples, n_classes]
            Returns the log-probability of the samples for each class in
            the model. The columns correspond to the classes in sorted
            order, as they appear in the attribute `classes_`.
        """
        jll = self._joint_log_likelihood(X)
        # normalize by P(x) = P(f_1, ..., f_n)
        log_prob_x = logsumexp(jll, axis=1)
        return jll - np.atleast_2d(log_prob_x).T
Exemple #19
0
    def _m_step_clust_params(self, X, log_resp):
        """
        M step. Each view's cluster parameters can be updated independently.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)

        log_resp : array-like, shape (n_samples, n_components)
            Logarithm of the posterior probabilities (or responsibilities) of
            the point of each sample in X.
        """
        # TODO: document this as it is a critical step

        # for each view-cluster pair, which columns of log_resp to logsumsxp
        vc_axes2sum = [[[] for c in range(self.view_models_[v].n_components)]
                       for v in range(self.n_views)]

        for k in range(self.n_components):
            view_idxs = self._get_view_clust_idx(k)
            for v in range(self.n_views):
                vc_axes2sum[v][view_idxs[v]].append(k)

            # idx_0, idx_1 = self._get_view_clust_idx(k)
            # vc_axes2sum[0][idx_0].append(k)
            # vc_axes2sum[1][idx_1].append(k)

        view_params = [None for v in range(self.n_views)]

        for v in range(self.n_views):
            view_log_resp = []
            # for each view-component logsumexp the responsibilities
            for c in range(self.view_models_[v].n_components):
                axes2sum = vc_axes2sum[v][c]
                view_log_resp.append(logsumexp(log_resp[:, axes2sum], axis=1))
            view_log_resp = np.array(view_log_resp).T

            view_params[v] = self.view_models_[v].\
                _m_step_clust_params(X=X[v], log_resp=view_log_resp)

        return view_params
Exemple #20
0
def _estimate_responsibilities(x, weights, means, precisions_cholesky,
                               covariance_type):
    """Estimate log-likelihood and responsibilities for the given data portion.

    Compute the sum of log-likelihoods, the count of samples, and the
    responsibilities for each sample in the data portion with respect to the
    current state of the model.

    Parameters
    ----------
    x : collection of depth 2
        Blocks of a horizontal portion of the data.
    weights : array-like, shape (n_components,)
        The weights of the current components.
    means : array-like, shape (n_components, n_features)
        The centers of the current components.
    precisions_cholesky : array-like
        The cholesky decomposition of sample precisions of the current
        components. The shape depends of the covariance_type.
    covariance_type : {'full', 'tied', 'diag', 'spherical'}
        The type of precision matrices.

    Returns
    -------
    log_prob_norm_x : tuple
        tuple(sum, count) for log p(x)

    responsibilities : array-like, shape (x.shape[0], n_features)
    """
    x = Array._merge_blocks(x)
    weighted_log_prob = _estimate_weighted_log_prob(x, weights, means,
                                                    precisions_cholesky,
                                                    covariance_type)
    log_prob_norm = logsumexp(weighted_log_prob, axis=1)
    log_prob_norm_sum = np.sum(log_prob_norm)
    count = len(log_prob_norm)
    with np.errstate(under='ignore'):
        # ignore underflow
        resp = np.exp(weighted_log_prob - log_prob_norm[:, np.newaxis])
    return (log_prob_norm_sum, count), resp
    def _estimate_log_prob_gamma(self, X):
        """Estimate log probabilities and responsibilities for each sample.

        Compute the log probability, and the prior Gamma weightsof the samples
        in X with respect to the current state of the model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)

        Returns
        -------
        log_prob_norm : float
            Mean of the logarithms of the probabilities of each sample in X

        gamma_priors : array, shape (n_samples,)
            Gamma weights of each sample in X.
        """
        log_prob = self._estimate_log_prob(X)
        log_prob_norm = logsumexp(log_prob)
        gamma_priors = self._estimate_gamma_priors(X)
        return log_prob_norm, gamma_priors
    def _approx_bound(self, X, doc_topic_distr, sub_sampling):
        """Estimate the variational bound.

        Estimate the variational bound over "all documents" using only the
        documents passed in as X. Since log-likelihood of each word cannot
        be computed directly, we use this bound to estimate it.

        Parameters
        ----------
        X : array-like or sparse matrix, shape=(n_samples, n_features)
            Document word matrix.

        doc_topic_distr : array, shape=(n_samples, n_components)
            Document topic distribution. In the literature, this is called
            gamma.

        sub_sampling : boolean, optional, (default=False)
            Compensate for subsampling of documents.
            It is used in calculate bound in online learning.

        Returns
        -------
        score : float

        """
        def _loglikelihood(prior, distr, dirichlet_distr, size):
            # calculate log-likelihood
            score = np.sum((prior - distr) * dirichlet_distr)
            score += np.sum(gammaln(distr) - gammaln(prior))
            score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))
            return score

        is_sparse_x = sp.issparse(X)
        n_samples, n_components = doc_topic_distr.shape
        n_features = self.components_.shape[1]
        score = 0

        dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr)
        dirichlet_component_ = _dirichlet_expectation_2d(self.components_)
        doc_topic_prior = self.doc_topic_prior_
        topic_word_prior = self.topic_word_prior_

        if is_sparse_x:
            X_data = X.data
            X_indices = X.indices
            X_indptr = X.indptr

        # E[log p(docs | theta, beta)]
        for idx_d in xrange(0, n_samples):
            if is_sparse_x:
                ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]
                cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]
            else:
                ids = np.nonzero(X[idx_d, :])[0]
                cnts = X[idx_d, ids]
            temp = (dirichlet_doc_topic[idx_d, :, np.newaxis] +
                    dirichlet_component_[:, ids])
            norm_phi = logsumexp(temp, axis=0)
            score += np.dot(cnts, norm_phi)

        # compute E[log p(theta | alpha) - log q(theta | gamma)]
        score += _loglikelihood(doc_topic_prior, doc_topic_distr,
                                dirichlet_doc_topic, self._n_components)

        # Compensate for the subsampling of the population of documents
        if sub_sampling:
            doc_ratio = float(self.total_samples) / n_samples
            score *= doc_ratio

        # E[log p(beta | eta) - log q (beta | lambda)]
        score += _loglikelihood(topic_word_prior, self.components_,
                                dirichlet_component_, n_features)

        return score
Exemple #23
0
 def predict_log_proba(self, X):
     jll = self._joint_log_likelihood(X)
     # normalize by P(x) = P(f_1, ..., f_n)
     log_prob_x = logsumexp(jll, axis=1)
     return jll - np.atleast_2d(log_prob_x).T