Example #1
0
def objective(K, y, alpha, lamda, beta, w):
    """Objective function for lasso kernel learning."""
    obj = .5 * sum(squared_norm(
        alpha[j].dot(K[j].T.dot(w)) - y[j]) for j in range(len(K)))
    obj += lamda * np.abs(w).sum()
    obj += beta * sum(squared_norm(a) for a in alpha)
    return obj
 def _f(x, _Z_0, Z_0, loss_res, nabla_con, nabla_pen, loss_func, S, C):
     _Z_0, A = _Z_0(x[0], x[1], Z_0, loss_res, nabla_con, nabla_pen)
     loss_res = loss_gen(loss_func, S, _Z_0) - C
     # loss_res_A = loss_gen(loss_func, S, A) - C
     # return squared_norm(loss_res) + squared_norm(loss_res - loss_res_A)
     return squared_norm(loss_res) + squared_norm(_Z_0 - A) / (S.shape[1] *
                                                               S.shape[2])
Example #3
0
def objective_admm(K, y, alpha, lamda, beta, w, w1, w2):
    """Objective function for lasso kernel learning."""
    obj = .5 * sum(squared_norm(
        np.dot(alpha[j], K[j].T.dot(w)) - y[j]) for j in range(len(K)))
    obj += lamda * np.abs(w1).sum()
    obj += beta * squared_norm(w2)
    return obj
Example #4
0
def objective(K, y, alpha, lamda, beta, w):
    """Objective function for lasso kernel learning."""
    obj = .5 * sum(
        squared_norm(alpha[j].dot(K[j].T.dot(w)) - y[j])
        for j in range(len(K)))
    obj += lamda * np.abs(w).sum()
    obj += beta * sum(squared_norm(a) for a in alpha)
    return obj
Example #5
0
def test_norm_squared_norm():
    X = np.random.RandomState(42).randn(50, 63)
    X *= 100  # check stability
    X += 200

    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
    assert_almost_equal(norm(X)**2, squared_norm(X), decimal=6)
    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
Example #6
0
def test_norm_squared_norm():
    X = np.random.RandomState(42).randn(50, 63)
    X *= 100        # check stability
    X += 200

    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
    assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
Example #7
0
def objective_admm(K, y, alpha, lamda, beta, w, w1, w2):
    """Objective function for lasso kernel learning."""
    obj = .5 * sum(
        squared_norm(np.dot(alpha[j], K[j].T.dot(w)) - y[j])
        for j in range(len(K)))
    obj += lamda * np.abs(w1).sum()
    obj += beta * squared_norm(w2)
    return obj
Example #8
0
def _fit_projected_gradient(X, W, H, tol, max_iter, nls_max_iter, alpha,
                            l1_ratio):
    gradW = (np.dot(W, np.dot(H, H.T)) -
             safe_sparse_dot(X, H.T, dense_output=True))
    gradH = (np.dot(np.dot(W.T, W), H) -
             safe_sparse_dot(W.T, X, dense_output=True))

    init_grad = squared_norm(gradW) + squared_norm(gradH.T)
    # max(0.001, tol) to force alternating minimizations of W and H
    tolW = max(0.001, tol) * np.sqrt(init_grad)
    tolH = tolW

    for n_iter in range(1, max_iter + 1):
        # stopping condition as discussed in paper
        proj_grad_W = squared_norm(gradW * np.logical_or(gradW < 0, W > 0))
        proj_grad_H = squared_norm(gradH * np.logical_or(gradH < 0, H > 0))

        if (proj_grad_W + proj_grad_H) / init_grad < tol**2:
            break

        # update W
        Wt, gradWt, iterW = _nls_subproblem(X.T,
                                            H.T,
                                            W.T,
                                            tolW,
                                            nls_max_iter,
                                            alpha=alpha,
                                            l1_ratio=l1_ratio)
        W, gradW = Wt.T, gradWt.T

        if iterW == 1:
            tolW = 0.1 * tolW

        # update H
        H, gradH, iterH = _nls_subproblem(X,
                                          W,
                                          H,
                                          tolH,
                                          nls_max_iter,
                                          alpha=alpha,
                                          l1_ratio=l1_ratio)
        if iterH == 1:
            tolH = 0.1 * tolH

    H[H == 0] = 0  # fix up negative zeros

    if n_iter == max_iter:
        Wt, _, _ = _nls_subproblem(X.T,
                                   H.T,
                                   W.T,
                                   tolW,
                                   nls_max_iter,
                                   alpha=alpha,
                                   l1_ratio=l1_ratio)
        W = Wt.T

    return W, H, n_iter
Example #9
0
def _fit_projected_gradient(X, W, H, tol, max_iter, nls_max_iter, alpha,
                            l1_ratio, sparseness, beta, eta):
    """Compute Non-negative Matrix Factorization (NMF) with Projected Gradient

    References
    ----------
    C.-J. Lin. Projected gradient methods for non-negative matrix
    factorization. Neural Computation, 19(2007), 2756-2779.
    http://www.csie.ntu.edu.tw/~cjlin/nmf/

    P. Hoyer. Non-negative Matrix Factorization with Sparseness Constraints.
    Journal of Machine Learning Research 2004.
    """
    gradW = (np.dot(W, np.dot(H, H.T)) -
             safe_sparse_dot(X, H.T, dense_output=True))
    gradH = (np.dot(np.dot(W.T, W), H) -
             safe_sparse_dot(W.T, X, dense_output=True))

    init_grad = squared_norm(gradW) + squared_norm(gradH.T)
    # max(0.001, tol) to force alternating minimizations of W and H
    tolW = max(0.001, tol) * np.sqrt(init_grad)
    tolH = tolW

    for n_iter in range(1, max_iter + 1):
        # stopping condition
        # as discussed in paper
        proj_grad_W = squared_norm(gradW * np.logical_or(gradW < 0, W > 0))
        proj_grad_H = squared_norm(gradH * np.logical_or(gradH < 0, H > 0))

        if (proj_grad_W + proj_grad_H) / init_grad < tol**2:
            break

        # update W
        W, gradW, iterW = _update_projected_gradient_w(X, W, H, tolW,
                                                       nls_max_iter, alpha,
                                                       l1_ratio, sparseness,
                                                       "L2", beta, eta)
        if iterW == 1:
            tolW = 0.1 * tolW

        # update H
        H, gradH, iterH = _update_projected_gradient_h(X, W, H, tolH,
                                                       nls_max_iter, alpha,
                                                       l1_ratio, sparseness,
                                                       "L1", beta, eta)
        if iterH == 1:
            tolH = 0.1 * tolH

    H[H == 0] = 0  # fix up negative zeros

    if n_iter == max_iter:
        W, _, _ = _update_projected_gradient_w(X, W, H, tol, nls_max_iter,
                                               alpha, l1_ratio, sparseness,
                                               "L2", beta, eta)

    return W, H, n_iter
Example #10
0
def test_norm_squared_norm():
    X = np.random.RandomState(42).randn(50, 63)
    X *= 100  # check stability
    X += 200

    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
    assert_almost_equal(norm(X)**2, squared_norm(X), decimal=6)
    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
    # Check the warning with an int array and np.dot potential overflow
    assert_warns_message(
        UserWarning, 'Array type is integer, np.dot may '
        'overflow. Data should be float type to avoid this issue',
        squared_norm, X.astype(int))
def test_norm_squared_norm():
    X = np.random.RandomState(42).randn(50, 63)
    X *= 100        # check stability
    X += 200

    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
    assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
    # Check the warning with an int array and np.dot potential overflow
    assert_warns_message(
                    UserWarning, 'Array type is integer, np.dot may '
                    'overflow. Data should be float type to avoid this issue',
                    squared_norm, X.astype(int))
    def _objective_func(self, w):
        bias, wf = self._split_coefficents(w)

        l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(wf)  # pylint: disable=unused-variable

        xw = self._xw
        val = 0.5 * squared_norm(wf)
        if self._has_time:
            val += 0.5 * self._regr_penalty * squared_norm(self.y_compressed - bias
                                                           - xw.compress(self.regr_mask, axis=0))

        val += 0.5 * self._rank_penalty * numexpr.evaluate(
            'sum(xw * ((l_plus + l_minus) * xw - xv_plus - xv_minus - 2 * (l_minus - l_plus)) + l_minus)')

        return val
    def _objective_func(self, w):
        bias, wf = self._split_coefficents(w)

        l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(wf)

        xw = self._xw
        val = 0.5 * squared_norm(wf)
        if self._has_time:
            val += 0.5 * self._regr_penalty * squared_norm(self.y_compressed - bias
                                                           - xw.compress(self.regr_mask, axis=0))

        val += 0.5 * self._rank_penalty * numexpr.evaluate(
            'sum(xw * ((l_plus + l_minus) * xw - xv_plus - xv_minus - 2 * (l_minus - l_plus)) + l_minus)')

        return val
def _beta_divergence(X, W, H, square_root=False):
    """Compute the beta-divergence of X and dot(W, H).
    Parameters
    ----------
    X : float or array-like, shape (n_samples, n_features)
    W : float or dense array-like, shape (n_samples, n_components)
    H : float or dense array-like, shape (n_components, n_features)
   
    square_root : boolean, default False
        If True, return np.sqrt(2 * res)
        
    Returns
    -------
        res : float
            Beta divergence of X and np.dot(X, H)
    """
    if not sp.issparse(X):
        X = np.atleast_2d(X)
    W = np.atleast_2d(W)
    H = np.atleast_2d(H)

    # Avoid the creation of the dense np.dot(W, H) if X is sparse.
    if sp.issparse(X):
        norm_X = np.dot(X.data, X.data)
        norm_WH = trace_dot(np.dot(np.dot(W.T, W), H), H)
        cross_prod = trace_dot((X * H.T), W)
        res = (norm_X + norm_WH - 2. * cross_prod) / 2
    else:
        res = squared_norm(X - np.dot(W, H)) / 2

    if square_root:
        return np.sqrt(res * 2)
    else:
        return res
Example #15
0
def _beta_divergence_dense(X, W, H, beta):
    """Compute the beta-divergence of X and W.H for dense array only.

    Used as a reference for testing nmf._beta_divergence.
    """
    WH = np.dot(W, H)

    if beta == 2:
        return squared_norm(X - WH) / 2

    WH_Xnonzero = WH[X != 0]
    X_nonzero = X[X != 0]
    np.maximum(WH_Xnonzero, 1e-9, out=WH_Xnonzero)

    if beta == 1:
        res = np.sum(X_nonzero * np.log(X_nonzero / WH_Xnonzero))
        res += WH.sum() - X.sum()

    elif beta == 0:
        div = X_nonzero / WH_Xnonzero
        res = np.sum(div) - X.size - np.sum(np.log(div))
    else:
        res = (X_nonzero**beta).sum()
        res += (beta - 1) * (WH**beta).sum()
        res -= beta * (X_nonzero * (WH_Xnonzero**(beta - 1))).sum()
        res /= beta * (beta - 1)

    return res
Example #16
0
    def loss(self, w):
        """Compute negative partial log-likelihood

        Parameters
        ----------
        w : array, shape = [n_features]
            Estimate of coefficients

        Returns
        -------
        loss : float
            Average negative partial log-likelihood
        """
        xw = numpy.dot(self.x, w)

        at_risk = numpy.empty(self.x.shape[0])
        for i in range(self.x.shape[0]):
            idx = self.time >= self.time[i]
            at_risk[i] = logsumexp(xw[idx])

        loss = numpy.mean(self.event * (xw - at_risk))
        if self.alpha > 0:
            loss -= 0.5 * self.alpha * squared_norm(w)

        return -loss
    def _update_center(self, X):
        """
        Fix Label, Weight, Update Center
        """
        centers_old = self.cluster_centers_.copy()

        if self.cluster_method == 'k-means':
            cluster_center = kmeans_center
        elif self.cluster_method == 'k-median':
            cluster_center = kmedian_center
        else:
            raise ValueError('cluster_method must be kmeans or kmedian')

        # Choose data belong to cluster k and
        # Update cluster center with it mean
        for k in range(self.n_clusters):
            mask = self.labels_ == k
            self.cluster_centers_[k] = cluster_center(X[mask])

        # check cluster is empty
        if np.isnan(self.cluster_centers_).any():
            raise ValueError('Cluster must have at least one member')

        center_shift_total = squared_norm(self.cluster_centers_ - centers_old)
        return center_shift_total
def choose_alpha(alpha,
                 x,
                 S,
                 n_samples,
                 beta,
                 lamda,
                 gamma,
                 theta=.99,
                 max_iter=1000):
    """Choose alpha for backtracking.

    References
    ----------
    Salzo S. (2017). https://doi.org/10.1137/16M1073741
    """
    eps = .5
    partial_J = partial(_J,
                        x,
                        beta=beta,
                        lamda=lamda,
                        gamma=gamma,
                        S=S,
                        n_samples=n_samples)
    partial_f = partial(_f, n_samples=n_samples, S=S)
    gradient_ = _gradient(x, S, n_samples)
    for i in range(max_iter):
        iter_diff = partial_J(alpha=alpha) - x
        obj_diff = partial_f(K=partial_J(alpha=alpha)) - partial_f(K=x)
        if obj_diff - _scalar_product_3d(iter_diff, gradient_) <= theta / (
                gamma * alpha) * squared_norm(iter_diff) + 1e-16:
            return alpha

        alpha *= eps
    return alpha
Example #19
0
def _beta_divergence_dense(X, W, H, beta):
    """Compute the beta-divergence of X and W.H for dense array only.

    Used as a reference for testing nmf._beta_divergence.
    """
    if isinstance(X, numbers.Number):
        W = np.array([[W]])
        H = np.array([[H]])
        X = np.array([[X]])

    WH = np.dot(W, H)

    if beta == 2:
        return squared_norm(X - WH) / 2

    WH_Xnonzero = WH[X != 0]
    X_nonzero = X[X != 0]
    np.maximum(WH_Xnonzero, 1e-9, out=WH_Xnonzero)

    if beta == 1:
        res = np.sum(X_nonzero * np.log(X_nonzero / WH_Xnonzero))
        res += WH.sum() - X.sum()

    elif beta == 0:
        div = X_nonzero / WH_Xnonzero
        res = np.sum(div) - X.size - np.sum(np.log(div))
    else:
        res = (X_nonzero ** beta).sum()
        res += (beta - 1) * (WH ** beta).sum()
        res -= beta * (X_nonzero * (WH_Xnonzero ** (beta - 1))).sum()
        res /= beta * (beta - 1)

    return res
Example #20
0
def _solve_weight_vector(similarities, grouping_matrix, delta):
    """Solve for the weight vector of the similarities, used for
    _solve_omega and _solve_pi

    Parameters
    ----------
    similarities : np.ndarray (n_similarities,
                               (n_features * (n_features - 1) /2)
        similarity matrices

    grouping_matrix : np.ndarray (n_features, n_communities)

    delta : float

    Returns
    -------
    weights : np.ndarray (1, n_similarities)
    """
    # do some type check
    if np.any(similarities < 0):
        raise ValueError('similarities contain invalid values (< 0)')
    if delta <= 0:
        raise ValueError('delta value of {0} not allowed, '
                         'needs to be >=0'.format(delta))

    sigma = np.dot(grouping_matrix, grouping_matrix.T)
    n_similarities = len(similarities)
    # preallocate vector
    a = np.zeros(n_similarities)
    for i in range(n_similarities):
        a[i] = squared_norm(squareform(similarities[i]) - sigma)

    # solve for weight
    weight = simplex_projection(a / (2 * delta))
    return np.atleast_2d(weight)
    def _fit(self, X):
        """Fit the LatentTimeMatrixDecomposition model to X.

        Parameters
        ----------
        X : ndarray, shape (n_time, n_samples, n_features), or
                (n_samples, n_features, n_time)
            Matrix to decompose.

        """
        self.precision_, self.latent_, self.n_iter_ = latent_time_matrix_decomposition(
            X,
            alpha=self.alpha,
            tau=self.tau,
            rho=self.rho,
            beta=self.beta,
            eta=self.eta,
            mode=self.mode,
            tol=self.tol,
            rtol=self.rtol,
            psi=self.psi,
            phi=self.phi,
            max_iter=self.max_iter,
            verbose=self.verbose,
            return_n_iter=True,
            return_history=False,
            update_rho_options=self.update_rho_options,
            compute_objective=self.compute_objective,
        )
        self.reconstruction_err_ = squared_norm(X - self.get_observed_precision())
        return self
Example #22
0
def kmeans(X, k):
    samples = X.shape[0]
    best_inertia = None
    best_labels = None
    x_squared_norms = row_norms(X, squared=True)
    seeds = random_state.permutation(samples)[:k]
    centers = X[seeds]
    centers = centers.toarray()
    distances = numpy.zeros(shape=(X.shape[0], ), dtype=X.dtype)

    # Iterations
    for i in range(100):
        centers_old = centers.copy()
        labels, inertia = assign_labels(X, x_squared_norms, centers, distances,
                                        samples)
        centers = maximization(X, labels, distances, centers, samples)
        if best_inertia is None or inertia < best_inertia:
            best_labels = labels.copy()
            best_inertia = inertia

        center_shift = squared_norm(centers_old - centers)

    if center_shift > 0:
        best_labels, best_inertia = assign_labels(X, x_squared_norms, centers,
                                                  distances, samples)
    return best_labels
Example #23
0
    def nlog_likelihood(self, w):
        """Compute negative partial log-likelihood

        Parameters
        ----------
        w : array, shape = (n_features,)
            Estimate of coefficients

        Returns
        -------
        loss : float
            Average negative partial log-likelihood
        """
        time = self.time
        n_samples = self.x.shape[0]
        xw = numpy.dot(self.x, w)

        loss = 0
        risk_set = 0
        k = 0
        for i in range(n_samples):
            ti = time[i]
            while k < n_samples and ti == time[k]:
                risk_set += numpy.exp(xw[k])
                k += 1

            if self.event[i]:
                loss -= (xw[i] - numpy.log(risk_set)) / n_samples

        # add regularization term to log-likelihood
        return loss + self.alpha * squared_norm(w) / (2. * n_samples)
def objectiveFLGL(emp_cov, K, R, T, H, U, mu, eta, rho):
    res = -fast_logdet(R) + np.sum(R * emp_cov)
    res += rho / 2. * squared_norm(R - T + U + np.linalg.multi_dot(
        (K.T, linalg.pinvh(H), K)))
    res += mu * l1_od_norm(H)
    res += eta * l1_od_norm(T)
    return res
def objective(S, R, Z_0, Z_1, Z_2, W_0, W_1, W_2, alpha, tau, beta, eta, psi, phi):
    """Objective for latent variable time-varying matrix decomposition."""
    obj = squared_norm(S - R)
    obj += alpha * sum(map(l1_od_norm, Z_0))
    obj += tau * sum(map(partial(np.linalg.norm, ord="nuc"), W_0))
    obj += beta * sum(map(psi, Z_2 - Z_1))
    obj += eta * sum(map(phi, W_2 - W_1))
    return obj
Example #26
0
    def _objective_func(self, w):
        self._update_constraints(w)

        l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(w)
        x = self._counter.x

        xs = numpy.dot(x, w)
        val = 0.5 * squared_norm(w)
        if self._has_time:
            val += 0.5 * self._regr_penalty * squared_norm(
                self.y_compressed - xs.compress(self.regr_mask, axis=0))

        val += 0.5 * self._rank_penalty * numexpr.evaluate(
            'sum(xs * ((l_plus + l_minus) * xs - xv_plus - xv_minus - 2 * (l_minus - l_plus)) + l_minus)'
        )

        return val
Example #27
0
def test_loss_gradients_hessp_intercept(base_loss, sample_weight,
                                        l2_reg_strength, X_sparse):
    """Test that loss and gradient handle intercept correctly."""
    loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=False)
    loss_inter = LinearModelLoss(base_loss=base_loss(), fit_intercept=True)
    n_samples, n_features = 10, 5
    X, y, coef = random_X_y_coef(linear_model_loss=loss,
                                 n_samples=n_samples,
                                 n_features=n_features,
                                 seed=42)

    X[:, -1] = 1  # make last column of 1 to mimic intercept term
    X_inter = X[:, :
                -1]  # exclude intercept column as it is added automatically by loss_inter

    if X_sparse:
        X = sparse.csr_matrix(X)

    if sample_weight == "range":
        sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])

    l, g = loss.loss_gradient(coef,
                              X,
                              y,
                              sample_weight=sample_weight,
                              l2_reg_strength=l2_reg_strength)
    _, hessp = loss.gradient_hessian_product(coef,
                                             X,
                                             y,
                                             sample_weight=sample_weight,
                                             l2_reg_strength=l2_reg_strength)
    l_inter, g_inter = loss_inter.loss_gradient(
        coef,
        X_inter,
        y,
        sample_weight=sample_weight,
        l2_reg_strength=l2_reg_strength)
    _, hessp_inter = loss_inter.gradient_hessian_product(
        coef,
        X_inter,
        y,
        sample_weight=sample_weight,
        l2_reg_strength=l2_reg_strength)

    # Note, that intercept gets no L2 penalty.
    assert l == pytest.approx(l_inter +
                              0.5 * l2_reg_strength * squared_norm(coef.T[-1]))

    g_inter_corrected = g_inter
    g_inter_corrected.T[-1] += l2_reg_strength * coef.T[-1]
    assert_allclose(g, g_inter_corrected)

    s = np.random.RandomState(42).randn(*coef.shape)
    h = hessp(s)
    h_inter = hessp_inter(s)
    h_inter_corrected = h_inter
    h_inter_corrected.T[-1] += l2_reg_strength * s.T[-1]
    assert_allclose(h, h_inter_corrected)
def norm(x):
    """Dot product-based Euclidean norm implementation
    See: http://fseoane.net/blog/2011/computing-the-vector-norm/
    Parameters
    ----------
    x : array-like
        Vector for which to compute the norm
    """
    return sqrt(squared_norm(x))
Example #29
0
def _kmeans_spark(X, n_clusters, max_iter=300, worker_nums=10, init='k-means++', random_state=None, tol=1e-4):
    from pyspark import SparkContext, SparkConf

    conf = SparkConf().setAppName('K-Means_Spark').setMaster('local[%d]'%worker_nums)
    sc = SparkContext(conf=conf)
    data = sc.parallelize(X)
    data.cache()

    random_state = check_random_state(random_state)

    best_labels, best_inertia, best_centers = None, None, None

    x_squared_norms = row_norms(X, squared=True)
    #  x_squared_norms = data.map(lambda x: (x*x).sum(axis=0)).collect()
    #  x_squared_norms = np.array(x_squared_norms, dtype='float64')

    centers = _init_centroids(X, n_clusters, init, random_state, x_squared_norms=x_squared_norms)

    bs = X.shape[0]/worker_nums
    data_temp = []
    for i in range(worker_nums-1):
        data_temp.append(X[i*bs:(i+1)*bs])
    data_temp.append(X[(worker_nums-1)*bs:])
    data_temp = np.array(data_temp, dtype='float64')
    data_temp = sc.parallelize(data_temp)
    data_temp.cache()


    for i in range(max_iter):
        centers_old = centers.copy()

        all_distances = data_temp.map(lambda x: euclidean_distances(centers, x, squared=True)).collect()
        temp_all_distances = all_distances[0]
        for i in range(1, worker_nums):
            temp_all_distances = np.hstack((temp_all_distances, all_distances[i]))
        all_distances = temp_all_distances

        #  all_distances = data.map(lambda x: euclidean_distances(centers, x, squared=True)).collect()
        #  # reshape, from (1, n_samples, k) to (k, n_samples)
        #  all_distances = np.asarray(all_distances, dtype="float64").T[0]

        # Assignment, also called E-step of EM
        labels, inertia = _labels_inertia(X, x_squared_norms, centers, all_distances=all_distances)
        # re-computation of the centroids, also called M-step of EM
        centers = _centers(X, labels, n_clusters)

        if best_inertia is None or inertia < best_inertia:
            best_labels  = labels.copy()
            best_centers = centers.copy()
            best_inertia = inertia

        shift = squared_norm(centers_old - centers)
        if shift <= tol:
            break

    return best_centers, best_labels, best_inertia
Example #30
0
    def fit(self, X):

        self.n_samples = X.shape[0]
        self.n_features = X.shape[1]
        if self.balanced:
            self.cluster_size = int(self.n_samples / self.n_clusters)

        # Place k centroids randomly.
        centers = self.init_centers(X)

        best_labels, best_inertia, best_centers = None, None, None

        for i in range(self.max_iterations):
            centers_old = centers.copy()

            # Get labels and inertia.
            if not self.balanced:
                labels, inertia = self.get_labels_and_inertia(X, centers)
            else:
                labels, inertia = self.get_labels_and_inertia_extended(
                    X, centers)

            # Move the centers to the mean of the points assigned to it.
            centers = self.move_to_mean(X, labels)

            print("Iteration {:2d}, inertia {:.3f}".format(i, inertia))

            # Update the labels and centers if the inertia is the minimum.
            if best_inertia is None or inertia < best_inertia:
                best_labels = labels.copy()
                best_centers = centers.copy()
                best_inertia = inertia

            # Check if the centers move.
            center_shift_total = squared_norm(centers_old - centers)
            print("center shift {:f}".format(center_shift_total))
            if center_shift_total == 0:
                print("Converged at iteration {:d}: center shift {:f}".format(
                    i, center_shift_total))
                break

        # For the case it stops due to the max iterations
        if center_shift_total > 0:
            if not self.balanced:
                best_labels, best_inertia = self.get_labels_and_inertia(
                    X, best_centers)
            else:
                best_labels, best_inertia = self.get_labels_and_inertia_extended(
                    X, best_centers)

        # Convert array to list for grading purpose.
        list_best_centers = []
        for centroid in best_centers:
            list_best_centers.append(list(centroid))

        return list(best_labels), list_best_centers
def _fit_projected_gradient(X, W, H, tol, max_iter, nls_max_iter, alpha,
                            l1_ratio):
    gradW = (np.dot(W, np.dot(H, H.T)) -
             safe_sparse_dot(X, H.T, dense_output=True))
    gradH = (np.dot(np.dot(W.T, W), H) -
             safe_sparse_dot(W.T, X, dense_output=True))

    init_grad = squared_norm(gradW) + squared_norm(gradH.T)
    # max(0.001, tol) to force alternating minimizations of W and H
    tolW = max(0.001, tol) * np.sqrt(init_grad)
    tolH = tolW

    for n_iter in range(1, max_iter + 1):
        # stopping condition as discussed in paper
        proj_grad_W = squared_norm(gradW * np.logical_or(gradW < 0, W > 0))
        proj_grad_H = squared_norm(gradH * np.logical_or(gradH < 0, H > 0))

        if (proj_grad_W + proj_grad_H) / init_grad < tol ** 2:
            break

        # update W
        Wt, gradWt, iterW = _nls_subproblem(X.T, H.T, W.T, tolW, nls_max_iter,
                                            alpha=alpha, l1_ratio=l1_ratio)
        W, gradW = Wt.T, gradWt.T

        if iterW == 1:
            tolW = 0.1 * tolW

        # update H
        H, gradH, iterH = _nls_subproblem(X, W, H, tolH, nls_max_iter,
                                          alpha=alpha, l1_ratio=l1_ratio)
        if iterH == 1:
            tolH = 0.1 * tolH

    H[H == 0] = 0   # fix up negative zeros

    if n_iter == max_iter:
        Wt, _, _ = _nls_subproblem(X.T, H.T, W.T, tolW, nls_max_iter,
                                   alpha=alpha, l1_ratio=l1_ratio)
        W = Wt.T

    return W, H, n_iter
Example #32
0
def _compute_RMSETOTAL(D, A, Ft, P, O, S, R):
    fD = 0
    numElementsD = 0
    fA = 0
    numElementsA = 0
    baselineSum = sum([norm(mat) for mat in D]) + sum([norm(mat) for mat in A])

    for i, Di in enumerate(D):
        PiSiFt = P[i] @ S[i] @ Ft
        fD += squared_norm(Di - PiSiFt) / (squared_norm(Di))
        # numElementsD += Di.size

    for i, Ai in enumerate(A):
        OiRiFt = O[i] @ R[i] @ Ft
        fA += squared_norm(Ai - OiRiFt) / (squared_norm(Ai))
        # numElementsA += Ai.size

    answer = (fD + fA)  #/(numElementsD+numElementsA)

    return math.sqrt(answer), math.sqrt(fA)  #/numElementsA)
Example #33
0
def _compute_RMSETOTAL(D, A, F, P, O):
    fD = 0
    numElementsD = 0
    fA = 0
    numElementsA = 0
    for i, Di in enumerate(D):
        PiF = dot(P[i], F)
        fD += squared_norm(Di - PiF)
        # numElementsD += Di.size
        numElementsD += squared_norm(Di)

    for i, Ai in enumerate(A):
        OiF = dot(O[i], F)
        fA += squared_norm(Ai - OiF)
        # numElementsA += Ai.size
        numElementsA += squared_norm(Ai)

    answer = (fD + fA) / (numElementsD + numElementsA)

    return answer, fA / numElementsA
Example #34
0
def _kmeans_single_lloyd(X, n_clusters, max_iter=300, init='k-means||',
                         verbose=False, x_squared_norms=None,
                         random_state=None, tol=1e-4,
                         precompute_distances=True,
                         oversampling_factor=2,
                         init_max_iter=None):
    centers = k_init(X, n_clusters, init=init,
                     oversampling_factor=oversampling_factor,
                     random_state=random_state, max_iter=init_max_iter)
    dt = X.dtype
    X = X.astype(np.float32)
    P = X.shape[1]
    for i in range(max_iter):
        t0 = tic()
        centers = centers.astype('f4')
        labels, distances = pairwise_distances_argmin_min(
            X, centers, metric='euclidean', metric_kwargs={"squared": True}
        )

        labels = labels.astype(np.int32)
        distances = distances.astype(np.float32)

        r = da.atop(_centers_dense, 'ij',
                    X, 'ij',
                    labels, 'i',
                    n_clusters, None,
                    distances, 'i',
                    adjust_chunks={"i": n_clusters, "j": P},
                    dtype='f8')
        new_centers = da.from_delayed(
            sum(r.to_delayed().flatten()),
            (n_clusters, P),
            X.dtype
        )
        counts = da.bincount(labels, minlength=n_clusters)
        new_centers = new_centers / counts[:, None]
        new_centers, = compute(new_centers)

        # Convergence check
        shift = squared_norm(centers - new_centers)
        t1 = tic()
        logger.info("Lloyd loop %2d. Shift: %0.4f [%.2f s]", i, shift, t1 - t0)
        if shift < tol:
            break
        centers = new_centers

    if shift > 1e-7:
        labels, distances = pairwise_distances_argmin_min(X, centers)
    inertia = distances.astype(dt).sum()
    centers = centers.astype(dt)
    labels = labels.astype(np.int64)

    return labels, inertia, centers, i + 1
def _multinomial_loss_and_gradient(w, X, Y, alpha, sample_weight, xStd, standardization):
	
	# print("coefficients = " + str(w))
	_, n_features = X.shape
	_, n_classes = Y.shape
	n_samples = np.sum(sample_weight)
	sample_weight = sample_weight[:, np.newaxis]
	fit_intercept = (w.size == n_classes * (n_features + 1))
	grad = np.zeros((n_classes, n_features + bool(fit_intercept)))

	# Calculate loss value
	w = w.reshape(n_classes, -1)
	
	if fit_intercept:
		intercept = w[:, -1]
		w = w[:, :-1]
	else:
		intercept = 0
	p = safe_sparse_dot(X, w.T) + intercept
	p -= logsumexp(p, axis=1)[:, np.newaxis]
	
	if standardization:
		l2reg = 0.5 * alpha * squared_norm(w)
		l2reg_grad = alpha * w
	else:
		_w = w / xStd
		l2reg = 0.5 * alpha * squared_norm(_w)
		l2reg_grad = alpha * _w / xStd

	loss = -(sample_weight * Y * p).sum() / n_samples + l2reg
	# print("loss = " + str(loss))

	diff = sample_weight * (np.exp(p) - Y)

	grad[:, :n_features] = safe_sparse_dot(diff.T, X) / n_samples
	grad[:, :n_features] += l2reg_grad
	# print("grad = " + str(grad))
	if fit_intercept:
		grad[:, -1] = diff.sum(axis=0) / n_samples
	return loss, grad.ravel()
Example #36
0
def logistic_loss(w, X, Y, alpha):
    """
    Implementation of the logistic loss function when Y is a probability
    distribution.

    loss = -SUM_i SUM_k y_ik * log(P[yi == k]) + alpha * ||w||^2
    """
    n_classes = Y.shape[1]
    n_features = X.shape[1]
    intercept = 0

    if n_classes > 2:
        fit_intercept = w.size == (n_classes * (n_features + 1))
        w = w.reshape(n_classes, -1)
        if fit_intercept:
            intercept = w[:, -1]
            w = w[:, :-1]
    else:
        fit_intercept = w.size == (n_features + 1)
        if fit_intercept:
            intercept = w[-1]
            w = w[:-1]

    z = safe_sparse_dot(X, w.T) + intercept

    if n_classes == 2:
        # in the binary case, simply compute the logistic function
        p = np.vstack([log_logistic(-z), log_logistic(z)]).T
    else:
        # compute the logistic function for each class and normalize
        denom = expit(z)
        denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
        p = log_logistic(z)
        loss = -(Y * p).sum()
        loss += np.log(denom).sum()  # Y.sum() = 1
        loss += 0.5 * alpha * squared_norm(w)
        return loss

    loss = -(Y * p).sum() + 0.5 * alpha * squared_norm(w)
    return loss
Example #37
0
def _multinomial_loss(w, X, Y, alpha, sample_weight):
    """Computes multinomial loss and class probabilities.

    Parameters
    ----------
    w : ndarray, shape (n_classes * n_features,) or (n_classes * (n_features +
        1),)
            Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    Y : ndarray, shape (n_samples, n_classes)
        Transformed labels according to the output of LabelBinarizer.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : ndarray, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    loss : float
        Multinomial loss.

    p : ndarray, shape (n_samples, n_classes)
        Estimated class probabilities.

    w : ndarray, shape (n_classes, n_features)
        Reshaped param vector excluding intercept terms.
    """
    n_classes = Y.shape[1]
    n_features = X.shape[1]
    fit_intercept = w.size == (n_classes * (n_features + 1))
    w = w.reshape(n_classes, -1)
    sample_weight = sample_weight[:, np.newaxis]
    if fit_intercept:
        intercept = w[:, -1]
        w = w[:, :-1]
    else:
        intercept = 0
    p = safe_sparse_dot(X, w.T)
    p += intercept
    p -= logsumexp(p, axis=1)[:, np.newaxis]
    loss = -(sample_weight * Y * p).sum()
    loss += 0.5 * alpha * squared_norm(w)
    p = np.exp(p, p)
    return loss, p, w
Example #38
0
 def _update_center(self, X):
     """
     Fix Weights and Labels, Update Centers
     """
     centers_old = self.cluster_centers_.copy()
     
     # choose data belong to cluster k
     # and update cluster center with it mean
     for k in range(self.n_clusters):
         mask = self.labels_ == k
         self.cluster_centers_[k] = np.mean(X[mask], axis=0)
         
     center_shift_total = squared_norm(self.cluster_centers_ - centers_old) 
     return center_shift_total
Example #39
0
def _multinomial_loss(w, X, Y, alpha, sample_weight):
    """Computes multinomial loss and class probabilities.

    Parameters
    ----------
    w : ndarray, shape (n_classes * n_features,) or (n_classes * (n_features +
        1),)
            Coefficient vector.

    X : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training data.

    Y : ndarray, shape (n_samples, n_classes)
        Transformed labels according to the output of LabelBinarizer.

    alpha : float
        Regularization parameter. alpha is equal to 1 / C.

    sample_weight : ndarray, shape (n_samples,) optional
        Array of weights that are assigned to individual samples.
        If not provided, then each sample is given unit weight.

    Returns
    -------
    loss : float
        Multinomial loss.

    p : ndarray, shape (n_samples, n_classes)
        Estimated class probabilities.

    w : ndarray, shape (n_classes, n_features)
        Reshaped param vector excluding intercept terms.
    """
    n_classes = Y.shape[1]
    n_features = X.shape[1]
    fit_intercept = w.size == (n_classes * (n_features + 1))
    w = w.reshape(n_classes, -1)
    sample_weight = sample_weight[:, np.newaxis]
    if fit_intercept:
        intercept = w[:, -1]
        w = w[:, :-1]
    else:
        intercept = 0
    p = safe_sparse_dot(X, w.T)
    p += intercept
    p -= logsumexp(p, axis=1)[:, np.newaxis]
    loss = -(sample_weight * Y * p).sum()
    loss += 0.5 * alpha * squared_norm(w)
    p = np.exp(p, p)
    return loss, p, w
    def _objective_func(self, beta_bias):
        bias, beta = self._split_coefficents(beta_bias)

        Kw = self._Kw

        val = 0.5 * numpy.dot(beta, Kw)
        if self._has_time:
            val += 0.5 * self._regr_penalty * squared_norm(self.y_compressed - bias
                                                           - Kw.compress(self.regr_mask, axis=0))

        l_plus, xv_plus, l_minus, xv_minus = self._counter.calculate(beta)
        val += 0.5 * self._rank_penalty * numexpr.evaluate(
            'sum(Kw * ((l_plus + l_minus) * Kw - xv_plus - xv_minus - 2 * (l_minus - l_plus)) + l_minus)')

        return val
def kmeansopt(data, k   ,rng, T = 50  , method = 'kmeans' , tol = 1e-4 ):
    centroids = []
    lable = []
    
    if(method == 'kmeans++'): 
        centroids = optimize_centroids(data, centroids , k  ,rng )
    else:
        centroids = ramdon_centroids(data, centroids , k  ,rng)
#    print("inital centroids")
#    print(centroids)
    old_centroids = []
    #    result_dict = {}
    Iteration = 0
    clusters = [[] for i in range(k)]
    #    while(Iteration < T and not compare(old_centroids , centroids)):
    while(Iteration < T ):
        clusters = [[] for i in range(k)]
        clusters,lable= euclidean(data, centroids, clusters)
    #        print(" The %d times cluster" % Iteration)
    #    print(clusters)
            # recalculate centriods from exist cluster
        index = 0
        old_centroids = list(centroids);
#        print(Iteration)
        for cluster in clusters:
            centroids[index] = np.mean(cluster, axis = 0).tolist()
            index += 1
            
#    for num in range(0,len(clusters)):
#        for ld in clusters[num]:
#            result_dict[str(ld)] = num
#        print(centroids)
        centroids_matrix = np.matrix(centroids)
#        print(centroids_matrix)
#        print(old_centroids)
        old_centroids_matrix = np.matrix(old_centroids)
#        print(old_centroids_matrix)
        shift = squared_norm(old_centroids_matrix - centroids_matrix)
        
        if shift <= tol:
#            print("Already Coverage , break")
            break
        
        Iteration += 1    # End of innerLoop
    return clusters, centroids, lable
Example #42
0
def temp_log_loss(w, X, Y, alpha):
    n_classes = Y.shape[1]
    w = w.reshape(n_classes, -1)
    intercept = w[:, -1]
    w = w[:, :-1]
    z = safe_sparse_dot(X, w.T) + intercept

    denom = expit(z)
    #print denom
    #print denom.sum()
    denom = denom.sum(axis=1).reshape((denom.shape[0], -1))
    #print denom
    p = log_logistic(z)

    loss = - (Y * p).sum()
    loss += np.log(denom).sum()
    loss += 0.5 * alpha * squared_norm(w)

    return loss
def _multinomial_loss(w, X, Y, alpha):
    sample_weight = np.ones(len(Y))
    n_classes = Y.shape[1]
    n_features = X.shape[1]
    fit_intercept = w.size == (n_classes * (n_features + 1))
    w = w.reshape(n_classes, -1)
    sample_weight = sample_weight[:, np.newaxis]
    if fit_intercept:
        intercept = w[:, -1]
        w = w[:, :-1]
    else:
        intercept = 0
    p = safe_sparse_dot(X, w.T)
    p += intercept
    p -= logsumexp(p, axis=1)[:, np.newaxis]
    loss = -(sample_weight * Y * p).sum()
    loss += 0.5 * alpha * squared_norm(w)
    p = np.exp(p, p)
    return loss, p, w
Example #44
0
def _kmeans_single(X, n_clusters, max_iter=300, init='k-means++', random_state=None, tol=1e-4):
    random_state = check_random_state(random_state)

    best_labels, best_inertia, best_centers = None, None, None

    # init
    x_squared_norms = row_norms(X, squared=True)
    centers = _init_centroids(X, n_clusters, init, random_state, x_squared_norms=x_squared_norms)

    #  distances = np.zeros(shape=(X.shape[0],), dtype=np.float64)

    # iterations
    for i in range(max_iter):
        centers_old = centers.copy()
        # Assignment, also called E-step of EM
        labels, inertia = _labels_inertia(X, x_squared_norms, centers)

        # re-computation of the centroids, also called M-step of EM
        centers = _centers(X, labels, n_clusters)

        if best_inertia is None or inertia < best_inertia:
            best_labels  = labels.copy()
            best_centers = centers.copy()
            best_inertia = inertia

        shift = squared_norm(centers_old - centers)
        if shift <= tol:
            break

    if shift > 0:
        # rerun E-step in case of non-convergence so that predicted labels
        # match cluster centers
        best_labels, best_inertia = \
            _labels_inertia(X, x_squared_norms, best_centers)


    return best_centers, best_labels, best_inertia
Example #45
0
def enet_kernel_learning(
        K, y, lamda=0.01, beta=0.01, gamma='auto', max_iter=100, verbose=0,
        tol=1e-4, return_n_iter=True):
    """Elastic Net kernel learning.

    Solve the following problem via alternating minimisation:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)

    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]
    # KKT = [K[j].T.dot(K[j]) for j in range(len(K))]
    # print(KKT[0].shape)
    if gamma == 'auto':
        lipschitz_constant = np.array([
            sum(np.linalg.norm(K_j[i].dot(K_j[i].T))
                for i in range(K_j.shape[0]))
            for K_j in K])
        gamma = 1. / (lipschitz_constant)

    objective_new = 0
    for iteration_ in range(max_iter):
        w_old = coef.copy()
        alpha_old = [a.copy() for a in alpha]
        objective_old = objective_new

        # update w
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        alpha_coef_K = [alpha[j].dot(K[j].T.dot(coef))
                        for j in range(n_patients)]
        gradient = sum((alpha_coef_K[j] - y[j]).dot(A[j].T)
                       for j in range(n_patients))

        # gradient_2 = coef.dot(sum(
        #     np.dot(K[j].dot(alpha[j]), K[j].dot(alpha[j]).T)
        #     for j in range(len(K)))) - sum(
        #         y[j].dot(K[j].dot(alpha[j]).T) for j in range(len(K)))

        # gradient = coef.dot(sum(
        #     alpha[j].dot(KKT[j].dot(alpha[j])) for j in range(len(K)))) - sum(
        #         y[j].dot(K[j].dot(alpha[j]).T) for j in range(len(K)))

        # gradient += 2 * beta * coef
        coef = soft_thresholding(coef - gamma * gradient, lamda=lamda * gamma)

        # update alpha
        # for j in range(len(K)):
        #     alpha[j] = _solve_cholesky_kernel(
        #         K[j].T.dot(coef), y[j][..., None], lamda).ravel()
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        alpha_coef_K = [alpha[j].dot(K[j].T.dot(coef))
                        for j in range(n_patients)]
        gradient = [(alpha_coef_K[j] - y[j]).dot(A[j].T) + 2 * beta * alpha[j]
                    for j in range(n_patients)]
        alpha = [alpha[j] - gamma * gradient[j] for j in range(n_patients)]

        objective_new = objective(K, y, alpha, lamda, beta, coef)
        objective_difference = abs(objective_new - objective_old)
        snorm = np.sqrt(squared_norm(coef - w_old) + sum(
            squared_norm(a - a_old) for a, a_old in zip(alpha, alpha_old)))

        obj = objective(K, y, alpha, lamda, beta, coef)

        if verbose and iteration_ % 10 == 0:
            print("obj: %.4f, snorm: %.4f" % (obj, snorm))

        if snorm < tol and objective_difference < tol:
            break
        if np.isnan(snorm) or np.isnan(objective_difference):
            raise ValueError('assdgg')
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
Example #46
0
def objective_admm2(x, y, alpha, lamda, beta, w1):
    """Objective function for lasso kernel learning."""
    obj = .5 * sum(squared_norm(x[j] - y[j]) for j in range(len(x)))
    obj += lamda * np.abs(w1).sum()
    obj += beta * sum(squared_norm(a) for a in alpha)
    return obj
def _spherical_kmeans_single_lloyd(X, n_clusters, max_iter=300,
                                   init='k-means++', verbose=False,
                                   x_squared_norms=None,
                                   random_state=None, tol=1e-4,
                                   precompute_distances=True):
    '''
    Modified from sklearn.cluster.k_means_.k_means_single_lloyd.
    '''
    random_state = check_random_state(random_state)

    best_labels, best_inertia, best_centers = None, None, None

    # init
    centers = _init_centroids(X, n_clusters, init, random_state=random_state,
                              x_squared_norms=x_squared_norms)
    if verbose:
        print("Initialization complete")

    # Allocate memory to store the distances for each sample to its
    # closer center for reallocation in case of ties
    distances = np.zeros(shape=(X.shape[0],), dtype=X.dtype)

    # iterations
    for i in range(max_iter):
        centers_old = centers.copy()

        # labels assignment
        # TODO: _labels_inertia should be done with cosine distance
        #       since ||a - b|| = 2(1 - cos(a,b)) when a,b are unit normalized
        #       this doesn't really matter.
        labels, inertia = \
            _labels_inertia(X, x_squared_norms, centers,
                            precompute_distances=precompute_distances,
                            distances=distances)

        # computation of the means
        if sp.issparse(X):
            centers = _k_means._centers_sparse(X, labels, n_clusters,
                                               distances)
        else:
            centers = _k_means._centers_dense(X, labels, n_clusters, distances)

        # l2-normalize centers (this is the main contibution here)
        centers = normalize(centers)

        if verbose:
            print("Iteration %2d, inertia %.3f" % (i, inertia))

        if best_inertia is None or inertia < best_inertia:
            best_labels = labels.copy()
            best_centers = centers.copy()
            best_inertia = inertia

        center_shift_total = squared_norm(centers_old - centers)
        if center_shift_total <= tol:
            if verbose:
                print("Converged at iteration %d: "
                      "center shift %e within tolerance %e"
                      % (i, center_shift_total, tol))
            break

    if center_shift_total > 0:
        # rerun E-step in case of non-convergence so that predicted labels
        # match cluster centers
        best_labels, best_inertia = \
            _labels_inertia(X, x_squared_norms, best_centers,
                            precompute_distances=precompute_distances,
                            distances=distances)

    return best_labels, best_inertia, best_centers, i + 1
Example #48
0
def enet_kernel_learning_admm(
        K, y, lamda=0.01, beta=0.01, rho=1., max_iter=100, verbose=0, rtol=1e-4,
        tol=1e-4, return_n_iter=True, update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||alpha_i * w * K_i - y_i||^2 + lamda ||w||_1 +
        + beta||w||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    u_1 = np.zeros(n_kernels)
    u_2 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)
    w_2 = np.zeros(n_kernels)

    w_1_old = w_1.copy()
    w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        KK = [A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [y[j].dot(A[j]) for j in range(n_patients)]

        alpha = [_solve_cholesky_kernel(
            KK[j], yy[j][..., None], 2).ravel() for j in range(n_patients)]
        # alpha = [_solve_cholesky_kernel(
        #     K_dot_coef[j], y[j][..., None], 0).ravel() for j in range(n_patients)]

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        w_2 = prox_laplacian(coef + u_2, beta / rho)

        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(y[j].dot(A[j].T) for j in range(n_patients))
        yy += rho * (w_1 + w_2 - u_1 - u_2)

        coef = _solve_cholesky_kernel(KK, yy[..., None], 2 * rho).ravel()

        # update residuals
        u_1 += coef - w_1
        u_2 += coef - w_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(squared_norm(coef - w_1) + squared_norm(coef - w_2))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) + squared_norm(w_2 - w_2_old))

        obj = objective_admm(K, y, alpha, lamda, beta, coef, w_1, w_2)

        check = convergence(
            obj=obj, rnorm=rnorm, snorm=snorm,
            e_pri=np.sqrt(2 * coef.size) * tol + rtol * max(
                np.sqrt(squared_norm(coef) + squared_norm(coef)),
                np.sqrt(squared_norm(w_1) + squared_norm(w_2))),
            e_dual=np.sqrt(2 * coef.size) * tol + rtol * rho * (
                np.sqrt(squared_norm(u_1) + squared_norm(u_2))))

        w_1_old = w_1.copy()
        w_2_old = w_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u_1 *= rho / rho_new
        u_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
def _norm(x):
    """Dot product-based Euclidean norm implementation
    See: http://fseoane.net/blog/2011/computing-the-vector-norm/
    """
    return np.sqrt(squared_norm(x))
Example #50
0
def matrix_factorization(X, H=None, n_components=None,
                               init=None, update_H=True,
                               tol=1e-4, max_iter=200, alpha=0.01,
                               beta=0.02):

    n_samples, n_features = X.shape
    if n_components is None:
        n_components = n_features

    # check W and H, or initialize them
    if not update_H:
        W = np.zeros((n_samples, n_components))
    else:
        W, H = _initialize_nmf(X, n_components, init=init, eps=1e-6)

    print W
    print H

    n_iter = 0
    e_before = 0
    for step in xrange(max_iter):
        n_iter = step + 1
        print n_iter

        xs, ys = X.nonzero()    # the x index and y index of nonzero
        W_temp = W
        ER = X - np.dot(W,H)	# the error matrix

        for i in xrange(n_samples):
            for k in xrange(n_components):
                sum = 0
                for j in ys[xs==i]:
                    sum += ER[i][j] * H[k][j]

                t = W[i][k] + alpha * (sum - beta * W[i][k])
                if t < 0:
                    a = alpha
                    for l in xrange(10):
                        a /= 2
                        t = W[i][k] + a * (sum - beta * W[i][k])
                        if t >= 0:
                            break
                    if t < 0:
                        t = W[i][k]
                W[i][k] = t

        if update_H:
            for j in xrange(n_features):
                for k in xrange(n_components):
                    sum = 0
                    for i in xs[ys==j]:
                        sum += ER[i][j] * W_temp[i][k]

                    t = H[k][j] + alpha * (sum - beta * H[k][j])
                    if t < 0:
                        a = alpha
                        for l in xrange(10):
                            a /= 2
                            t = H[k][j] + a * (sum - beta * H[k][j])
                            if t >= 0:
                                break
                        if t < 0:
                            t = H[k][j]
                    H[k][j] = t

        E = (X - np.dot(W,H)) * (X>0)
        e = squared_norm(E) + beta * ( squared_norm(W) + squared_norm(H) )
        # if step > 0:
        #     if abs(e/e_before - 1) < tol:
        #         break
        # e_before = e
        print e
        if e < tol:
            break

    if n_iter == max_iter:
        print ("Maximum number of iteration %d reached. Increase it to"
                      " improve convergence." % max_iter)

    return W, H, n_iter
Example #51
0
def norm(x):
    return sqrt(squared_norm(x))
Example #52
0
b = pd.DataFrame(temp)
print b
b0 = b.fillna(0)



X = np.array(b0)
# print X.mean()
# print (X[X>0]).mean()
# X[X==0] = (X[X>0]).mean()
# print X

# U, S, V = randomized_svd(X, 20)
U, S, V = svds(sparse.csr_matrix(X),  k=50, maxiter=2000)

S = vector_to_diagonal(S)
print X
print U
print S
print V

recon= pd.DataFrame(np.dot(U,np.dot(S,V)),b0.index,b0.columns)
recon[recon<1] = 1
recon[recon>5] = 5
# recon.to_csv('svdrecon.csv')
print recon
d = (t0 - recon) * (t0>0)
# d.to_csv('svdd.csv')
d.fillna(0,inplace = True)
print squared_norm(d)
 def _objective_func(self, w):
     z = self.Aw.shape[0] + squared_norm(self.AXw) - 2. * self.AXw.sum()
     val = 0.5 * squared_norm(w) + 0.5 * self.alpha * z
     return val
Example #54
0
def enet_kernel_learning_admm2(
        K, y, lamda=0.01, beta=0.01, rho=1., max_iter=100, verbose=0, rtol=1e-4,
        tol=1e-4, return_n_iter=True, update_rho_options=None):
    """Elastic Net kernel learning.

    Solve the following problem via ADMM:
        min sum_{i=1}^p 1/2 ||y_i - alpha_i * sum_{k=1}^{n_k} w_k * K_{ik}||^2
        + lamda ||w||_1 + beta sum_{j=1}^{c_i}||alpha_j||_2^2
    """
    n_patients = len(K)
    n_kernels = len(K[0])
    coef = np.ones(n_kernels)
    alpha = [np.zeros(K[j].shape[2]) for j in range(n_patients)]

    u = [np.zeros(K[j].shape[1]) for j in range(n_patients)]
    u_1 = np.zeros(n_kernels)
    w_1 = np.zeros(n_kernels)

    x_old = [np.zeros(K[0].shape[1]) for j in range(n_patients)]
    w_1_old = w_1.copy()
    # w_2_old = w_2.copy()

    checks = []
    for iteration_ in range(max_iter):
        # update x
        A = [K[j].T.dot(coef) for j in range(n_patients)]
        x = [prox_laplacian(y[j] + rho * (A[j].T.dot(alpha[j]) - u[j]), rho / 2.)
             for j in range(n_patients)]

        # update alpha
        # solve (AtA + 2I)^-1 (Aty) with A = wK
        KK = [rho * A[j].dot(A[j].T) for j in range(n_patients)]
        yy = [rho * A[j].dot(x[j] + u[j]) for j in range(n_patients)]
        alpha = [_solve_cholesky_kernel(
            KK[j], yy[j][..., None], 2 * beta).ravel() for j in range(n_patients)]
        # equivalent to alpha_dot_K
        # solve (sum(AtA) + 2*rho I)^-1 (sum(Aty) + rho(w1+w2-u1-u2))
        # with A = K * alpha
        A = [K[j].dot(alpha[j]) for j in range(n_patients)]
        KK = sum(A[j].dot(A[j].T) for j in range(n_patients))
        yy = sum(A[j].dot(x[j] + u[j]) for j in range(n_patients))
        yy += w_1 - u_1
        coef = _solve_cholesky_kernel(KK, yy[..., None], 1).ravel()

        w_1 = soft_thresholding(coef + u_1, lamda / rho)
        # w_2 = prox_laplacian(coef + u_2, beta / rho)

        # update residuals
        alpha_coef_K = [
            alpha[j].dot(K[j].T.dot(coef)) for j in range(n_patients)]
        residuals = [x[j] - alpha_coef_K[j] for j in range(n_patients)]
        u = [u[j] + residuals[j] for j in range(n_patients)]
        u_1 += coef - w_1

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(coef - w_1) +
            sum(squared_norm(residuals[j]) for j in range(n_patients)))
        snorm = rho * np.sqrt(
            squared_norm(w_1 - w_1_old) +
            sum(squared_norm(x[j] - x_old[j]) for j in range(n_patients)))

        obj = objective_admm2(x, y, alpha, lamda, beta, w_1)
        check = convergence(
            obj=obj, rnorm=rnorm, snorm=snorm,
            e_pri=np.sqrt(coef.size + sum(
                x[j].size for j in range(n_patients))) * tol + rtol * max(
                    np.sqrt(squared_norm(coef) + sum(squared_norm(
                        alpha_coef_K[j]) for j in range(n_patients))),
                    np.sqrt(squared_norm(w_1) + sum(squared_norm(
                        x[j]) for j in range(n_patients)))),
            e_dual=np.sqrt(coef.size + sum(
                x[j].size for j in range(n_patients))) * tol + rtol * rho * (
                    np.sqrt(squared_norm(u_1) + sum(squared_norm(
                        u[j]) for j in range(n_patients)))))

        w_1_old = w_1.copy()
        x_old = [x[j].copy() for j in range(n_patients)]

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check)

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual and iteration_ > 1:
            break

        rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u = [u[j] * (rho / rho_new) for j in range(n_patients)]
        u_1 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    return_list = [alpha, coef]
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
 def _objective_func(self, w):
     val = 0.5 * squared_norm(w) + 0.5 * self.alpha * squared_norm(self.L)
     return val