コード例 #1
0
def test_min_pos():
    # Check that min_pos returns a positive value and that it's consistent
    # between float and double
    X = np.random.RandomState(0).randn(100)

    min_double = min_pos(X)
    min_float = min_pos(X.astype(np.float32))

    assert_allclose(min_double, min_float)
    assert min_double >= 0
コード例 #2
0
ファイル: lars_lasso.py プロジェクト: zhuya1996/Lasso
    def fit(self, X: np.ndarray, y: np.ndarray):
        n, p = X.shape

        self.intercept_ = y.mean()
        y = y - self.intercept_
        self.coef_ = np.zeros(p)

        active_set = []
        inactive_set = list(range(p))
        beta = np.zeros(p)
        mu = np.zeros(n)

        change_sign_flag = False

        k = 0
        while k != min(p, n - 1):
            c = np.dot(X.T, y - mu)
            # print(np.sign(c[active_set]) * np.sign(beta[active_set]))
            if change_sign_flag:
                # remove j from the calculation of the next equiangular direction.
                pass
            else:
                j = inactive_set[np.argmax(np.abs(c[inactive_set]))]
                # print(f"add {j=}")
                active_set.append(j)
                inactive_set.remove(j)
            C = np.amax(np.abs(c))
            s = np.sign(c[active_set]).reshape((1, len(active_set)))
            XA = np.copy(X[:, active_set] * s)

            GA = XA.T @ XA
            GA_inv = np.linalg.inv(GA)

            one = np.ones((len(active_set), 1))
            AA = (1. / np.sqrt(one.T @ GA_inv @ one)).flatten()[0]

            w = AA * GA_inv @ one
            u = XA @ w

            a = X.T @ u
            d = s.T * w

            if k == p - 1:
                gamma = C / AA
            else:
                gamma_candidates = np.zeros((len(inactive_set), 2))
                for _j, jj in enumerate(inactive_set):
                    gamma_candidates[_j] = [(C - c[jj]) / (AA - a[jj]),
                                            (C + c[jj]) / (AA + a[jj])]
                gamma = arrayfuncs.min_pos(gamma_candidates)

            gamma_candidates_tilde = -beta[active_set] / d.flatten()
            gamma_tilde = arrayfuncs.min_pos(gamma_candidates_tilde)

            change_sign_flag = False
            if gamma_tilde < gamma:
                gamma = gamma_tilde
                j = active_set[list(gamma_candidates_tilde).index(gamma)]
                # print(f"remove {j=}")
                change_sign_flag = True

            new_beta = beta[active_set] + gamma * d.flatten()
            idx = 0 if j != 0 else 1
            tmp_beta = np.zeros(p)
            tmp_beta[active_set] = new_beta.copy()
            lambda_ = np.abs(
                X[:, active_set[idx]] @ (y - X @ tmp_beta)) * 2 / n

            if lambda_ < self.alpha:
                prev_lambda_ = np.abs(
                    X[:, active_set[idx]] @ (y - X @ self.coef_)) * 2 / n
                if len(active_set) < 2 and prev_lambda_ < self.alpha:
                    break
                # print(prev_lambda_, lambda_, self.alpha)
                modified_gamma = 0 + (gamma - 0) * (
                    self.alpha - prev_lambda_) / (lambda_ - prev_lambda_)
                beta[active_set] += modified_gamma * d.flatten()
                mu = mu + modified_gamma * u.flatten()
                self.coef_ = beta.copy()
                # print(np.abs(X[:, active_set[idx]] @ (y - X @ self.coef_)) * 2 / n)
                break

            mu = mu + gamma * u.flatten()
            beta[active_set] = new_beta.copy()
            self.coef_ = beta.copy()
            # print(self.coef_)

            if change_sign_flag:
                active_set.remove(j)
                inactive_set.append(j)
            k = len(active_set)
        return self
コード例 #3
0
ファイル: lasso_lars.py プロジェクト: WISDelft/SODA
def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
              alpha_min=0, method='lar', copy_X=True,
              eps=np.finfo(np.float).eps,
              copy_Gram=True, verbose=False, return_path=True,
              group_ids=None, positive=False):
    """Compute Least Angle Regression and Lasso path

    The optimization objective for Lasso is::

    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1

    Parameters
    -----------
    X: array, shape: (n_samples, n_features)
        Input data

    y: array, shape: (n_samples)
        Input targets

    max_iter: integer, optional
        Maximum number of iterations to perform, set to infinity for no limit.

    Gram: None, 'auto', array, shape: (n_features, n_features), optional
        Precomputed Gram matrix (X' * X), if 'auto', the Gram
        matrix is precomputed from the given X, if there are more samples
        than features

    alpha_min: float, optional
        Minimum correlation along the path. It corresponds to the
        regularization parameter alpha parameter in the Lasso.

    method: {'lar', 'lasso'}
        Specifies the returned model. Select 'lar' for Least Angle
        Regression, 'lasso' for the Lasso.

    eps: float, optional
        The machine-precision regularization in the computation of the
        Cholesky diagonal factors. Increase this for very ill-conditioned
        systems.

    copy_X: bool
        If False, X is overwritten.

    copy_Gram: bool
        If False, Gram is overwritten.

    Returns
    --------
    alphas: array, shape: (max_features + 1,)
        Maximum of covariances (in absolute value) at each iteration.

    active: array, shape (max_features,)
        Indices of active variables at the end of the path.

    coefs: array, shape (n_features, max_features + 1)
        Coefficients along the path

    See also
    --------
    lasso_path
    LassoLars
    Lars
    LassoLarsCV
    LarsCV
    sklearn.decomposition.sparse_encode

    Notes
    ------
    * http://en.wikipedia.org/wiki/Least-angle_regression

    * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method
    """
    if group_ids is not None:
        group_ids = np.array(group_ids).copy()
        max_iter = len(np.unique(group_ids))

    n_features = X.shape[1]
    n_samples = y.size
    max_features = min(max_iter, n_features)

    if return_path:
        coefs = np.zeros((max_features + 1, n_features))
        alphas = np.zeros(max_features + 1)
    else:
        coef, prev_coef = np.zeros(n_features), np.zeros(n_features)
        alpha, prev_alpha = np.array([0.]), np.array([0.])  # better ideas?

    n_iter, n_active = 0, 0
    active, indices = list(), np.arange(n_features)
    # holds the sign of covariance
    sign_active = np.empty(max_features, dtype=np.int8)
    drop = False

    # will hold the cholesky factorization. Only lower part is
    # referenced.
    L = np.empty((max_features, max_features), dtype=X.dtype)
    swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X,))
    solve_cholesky, = get_lapack_funcs(('potrs',), (X,))

    if Gram is None:
        if copy_X:
            # force copy. setting the array to be fortran-ordered
            # speeds up the calculation of the (partial) Gram matrix
            # and allows to easily swap columns
            X = X.copy('F')
    elif Gram == 'auto':
        Gram = None
        if X.shape[0] > X.shape[1]:
            Gram = np.dot(X.T, X)
    elif copy_Gram:
            Gram = Gram.copy()

    if Xy is None:
        Cov = np.dot(X.T, y)
    else:
        Cov = Xy.copy()

    if verbose:
        if verbose > 1:
            print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC"
        else:
            sys.stdout.write('.')
            sys.stdout.flush()

    tiny = np.finfo(np.float).tiny  # to avoid division by 0 warning
    tiny32 = np.finfo(np.float32).tiny  # to avoid division by 0 warning

    if group_ids is not None:
        selected_group = list()

    while True:
        if Cov.size:
            if group_ids is None:
                if positive:
                    C_idx = np.argmax(Cov)
                else:
                    C_idx = np.argmax(np.abs(Cov))
                C_ = Cov[C_idx]
                if C_ <= 0 and positive:
                    break
                C = np.fabs(C_)
            else:
                if positive:
                    tmp = Cov
                else:
                    tmp = np.abs(Cov)
                already_selected = np.zeros(len(tmp), dtype=np.bool)
                for gid in selected_group:
                    already_selected[group_ids == gid] = True
                tmp[already_selected] = 0.
                C_idx = np.argmax(tmp)
                C_ = Cov[C_idx]
                if C_ <= 0 and positive:
                    break
                C = np.fabs(C_)
                selected_group.append(group_ids[C_idx])
        else:
            C = 0.

        if return_path:
            alpha = alphas[n_iter, np.newaxis]
            coef = coefs[n_iter]
            prev_alpha = alphas[n_iter - 1, np.newaxis]
            prev_coef = coefs[n_iter - 1]

        alpha[0] = C / n_samples
        if alpha[0] < alpha_min:  # early stopping
            # interpolation factor 0 <= ss < 1
            if n_iter > 0:
                # In the first iteration, all alphas are zero, the formula
                # below would make ss a NaN
                ss = (prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0])
                coef[:] = prev_coef + ss * (coef - prev_coef)
            alpha[0] = alpha_min
            if return_path:
                coefs[n_iter] = coef
            break

        if n_iter >= max_iter or n_active >= n_features:
            break

        if not drop:

            ##########################################################
            # Append x_j to the Cholesky factorization of (Xa * Xa') #
            #                                                        #
            #            ( L   0 )                                   #
            #     L  ->  (       )  , where L * w = Xa' x_j          #
            #            ( w   z )    and z = ||x_j||                #
            #                                                        #
            ##########################################################

            sign_active[n_active] = np.sign(C_)
            m, n = n_active, C_idx + n_active

            Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])
            indices[n], indices[m] = indices[m], indices[n]
            Cov = Cov[1:]  # remove Cov[0]

            if group_ids is not None:
                group_ids[C_idx], group_ids[0] = group_ids[0], group_ids[C_idx]
                group_ids = group_ids[1:]  # remove group_ids[0]

            if Gram is None:
                X.T[n], X.T[m] = swap(X.T[n], X.T[m])
                c = nrm2(X.T[n_active]) ** 2
                L[n_active, :n_active] = \
                    np.dot(X.T[n_active], X.T[:n_active].T)
            else:
                # swap does only work inplace if matrix is fortran
                # contiguous ...
                Gram[m], Gram[n] = swap(Gram[m], Gram[n])
                Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n])
                c = Gram[n_active, n_active]
                L[n_active, :n_active] = Gram[n_active, :n_active]

            # Update the cholesky decomposition for the Gram matrix
            arrayfuncs.solve_triangular(L[:n_active, :n_active],
                                        L[n_active, :n_active])
            v = np.dot(L[n_active, :n_active], L[n_active, :n_active])
            diag = max(np.sqrt(np.abs(c - v)), eps)
            L[n_active, n_active] = diag

            active.append(indices[n_active])
            n_active += 1

            if verbose > 1:
                print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '',
                                                            n_active, C)
        # least squares solution
        least_squares, info = solve_cholesky(L[:n_active, :n_active],
                               sign_active[:n_active], lower=True)

        # is this really needed ?
        AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active]))

        if not np.isfinite(AA):
            # L is too ill-conditionned
            i = 0
            L_ = L[:n_active, :n_active].copy()
            while not np.isfinite(AA):
                L_.flat[::n_active + 1] += (2 ** i) * eps
                least_squares, info = solve_cholesky(L_,
                                    sign_active[:n_active], lower=True)
                tmp = max(np.sum(least_squares * sign_active[:n_active]), eps)
                AA = 1. / np.sqrt(tmp)
                i += 1
        least_squares *= AA

        if Gram is None:
            # equiangular direction of variables in the active set
            eq_dir = np.dot(X.T[:n_active].T, least_squares)
            # correlation between each unactive variables and
            # eqiangular vector
            corr_eq_dir = np.dot(X.T[n_active:], eq_dir)
        else:
            # if huge number of features, this takes 50% of time, I
            # think could be avoided if we just update it using an
            # orthogonal (QR) decomposition of X
            corr_eq_dir = np.dot(Gram[:n_active, n_active:].T,
                                 least_squares)


        if group_ids is not None:
            mask = np.ones(group_ids.shape,dtype=bool)
            for g in selected_group:
                mask = mask & (group_ids!=g)

            arg = ((C - Cov) / (AA - corr_eq_dir + tiny))[mask]
            g1 = arrayfuncs.min_pos(arg)
            arg = ((C + Cov) / (AA + corr_eq_dir + tiny))[mask]
            g2 = arrayfuncs.min_pos(arg)
        else:
            g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
            g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))
        
        if positive: 
            gamma_ = min(g1, C / AA)
        else:
            gamma_ = min(g1, g2, C / AA)

        # TODO: better names for these variables: z
        drop = False
        z = -coef[active] / (least_squares + tiny32)
        z_pos = arrayfuncs.min_pos(z)
        if z_pos < gamma_:
            # some coefficients have changed sign
            idx = np.where(z == z_pos)[0]

            # update the sign, important for LAR
            sign_active[idx] = -sign_active[idx]

            if method == 'lasso':
                gamma_ = z_pos
            drop = True

        n_iter += 1

        if return_path:
            if n_iter >= coefs.shape[0]:
                del coef, alpha, prev_alpha, prev_coef
                # resize the coefs and alphas array
                add_features = 2 * max(1, (max_features - n_active))
                coefs.resize((n_iter + add_features, n_features))
                alphas.resize(n_iter + add_features)
            coef = coefs[n_iter]
            prev_coef = coefs[n_iter - 1]
            alpha = alphas[n_iter, np.newaxis]
            prev_alpha = alphas[n_iter - 1, np.newaxis]
        else:
            # mimic the effect of incrementing n_iter on the array references
            prev_coef = coef
            prev_alpha[0] = alpha[0]
            coef = np.zeros_like(coef)

        coef[active] = prev_coef[active] + gamma_ * least_squares
        
        
        # update correlations
        Cov -= gamma_ * corr_eq_dir

        # See if any coefficient has changed sign
        if drop and method == 'lasso':

            arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx)

            n_active -= 1
            m, n = idx, n_active
            drop_idx = active.pop(idx)

            if group_ids is not None:
                selected_group.remove(group_ids[idx])
                group_ids = np.r_[idx,group_ids]  # remove group_ids[0]
            
            if Gram is None:
                # propagate dropped variable
                for i in range(idx, n_active):
                    X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1])
                    indices[i], indices[i + 1] = \
                            indices[i + 1], indices[i]  # yeah this is stupid

                # TODO: this could be updated
                residual = y - np.dot(X[:, :n_active], coef[active])
                temp = np.dot(X.T[n_active], residual)

                Cov = np.r_[temp, Cov]
            else:
                for i in range(idx, n_active):
                    indices[i], indices[i + 1] = \
                                indices[i + 1], indices[i]
                    Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])
                    Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i],
                                                      Gram[:, i + 1])

                # Cov_n = Cov_j + x_j * X + increment(betas) TODO:
                # will this still work with multiple drops ?

                # recompute covariance. Probably could be done better
                # wrong as Xy is not swapped with the rest of variables

                # TODO: this could be updated
                residual = y - np.dot(X, coef)
                temp = np.dot(X.T[drop_idx], residual)
                Cov = np.r_[temp, Cov]

            sign_active = np.delete(sign_active, idx)
            sign_active = np.append(sign_active, 0.)  # just to maintain size
            if verbose > 1:
                print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx,
                                                      n_active, abs(temp))

    if return_path:
        # resize coefs in case of early stop
        alphas = alphas[:n_iter + 1]
        coefs = coefs[:n_iter + 1]

        return alphas, active, coefs.T
    else:
        return alpha, active, coef
コード例 #4
0
        active.append(indices[n_active])
        n_active += 1
        #
        least_squares, _ = solve_cholesky(L[:n_active, :n_active],
                                          sign_active[:n_active],
                                          lower=True)
        if least_squares.size == 1 and least_squares == 0:
            #sign_active[:n_active] = 0时的情况
            least_squares[...] = 1
            AA = 1.
        else:
            AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active]))
            least_squares *= AA
        corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares)
        #在相关系数正负两种情况下有两个γ,选正的最小值
        g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny32))
        g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny32))
        gamma_ = min(g1, g2, C / AA)
        #change names for these variables: z
        z = -coef[active] / (least_squares + tiny32)
        z_pos = arrayfuncs.min_pos(z)
        if z_pos < gamma_:
            # some coefficients have changed sign
            idx = np.where(z == z_pos)[0][::-1]

            # 更新sign
            sign_active[idx] = -sign_active[idx]

            gamma_ = z_pos
            #drop = True
コード例 #5
0
def test_min_pos_no_positive(dtype):
    # Check that the return value of min_pos is the maximum representable
    # value of the input dtype when all input elements are <= 0 (#19328)
    X = np.full(100, -1.).astype(dtype, copy=False)

    assert min_pos(X) == np.finfo(dtype).max
コード例 #6
0
    product_power = np.diagonal(chords_i_gram_matrix)
    mean_power = np.sqrt(np.outer(product_power, product_power))
else:
    chords_i_gram_matrix = squareform(pdist(np.arange(2 ** 12).reshape(2 ** 12, 1), v_chord_product_from_chord_i_raw))
    # but wait! pdist optimised by assuming self-distance is zero
    # but this isn't a distance function! Quick!
    chords_i_gram_matrix[np.diag_indices_from(chords_i_gram_matrix)] = [
        v_chord_product_from_chord_i_raw(i, i) for i in xrange(2 ** 12)
    ]

    chords_i_dist_matrix = squareform(np.sqrt(pdist(chord_idx, v_chord_dist2_from_chord_i)))

    # We can also construct everything in terms of correlations...
    product_power = np.diagonal(chords_i_gram_matrix)
    mean_power = np.sqrt(np.outer(product_power, product_power))
    assert min_pos(mean_power) >= 1.0  # otherwise ... uh... double renormalization?
    chords_i_corr_matrix = chords_i_gram_matrix / np.maximum(mean_power, 1)

    # let's do the same thing as with the product matrix but a different way because of laziness
    chords_i_corr_dist_matrix = np.zeros_like(chords_i_corr_matrix)
    for ci1 in xrange(chords_i_corr_matrix.shape[0]):
        for ci2 in xrange(ci1, chords_i_corr_matrix.shape[1]):
            if ci2 % 11 == 0:
                print ci1, ci2
            dist = sqrt(
                chords_i_corr_matrix[ci1, ci1] - 2 * chords_i_corr_matrix[ci1, ci2] + chords_i_corr_matrix[ci2, ci2]
            )
            chords_i_corr_dist_matrix[ci1, ci2] = dist
            chords_i_corr_dist_matrix[ci2, ci1] = dist

if not os.path.exists("dists.h5"):