Example #1
0
    def center_normTrace_decomp(K):
        print 'centering kernel'
        #### Get transformed features for K_train that DONT snoop when centering, tracing, or eiging#####
        Kcent=KernelCenterer()
        Ktrain=Kcent.fit_transform(K[:in_samples,:in_samples])
        #Ktrain=Ktrain/float(np.trace(Ktrain))
        #[EigVals,EigVectors]=scipy.sparse.linalg.eigsh(Ktrain,k=reduced_dimen,which='LM')
        [EigVals,EigVectors]=scipy.linalg.eigh(Ktrain,eigvals=(in_samples-reduced_dimen,in_samples-1))
        for i in range(len(EigVals)): 
            if EigVals[i]<=0: EigVals[i]=0
        EigVals=np.flipud(np.fliplr(np.diag(EigVals)))
        EigVectors=np.fliplr(EigVectors)
        Ktrain_decomp=np.dot(EigVectors,scipy.linalg.sqrtm(EigVals))
       
        #### Get transformed features for K_test using K_train implied mapping ####
        Kcent=KernelCenterer()
        Kfull=Kcent.fit_transform(K)
        #Kfull=Kfull/float(np.trace(Kfull))
        K_train_test=Kfull[in_samples:,:in_samples]
        Ktest_decomp=np.dot(K_train_test,np.linalg.pinv(Ktrain_decomp.T))

        ####combine mapped train and test vectors and normalize each vector####
        Kdecomp=np.vstack((Ktrain_decomp,Ktest_decomp))
        print 'doing normalization'
        Kdecomp=normalize(Kdecomp,copy=False)
        return Kdecomp
Example #2
0
def KernelPCA(X):
    # pdist to calculate the squared Euclidean distances for every pair of points
    # in the 100x2 dimensional dataset.

    sq_dists = pdist(X, 'sqeuclidean')

    # Variance of the Euclidean distance between all pairs of data points.
    variance = np.var(sq_dists)

    # squareform to converts the pairwise distances into a symmetric 100x100 matrix
    mat_sq_dists = squareform(sq_dists)

    # set the gamma parameter equal to the one I used in scikit-learn KernelPCA
    gamma = 15

    # Compute the 100x100 kernel matrix
    K = exp(-gamma * mat_sq_dists)

    # Center the kernel matrix
    kern_cent = KernelCenterer()
    K = kern_cent.fit_transform(K)

    # Get eigenvalues in ascending order with corresponding
    # eigenvectors from the symmetric matrix
    eigvals, eigvecs = eigh(K)

    # Get the eigenvectors that corresponds to the highest eigenvalue
    X_pc1 = eigvecs[:, -1]
    return X_pc1
def KPCA(gamma, data, feature_size):
    sq_dists = squared_euclidean_distance(data)
    # squareform to converts the pairwise distances into a symmetric 400x400 matrix
    mat_sq_dists = squareform(sq_dists)
    # Compute the 400x400 kernel matrix
    K = rbfkernel(gamma, mat_sq_dists)
    # Center the kernel matrix
    kern_cent = KernelCenterer()
    K = kern_cent.fit_transform(K)
    # Get the eigenvector with largest eigenvalue
    eigen_values, eigen_vectors = eigh(K)
    indexes = eigen_values.argsort()[::-1]
    direction_vectors = eigen_vectors[:, indexes[0:feature_size]]
    projected_data = np.dot(K, direction_vectors)

    return projected_data
Example #4
0
class KernelPca:
    # beta: ガウスカーネルパラメータ
    def __init__(self, beta):
        self.beta = beta
        self.centerer = KernelCenterer()

    # gauss kernel
    def __kernel(self, x1, x2):
        return np.exp(-self.beta * np.linalg.norm(x1 - x2)**2)

    # データを入力して主成分ベクトルを計算する
    # shape(X) = (N, M)
    # n: 抽出する主成分の数
    def fit_transform(self, X, n):
        self.X = X
        # グラム行列
        N = X.shape[0]
        K = np.array([[self.__kernel(X[i], X[j]) for j in range(N)]
                      for i in range(N)])
        # 中心化
        K = self.centerer.fit_transform(K)
        # eighは固有値の昇順で出力される
        vals, vecs = np.linalg.eigh(K)
        vals = vals[::-1]
        vecs = vecs[:, ::-1]
        # 特異値と左特異ベクトル、上位n個
        self.sigma = np.sqrt(vals[:n])  # (n)
        self.a = np.array(vecs[:, :n])  # (N,n)
        return self.sigma * self.a  # (N,n)

    # xの主成分表示を返す
    # shape(x)=(Nx, M)
    def transform(self, x):
        # グラム行列
        N = self.X.shape[0]
        Nx = x.shape[0]
        K = np.array([[self.__kernel(x[i], self.X[j]) for j in range(N)]
                      for i in range(Nx)])  # (Nx,N)
        # 中心化
        K = self.centerer.transform(K)
        # 主成分を計算
        return K.dot(self.a) / self.sigma  # (Nx,n)
Example #5
0
def test_center_kernel():
    """Test that KernelCenterer is equivalent to Scaler in feature space"""
    X_fit = np.random.random((5, 4))
    scaler = Scaler(with_std=False)
    scaler.fit(X_fit)
    X_fit_centered = scaler.transform(X_fit)
    K_fit = np.dot(X_fit, X_fit.T)

    # center fit time matrix
    centerer = KernelCenterer()
    K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T)
    K_fit_centered2 = centerer.fit_transform(K_fit)
    assert_array_almost_equal(K_fit_centered, K_fit_centered2)

    # center predict time matrix
    X_pred = np.random.random((2, 4))
    K_pred = np.dot(X_pred, X_fit.T)
    X_pred_centered = scaler.transform(X_pred)
    K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T)
    K_pred_centered2 = centerer.transform(K_pred)
    assert_array_almost_equal(K_pred_centered, K_pred_centered2)
Example #6
0
class kc():
    def __init__(self, cols, metric):
        self.columns = cols
        self.metric = metric
        self.model = KernelCenterer()

    def fit(self, data):
        k = pairwise_kernels(data[self.columns], metric=self.metric)
        self.model.fit(k)

    def fit_transform(self, data):
        k = pairwise_kernels(data[self.columns], metric=self.metric)
        transformed = self.model.fit_transform(k)
        for idx in range(len(self.columns)):
            data[self.columns[idx]] = transformed[:, idx]
        return data

    def transform(self, data):
        k = pairwise_kernels(data[self.columns], metric=self.metric)
        transformed = self.model.transform(k)
        for idx in range(len(self.columns)):
            data[self.columns[idx]] = transformed[:, idx]
        return data
def kpca(X, gamma=15, k=10):
    
    # Calculating the distances for every pair of points in the NxD dimensional dataset.
    _dists = pdist(X, 'minkowski')

    # Converting the pairwise distances into a symmetric NxN matrix.
    sym_dists = squareform(_dists)

    # Computing the NxN kernel matrix.
    K = exp(-gamma * sym_dists)

    #Centering Kernel since data has to be standardizied
    kern_cent = KernelCenterer()
    K = kern_cent.fit_transform(K)
    
    eig_vals, eig_vecs = np.linalg.eig(K)
    eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
    # Sort the (eigenvalue, eigenvector) tuples from high to low
    eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True)
    vec = np.array([ eig_pairs[i][1] for i in range(k)])
    vec = vec.T # to make eigen vector matrix nxk

    return vec
class Kernel(object):
    """
    This is a base Kernel class (acting as MixIn).
    It is not supposed to be directly initialized, but should be inherited from.

    """

    def __init__(self, kernel_type="linear", degree=2, gamma=None, coef0=1):

        self.kernel_type = kernel_type
        self.degree = degree
        self.gamma = gamma
        self.coef0 = coef0

        self.centerer = KernelCenterer()

    def c_(self, X):
        """
        Center the gram matrix
        """
        return self.centerer.fit_transform(X)

    def apply_kernel(self, X):
        kernel_handler = {"rbf": self._apply_rbf,
                          "linear": self._apply_linear,
                          "poly": self. _apply_poly}
        return self.c_(kernel_handler[self.kernel_type](X))

    def _apply_linear(self, X):
        return linear_kernel(X)

    def _apply_poly(self, X):
        return polynomial_kernel(X, degree=self.degree, coef0=self.coef0, gamma=self.gamma)

    def _apply_rbf(self, X):
        return rbf_kernel(X, gamma=self.gamma)
Example #9
0
class KernelECA(BaseEstimator, TransformerMixin):
    """Kernel Entropy component analysis (KECA)

    Non-linear dimensionality reduction through the use of kernels (see
    :ref:`metrics`).

    Parameters
    ----------
    n_components: int or None
        Number of components. If None, all non-zero components are kept.

    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
        Kernel.
        Default: "linear"

    degree : int, default=3
        Degree for poly kernels. Ignored by other kernels.

    gamma : float, optional
        Kernel coefficient for rbf and poly kernels. Default: 1/n_features.
        Ignored by other kernels.

    coef0 : float, optional
        Independent term in poly and sigmoid kernels.
        Ignored by other kernels.

    kernel_params : mapping of string to any, optional
        Parameters (keyword arguments) and values for kernel passed as
        callable object. Ignored by other kernels.


    eigen_solver: string ['auto'|'dense'|'arpack']
        Select eigensolver to use.  If n_components is much less than
        the number of training samples, arpack may be more efficient
        than the dense eigensolver.

    tol: float
        convergence tolerance for arpack.
        Default: 0 (optimal value will be chosen by arpack)

    max_iter : int
        maximum number of iterations for arpack
        Default: None (optimal value will be chosen by arpack)
		    
	random_state : int seed, RandomState instance, or None, default : None
        A pseudo random number generator used for the initialization of the
        residuals when eigen_solver == 'arpack'.

    Attributes
    ----------

    lambdas_ :
        Eigenvalues of the centered kernel matrix

    alphas_ :
        Eigenvectors of the centered kernel matrix

    dual_coef_ :
        Inverse transform matrix

    X_transformed_fit_ :
        Projection of the fitted data on the kernel entropy components

    References
    ----------
    Kernel ECA based on:
    (c) Robert Jenssen, University of Tromso, Norway, 2010 
        R. Jenssen, "Kernel Entropy Component Analysis,"
        IEEE Trans. Patt. Anal. Mach. Intel., 32(5), 847-860, 2010.

    """

    def __init__(self, n_components=None, kernel="linear",
                 gamma=None, degree=3, coef0=1, kernel_params=None, eigen_solver='auto',
                 tol=0, max_iter=None, random_state=None,center=False):
        self.n_components = n_components
        self._kernel = kernel
        self.kernel_params = kernel_params
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.eigen_solver = eigen_solver
        self.tol = tol
        self.max_iter = max_iter
        self.random_state = random_state
        self._centerer = KernelCenterer()
        self.center = center
        
    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        if callable(self._kernel):
            params = self.kernel_params or {}
        else:
            params = {"gamma": self.gamma,
                      "degree": self.degree,
                      "coef0": self.coef0}
        return pairwise_kernels(X, Y, metric=self._kernel,
                                filter_params=True, **params)

    def _fit_transform(self, K):
        """ Fit's using kernel K"""
        # center kernel
        if self.center == True:
            K = self._centerer.fit_transform(K)

        X_transformed = self.kernelECA(K=K)    
        self.X_transformed = X_transformed
        return K        

    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        K = self._get_kernel(X)
        self._fit_transform(K)

        self.X_fit_ = X
        return self

    def fit_transform(self, X, y=None, **params):
        """Fit the model from data in X and transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        self.fit(X, **params)

        X_transformed= self.X_transformed
        
        return X_transformed

    def transform(self, X):
        """Transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        check_is_fitted(self, 'X_fit_')

        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
        return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_))

    def inverse_transform(self, X):
        raise NotImplementedError("Function inverse_transform is not implemented.")

    # here are the helper functions => to integrate in the code!
    def kernelECA(self,K):   
        if self.n_components is None:
            n_components = K.shape[0]
        else:
            n_components = min(K.shape[0], self.n_components)
             
        # compute eigenvectors
        self.lambdas_, self.alphas_ = linalg.eigh(K)
        
        d = self.lambdas_
        E = self.alphas_
        # sort eigenvectors in descending order
        D,E = self.sort_eigenvalues(d,E)
    
        d = np.diag(D)
        sorted_entropy_index,entropy = self.ECA(D,E)
        Es = E[:,sorted_entropy_index]
        ds = d[sorted_entropy_index]

        Phi = np.zeros((K.shape[0],n_components))
        for i in range(n_components):
            Phi[:,i] = np.sqrt(ds[i]) * Es[:,i]
    
        X_transformed = Phi
        
        return X_transformed

    def sort_eigenvalues(self,D,E):
        d = D
        indices = np.argsort(d)[::-1]
    
        d = d[indices]
        D = np.zeros((len(d),len(d)))
        for i in range(len(d)):
            D[i,i] = d[i]
        E = E[:,indices]

        return D,E
    
    def ECA(self,D,E):
        N = E.shape[0]
        entropy = np.multiply(np.diag(D).T , (np.dot(np.ones((1,N)),E))**2)[0]
        indices = np.argsort(entropy)[::-1]
        entropy = entropy[indices]
        return indices,entropy
Example #10
0
class KernelECA(BaseEstimator, TransformerMixin):
    """Kernel Entropy component analysis (KECA)

    Non-linear dimensionality reduction through the use of kernels (see
    :ref:`metrics`).

    Parameters
    ----------
    n_components: int or None
        Number of components. If None, all non-zero components are kept.

    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
        Kernel.
        Default: "linear"

    degree : int, default=3
        Degree for poly kernels. Ignored by other kernels.

    gamma : float, optional
        Kernel coefficient for rbf and poly kernels. Default: 1/n_features.
        Ignored by other kernels.

    coef0 : float, optional
        Independent term in poly and sigmoid kernels.
        Ignored by other kernels.

    kernel_params : mapping of string to any, optional
        Parameters (keyword arguments) and values for kernel passed as
        callable object. Ignored by other kernels.


    eigen_solver: string ['auto'|'dense'|'arpack']
        Select eigensolver to use.  If n_components is much less than
        the number of training samples, arpack may be more efficient
        than the dense eigensolver.

    tol: float
        convergence tolerance for arpack.
        Default: 0 (optimal value will be chosen by arpack)

    max_iter : int
        maximum number of iterations for arpack
        Default: None (optimal value will be chosen by arpack)
		    
	random_state : int seed, RandomState instance, or None, default : None
        A pseudo random number generator used for the initialization of the
        residuals when eigen_solver == 'arpack'.

    Attributes
    ----------

    lambdas_ :
        Eigenvalues of the centered kernel matrix

    alphas_ :
        Eigenvectors of the centered kernel matrix

    dual_coef_ :
        Inverse transform matrix

    X_transformed_fit_ :
        Projection of the fitted data on the kernel entropy components

    References
    ----------
    Kernel ECA based on:
    (c) Robert Jenssen, University of Tromso, Norway, 2010 
        R. Jenssen, "Kernel Entropy Component Analysis,"
        IEEE Trans. Patt. Anal. Mach. Intel., 32(5), 847-860, 2010.

    """
    def __init__(self,
                 n_components=None,
                 kernel="linear",
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None,
                 eigen_solver='auto',
                 tol=0,
                 max_iter=None,
                 random_state=None,
                 center=False):
        self.n_components = n_components
        self._kernel = kernel
        self.kernel_params = kernel_params
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.eigen_solver = eigen_solver
        self.tol = tol
        self.max_iter = max_iter
        self.random_state = random_state
        self._centerer = KernelCenterer()
        self.center = center

    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        if callable(self._kernel):
            params = self.kernel_params or {}
        else:
            params = {
                "gamma": self.gamma,
                "degree": self.degree,
                "coef0": self.coef0
            }
        return pairwise_kernels(X,
                                Y,
                                metric=self._kernel,
                                filter_params=True,
                                **params)

    def _fit_transform(self, K):
        """ Fit's using kernel K"""
        # center kernel
        if self.center == True:
            K = self._centerer.fit_transform(K)

        X_transformed = self.kernelECA(K=K)
        self.X_transformed = X_transformed
        return K

    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        K = self._get_kernel(X)
        self._fit_transform(K)

        self.X_fit_ = X
        return self

    def fit_transform(self, X, y=None, **params):
        """Fit the model from data in X and transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        self.fit(X, **params)

        X_transformed = self.X_transformed

        return X_transformed

    def transform(self, X):
        """Transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        check_is_fitted(self, 'X_fit_')

        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
        return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_))

    def inverse_transform(self, X):
        raise NotImplementedError(
            "Function inverse_transform is not implemented.")

    # here are the helper functions => to integrate in the code!
    def kernelECA(self, K):
        if self.n_components is None:
            n_components = K.shape[0]
        else:
            n_components = min(K.shape[0], self.n_components)

        # compute eigenvectors
        self.lambdas_, self.alphas_ = linalg.eigh(K)

        d = self.lambdas_
        E = self.alphas_
        # sort eigenvectors in descending order
        D, E = self.sort_eigenvalues(d, E)

        d = np.diag(D)
        sorted_entropy_index, entropy = self.ECA(D, E)
        Es = E[:, sorted_entropy_index]
        ds = d[sorted_entropy_index]

        Phi = np.zeros((K.shape[0], n_components))
        for i in range(n_components):
            Phi[:, i] = np.sqrt(ds[i]) * Es[:, i]

        X_transformed = Phi

        return X_transformed

    def sort_eigenvalues(self, D, E):
        d = D
        indices = np.argsort(d)[::-1]

        d = d[indices]
        D = np.zeros((len(d), len(d)))
        for i in range(len(d)):
            D[i, i] = d[i]
        E = E[:, indices]

        return D, E

    def ECA(self, D, E):
        N = E.shape[0]
        entropy = np.multiply(np.diag(D).T, (np.dot(np.ones((1, N)), E))**2)[0]
        indices = np.argsort(entropy)[::-1]
        entropy = entropy[indices]
        return indices, entropy
Example #11
0
class PASVM(object):
    def __init__(self,
                 C=1,
                 relaxation="classic",
                 coef0=1,
                 degree=2,
                 gamma=1.5,
                 kernel_type=None):

        self.C = C
        self.relaxation = relaxation

        self.coef0 = 1
        self.degree = degree
        self.gamma = gamma
        self.kernel_type = kernel_type
        self.centerer = KernelCenterer()

    def c_(self, X):
        """
        Center the gram matrix
        """
        return self.centerer.fit_transform(X)

    def fit(self, X, y):
        if not hasattr(self, "W"):
            self.W = self._init_weights(X)

        if self.W.shape[0] != X.shape[1]:
            raise ValueError(
                "Expecter to get X with {} features, got {} instead".format(
                    X.shape[1], self.W.shape[0]))

        for i in range(X.shape[0]):

            x = X[i, :].reshape(1, -1)
            if self.kernel_type is not None:
                x = self.apply_kernel(x)
            loss = self._get_loss(x, y[i])
            tau = self._get_update_rule()(X, loss)

            self.W = self.W + tau * y[i] * x.reshape(-1, 1)

    def predict(self, X):
        if not hasattr(self, "W"):
            self.W = self._init_weights(X)
        return np.sign(np.dot(X, self.W))

    def _get_loss(self, X, y):

        loss = max(0, 1 - y * (np.dot(X, self.W)))
        return loss

    def _get_update_rule(self):
        def classic(X, loss):
            tau = loss / l2(X)
            return tau

        def first_relaxation(X, loss):
            tau = min(self.C, loss / l2(X))
            return tau

        def second_relaxation(X, loss):
            tau = loss / (l2(X) + (1 / (2 * self.C)))
            return tau

        mapping = {
            'classic': classic,
            'first': first_relaxation,
            'second': second_relaxation
        }

        return mapping[self.relaxation]

    def _init_weights(self, X):

        return np.random.randn(X.shape[1], 1)

    def apply_kernel(self, X):
        kernel_handler = {
            "rbf": self._apply_rbf,
            "linear": self._apply_linear,
            "poly": self._apply_poly
        }
        return self.c_(kernel_handler[self.kernel_type](X))

    def _apply_linear(self, X):
        return linear_kernel(X)

    def _apply_poly(self, X):
        return polynomial_kernel(X,
                                 degree=self.degree,
                                 coef0=self.coef0,
                                 gamma=self.gamma)

    def _apply_rbf(self, X):
        return rbf_kernel(X, gamma=self.gamma)
class KernelPCA(TransformerMixin, BaseEstimator):
    def __init__(self,
                 kernel="linear",
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None,
                 alpha=1.0,
                 fit_inverse_transform=False,
                 eigen_solver='auto',
                 tol=0,
                 max_iter=None,
                 remove_zero_eig=False,
                 n_components=2,
                 random_state=None,
                 copy_X=True,
                 n_jobs=None,
                 coeficient=None,
                 nkernel=10):
        self.kernel_params = kernel_params
        self.gamma = gamma
        self.nkernel = nkernel
        self.n_components = n_components
        self.degree = degree
        self.coef0 = coef0
        self.alpha = alpha
        self.fit_inverse_transform = fit_inverse_transform
        self.eigen_solver = eigen_solver
        self.remove_zero_eig = remove_zero_eig
        self.tol = tol
        self.max_iter = max_iter
        self.random_state = random_state
        self.n_jobs = n_jobs
        self.copy_X = copy_X
        self._centerer = KernelCenterer()
        self.coeficient = coeficient

    def kernels(self, X):
        kern = [
            'linear', 'poly', 'polynomial', 'rbf', 'laplacian', 'sigmoid',
            'cosine'
        ]
        tkernel = len(kern) + 2
        K = []
        self.gamma = 0.00001
        gamma_jump = 1.02
        K.append(normalize(self._get_kernel(X, 'rbf')))
        #K.append(procrustes(K[0],normalize(LLE.K(X)))[1])
        #K.append(procrustes(K[0],normalize(LE.K(X)))[1])
        #K.append(procrustes(K[0],normalize(Iso.K(X)))[1])
        K.append((LLE.K(X)))
        K.append((LE.K(X)))
        K.append((Iso.K(X)))
        for i in kern:
            if i == 'rbf':
                for j in range(1, self.nkernel - tkernel):
                    #self.gamma=gamma_jump*self.gamma
                    self.gamma = 0.3 * j
                    K.append(self._get_kernel(X, i))
                    #K.append(procrustes(K[0],normalize(self._get_kernel(X,i)))[1])
            else:
                K.append(self._get_kernel(X, i))
                #K.append(procrustes(K[0],normalize(self._get_kernel(X,i)))[1])
        if not (self.coeficient):
            self.coeficient = np.zeros(len(K))
            self.coeficient[0] = 1
        self.SuperK = self.createSuperK(K)
        return K

    def _get_kernel(self, X, kernel):
        params = {
            "gamma": self.gamma,
            "degree": self.degree,
            "coef0": self.coef0
        }
        return pairwise_kernels(X,
                                None,
                                metric=kernel,
                                filter_params=True,
                                n_jobs=self.n_jobs,
                                **params)

    def normalize(self, v):
        return v / max(v)

    def Solve(self, K):

        # SELECT THE BEST METHOD TO CALCULATE THE EIGENVALUES
        if self.eigen_solver == 'auto':
            if K.shape[0] > 200 and self.n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        #GET EIGENVALUES AND EIGENVECTOR THE CENTER KERNEL
        if eigen_solver == 'dense':
            self.lambdas_, self.vectors_ = linalg.eigh(
                K, eigvals=(K.shape[0] - self.n_components, K.shape[0] - 1))
        elif eigen_solver == 'arpack':
            random_state = check_random_state(self.random_state)
            # initialize with [-1,1] as in ARPACK
            v0 = random_state.uniform(-1, 1, K.shape[0])
            self.lambdas_, self.vectors_ = eigsh(K,
                                                 self.n_components,
                                                 which="LA",
                                                 tol=self.tol,
                                                 maxiter=self.max_iter,
                                                 v0=v0)

        # make sure that the eigenvalues are ok and fix numerical issues
        self.lambdas_ = _check_psd_eigenvalues(self.lambdas_,
                                               enable_warnings=False)

        # flip eigenvectors' sign to enforce deterministic output
        self.vectors_, _ = svd_flip(self.vectors_,
                                    np.empty_like(self.vectors_).T)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.vectors_ = self.vectors_[:, indices]

        # remove eigenvectors with a zero eigenvalue (null space) if required
        if self.remove_zero_eig:
            self.vectors_ = self.vectors_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        return K

    def fit(self, X):

        return self

    def fit_transform(self, X, y=None):
        #X=normalize(X)
        X = check_array(X, accept_sparse='csr', copy=self.copy_X)
        self.K = self.kernels(X)
        return self.KPCA(self.coeficient)

    def KPCA(self, alpha):
        ##GET THE KERNEL WITH ALPHAS
        self.coeficient = alpha
        self.Kernel = self.Kf(self.K)
        #CENTER THE KERNEL
        self.Center_Kernel = self._centerer.fit_transform(self.Kernel)
        #GET THE EIGENVALUES AND EIGENVECTORS
        self.Solve(self.Center_Kernel)
        #GET THE DIMENSIONAL REDUCTION
        X_Transform = self.vectors_ * np.sqrt(self.lambdas_)
        #X_Transform=np.matmul(self.Kernel,self.vectors_)
        return X_Transform, self.SuperK

    def Kf(self, K):
        Kf = np.zeros(K[0].shape)
        for i in range(0, len(self.coeficient)):
            Kf += self.coeficient[i] * K[i]
        return Kf

    def createSuperK(self, K):
        data = []
        for i in range(len(K)):
            data.append(np.ravel(K[i])[np.newaxis])
        return np.concatenate(tuple(data), axis=0).T
Example #13
0
class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin):
    """
    Kernalized Fisher Discriminant Analysis (KDA)

    A classifier with a non-linear decision boundary, generated
    by fitting class conditional densities to the data
    fisher criteria of maximizing between class variance
    while minimizing within class variance.

    The fisher criteria is used in a non-linear space, by transforming
    the data, X, of dimension D onto a D-dimensional manifold of
    a D' dimensional space (where D' is possible infinite) using a funtion f(X).
    The key to solving the problem in the non-linear space is to write
    the solution to fisher only in terms of inner products of
    the vectors X*Y.  Then the kernel trick can be employed, such that
    the standard inner product is promoted to a general inner product.
    That is, K(X,Y) = X*Y --> K(X,Y) = f(X)*f(Y), which is allowed for
    valid Kernels.  In this case, the function f() does not need to be
    known, but only the kernel K(X,Y).

    The fitted model can also be used to reduce the dimensionality
    of the input, by projecting it to the most discriminative
    directions.

    Parameters
    ----------

    use_total_scatter : boolean
        If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw
        If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T
                   where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1

    sigma_sqrd:  float
        smooth regularization parameter, which is size of singular value where smoothing becomes important.
        NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case

    tol:  float
         used for truncated SVD of St.  Essentially a form of regularization.  Tol for SVD(R) is 1e-6, fixed right now

    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
        Kernel used for generalized inner product.
        Default: "linear"

    degree : int, optional
        Degree for poly
        Default: 3.

    gamma : float, optional
        Kernel coefficient for rbf, sigmoid and poly kernels.
        Default: 1/n_features.

    coef0 : float, optional
        Independent term in poly and sigmoid kernels.

    norm_covariance :  boolean
        if true, the covariance of each class will be divided by (n_points_in_class - 1)
        NOTE: not currently used

    priors : array, optional, shape = [n_classes]
        Priors on classes

    print_timing: boolean
        print time for several matrix operations in the algorithm

    Attributes
    ----------
    `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ]
        Class means, for each component found
    `priors_` : array-like, shape = [n_classes]
        Class priors (sum to 1)
    
    `n_components_found_` : int
        number of fisher components found, which is <= n_components
        
    Examples (put fisher.py in working directory)
    --------
    >>> import numpy as np
    >>> from fisher import KernelFisher
    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    >>> y = np.array([0, 0, 0, 1, 1, 1])
    >>> fd = KernelFisher()
    >>> fd.fit(X, y)
    KernelFisher(coef0=1, degree=3, gamma=None, kernel='linear',
       norm_covariance=False, print_timing=False, priors=None,
       sigma_sqrd=1e-08, tol=0.001, use_total_scatter=True)
    >>> print(fd.transform([[-0.8, -1]]))
    [[-7.62102356]]]

    """

    def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3,
                 kernel="linear", gamma=None, degree=3, coef0=1,
                 norm_covariance = False, priors=None, print_timing=False):

        self.use_total_scatter = use_total_scatter
        self.sigma_sqrd = sigma_sqrd
        self.tol = tol
        self.kernel = kernel.lower()
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self._centerer = KernelCenterer()

        self.norm_covariance = norm_covariance
        self.print_timing = print_timing
        
        
        self.priors = np.asarray(priors) if priors is not None else None
        
        if self.priors is not None:
            if (self.priors < 0).any():
                raise ValueError('priors must be non-negative')
            if self.priors.sum() != 1:
                print 'warning: the priors do not sum to 1. Renormalizing'
                self.priors = self.priors / self.priors.sum()
                
                
    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        params = {"gamma": self.gamma,
                  "degree": self.degree,
                  "coef0": self.coef0}
        try:
            return pairwise_kernels(X, Y, metric=self.kernel,
                                    filter_params=True, **params)
        except AttributeError:
            raise ValueError("%s is not a valid kernel. Valid kernels are: "
                             "rbf, poly, sigmoid, linear and precomputed."
                             % self.kernel)


    def fit(self, X, y):
        """
        Fit the Kernelized Fisher Discriminant model according to the given training data and parameters.
        Based on "Algorithm 5" in
        Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228
        NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array, shape = [n_samples]
            Target values (integers)
        
        """
        X, y = check_arrays(X, y, sparse_format='dense')
        self.classes_, y = unique( y, return_inverse=True)
        n_samples, n_features = X.shape
        n_classes = len(self.classes_)
        n_samples_perclass = np.bincount(y)
        if n_classes < 2:
            raise ValueError('y has less than 2 classes')
        if self.priors is None:
            self.priors_ = np.bincount(y) / float(n_samples)
        else:
            self.priors_ = self.priors

        ts = time.time()
                    
        self.means_ = []
        for ind in xrange(n_classes):
            Xg = X[y == ind, :]
            meang = Xg.mean(0)
            self.means_.append(np.asarray(meang))
        if self.print_timing: print 'KernelFisher.fit: means took', time.time() - ts


        ts = time.time()
        PI_diag = np.diag( 1.0*n_samples_perclass )                                        # shape(PI_diag) = n_classes x n_classes
        PI_inv = np.diag( 1.0 / (1.0*n_samples_perclass) )                                 # shape(PI_inv) = n_classes x n_classes
        PI_sqrt_inv = np.sqrt( PI_inv )                                                    # shape(PI_sqrt_inv) = n_classes x n_classes
        #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples))
        E=np.zeros( (n_samples,n_classes) )                                                # shape(E) = n_samples x n_classes
        E[[range(n_samples),y]]=1
        E_PIsi = np.dot(E, PI_sqrt_inv)
        One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T ) # shape(One_minus_E_Pi_Et) = n_samples x n_samples
        if self.print_timing: print 'KernelFisher.fit: matrices took', time.time() - ts


        #####################################################################################################################
        #C = HKH = (I - 1/n 1x1.T) K (I - 1/n 1x1.T) = (K -  1xK_mean.T) * (I - 1/n 1x1.T)
        #        = K - K_meanx1.T - 1xK_mean.T + K_allmean 1x1
        #  --> which is the same as what self._centerer.fit_transform(C) performs
        #
        # if use_total_scatter=False,
        #      then using Sw which is (1-E*Pi*E.T)K(1-E*Pi*E.T)
        #####################################################################################################################
        ts = time.time()
        C = self._get_kernel(X) 
        K_mean = np.sum(C, axis=1) / (1.0*C.shape[1])

        if self.use_total_scatter:
            C = self._centerer.fit_transform(C)
        else:
            C = np.inner( One_minus_E_Pi_Et, np.inner(C, One_minus_E_Pi_Et).T)
        if self.print_timing: print 'KernelFisher.fit: Kernel Calculation took', time.time() - ts


        ts = time.time()
        Uc, Sc, Utc, Sc_norm = self.condensed_svd( C, self.tol, store_singular_vals=True )
        if self.print_timing: print 'KernelFisher.fit: Uc, Sc, Utc took', time.time() - ts


        ts = time.time()
        #scale up sigma to appropriate range of singular values
        reg_factor = self.sigma_sqrd * Sc_norm 
        St_reg_inv = np.inner( Uc, np.inner(np.diag(1.0/(Sc + reg_factor)), Utc.T).T )   
        if self.print_timing: print 'KernelFisher.fit: St_reg_inv took', time.time() - ts

        ts = time.time()
        R = np.inner(E_PIsi.T, np.inner(C, np.inner( St_reg_inv, E_PIsi.T ).T ).T )
        if self.print_timing: print 'KernelFisher.fit: R took', time.time() - ts


        ts = time.time()
        Vr, Lr, Vtr, Lr_norm =  self.condensed_svd( R, tol=1e-6 )                
        if self.print_timing: print 'KernelFisher.fit: Vr, Lr, Vtr took', time.time() - ts


        ts = time.time()
        #####################################################################################################################
        #This capital Z is Upsilon.T * H from equation (22)
        #####################################################################################################################
        #Z = np.inner( np.diag(1.0 / np.sqrt(Lr)), np.inner(Vtr, np.inner(E_PIsi.T, np.inner(C, St_reg_inv.T ).T ).T ).T )
        Z = np.inner( np.inner( np.inner( np.inner( np.diag(1.0 / np.sqrt(Lr)), Vtr.T), E_PIsi), C.T), St_reg_inv)

        Z = (Z.T - (Z.sum(axis=1) / (1.0*Z.shape[1])) ).T
        if self.print_timing: print 'KernelFisher.fit: Z took', time.time() - ts

        self.Z = Z
        self.n_components_found_ = Z.shape[0]

        #####################################################################################################################
        #This K_mean is (1/n) K*1_n from equation (22)
        #####################################################################################################################
        self.K_mean = K_mean

        #print Z.shape, K_mean.shape, self.n_components_found_

        self.X_fit_ = X
        return self

    def condensed_svd(self, M, tol=1e-3, store_singular_vals=False):
        U, S, Vt = linalg.svd(M, full_matrices=False)
        if store_singular_vals:
            self.singular_vals = S

        #want tolerance on fraction of variance in singular value
        #when not norm_covariance, need to normalize singular values
        S_norm = np.sum(S)

        rank = np.sum( (S/S_norm) > tol )

        return U[:,:rank], S[:rank], Vt[:rank,:], S_norm


    @property
    def classes(self):
        warnings.warn("KernelFisher.classes is deprecated and will be removed in 0.14. "
                      "Use .classes_ instead.", DeprecationWarning,
                      stacklevel=2)
        return self.classes_

    def _decision_function(self, X):
        #X = np.asarray(X)
        return self.transform(X)

    def decision_function(self, X):
        """
        This function return the decision function values related to each
        class on an array of test vectors X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        X_new : array, shape = [n_samples, n_components_found_]
            Decision function values related to each class, per sample
            n_components_found_ is the number of components requested and found
            NOTE: currently identical to self.transform(X)
        """
        return self._decision_function(X)

    def transform(self, X):
        """
        Project the data so as to maximize class separation (large separation
        between projected class means and small variance within each class).

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        X_new : array, shape = [n_samples, n_components_found_]
        """

        #X = np.asarray(X)
        #ts = time.time()
        k = self._get_kernel(X, self.X_fit_)
        #if self.print_timing: print 'KernelFisher.transform: k took', time.time() - ts

        #ts = time.time()
        z = np.inner(self.Z, (k-self.K_mean) ).T
        #if self.print_timing: print 'KernelFisher.transform: z took', time.time() - ts

        return z
        
    

    def fit_transform(self, X, y, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3):
        """
        Fit the Fisher Discriminant model according to the given training data and parameters.
        The project the data onto up to n_components_found_ so as to maximize class separation (large separation
        between projected class means and small variance within each class).
        NOTE this function is not clever, it simply runs fit(X,y [, ...]).transform(X)

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
        y : array, shape = [n_samples]
            Target values (integers)
        store_covariance : boolean
            If True the covariance matrix of each class and each iteration is computed
            and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2

        Returns
        -------
        X_new : array, shape = [n_samples, n_components_found_]
        """
        return self.fit(X, y, use_total_scatter=use_total_scatter, sigma_sqrd=sigma_sqrd, tol=tol).transform(X)
class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin):
    """
    Kernel Fisher Discriminant Analysis (KFDA)

    Parameters
    ----------
    
    sigma_sqrd:  float
    tol:  float
    kernel: "linear","poly","rbf","sigmoid" 
    degree : Degree for poly
    gamma : gamma as in LDA 
    coef0 : coefficient in poly and sigmoid
    """
    def __init__(self,
                 sigma_sqrd=1e-8,
                 tol=1.0e-3,
                 kernel="linear",
                 gamma=None,
                 degree=3,
                 coef0=1):

        self.sigma_sqrd = sigma_sqrd
        self.tol = tol
        self.kernel = kernel.lower()
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self._centerer = KernelCenterer()

    @property
    def _pairwise(self):
        return self.kernel == "kerenl"

    def _get_kernel(self, X, Y=None):
        params = {
            "gamma": self.gamma,
            "degree": self.degree,
            "coef0": self.coef0
        }

        return pairwise_kernels(X,
                                Y,
                                metric=self.kernel,
                                filter_params=True,
                                **params)

    def fit(self, X, y):
        X, y = check_X_y(X, y)  #does not accept sparse arrays
        self.classes_, y = np.unique(y, return_inverse=True)
        n_samples, n_features = X.shape
        n_classes = len(self.classes_)
        n_samples_perclass = np.bincount(y)

        self.means_ = []
        for ind in range(n_classes):
            Xg = X[y == ind, :]
            meang = Xg.mean(0)
            self.means_.append(np.asarray(meang))

        PI_diag = np.diag(
            1.0 * n_samples_perclass)  # shape(PI_diag) = n_classes x n_classes
        PI_inv = np.diag(
            1.0 /
            (1.0 *
             n_samples_perclass))  # shape(PI_inv) = n_classes x n_classes
        PI_sqrt_inv = np.sqrt(
            PI_inv)  # shape(PI_sqrt_inv) = n_classes x n_classes
        E = np.zeros(
            (n_samples, n_classes))  # shape(E) = n_samples x n_classes
        E[[range(n_samples), y]] = 1
        EPI = np.dot(E, PI_sqrt_inv)
        #One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T ) # shape(One_minus_E_Pi_Et) = n_samples x n_samples
        C = self._get_kernel(X)
        K_mean = np.sum(C, axis=1) / (1.0 * C.shape[1])
        C = self._centerer.fit_transform(C)
        Uc, Sc, Utc, Sc_norm = self.svd_comp(C, self.tol, flag=True)
        reg_factor = self.sigma_sqrd * Sc_norm
        St_reg_inv = np.inner(
            Uc,
            np.inner(np.diag(1.0 / (Sc + reg_factor)), Utc.T).T)
        R = np.inner(EPI.T, np.inner(C, np.inner(St_reg_inv, EPI.T).T).T)
        Vr, Lr, Vtr, Lr_norm = self.svd_comp(R, tol=1e-6)
        Z = np.inner(
            np.inner(
                np.inner(np.inner(np.diag(1.0 / np.sqrt(Lr)), Vtr.T), EPI),
                C.T), St_reg_inv)
        Z = (Z.T - (Z.sum(axis=1) / (1.0 * Z.shape[1]))).T
        self.Z = Z
        self.n_components_found_ = Z.shape[0]
        self.K_mean = K_mean
        self.X_fit_ = X

        return self

    def svd_comp(self, M, tol=1e-3, flag=False):
        U, S, Vt = linalg.svd(M, full_matrices=False)
        if flag:
            self.singular_vals = S

        S_norm = np.sum(S)
        rank = np.sum((S / S_norm) > tol)

        return U[:, :rank], S[:rank], Vt[:rank, :], S_norm

    @property
    def classes(self):
        return self.classes_

    def _decision_function(self, X):
        return self.transform(X)

    def decision_function(self, X):
        return self._decision_function(X)

    def transform(self, X):

        k = self._get_kernel(X, self.X_fit_)
        z = np.inner(self.Z, (k - self.K_mean)).T

        return z

    def fit_transform(self, X, y, sigma_sqrd=1e-8, tol=1.0e-3):

        return self.fit(X, y, sigma_sqrd=sigma_sqrd, tol=tol).transform(X)
#S = np.sqrt(eVals)[::-1] #reverse since eigenvalues are in increasing order

#Y=S.T.dot(X_std)
# Make a list of (eigenvalue, eigenvector) tuples
# d×k-dimensional eigenvector matrix W.
#W = [(abs(eVals[i]), eVecs[:,i]) for i in range(len(eVals))]
#Y=X×W
#Y=X_std.dot(eVecs.real)
#Y=np.dot(eVecs.T, X_std.T).T

#Kernel_PCA since d>>n
K = X.dot(X.T)

#Centering Kernel since data has to be standardizied
kern_cent = KernelCenterer()
S = kern_cent.fit_transform(K.toarray())

#val,vec=linalg.eigs(S,k,which='LM')

eig_vals, eig_vecs = np.linalg.eig(S)
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:, i])
             for i in range(len(eig_vals))]
# Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True)
vec = np.array([eig_pairs[i][1] for i in range(k)])
vec = vec.T  # to make eigen vector matrix nxk

# d×k-dimensional eigenvector matrix W.
W = X.T.dot(vec)
Y = X.dot(W)
Example #16
0
class KernelPCA(TransformerMixin, BaseEstimator):
    def __init__(self,
                 degree=3,
                 coef0=1,
                 kernel_params=None,
                 alpha=1.0,
                 eigen_solver='auto',
                 neigh=8,
                 tol=0,
                 max_iter=None,
                 remove_zero_eig=True,
                 n_components=2,
                 random_state=None,
                 n_jobs=None,
                 coeficient=None,
                 nkernel=10):
        self.kernel_params = kernel_params
        self.gamma = 0.0001
        self.neigh = neigh
        self.nkernel = nkernel
        self.n_components = n_components
        self.degree = degree
        self.coef0 = coef0
        self.alpha = alpha
        self.eigen_solver = eigen_solver
        self.remove_zero_eig = remove_zero_eig
        self.tol = tol
        self.max_iter = max_iter
        self.random_state = random_state
        self.n_jobs = n_jobs
        self._centerer = KernelCenterer()
        self.coeficient = coeficient

    def KLE(self, X, neigh):
        LE = SpectralEmbedding(n_neighbors=neigh)
        return LE.K(X)

    def KIsomap(self, X, neigh):
        Iso = Isomap(n_neighbors=neigh)
        return Iso.K(X)

    def KLLE(self, X, neigh):
        LLE = LocallyLinearEmbedding(n_neighbors=neigh)
        return LLE.K(X)

    def kernels(self, X):
        kern = [
            'linear', 'poly', 'polynomial', 'rbf', 'laplacian', 'sigmoid',
            'cosine'
        ]
        if self.neigh > len(X):
            self.neigh = len(X) - 3
        tkernel = len(kern) + 3 * self.neigh - 1
        K = []
        K.append((KCMDS(X)))
        for i in kern:
            if i == 'rbf':
                for j in range(0, self.nkernel - tkernel):
                    self.gamma = (j) * 0.01
                    K.append((self._get_kernel(X, i)))
            else:
                K.append((self._get_kernel(X, i)))

        for i in range(2, 2 + self.neigh):
            K.append((self.KLE(X, i)))
            K.append((self.KLLE(X, i)))
            K.append((self.KIsomap(X, i)))

        #IN case that coeffcients weren't set
        if not (self.coeficient):
            self.coeficient = np.zeros(len(K))
            self.coeficient[0] = 1  #Linear Kernel

        self.SuperK = self.createSuperK(K)
        return K

    def _get_kernel(self, X, kernel):
        params = {
            "gamma": self.gamma,
            "degree": self.degree,
            "coef0": self.coef0
        }
        return pairwise_kernels(X,
                                None,
                                metric=kernel,
                                filter_params=True,
                                n_jobs=self.n_jobs,
                                **params)

    def Solve(self, K):

        #GET EIGENVALUES AND EIGENVECTOR THE CENTER KERNEL
        self.lambdas_, self.vectors_ = linalg.eigh(
            K, eigvals=(K.shape[0] - self.n_components, K.shape[0] - 1))

        # make sure that the eigenvalues are ok and fix numerical issues
        self.lambdas_ = _check_psd_eigenvalues(self.lambdas_,
                                               enable_warnings=False)

        # flip eigenvectors' sign to enforce deterministic output
        self.vectors_, _ = svd_flip(self.vectors_,
                                    np.empty_like(self.vectors_).T)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.vectors_ = self.vectors_[:, indices]

        # remove eigenvectors with a zero eigenvalue (null space) if required
        if self.remove_zero_eig:
            self.vectors_ = self.vectors_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        return K

    def fit_transform(self, X, y=None):
        #X=normalize(X)
        X = check_array(X, accept_sparse='csr', copy=True)
        self.K = self.kernels(X)
        return self.KPCA(self.coeficient)

    def KPCA(self, alpha):
        ##GET THE KERNEL WITH ALPHAS
        self.coeficient = alpha
        self.Kernel = self.Kf(self.K)
        #CENTER THE KERNEL
        self.Center_Kernel = self._centerer.fit_transform(self.Kernel)
        #GET THE EIGENVALUES AND EIGENVECTORS
        self.Solve(self.Center_Kernel)
        #GET THE DIMENSIONAL REDUCTION
        X_Transform = self.vectors_ * np.sqrt(self.lambdas_)
        #X_Transform=np.matmul(self.Kernel,self.vectors_)
        return X_Transform, self.SuperK

    def Kf(self, K):
        Kf = np.zeros(K[0].shape)
        for i in range(0, len(self.coeficient)):
            Kf += self.coeficient[i] * K[i]
        return Kf

    def createSuperK(self, K):
        data = []
        for i in range(len(K)):
            data.append(np.ravel(K[i])[np.newaxis])
        return np.concatenate(tuple(data), axis=0).T
Example #17
0
def ALIGNFSOFT(kernel_list, ky, y, test_fold, tags):
    # Find best upper bound in CV and train on whole data
    # Reutrn the weights 
    y = y.ravel()
    n_km = len(kernel_list)

    tag = np.array(tags)
    tag = tag[tag!=test_fold]
    remain_fold = np.unique(tag).tolist()
    all_best_c = []
    for validate_fold in remain_fold:
        train = tag != validate_fold
        validate = tag == validate_fold
        # train on train fold ,validate on validate_fold.
        # Do not use test fold. test fold used in outter cv
        ky_train = ky[np.ix_(train, train)]
        y_train = y[train]
        y_validate = y[validate]
        train_km_list = []
        validate_km_list = []
        n_train = len(y_train)
        n_validate = len(y_validate)

        for km in kernel_list:
            kc = KernelCenterer()
            train_km = km[np.ix_(train, train)]
            validate_km = km[np.ix_(validate, train)]
            # center train and validate kernels                      
            train_km_c = kc.fit_transform(train_km)
            train_km_list.append(train_km_c)
            validate_km_c = kc.transform(validate_km)
            validate_km_list.append(validate_km_c)

        # if the label is too biased, SVM CV will fail, just return ALIGNF solution
        if np.sum(y_train==1) > n_train-3 or np.sum(y_train==-1) > n_train-3:
            return 1e8, ALIGNFSLACK(train_km_list, ky_train, 1e8) 

        Cs = np.exp2(np.array(range(-9,7))).tolist() + [1e8]
        W = np.zeros((n_km, len(Cs)))
        for i in xrange(len(Cs)):
            W[:,i] = ALIGNFSLACK(train_km_list, ky_train, Cs[i])

        W = W / np.linalg.norm(W, 2, 0)
        f1 = np.zeros(len(Cs))
        for i in xrange(len(Cs)):
            train_ckm = np.zeros((n_train,n_train))
            validate_ckm = np.zeros((n_validate,n_train))
            w = W[:,i]
            for j in xrange(n_km):
                train_ckm += w[j]*train_km_list[j]
                validate_ckm += w[j]*validate_km_list[j]
            f1[i] = svm(train_ckm, validate_ckm, y_train, y_validate)
        # return the first maximum
        maxind = np.argmax(f1)
        bestC = Cs[maxind]
        all_best_c.append(bestC)
        print f1
        print "..Best C is", bestC

    bestC = np.mean(all_best_c)
    print "..Take the average best upper bound", bestC
    # use the best upper bound to solve ALIGNFSOFT
    return bestC, ALIGNFSLACK(kernel_list, ky, bestC)    
Example #18
0
def AirbnbKNN_score(dc_listings):
    #数据选择
    if (filter_flag == True):
        house_features = dc_listings[dc_listings.price < 1000]
    else:
        house_features = dc_listings
    house_features = house_features.dropna(subset=['host_acceptance_rate'
                                                   ])  #去除未成交房屋
    del house_features['city']  #去除重复信息
    del house_features['zipcode']
    del house_features['state']
    del house_features['minimum_nights']
    del house_features['maximum_nights']
    del house_features['host_listings_count']
    if (cleaning_fee_flag == False):
        del house_features['cleaning_fee']
    if (security_deposit_flag == False):
        del house_features['security_deposit']
    if (independent_flag == True):
        del house_features['bedrooms']
        del house_features['beds']
    if (response_flag == False):
        del house_features['host_response_rate']
        del house_features['host_acceptance_rate']
    if (review_flag == False):
        del house_features['number_of_reviews']
    if (room_type_flag == False):
        del house_features['room_type']

    #距离替换经纬度
    if (distance_flag == True):
        DC_capital_lat = 38.889931
        DC_capital_long = -77.009003
        distance = ((house_features['latitude'] - DC_capital_lat)**2 +
                    (house_features['longitude'] - DC_capital_long)**2)**0.5
        house_features['latitude'] = distance
        del house_features['longitude']

    #产生KNN输入
    house_features = house_features.fillna(0)  #补充 cleaning fee等列的nan
    AirbnbKNN_X = house_features
    AirbnbKNN_y = np.array(house_features['price'])
    del AirbnbKNN_X['price']
    AirbnbKNN_X = np.array(AirbnbKNN_X)

    print(house_features.iloc[0])

    #归一化
    if (normalizer_flag == "min_max_scaler"):
        from sklearn import preprocessing
        min_max_scaler = preprocessing.MinMaxScaler()
        AirbnbKNN_X = min_max_scaler.fit_transform(AirbnbKNN_X)
    if (normalizer_flag == "kernel_centerer"):
        from sklearn.preprocessing import KernelCenterer
        kernel_centerer = KernelCenterer().fit(AirbnbKNN_X)
        AirbnbKNN_X = kernel_centerer.fit_transform(AirbnbKNN_X)
    if (normalizer_flag == "standard_scaler"):
        from sklearn.preprocessing import StandardScaler
        standard_scaler = StandardScaler()
        AirbnbKNN_X = standard_scaler.fit_transform(AirbnbKNN_X)

    #数据切分,训练模型,用训练好的模型进行预测,并对预测好坏进行评估
    from sklearn.model_selection import train_test_split
    from sklearn.neighbors import KNeighborsRegressor
    knn = KNeighborsRegressor(n_neighbors=16)  #定义用sklearn中的KNN分类算法
    total_score = 0.0
    trial_number = 1000
    score_list = []
    for i in range(trial_number):
        X_train, X_test, y_train, y_test = train_test_split(AirbnbKNN_X,
                                                            AirbnbKNN_y,
                                                            test_size=0.3)
        knn.fit(X_train, y_train)
        #print(knn.predict(X_test))   #这里的knn就是已经train好了的knn
        #print(y_test)    # 对比真实值
        score = knn.score(X_test, y_test)
        total_score += score
        score_list.append(score)
        if (i == 0):
            viz(score, knn.predict(X_test), y_test, 1)

    avg_score = total_score / trial_number
    viz(avg_score, score_list, y_test, 2)
    print("Trial: " + str(trial_number) + " times")
class KernelPCA(BaseEstimator, TransformerMixin):
    """Kernel Principal component analysis (KPCA)

    Non-linear dimensionality reduction through the use of kernels (see
    :ref:`metrics`).

    Parameters
    ----------
    n_components: int or None
        Number of components. If None, all non-zero components are kept.

    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
        Kernel.
        Default: "linear"

    degree : int, default=3
        Degree for poly kernels. Ignored by other kernels.

    gamma : float, optional
        Kernel coefficient for rbf and poly kernels. Default: 1/n_features.
        Ignored by other kernels.

    coef0 : float, optional
        Independent term in poly and sigmoid kernels.
        Ignored by other kernels.

    kernel_params : mapping of string to any, optional
        Parameters (keyword arguments) and values for kernel passed as
        callable object. Ignored by other kernels.

    alpha: int
        Hyperparameter of the ridge regression that learns the
        inverse transform (when fit_inverse_transform=True).
        Default: 1.0

    fit_inverse_transform: bool
        Learn the inverse transform for non-precomputed kernels.
        (i.e. learn to find the pre-image of a point)
        Default: False

    eigen_solver: string ['auto'|'dense'|'arpack']
        Select eigensolver to use.  If n_components is much less than
        the number of training samples, arpack may be more efficient
        than the dense eigensolver.

    tol: float
        convergence tolerance for arpack.
        Default: 0 (optimal value will be chosen by arpack)

    max_iter : int
        maximum number of iterations for arpack
        Default: None (optimal value will be chosen by arpack)

    remove_zero_eig : boolean, default=True
        If True, then all components with zero eigenvalues are removed, so
        that the number of components in the output may be < n_components
        (and sometimes even zero due to numerical instability).
        When n_components is None, this parameter is ignored and components
        with zero eigenvalues are removed regardless.

    Attributes
    ----------

    lambdas_ :
        Eigenvalues of the centered kernel matrix

    alphas_ :
        Eigenvectors of the centered kernel matrix

	evals_ : array[float], shape=(n_features)
		All eigenvalues of centered kernel matrix

	evecs_ : array[float, float], shape=(n_features, n_samples)
		All eigenvectors of centered kernel matrix

    dual_coef_ :
        Inverse transform matrix

    X_transformed_fit_ :
        Projection of the fitted data on the kernel principal components

    References
    ----------
    Kernel PCA was introduced in:
        Bernhard Schoelkopf, Alexander J. Smola,
        and Klaus-Robert Mueller. 1999. Kernel principal
        component analysis. In Advances in kernel methods,
        MIT Press, Cambridge, MA, USA 327-352.
    """

    def __init__(self, n_components=None, kernel="linear",
                 gamma=None, degree=3, coef0=1, kernel_params=None,
                 alpha=1.0, fit_inverse_transform=False, eigen_solver='auto',
                 tol=0, max_iter=None, remove_zero_eig=False):
        if fit_inverse_transform and kernel == 'precomputed':
            raise ValueError(
                "Cannot fit_inverse_transform with a precomputed kernel.")
        self.n_components = n_components
        self.kernel = kernel
        self.kernel_params = kernel_params
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.alpha = alpha
        self.fit_inverse_transform = fit_inverse_transform
        self.eigen_solver = eigen_solver
        self.remove_zero_eig = remove_zero_eig
        self.tol = tol
        self.max_iter = max_iter
        self._centerer = KernelCenterer()

    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {"gamma": self.gamma,
                      "degree": self.degree,
                      "coef0": self.coef0}
        return pairwise_kernels(X, Y, metric=self.kernel,
                                filter_params=True, **params)

    def _fit_transform(self, K):
        """ Fit's using kernel K"""
        # center kernel
        K = self._centerer.fit_transform(K)

        if self.n_components is None:
            n_components = K.shape[0]
        else:
            n_components = min(K.shape[0], self.n_components)

        # compute eigenvectors
        if self.eigen_solver == 'auto':
            if K.shape[0] > 200 and n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        if eigen_solver == 'dense':
            self.lambdas_, self.alphas_ = linalg.eigh(
                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
            self.evals_, self.evecs_ = linalg.eigh(K)

        elif eigen_solver == 'arpack':
            self.lambdas_, self.alphas_ = eigsh(K, n_components,
                                                which="LA",
                                                tol=self.tol,
                                                maxiter=self.max_iter)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.alphas_ = self.alphas_[:, indices]

        # remove eigenvectors with a zero eigenvalue
        if self.remove_zero_eig or self.n_components is None:
            self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        return K

    def _fit_inverse_transform(self, X_transformed, X):
        if hasattr(X, "tocsr"):
            raise NotImplementedError("Inverse transform not implemented for "
                                      "sparse matrices!")

        n_samples = X_transformed.shape[0]
        K = self._get_kernel(X_transformed)
        K.flat[::n_samples + 1] += self.alpha
        self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)
        self.X_transformed_fit_ = X_transformed

    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        K = self._get_kernel(X)
        self._fit_transform(K)

        if self.fit_inverse_transform:
            sqrt_lambdas = np.diag(np.sqrt(self.lambdas_))
            X_transformed = np.dot(self.alphas_, sqrt_lambdas)
            self._fit_inverse_transform(X_transformed, X)

        self.X_fit_ = X
        return self

    def fit_transform(self, X, y=None, **params):
        """Fit the model from data in X and transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        self.fit(X, **params)

        X_transformed = self.alphas_ * np.sqrt(self.lambdas_)

        if self.fit_inverse_transform:
            self._fit_inverse_transform(X_transformed, X)

        return X_transformed

    def transform(self, X):
        """Transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        check_is_fitted(self, 'X_fit_')

        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
        return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_))

    def inverse_transform(self, X):
        """Transform X back to original space.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_components)

        Returns
        -------
        X_new: array-like, shape (n_samples, n_features)

        References
        ----------
        "Learning to Find Pre-Images", G BakIr et al, 2004.
        """
        if not self.fit_inverse_transform:
            raise NotFittedError("The fit_inverse_transform parameter was not"
                                 " set to True when instantiating and hence "
                                 "the inverse transform is not available.")

        K = self._get_kernel(X, self.X_transformed_fit_)

        return np.dot(K, self.dual_coef_)
Example #20
0
class KernelPCA(BaseEstimator, TransformerMixin):
    """Kernel Principal component analysis (KPCA)
    Non-linear dimensionality reduction through the use of kernels (see
    :ref:`metrics`).
    Read more in the :ref:`User Guide <kernel_PCA>`.
    Parameters
    ----------
    n_components : int, default=None
        Number of components. If None, all non-zero components are kept.
    kernel : "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
        Kernel. Default="linear".
    gamma : float, default=1/n_features
        Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
        kernels.
    degree : int, default=3
        Degree for poly kernels. Ignored by other kernels.
    coef0 : float, default=1
        Independent term in poly and sigmoid kernels.
        Ignored by other kernels.
    kernel_params : mapping of string to any, default=None
        Parameters (keyword arguments) and values for kernel passed as
        callable object. Ignored by other kernels.
    alpha : int, default=1.0
        Hyperparameter of the ridge regression that learns the
        inverse transform (when fit_inverse_transform=True).
    fit_inverse_transform : bool, default=False
        Learn the inverse transform for non-precomputed kernels.
        (i.e. learn to find the pre-image of a point)
    eigen_solver : string ['auto'|'dense'|'arpack'], default='auto'
        Select eigensolver to use. If n_components is much less than
        the number of training samples, arpack may be more efficient
        than the dense eigensolver.
    tol : float, default=0
        Convergence tolerance for arpack.
        If 0, optimal value will be chosen by arpack.
    max_iter : int, default=None
        Maximum number of iterations for arpack.
        If None, optimal value will be chosen by arpack.
    remove_zero_eig : boolean, default=False
        If True, then all components with zero eigenvalues are removed, so
        that the number of components in the output may be < n_components
        (and sometimes even zero due to numerical instability).
        When n_components is None, this parameter is ignored and components
        with zero eigenvalues are removed regardless.
    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`. Used when ``eigen_solver`` == 'arpack'.
        .. versionadded:: 0.18
    copy_X : boolean, default=True
        If True, input X is copied and stored by the model in the `X_fit_`
        attribute. If no further changes will be done to X, setting
        `copy_X=False` saves memory by storing a reference.
        .. versionadded:: 0.18
    n_jobs : int, default=1
        The number of parallel jobs to run.
        If `-1`, then the number of jobs is set to the number of CPU cores.
        .. versionadded:: 0.18
    Attributes
    ----------
    lambdas_ : array, (n_components,)
        Eigenvalues of the centered kernel matrix in decreasing order.
        If `n_components` and `remove_zero_eig` are not set,
        then all values are stored.
    alphas_ : array, (n_samples, n_components)
        Eigenvectors of the centered kernel matrix. If `n_components` and
        `remove_zero_eig` are not set, then all components are stored.
    dual_coef_ : array, (n_samples, n_features)
        Inverse transform matrix. Set if `fit_inverse_transform` is True.
    X_transformed_fit_ : array, (n_samples, n_components)
        Projection of the fitted data on the kernel principal components.
    X_fit_ : (n_samples, n_features)
        The data used to fit the model. If `copy_X=False`, then `X_fit_` is
        a reference. This attribute is used for the calls to transform.
    References
    ----------
    Kernel PCA was introduced in:
        Bernhard Schoelkopf, Alexander J. Smola,
        and Klaus-Robert Mueller. 1999. Kernel principal
        component analysis. In Advances in kernel methods,
        MIT Press, Cambridge, MA, USA 327-352.
    """
    def __init__(self,
                 n_components=None,
                 kernel="linear",
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None,
                 alpha=1.0,
                 fit_inverse_transform=False,
                 eigen_solver='auto',
                 tol=0,
                 max_iter=None,
                 remove_zero_eig=False,
                 random_state=None,
                 copy_X=True,
                 n_jobs=1):
        if fit_inverse_transform and kernel == 'precomputed':
            raise ValueError(
                "Cannot fit_inverse_transform with a precomputed kernel.")
        self.n_components = n_components
        self.kernel = kernel
        self.kernel_params = kernel_params
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.alpha = alpha
        self.fit_inverse_transform = fit_inverse_transform
        self.eigen_solver = eigen_solver
        self.remove_zero_eig = remove_zero_eig
        self.tol = tol
        self.max_iter = max_iter
        self._centerer = KernelCenterer()
        self.random_state = random_state
        self.n_jobs = n_jobs
        self.copy_X = copy_X

    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {
                "gamma": self.gamma,
                "degree": self.degree,
                "coef0": self.coef0
            }
        return pairwise_kernels(X,
                                Y,
                                metric=self.kernel,
                                filter_params=True,
                                n_jobs=self.n_jobs,
                                **params)

    def _fit_transform(self, K):
        """ Fit's using kernel K"""
        # center kernel
        K = self._centerer.fit_transform(K)

        if self.n_components is None:
            n_components = K.shape[0]
        else:
            n_components = min(K.shape[0], self.n_components)

        # compute eigenvectors
        if self.eigen_solver == 'auto':
            if K.shape[0] > 200 and n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        if eigen_solver == 'dense':
            self.lambdas_, self.alphas_ = linalg.eigh(
                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
        elif eigen_solver == 'arpack':
            random_state = check_random_state(self.random_state)
            # initialize with [-1,1] as in ARPACK
            v0 = random_state.uniform(-1, 1, K.shape[0])
            self.lambdas_, self.alphas_ = eigsh(K,
                                                n_components,
                                                which="LA",
                                                tol=self.tol,
                                                maxiter=self.max_iter,
                                                v0=v0)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.alphas_ = self.alphas_[:, indices]

        # remove eigenvectors with a zero eigenvalue
        if self.remove_zero_eig or self.n_components is None:
            self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        return K

    def _fit_inverse_transform(self, X_transformed, X):
        if hasattr(X, "tocsr"):
            raise NotImplementedError("Inverse transform not implemented for "
                                      "sparse matrices!")

        n_samples = X_transformed.shape[0]
        K = self._get_kernel(X_transformed)
        K.flat[::n_samples + 1] += self.alpha
        self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)
        self.X_transformed_fit_ = X_transformed

    def fit(self, X, y=None):
        """Fit the model from data in X.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.
        Returns
        -------
        self : object
            Returns the instance itself.
        """
        X = check_array(X, accept_sparse='csr', copy=self.copy_X)
        K = self._get_kernel(X)
        self._fit_transform(K)

        if self.fit_inverse_transform:
            sqrt_lambdas = np.diag(np.sqrt(self.lambdas_))
            X_transformed = np.dot(self.alphas_, sqrt_lambdas)
            self._fit_inverse_transform(X_transformed, X)

        self.X_fit_ = X
        return self

    def fit_transform(self, X, y=None, **params):
        """Fit the model from data in X and transform X.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.
        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        """
        self.fit(X, **params)

        X_transformed = self.alphas_ * np.sqrt(self.lambdas_)

        if self.fit_inverse_transform:
            self._fit_inverse_transform(X_transformed, X)

        return self.alphas_, self.lambdas_

    def transform(self, X):
        """Transform X.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        """
        check_is_fitted(self, 'X_fit_')

        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
        return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_))

    def get_eigen(self):
        return self.alphas_, self.lambdas_

    def inverse_transform(self, X):
        """Transform X back to original space.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_components)
        Returns
        -------
        X_new : array-like, shape (n_samples, n_features)
        References
        ----------
        "Learning to Find Pre-Images", G BakIr et al, 2004.
        """
        if not self.fit_inverse_transform:
            raise NotFittedError("The fit_inverse_transform parameter was not"
                                 " set to True when instantiating and hence "
                                 "the inverse transform is not available.")

        K = self._get_kernel(X, self.X_transformed_fit_)

        return np.dot(K, self.dual_coef_)
Example #21
0
class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin):
    """
    Kernalized Fisher Discriminant Analysis (KDA)

    A classifier with a non-linear decision boundary, generated
    by fitting class conditional densities to the data
    fisher criteria of maximizing between class variance
    while minimizing within class variance.

    The fisher criteria is used in a non-linear space, by transforming
    the data, X, of dimension D onto a D-dimensional manifold of
    a D' dimensional space (where D' is possible infinite) using a funtion f(X).
    The key to solving the problem in the non-linear space is to write
    the solution to fisher only in terms of inner products of
    the vectors X*Y.  Then the kernel trick can be employed, such that
    the standard inner product is promoted to a general inner product.
    That is, K(X,Y) = X*Y --> K(X,Y) = f(X)*f(Y), which is allowed for
    valid Kernels.  In this case, the function f() does not need to be
    known, but only the kernel K(X,Y).

    The fitted model can also be used to reduce the dimensionality
    of the input, by projecting it to the most discriminative
    directions.

    Parameters
    ----------

    use_total_scatter : boolean
        If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw
        If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T
                   where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1

    sigma_sqrd:  float
        smooth regularization parameter, which is size of singular value where smoothing becomes important.
        NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case

    tol:  float
         used for truncated SVD of St.  Essentially a form of regularization.  Tol for SVD(R) is 1e-6, fixed right now

    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
        Kernel used for generalized inner product.
        Default: "linear"

    degree : int, optional
        Degree for poly
        Default: 3.

    gamma : float, optional
        Kernel coefficient for rbf, sigmoid and poly kernels.
        Default: 1/n_features.

    coef0 : float, optional
        Independent term in poly and sigmoid kernels.

    norm_covariance :  boolean
        if true, the covariance of each class will be divided by (n_points_in_class - 1)
        NOTE: not currently used

    priors : array, optional, shape = [n_classes]
        Priors on classes

    print_timing: boolean
        print time for several matrix operations in the algorithm

    Attributes
    ----------
    `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ]
        Class means, for each component found
    `priors_` : array-like, shape = [n_classes]
        Class priors (sum to 1)
    
    `n_components_found_` : int
        number of fisher components found, which is <= n_components
        
    Examples (put fisher.py in working directory)
    --------
    >>> import numpy as np
    >>> from fisher import KernelFisher
    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    >>> y = np.array([0, 0, 0, 1, 1, 1])
    >>> fd = KernelFisher()
    >>> fd.fit(X, y)
    KernelFisher(coef0=1, degree=3, gamma=None, kernel='linear',
       norm_covariance=False, print_timing=False, priors=None,
       sigma_sqrd=1e-08, tol=0.001, use_total_scatter=True)
    >>> print(fd.transform([[-0.8, -1]]))
    [[-7.62102356]]]

    """
    def __init__(self,
                 use_total_scatter=True,
                 sigma_sqrd=1e-8,
                 tol=1.0e-3,
                 kernel="linear",
                 gamma=None,
                 degree=3,
                 coef0=1,
                 norm_covariance=False,
                 priors=None,
                 print_timing=False):

        self.use_total_scatter = use_total_scatter
        self.sigma_sqrd = sigma_sqrd
        self.tol = tol
        self.kernel = kernel.lower()
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self._centerer = KernelCenterer()

        self.norm_covariance = norm_covariance
        self.print_timing = print_timing

        self.priors = np.asarray(priors) if priors is not None else None

        if self.priors is not None:
            if (self.priors < 0).any():
                raise ValueError('priors must be non-negative')
            if self.priors.sum() != 1:
                print 'warning: the priors do not sum to 1. Renormalizing'
                self.priors = self.priors / self.priors.sum()

    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        params = {
            "gamma": self.gamma,
            "degree": self.degree,
            "coef0": self.coef0
        }
        try:
            return pairwise_kernels(X,
                                    Y,
                                    metric=self.kernel,
                                    filter_params=True,
                                    **params)
        except AttributeError:
            raise ValueError("%s is not a valid kernel. Valid kernels are: "
                             "rbf, poly, sigmoid, linear and precomputed." %
                             self.kernel)

    def fit(self, X, y):
        """
        Fit the Kernelized Fisher Discriminant model according to the given training data and parameters.
        Based on "Algorithm 5" in
        Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228
        NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array, shape = [n_samples]
            Target values (integers)
        
        """
        X, y = check_X_y(X, y)  #does not accept sparse arrays
        self.classes_, y = unique(y, return_inverse=True)
        n_samples, n_features = X.shape
        n_classes = len(self.classes_)
        n_samples_perclass = np.bincount(y)
        if n_classes < 2:
            raise ValueError('y has less than 2 classes')
        if self.priors is None:
            self.priors_ = np.bincount(y) / float(n_samples)
        else:
            self.priors_ = self.priors

        ts = time.time()

        self.means_ = []
        for ind in xrange(n_classes):
            Xg = X[y == ind, :]
            meang = Xg.mean(0)
            self.means_.append(np.asarray(meang))
        if self.print_timing:
            print 'KernelFisher.fit: means took', time.time() - ts

        ts = time.time()
        PI_diag = np.diag(
            1.0 * n_samples_perclass)  # shape(PI_diag) = n_classes x n_classes
        PI_inv = np.diag(
            1.0 /
            (1.0 *
             n_samples_perclass))  # shape(PI_inv) = n_classes x n_classes
        PI_sqrt_inv = np.sqrt(
            PI_inv)  # shape(PI_sqrt_inv) = n_classes x n_classes
        #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples))
        E = np.zeros(
            (n_samples, n_classes))  # shape(E) = n_samples x n_classes
        E[[range(n_samples), y]] = 1
        E_PIsi = np.dot(E, PI_sqrt_inv)
        One_minus_E_Pi_Et = np.identity(n_samples) - np.inner(
            E,
            np.inner(PI_diag,
                     E).T)  # shape(One_minus_E_Pi_Et) = n_samples x n_samples
        if self.print_timing:
            print 'KernelFisher.fit: matrices took', time.time() - ts

        #####################################################################################################################
        #C = HKH = (I - 1/n 1x1.T) K (I - 1/n 1x1.T) = (K -  1xK_mean.T) * (I - 1/n 1x1.T)
        #        = K - K_meanx1.T - 1xK_mean.T + K_allmean 1x1
        #  --> which is the same as what self._centerer.fit_transform(C) performs
        #
        # if use_total_scatter=False,
        #      then using Sw which is (1-E*Pi*E.T)K(1-E*Pi*E.T)
        #####################################################################################################################
        ts = time.time()
        C = self._get_kernel(X)
        K_mean = np.sum(C, axis=1) / (1.0 * C.shape[1])

        if self.use_total_scatter:
            C = self._centerer.fit_transform(C)
        else:
            C = np.inner(One_minus_E_Pi_Et, np.inner(C, One_minus_E_Pi_Et).T)
        if self.print_timing:
            print 'KernelFisher.fit: Kernel Calculation took', time.time() - ts

        ts = time.time()
        Uc, Sc, Utc, Sc_norm = self.condensed_svd(C,
                                                  self.tol,
                                                  store_singular_vals=True)
        if self.print_timing:
            print 'KernelFisher.fit: Uc, Sc, Utc took', time.time() - ts

        ts = time.time()
        #scale up sigma to appropriate range of singular values
        reg_factor = self.sigma_sqrd * Sc_norm
        St_reg_inv = np.inner(
            Uc,
            np.inner(np.diag(1.0 / (Sc + reg_factor)), Utc.T).T)
        if self.print_timing:
            print 'KernelFisher.fit: St_reg_inv took', time.time() - ts

        ts = time.time()
        R = np.inner(E_PIsi.T, np.inner(C, np.inner(St_reg_inv, E_PIsi.T).T).T)
        if self.print_timing:
            print 'KernelFisher.fit: R took', time.time() - ts

        ts = time.time()
        Vr, Lr, Vtr, Lr_norm = self.condensed_svd(R, tol=1e-6)
        if self.print_timing:
            print 'KernelFisher.fit: Vr, Lr, Vtr took', time.time() - ts

        ts = time.time()
        #####################################################################################################################
        #This capital Z is Upsilon.T * H from equation (22)
        #####################################################################################################################
        #Z = np.inner( np.diag(1.0 / np.sqrt(Lr)), np.inner(Vtr, np.inner(E_PIsi.T, np.inner(C, St_reg_inv.T ).T ).T ).T )
        Z = np.inner(
            np.inner(
                np.inner(np.inner(np.diag(1.0 / np.sqrt(Lr)), Vtr.T), E_PIsi),
                C.T), St_reg_inv)

        Z = (Z.T - (Z.sum(axis=1) / (1.0 * Z.shape[1]))).T
        if self.print_timing:
            print 'KernelFisher.fit: Z took', time.time() - ts

        self.Z = Z
        self.n_components_found_ = Z.shape[0]

        #####################################################################################################################
        #This K_mean is (1/n) K*1_n from equation (22)
        #####################################################################################################################
        self.K_mean = K_mean

        #print Z.shape, K_mean.shape, self.n_components_found_

        self.X_fit_ = X
        return self

    def condensed_svd(self, M, tol=1e-3, store_singular_vals=False):
        U, S, Vt = linalg.svd(M, full_matrices=False)
        if store_singular_vals:
            self.singular_vals = S

        #want tolerance on fraction of variance in singular value
        #when not norm_covariance, need to normalize singular values
        S_norm = np.sum(S)

        rank = np.sum((S / S_norm) > tol)

        return U[:, :rank], S[:rank], Vt[:rank, :], S_norm

    @property
    def classes(self):
        warnings.warn(
            "KernelFisher.classes is deprecated and will be removed in 0.14. "
            "Use .classes_ instead.",
            DeprecationWarning,
            stacklevel=2)
        return self.classes_

    def _decision_function(self, X):
        #X = np.asarray(X)
        return self.transform(X)

    def decision_function(self, X):
        """
        This function return the decision function values related to each
        class on an array of test vectors X.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        X_new : array, shape = [n_samples, n_components_found_]
            Decision function values related to each class, per sample
            n_components_found_ is the number of components requested and found
            NOTE: currently identical to self.transform(X)
        """
        return self._decision_function(X)

    def transform(self, X):
        """
        Project the data so as to maximize class separation (large separation
        between projected class means and small variance within each class).

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        X_new : array, shape = [n_samples, n_components_found_]
        """

        #X = np.asarray(X)
        #ts = time.time()
        k = self._get_kernel(X, self.X_fit_)
        #if self.print_timing: print 'KernelFisher.transform: k took', time.time() - ts

        #ts = time.time()
        z = np.inner(self.Z, (k - self.K_mean)).T
        #if self.print_timing: print 'KernelFisher.transform: z took', time.time() - ts

        return z

    def fit_transform(self,
                      X,
                      y,
                      use_total_scatter=True,
                      sigma_sqrd=1e-8,
                      tol=1.0e-3):
        """
        Fit the Fisher Discriminant model according to the given training data and parameters.
        The project the data onto up to n_components_found_ so as to maximize class separation (large separation
        between projected class means and small variance within each class).
        NOTE this function is not clever, it simply runs fit(X,y [, ...]).transform(X)

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
        y : array, shape = [n_samples]
            Target values (integers)
        store_covariance : boolean
            If True the covariance matrix of each class and each iteration is computed
            and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2

        Returns
        -------
        X_new : array, shape = [n_samples, n_components_found_]
        """
        return self.fit(X,
                        y,
                        use_total_scatter=use_total_scatter,
                        sigma_sqrd=sigma_sqrd,
                        tol=tol).transform(X)
Example #22
0
class MIDA(BaseEstimator, TransformerMixin):
    """Maximum independence domain adaptation
    Args:
        n_components (int): Number of components to keep.
        kernel (str): "linear", "rbf", or "poly". Kernel to use for MIDA. Defaults to "linear".
        mu (float): Hyperparameter of the l2 penalty. Defaults to 1.0.
        eta (float): Hyperparameter of the label dependence. Defaults to 1.0.
        augmentation (bool): Whether using covariates as augment features. Defaults to False.
        kernel_params (dict or None): Parameters for the kernel. Defaults to None.

    References:
        [1] Yan, K., Kou, L. and Zhang, D., 2018. Learning domain-invariant subspace using domain features and
            independence maximization. IEEE transactions on cybernetics, 48(1), pp.288-299.
    """
    def __init__(
        self,
        n_components,
        kernel="linear",
        lambda_=1.0,
        mu=1.0,
        eta=1.0,
        augmentation=False,
        kernel_params=None,
    ):
        self.n_components = n_components
        self.kernel = kernel
        self.mu = mu
        self.eta = eta
        self.augmentation = augmentation
        if kernel_params is None:
            self.kernel_params = {}
        else:
            self.kernel_params = kernel_params
        self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)
        self._centerer = KernelCenterer()
        self.x_fit = None

    def _get_kernel(self, x, y=None):
        if self.kernel in ["linear", "rbf", "poly"]:
            params = self.kernel_params or {}
        else:
            raise ValueError("Pre-computed kernel not supported")
        return pairwise_kernels(x,
                                y,
                                metric=self.kernel,
                                filter_params=True,
                                **params)

    def fit(self, x, y=None, covariates=None):
        """
        Args:
            x : array-like. Input data, shape (n_samples, n_features)
            y : array-like. Labels, shape (nl_samples,)
            covariates : array-like. Domain co-variates, shape (n_samples, n_co-variates)

        Note:
            Unsupervised MIDA is performed if y is None.
            Semi-supervised MIDA is performed is y is not None.
        """
        if self.augmentation and type(covariates) == np.ndarray:
            x = np.concatenate((x, covariates), axis=1)

        # Kernel matrix
        kernel_x = self._get_kernel(x)
        kernel_x[np.isnan(kernel_x)] = 0

        # Solve the optimization problem
        self._fit(kernel_x, y, covariates)
        self.x_fit = x

        return self

    def _fit(self, kernel_x, y, covariates=None):
        """solve MIDA

        Args:
            kernel_x: array-like, kernel matrix of input data x, shape (n_samples, n_samples)
            y: array-like. Labels, shape (nl_samples,)
            covariates: array-like. Domain co-variates, shape (n_samples, n_covariates)

        Returns:
            self
        """
        n_samples = kernel_x.shape[0]
        # Identity (unit) matrix
        unit_mat = np.eye(n_samples)
        # Centering matrix
        ctr_mat = unit_mat - 1.0 / n_samples * np.ones((n_samples, n_samples))

        kernel_x = self._centerer.fit_transform(kernel_x)
        if type(covariates) == np.ndarray:
            kernel_c = np.dot(covariates, covariates.T)
        else:
            kernel_c = np.zeros((n_samples, n_samples))

        if y is not None:
            n_labeled = y.shape[0]
            if n_labeled > n_samples:
                raise ValueError("Number of labels exceeds number of samples")
            y_mat_ = self._label_binarizer.fit_transform(y)
            y_mat = np.zeros((n_samples, y_mat_.shape[1]))
            y_mat[:n_labeled, :] = y_mat_
            ker_y = np.dot(y_mat, y_mat.T)
            obj = multi_dot([
                kernel_x,
                self.mu * ctr_mat +
                self.eta * multi_dot([ctr_mat, ker_y, ctr_mat]) -
                multi_dot([ctr_mat, kernel_c, ctr_mat]),
                kernel_x.T,
            ])
        else:
            obj = multi_dot([
                kernel_x,
                self.mu * ctr_mat - multi_dot([ctr_mat, kernel_c, ctr_mat]),
                kernel_x.T
            ])

        eig_values, eig_vectors = linalg.eigh(
            obj,
            subset_by_index=[n_samples - self.n_components, n_samples - 1])
        idx_sorted = eig_values.argsort()[::-1]

        self.eig_values_ = eig_values[idx_sorted]
        self.U = eig_vectors[:, idx_sorted]
        self.U = np.asarray(self.U, dtype=np.float)

        return self

    def fit_transform(self, x, y=None, covariates=None):
        """
        Args:
            x : array-like, shape (n_samples, n_features)
            y : array-like, shape (n_samples,)
            covariates : array-like, shape (n_samples, n_covariates)

        Returns:
            x_transformed : array-like, shape (n_samples, n_components)
        """
        self.fit(x, y, covariates)

        return self.transform(x, covariates)

    def transform(self, x, covariates=None):
        """
        Args:
            x : array-like, shape (n_samples, n_features)
            covariates : array-like, augmentation features, shape (n_samples, n_covariates)
        Returns:
            x_transformed : array-like, shape (n_samples, n_components)
        """
        check_is_fitted(self, "x_fit")
        if type(covariates) == np.ndarray and self.augmentation:
            x = np.concatenate((x, covariates), axis=1)
        kernel_x = self._centerer.transform(
            pairwise_kernels(x,
                             self.x_fit,
                             metric=self.kernel,
                             filter_params=True,
                             **self.kernel_params))

        return np.dot(kernel_x, self.U)
Example #23
0
class SupervisedPCA(BaseEstimator, TransformerMixin):
    """
    Supervised Principal component analysis (SPCA)

    Finally for Python 3

    Non-linear dimensionality reduction through the use of kernels.
    Parameters
    ----------
    n_components: int or None
        Number of components. If None, all non-zero components are kept.
    kernel: 'linear' | 'poly' | 'rbf' | 'sigmoid' | 'precomputed'
        Kernel.
        Default: 'linear'
    degree : int, optional
        Degree for poly, rbf and sigmoid kernels.
        Default: 3.
    gamma : float, optional
        Kernel coefficient for rbf and poly kernels.
        Default: 1/n_features.
    coef0 : float, optional
        Independent term in poly and sigmoid kernels.
    eigen_solver: string ['auto'|'dense'|'arpack']
        Select eigensolver to use.  If n_components is much less than
        the number of training samples, arpack may be more efficient
        than the dense eigensolver.
    tol: float
        convergence tolerance for arpack.
        Default: 0 (optimal value will be chosen by arpack)
    max_iter : int
        maximum number of iterations for arpack
        Default: None (optimal value will be chosen by arpack)
    Attributes
    ----------
    `lambdas_`, `alphas_`:
        Eigenvalues and eigenvectors of the centered kernel matrix
    """
    def __init__(self,
                 n_components=None,
                 kernel='linear',
                 gamma=0,
                 degree=3,
                 coef0=1,
                 alpha=1.0,
                 fit_inverse_transform=False,
                 eigen_solver='auto',
                 tol=0,
                 max_iter=None):

        self.n_components = n_components
        self.kernel = kernel.lower()
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.alpha = alpha
        self.fit_inverse_transform = fit_inverse_transform
        self.eigen_solver = eigen_solver
        self.tol = tol
        self.max_iter = max_iter
        self.centerer = KernelCenterer()

    def transform(self, X):
        """
        Returns a new X, X_trans, based on previous self.fit() estimates
        """
        return X @ self.alphas_

    def fit(self, X, y):
        self._fit(X, y)
        return self

    def fit_transform(self, X, y=None, **fit_params):
        if y is None:
            raise ValueError('SPCA requires a target variable')
        self.fit(X, y)
        return X @ self.alphas_

    def _fit(self, X, y):
        # find kernel matrix of Y
        K = self.centerer.fit_transform(self._get_kernel(y))
        # scale X
        X_scale = scale(X)

        n_components = K.shape[0] if self.n_components is None else min(
            K.shape[0], self.n_components)

        # compute eigenvalues of X^TKX
        M = X.T @ K @ X
        if self.eigen_solver == 'auto':
            if M.shape[0] > 200 and n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        if eigen_solver == 'dense':
            warnings.warn(
                '`dense` is experimental! Please verify results or use < 10 components.'
            )
            self.lambdas_, self.alphas_ = la.eigh(M)
        elif eigen_solver == 'arpack':
            self.lambdas_, self.alphas_ = eigsh(M,
                                                n_components,
                                                which='LA',
                                                tol=self.tol)

        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.alphas_ = self.alphas_[:, indices]

        # remove the zero/negative eigenvalues
        self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
        self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        self.X_fit = X

    def _get_kernel(self, X, Y=None):
        params = {
            'gamma': self.gamma,
            'degree': self.degree,
            'coef0': self.coef0
        }
        try:
            return pairwise_kernels(X,
                                    Y,
                                    metric=self.kernel,
                                    filter_params=True,
                                    n_jobs=-1,
                                    **params)
        except AttributeError:
            raise ValueError(
                f'{self.kernel} is not a valid kernel. Valid kernels are: '
                'rbf, poly, sigmoid, linear and precomputed.')
class SupervisedPCA(BaseEstimator, TransformerMixin):
    def __init__(self,
                 n_components,
                 kernel='linear',
                 eigen_solver='auto',
                 max_iterations=None,
                 gamma=0,
                 degree=3,
                 coef0=1,
                 alpha=1.0,
                 tolerance=0,
                 fit_inverse_transform=False):
        self._n_components = n_components
        self._gamma = gamma
        self._tolerance = tolerance
        self._fit_inverse_transform = fit_inverse_transform
        self._max_iterations = max_iterations
        self._degree = degree
        self._kernel = kernel
        self._eigen_solver = eigen_solver
        self._coef0 = coef0
        self._centerer = KernelCenterer()
        self._alpha = alpha

    def _get_kernel(self, X, Y=None):
        """
    Returns a kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors of the given matrix X, if y is None.

    If y is not None, then K_{i, j} is the kernel between the ith array from X and the jth array from Y.

    Valid kernels are 'linear', 'rbf', 'poly', 'sigmoid', 'precomputed'
    """
        kwargs = {
            'gamma': self._gamma,
            'degree': self._degree,
            'coef0': self._coef0
        }
        return pairwise_kernels(X,
                                Y,
                                metric=self._kernel,
                                n_jobs=-1,
                                filter_params=True,
                                **kwargs)

    def _fit(self, X, y):
        # Calculate kernel matrix of the labels Y and centre it and call it K (=H.L.H)
        K = self._centerer.fit_transform(self._get_kernel(y.reshape(-1, 1)))

        # deciding on the number of components to use
        if self._n_components is not None:
            n_components = min(K.shape[0], self._n_components)
        else:
            n_components = K.shape[0]

        # Scale X
        # scaled_X = scale(X)

        # calculate the eigen values and eigen vectors for X^T.K.X
        Q = (X.T).dot(K).dot(X)

        # If n_components is much less than the number of training samples,
        # arpack may be more efficient than the dense eigensolver.
        if (self._eigen_solver == 'auto'):
            if (Q.shape[0] / n_components) > 20:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self._eigen_solver

        if eigen_solver == 'dense':
            # Return the eigenvalues (in ascending order) and eigenvectors of a Hermitian or symmetric matrix.
            self._lambdas, self._alphas = linalg.eigh(
                Q, eigvals=(Q.shape[0] - n_components, Q.shape[0] - 1))
            # argument eigvals = Indexes of the smallest and largest (in ascending order) eigenvalues

        elif eigen_solver == 'arpack':
            # deprecated :: self._lambdas, self._alphas = utils.arpack.eigsh(A=Q, n_components, which="LA", tol=self._tolerance)
            self._lambdas, self._alphas = ssl_eigsh(A=Q,
                                                    k=n_components,
                                                    which="LA",
                                                    tol=self._tolerance)

        indices = self._lambdas.argsort()[::-1]

        self._lambdas = self._lambdas[indices]
        self._lambdas = self._lambdas[
            self._lambdas >
            0]  # selecting values only for non zero eigen values

        self._alphas = self._alphas[:, indices]
        self._alphas = self._alphas[:, self._lambdas >
                                    0]  # selecting values only for non zero eigen values

        self.X_fit = X

    def _transform(self):
        return self.X_fit.dot(self._alphas)

    def transform(self, X):
        return X.dot(self._alphas)

    def fit(self, X, Y):
        self._fit(X, Y)
        return

    def fit_transform(self, X, Y):
        self.fit(X, Y)
        return self._transform()
Example #25
0
class KernelPCA(BaseEstimator, TransformerMixin):
    """Kernel Principal component analysis (KPCA)

    Non-linear dimensionality reduction through the use of kernels (see
    :ref:`metrics`).

    Parameters
    ----------
    n_components: int or None
        Number of components. If None, all non-zero components are kept.

    kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
        Kernel.
        Default: "linear"

    degree : int, default=3
        Degree for poly kernels. Ignored by other kernels.

    gamma : float, optional
        Kernel coefficient for rbf and poly kernels. Default: 1/n_features.
        Ignored by other kernels.

    coef0 : float, optional
        Independent term in poly and sigmoid kernels.
        Ignored by other kernels.

    kernel_params : mapping of string to any, optional
        Parameters (keyword arguments) and values for kernel passed as
        callable object. Ignored by other kernels.

    alpha: int
        Hyperparameter of the ridge regression that learns the
        inverse transform (when fit_inverse_transform=True).
        Default: 1.0

    fit_inverse_transform: bool
        Learn the inverse transform for non-precomputed kernels.
        (i.e. learn to find the pre-image of a point)
        Default: False

    eigen_solver: string ['auto'|'dense'|'arpack']
        Select eigensolver to use.  If n_components is much less than
        the number of training samples, arpack may be more efficient
        than the dense eigensolver.

    tol: float
        convergence tolerance for arpack.
        Default: 0 (optimal value will be chosen by arpack)

    max_iter : int
        maximum number of iterations for arpack
        Default: None (optimal value will be chosen by arpack)

    remove_zero_eig : boolean, default=True
        If True, then all components with zero eigenvalues are removed, so
        that the number of components in the output may be < n_components
        (and sometimes even zero due to numerical instability).
        When n_components is None, this parameter is ignored and components
        with zero eigenvalues are removed regardless.

    Attributes
    ----------

    lambdas_ :
        Eigenvalues of the centered kernel matrix

    alphas_ :
        Eigenvectors of the centered kernel matrix

	evals_ : array[float], shape=(n_features)
		All eigenvalues of centered kernel matrix

	evecs_ : array[float, float], shape=(n_features, n_samples)
		All eigenvectors of centered kernel matrix

    dual_coef_ :
        Inverse transform matrix

    X_transformed_fit_ :
        Projection of the fitted data on the kernel principal components

    References
    ----------
    Kernel PCA was introduced in:
        Bernhard Schoelkopf, Alexander J. Smola,
        and Klaus-Robert Mueller. 1999. Kernel principal
        component analysis. In Advances in kernel methods,
        MIT Press, Cambridge, MA, USA 327-352.
    """
    def __init__(self,
                 n_components=None,
                 kernel="linear",
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None,
                 alpha=1.0,
                 fit_inverse_transform=False,
                 eigen_solver='auto',
                 tol=0,
                 max_iter=None,
                 remove_zero_eig=False):
        if fit_inverse_transform and kernel == 'precomputed':
            raise ValueError(
                "Cannot fit_inverse_transform with a precomputed kernel.")
        self.n_components = n_components
        self.kernel = kernel
        self.kernel_params = kernel_params
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.alpha = alpha
        self.fit_inverse_transform = fit_inverse_transform
        self.eigen_solver = eigen_solver
        self.remove_zero_eig = remove_zero_eig
        self.tol = tol
        self.max_iter = max_iter
        self._centerer = KernelCenterer()

    @property
    def _pairwise(self):
        return self.kernel == "precomputed"

    def _get_kernel(self, X, Y=None):
        if callable(self.kernel):
            params = self.kernel_params or {}
        else:
            params = {
                "gamma": self.gamma,
                "degree": self.degree,
                "coef0": self.coef0
            }
        return pairwise_kernels(X,
                                Y,
                                metric=self.kernel,
                                filter_params=True,
                                **params)

    def _fit_transform(self, K):
        """ Fit's using kernel K"""
        # center kernel
        K = self._centerer.fit_transform(K)

        if self.n_components is None:
            n_components = K.shape[0]
        else:
            n_components = min(K.shape[0], self.n_components)

        # compute eigenvectors
        if self.eigen_solver == 'auto':
            if K.shape[0] > 200 and n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        if eigen_solver == 'dense':
            self.lambdas_, self.alphas_ = linalg.eigh(
                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
            self.evals_, self.evecs_ = linalg.eigh(K)

        elif eigen_solver == 'arpack':
            self.lambdas_, self.alphas_ = eigsh(K,
                                                n_components,
                                                which="LA",
                                                tol=self.tol,
                                                maxiter=self.max_iter)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.alphas_ = self.alphas_[:, indices]

        # remove eigenvectors with a zero eigenvalue
        if self.remove_zero_eig or self.n_components is None:
            self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        return K

    def _fit_inverse_transform(self, X_transformed, X):
        if hasattr(X, "tocsr"):
            raise NotImplementedError("Inverse transform not implemented for "
                                      "sparse matrices!")

        n_samples = X_transformed.shape[0]
        K = self._get_kernel(X_transformed)
        K.flat[::n_samples + 1] += self.alpha
        self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)
        self.X_transformed_fit_ = X_transformed

    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        K = self._get_kernel(X)
        self._fit_transform(K)

        if self.fit_inverse_transform:
            sqrt_lambdas = np.diag(np.sqrt(self.lambdas_))
            X_transformed = np.dot(self.alphas_, sqrt_lambdas)
            self._fit_inverse_transform(X_transformed, X)

        self.X_fit_ = X
        return self

    def fit_transform(self, X, y=None, **params):
        """Fit the model from data in X and transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        self.fit(X, **params)

        X_transformed = self.alphas_ * np.sqrt(self.lambdas_)

        if self.fit_inverse_transform:
            self._fit_inverse_transform(X_transformed, X)

        return X_transformed

    def transform(self, X):
        """Transform X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        check_is_fitted(self, 'X_fit_')

        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
        return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_))

    def inverse_transform(self, X):
        """Transform X back to original space.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_components)

        Returns
        -------
        X_new: array-like, shape (n_samples, n_features)

        References
        ----------
        "Learning to Find Pre-Images", G BakIr et al, 2004.
        """
        if not self.fit_inverse_transform:
            raise NotFittedError("The fit_inverse_transform parameter was not"
                                 " set to True when instantiating and hence "
                                 "the inverse transform is not available.")

        K = self._get_kernel(X, self.X_transformed_fit_)

        return np.dot(K, self.dual_coef_)
Example #26
0
    def fit(self, X, Y):
        """Fit the KCCA model with two views represented by kernels X and Y.

        Parameters
        ----------
        X : array_like, shape = (n_samples, n_features) for data matrix
            or shape = (n_samples, n_samples) for kernel matrix.
            When both X and Y are kernel matrix, the kernel parameter
            should be set to 'precomputed'.
            It is considered to be one view of the data.

        Y : array_like, shape = (n_samples, n_features) for data matrix
            or shape = (n_samples, n_samples) for kernel matrix.
            When both X and Y are kernel matrix, the kernel parameter
            should be set to 'precomputed'.
            It is considered to be another view of the data.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        check_consistent_length(X, Y)
        X = check_array(X, dtype=np.float, copy=self.copy)
        Y = check_array(Y, dtype=np.float, copy=self.copy, ensure_2d=False)
        if Y.ndim == 1:
            Y = Y.reshape(-1,1)

        n = X.shape[0]
        p = X.shape[1]
        q = Y.shape[1]

        if self.n_components < 1 or self.n_components > n:
            raise ValueError('Invalid number of components: %d' %
                             self.n_components)
        if self.eigen_solver not in ("auto", "dense", "arpack"):
            raise ValueError("Got eigen_solver %s when only 'auto', "
                             "'dense' and 'arparck' are valid" %
                             self.algorithm)
        if self.kernel == 'precomputed' and (p != n or q != n):
            raise ValueError('Invalid kernel matrices dimension')
        if not self.pgso and (self.kapa <= 0 or self.kapa >= 1):
            raise ValueError('kapa should be in (0, 1) when pgso=False')
        if self.pgso and (self.kapa < 0 or self.kapa > 1):
            raise ValueError('kapa should be in [0, 1] when pgso=True')

        KX = self._get_kernel(X)
        KY = self._get_kernel(Y)

        if self.center:
            kc = KernelCenterer()
            self.KXc_ = kc.fit_transform(KX)
            self.KYc_ = kc.fit_transform(KY)
        else:
            self.KXc_ = KX
            self.KYc_ = KY

        if self.pgso:  # use PGSO to decompose kernel matrix
            self._fit_pgso(self.KXc_, self.KYc_)
        else:
            self._fit(self.KXc_, self.KYc_)
        return self