def center_normTrace_decomp(K): print 'centering kernel' #### Get transformed features for K_train that DONT snoop when centering, tracing, or eiging##### Kcent=KernelCenterer() Ktrain=Kcent.fit_transform(K[:in_samples,:in_samples]) #Ktrain=Ktrain/float(np.trace(Ktrain)) #[EigVals,EigVectors]=scipy.sparse.linalg.eigsh(Ktrain,k=reduced_dimen,which='LM') [EigVals,EigVectors]=scipy.linalg.eigh(Ktrain,eigvals=(in_samples-reduced_dimen,in_samples-1)) for i in range(len(EigVals)): if EigVals[i]<=0: EigVals[i]=0 EigVals=np.flipud(np.fliplr(np.diag(EigVals))) EigVectors=np.fliplr(EigVectors) Ktrain_decomp=np.dot(EigVectors,scipy.linalg.sqrtm(EigVals)) #### Get transformed features for K_test using K_train implied mapping #### Kcent=KernelCenterer() Kfull=Kcent.fit_transform(K) #Kfull=Kfull/float(np.trace(Kfull)) K_train_test=Kfull[in_samples:,:in_samples] Ktest_decomp=np.dot(K_train_test,np.linalg.pinv(Ktrain_decomp.T)) ####combine mapped train and test vectors and normalize each vector#### Kdecomp=np.vstack((Ktrain_decomp,Ktest_decomp)) print 'doing normalization' Kdecomp=normalize(Kdecomp,copy=False) return Kdecomp
def KernelPCA(X): # pdist to calculate the squared Euclidean distances for every pair of points # in the 100x2 dimensional dataset. sq_dists = pdist(X, 'sqeuclidean') # Variance of the Euclidean distance between all pairs of data points. variance = np.var(sq_dists) # squareform to converts the pairwise distances into a symmetric 100x100 matrix mat_sq_dists = squareform(sq_dists) # set the gamma parameter equal to the one I used in scikit-learn KernelPCA gamma = 15 # Compute the 100x100 kernel matrix K = exp(-gamma * mat_sq_dists) # Center the kernel matrix kern_cent = KernelCenterer() K = kern_cent.fit_transform(K) # Get eigenvalues in ascending order with corresponding # eigenvectors from the symmetric matrix eigvals, eigvecs = eigh(K) # Get the eigenvectors that corresponds to the highest eigenvalue X_pc1 = eigvecs[:, -1] return X_pc1
def KPCA(gamma, data, feature_size): sq_dists = squared_euclidean_distance(data) # squareform to converts the pairwise distances into a symmetric 400x400 matrix mat_sq_dists = squareform(sq_dists) # Compute the 400x400 kernel matrix K = rbfkernel(gamma, mat_sq_dists) # Center the kernel matrix kern_cent = KernelCenterer() K = kern_cent.fit_transform(K) # Get the eigenvector with largest eigenvalue eigen_values, eigen_vectors = eigh(K) indexes = eigen_values.argsort()[::-1] direction_vectors = eigen_vectors[:, indexes[0:feature_size]] projected_data = np.dot(K, direction_vectors) return projected_data
class KernelPca: # beta: ガウスカーネルパラメータ def __init__(self, beta): self.beta = beta self.centerer = KernelCenterer() # gauss kernel def __kernel(self, x1, x2): return np.exp(-self.beta * np.linalg.norm(x1 - x2)**2) # データを入力して主成分ベクトルを計算する # shape(X) = (N, M) # n: 抽出する主成分の数 def fit_transform(self, X, n): self.X = X # グラム行列 N = X.shape[0] K = np.array([[self.__kernel(X[i], X[j]) for j in range(N)] for i in range(N)]) # 中心化 K = self.centerer.fit_transform(K) # eighは固有値の昇順で出力される vals, vecs = np.linalg.eigh(K) vals = vals[::-1] vecs = vecs[:, ::-1] # 特異値と左特異ベクトル、上位n個 self.sigma = np.sqrt(vals[:n]) # (n) self.a = np.array(vecs[:, :n]) # (N,n) return self.sigma * self.a # (N,n) # xの主成分表示を返す # shape(x)=(Nx, M) def transform(self, x): # グラム行列 N = self.X.shape[0] Nx = x.shape[0] K = np.array([[self.__kernel(x[i], self.X[j]) for j in range(N)] for i in range(Nx)]) # (Nx,N) # 中心化 K = self.centerer.transform(K) # 主成分を計算 return K.dot(self.a) / self.sigma # (Nx,n)
def test_center_kernel(): """Test that KernelCenterer is equivalent to Scaler in feature space""" X_fit = np.random.random((5, 4)) scaler = Scaler(with_std=False) scaler.fit(X_fit) X_fit_centered = scaler.transform(X_fit) K_fit = np.dot(X_fit, X_fit.T) # center fit time matrix centerer = KernelCenterer() K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T) K_fit_centered2 = centerer.fit_transform(K_fit) assert_array_almost_equal(K_fit_centered, K_fit_centered2) # center predict time matrix X_pred = np.random.random((2, 4)) K_pred = np.dot(X_pred, X_fit.T) X_pred_centered = scaler.transform(X_pred) K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T) K_pred_centered2 = centerer.transform(K_pred) assert_array_almost_equal(K_pred_centered, K_pred_centered2)
class kc(): def __init__(self, cols, metric): self.columns = cols self.metric = metric self.model = KernelCenterer() def fit(self, data): k = pairwise_kernels(data[self.columns], metric=self.metric) self.model.fit(k) def fit_transform(self, data): k = pairwise_kernels(data[self.columns], metric=self.metric) transformed = self.model.fit_transform(k) for idx in range(len(self.columns)): data[self.columns[idx]] = transformed[:, idx] return data def transform(self, data): k = pairwise_kernels(data[self.columns], metric=self.metric) transformed = self.model.transform(k) for idx in range(len(self.columns)): data[self.columns[idx]] = transformed[:, idx] return data
def kpca(X, gamma=15, k=10): # Calculating the distances for every pair of points in the NxD dimensional dataset. _dists = pdist(X, 'minkowski') # Converting the pairwise distances into a symmetric NxN matrix. sym_dists = squareform(_dists) # Computing the NxN kernel matrix. K = exp(-gamma * sym_dists) #Centering Kernel since data has to be standardizied kern_cent = KernelCenterer() K = kern_cent.fit_transform(K) eig_vals, eig_vecs = np.linalg.eig(K) eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))] # Sort the (eigenvalue, eigenvector) tuples from high to low eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True) vec = np.array([ eig_pairs[i][1] for i in range(k)]) vec = vec.T # to make eigen vector matrix nxk return vec
class Kernel(object): """ This is a base Kernel class (acting as MixIn). It is not supposed to be directly initialized, but should be inherited from. """ def __init__(self, kernel_type="linear", degree=2, gamma=None, coef0=1): self.kernel_type = kernel_type self.degree = degree self.gamma = gamma self.coef0 = coef0 self.centerer = KernelCenterer() def c_(self, X): """ Center the gram matrix """ return self.centerer.fit_transform(X) def apply_kernel(self, X): kernel_handler = {"rbf": self._apply_rbf, "linear": self._apply_linear, "poly": self. _apply_poly} return self.c_(kernel_handler[self.kernel_type](X)) def _apply_linear(self, X): return linear_kernel(X) def _apply_poly(self, X): return polynomial_kernel(X, degree=self.degree, coef0=self.coef0, gamma=self.gamma) def _apply_rbf(self, X): return rbf_kernel(X, gamma=self.gamma)
class KernelECA(BaseEstimator, TransformerMixin): """Kernel Entropy component analysis (KECA) Non-linear dimensionality reduction through the use of kernels (see :ref:`metrics`). Parameters ---------- n_components: int or None Number of components. If None, all non-zero components are kept. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default: "linear" degree : int, default=3 Degree for poly kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, optional Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. eigen_solver: string ['auto'|'dense'|'arpack'] Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol: float convergence tolerance for arpack. Default: 0 (optimal value will be chosen by arpack) max_iter : int maximum number of iterations for arpack Default: None (optimal value will be chosen by arpack) random_state : int seed, RandomState instance, or None, default : None A pseudo random number generator used for the initialization of the residuals when eigen_solver == 'arpack'. Attributes ---------- lambdas_ : Eigenvalues of the centered kernel matrix alphas_ : Eigenvectors of the centered kernel matrix dual_coef_ : Inverse transform matrix X_transformed_fit_ : Projection of the fitted data on the kernel entropy components References ---------- Kernel ECA based on: (c) Robert Jenssen, University of Tromso, Norway, 2010 R. Jenssen, "Kernel Entropy Component Analysis," IEEE Trans. Patt. Anal. Mach. Intel., 32(5), 847-860, 2010. """ def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, eigen_solver='auto', tol=0, max_iter=None, random_state=None,center=False): self.n_components = n_components self._kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.eigen_solver = eigen_solver self.tol = tol self.max_iter = max_iter self.random_state = random_state self._centerer = KernelCenterer() self.center = center @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self._kernel): params = self.kernel_params or {} else: params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} return pairwise_kernels(X, Y, metric=self._kernel, filter_params=True, **params) def _fit_transform(self, K): """ Fit's using kernel K""" # center kernel if self.center == True: K = self._centerer.fit_transform(K) X_transformed = self.kernelECA(K=K) self.X_transformed = X_transformed return K def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ K = self._get_kernel(X) self._fit_transform(K) self.X_fit_ = X return self def fit_transform(self, X, y=None, **params): """Fit the model from data in X and transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new: array-like, shape (n_samples, n_components) """ self.fit(X, **params) X_transformed= self.X_transformed return X_transformed def transform(self, X): """Transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Returns ------- X_new: array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'X_fit_') K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) def inverse_transform(self, X): raise NotImplementedError("Function inverse_transform is not implemented.") # here are the helper functions => to integrate in the code! def kernelECA(self,K): if self.n_components is None: n_components = K.shape[0] else: n_components = min(K.shape[0], self.n_components) # compute eigenvectors self.lambdas_, self.alphas_ = linalg.eigh(K) d = self.lambdas_ E = self.alphas_ # sort eigenvectors in descending order D,E = self.sort_eigenvalues(d,E) d = np.diag(D) sorted_entropy_index,entropy = self.ECA(D,E) Es = E[:,sorted_entropy_index] ds = d[sorted_entropy_index] Phi = np.zeros((K.shape[0],n_components)) for i in range(n_components): Phi[:,i] = np.sqrt(ds[i]) * Es[:,i] X_transformed = Phi return X_transformed def sort_eigenvalues(self,D,E): d = D indices = np.argsort(d)[::-1] d = d[indices] D = np.zeros((len(d),len(d))) for i in range(len(d)): D[i,i] = d[i] E = E[:,indices] return D,E def ECA(self,D,E): N = E.shape[0] entropy = np.multiply(np.diag(D).T , (np.dot(np.ones((1,N)),E))**2)[0] indices = np.argsort(entropy)[::-1] entropy = entropy[indices] return indices,entropy
class KernelECA(BaseEstimator, TransformerMixin): """Kernel Entropy component analysis (KECA) Non-linear dimensionality reduction through the use of kernels (see :ref:`metrics`). Parameters ---------- n_components: int or None Number of components. If None, all non-zero components are kept. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default: "linear" degree : int, default=3 Degree for poly kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, optional Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. eigen_solver: string ['auto'|'dense'|'arpack'] Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol: float convergence tolerance for arpack. Default: 0 (optimal value will be chosen by arpack) max_iter : int maximum number of iterations for arpack Default: None (optimal value will be chosen by arpack) random_state : int seed, RandomState instance, or None, default : None A pseudo random number generator used for the initialization of the residuals when eigen_solver == 'arpack'. Attributes ---------- lambdas_ : Eigenvalues of the centered kernel matrix alphas_ : Eigenvectors of the centered kernel matrix dual_coef_ : Inverse transform matrix X_transformed_fit_ : Projection of the fitted data on the kernel entropy components References ---------- Kernel ECA based on: (c) Robert Jenssen, University of Tromso, Norway, 2010 R. Jenssen, "Kernel Entropy Component Analysis," IEEE Trans. Patt. Anal. Mach. Intel., 32(5), 847-860, 2010. """ def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, eigen_solver='auto', tol=0, max_iter=None, random_state=None, center=False): self.n_components = n_components self._kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.eigen_solver = eigen_solver self.tol = tol self.max_iter = max_iter self.random_state = random_state self._centerer = KernelCenterer() self.center = center @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self._kernel): params = self.kernel_params or {} else: params = { "gamma": self.gamma, "degree": self.degree, "coef0": self.coef0 } return pairwise_kernels(X, Y, metric=self._kernel, filter_params=True, **params) def _fit_transform(self, K): """ Fit's using kernel K""" # center kernel if self.center == True: K = self._centerer.fit_transform(K) X_transformed = self.kernelECA(K=K) self.X_transformed = X_transformed return K def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ K = self._get_kernel(X) self._fit_transform(K) self.X_fit_ = X return self def fit_transform(self, X, y=None, **params): """Fit the model from data in X and transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new: array-like, shape (n_samples, n_components) """ self.fit(X, **params) X_transformed = self.X_transformed return X_transformed def transform(self, X): """Transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Returns ------- X_new: array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'X_fit_') K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) def inverse_transform(self, X): raise NotImplementedError( "Function inverse_transform is not implemented.") # here are the helper functions => to integrate in the code! def kernelECA(self, K): if self.n_components is None: n_components = K.shape[0] else: n_components = min(K.shape[0], self.n_components) # compute eigenvectors self.lambdas_, self.alphas_ = linalg.eigh(K) d = self.lambdas_ E = self.alphas_ # sort eigenvectors in descending order D, E = self.sort_eigenvalues(d, E) d = np.diag(D) sorted_entropy_index, entropy = self.ECA(D, E) Es = E[:, sorted_entropy_index] ds = d[sorted_entropy_index] Phi = np.zeros((K.shape[0], n_components)) for i in range(n_components): Phi[:, i] = np.sqrt(ds[i]) * Es[:, i] X_transformed = Phi return X_transformed def sort_eigenvalues(self, D, E): d = D indices = np.argsort(d)[::-1] d = d[indices] D = np.zeros((len(d), len(d))) for i in range(len(d)): D[i, i] = d[i] E = E[:, indices] return D, E def ECA(self, D, E): N = E.shape[0] entropy = np.multiply(np.diag(D).T, (np.dot(np.ones((1, N)), E))**2)[0] indices = np.argsort(entropy)[::-1] entropy = entropy[indices] return indices, entropy
class PASVM(object): def __init__(self, C=1, relaxation="classic", coef0=1, degree=2, gamma=1.5, kernel_type=None): self.C = C self.relaxation = relaxation self.coef0 = 1 self.degree = degree self.gamma = gamma self.kernel_type = kernel_type self.centerer = KernelCenterer() def c_(self, X): """ Center the gram matrix """ return self.centerer.fit_transform(X) def fit(self, X, y): if not hasattr(self, "W"): self.W = self._init_weights(X) if self.W.shape[0] != X.shape[1]: raise ValueError( "Expecter to get X with {} features, got {} instead".format( X.shape[1], self.W.shape[0])) for i in range(X.shape[0]): x = X[i, :].reshape(1, -1) if self.kernel_type is not None: x = self.apply_kernel(x) loss = self._get_loss(x, y[i]) tau = self._get_update_rule()(X, loss) self.W = self.W + tau * y[i] * x.reshape(-1, 1) def predict(self, X): if not hasattr(self, "W"): self.W = self._init_weights(X) return np.sign(np.dot(X, self.W)) def _get_loss(self, X, y): loss = max(0, 1 - y * (np.dot(X, self.W))) return loss def _get_update_rule(self): def classic(X, loss): tau = loss / l2(X) return tau def first_relaxation(X, loss): tau = min(self.C, loss / l2(X)) return tau def second_relaxation(X, loss): tau = loss / (l2(X) + (1 / (2 * self.C))) return tau mapping = { 'classic': classic, 'first': first_relaxation, 'second': second_relaxation } return mapping[self.relaxation] def _init_weights(self, X): return np.random.randn(X.shape[1], 1) def apply_kernel(self, X): kernel_handler = { "rbf": self._apply_rbf, "linear": self._apply_linear, "poly": self._apply_poly } return self.c_(kernel_handler[self.kernel_type](X)) def _apply_linear(self, X): return linear_kernel(X) def _apply_poly(self, X): return polynomial_kernel(X, degree=self.degree, coef0=self.coef0, gamma=self.gamma) def _apply_rbf(self, X): return rbf_kernel(X, gamma=self.gamma)
class KernelPCA(TransformerMixin, BaseEstimator): def __init__(self, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False, n_components=2, random_state=None, copy_X=True, n_jobs=None, coeficient=None, nkernel=10): self.kernel_params = kernel_params self.gamma = gamma self.nkernel = nkernel self.n_components = n_components self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self.random_state = random_state self.n_jobs = n_jobs self.copy_X = copy_X self._centerer = KernelCenterer() self.coeficient = coeficient def kernels(self, X): kern = [ 'linear', 'poly', 'polynomial', 'rbf', 'laplacian', 'sigmoid', 'cosine' ] tkernel = len(kern) + 2 K = [] self.gamma = 0.00001 gamma_jump = 1.02 K.append(normalize(self._get_kernel(X, 'rbf'))) #K.append(procrustes(K[0],normalize(LLE.K(X)))[1]) #K.append(procrustes(K[0],normalize(LE.K(X)))[1]) #K.append(procrustes(K[0],normalize(Iso.K(X)))[1]) K.append((LLE.K(X))) K.append((LE.K(X))) K.append((Iso.K(X))) for i in kern: if i == 'rbf': for j in range(1, self.nkernel - tkernel): #self.gamma=gamma_jump*self.gamma self.gamma = 0.3 * j K.append(self._get_kernel(X, i)) #K.append(procrustes(K[0],normalize(self._get_kernel(X,i)))[1]) else: K.append(self._get_kernel(X, i)) #K.append(procrustes(K[0],normalize(self._get_kernel(X,i)))[1]) if not (self.coeficient): self.coeficient = np.zeros(len(K)) self.coeficient[0] = 1 self.SuperK = self.createSuperK(K) return K def _get_kernel(self, X, kernel): params = { "gamma": self.gamma, "degree": self.degree, "coef0": self.coef0 } return pairwise_kernels(X, None, metric=kernel, filter_params=True, n_jobs=self.n_jobs, **params) def normalize(self, v): return v / max(v) def Solve(self, K): # SELECT THE BEST METHOD TO CALCULATE THE EIGENVALUES if self.eigen_solver == 'auto': if K.shape[0] > 200 and self.n_components < 10: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self.eigen_solver #GET EIGENVALUES AND EIGENVECTOR THE CENTER KERNEL if eigen_solver == 'dense': self.lambdas_, self.vectors_ = linalg.eigh( K, eigvals=(K.shape[0] - self.n_components, K.shape[0] - 1)) elif eigen_solver == 'arpack': random_state = check_random_state(self.random_state) # initialize with [-1,1] as in ARPACK v0 = random_state.uniform(-1, 1, K.shape[0]) self.lambdas_, self.vectors_ = eigsh(K, self.n_components, which="LA", tol=self.tol, maxiter=self.max_iter, v0=v0) # make sure that the eigenvalues are ok and fix numerical issues self.lambdas_ = _check_psd_eigenvalues(self.lambdas_, enable_warnings=False) # flip eigenvectors' sign to enforce deterministic output self.vectors_, _ = svd_flip(self.vectors_, np.empty_like(self.vectors_).T) # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.vectors_ = self.vectors_[:, indices] # remove eigenvectors with a zero eigenvalue (null space) if required if self.remove_zero_eig: self.vectors_ = self.vectors_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] return K def fit(self, X): return self def fit_transform(self, X, y=None): #X=normalize(X) X = check_array(X, accept_sparse='csr', copy=self.copy_X) self.K = self.kernels(X) return self.KPCA(self.coeficient) def KPCA(self, alpha): ##GET THE KERNEL WITH ALPHAS self.coeficient = alpha self.Kernel = self.Kf(self.K) #CENTER THE KERNEL self.Center_Kernel = self._centerer.fit_transform(self.Kernel) #GET THE EIGENVALUES AND EIGENVECTORS self.Solve(self.Center_Kernel) #GET THE DIMENSIONAL REDUCTION X_Transform = self.vectors_ * np.sqrt(self.lambdas_) #X_Transform=np.matmul(self.Kernel,self.vectors_) return X_Transform, self.SuperK def Kf(self, K): Kf = np.zeros(K[0].shape) for i in range(0, len(self.coeficient)): Kf += self.coeficient[i] * K[i] return Kf def createSuperK(self, K): data = [] for i in range(len(K)): data.append(np.ravel(K[i])[np.newaxis]) return np.concatenate(tuple(data), axis=0).T
class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin): """ Kernalized Fisher Discriminant Analysis (KDA) A classifier with a non-linear decision boundary, generated by fitting class conditional densities to the data fisher criteria of maximizing between class variance while minimizing within class variance. The fisher criteria is used in a non-linear space, by transforming the data, X, of dimension D onto a D-dimensional manifold of a D' dimensional space (where D' is possible infinite) using a funtion f(X). The key to solving the problem in the non-linear space is to write the solution to fisher only in terms of inner products of the vectors X*Y. Then the kernel trick can be employed, such that the standard inner product is promoted to a general inner product. That is, K(X,Y) = X*Y --> K(X,Y) = f(X)*f(Y), which is allowed for valid Kernels. In this case, the function f() does not need to be known, but only the kernel K(X,Y). The fitted model can also be used to reduce the dimensionality of the input, by projecting it to the most discriminative directions. Parameters ---------- use_total_scatter : boolean If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1 sigma_sqrd: float smooth regularization parameter, which is size of singular value where smoothing becomes important. NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case tol: float used for truncated SVD of St. Essentially a form of regularization. Tol for SVD(R) is 1e-6, fixed right now kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel used for generalized inner product. Default: "linear" degree : int, optional Degree for poly Default: 3. gamma : float, optional Kernel coefficient for rbf, sigmoid and poly kernels. Default: 1/n_features. coef0 : float, optional Independent term in poly and sigmoid kernels. norm_covariance : boolean if true, the covariance of each class will be divided by (n_points_in_class - 1) NOTE: not currently used priors : array, optional, shape = [n_classes] Priors on classes print_timing: boolean print time for several matrix operations in the algorithm Attributes ---------- `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ] Class means, for each component found `priors_` : array-like, shape = [n_classes] Class priors (sum to 1) `n_components_found_` : int number of fisher components found, which is <= n_components Examples (put fisher.py in working directory) -------- >>> import numpy as np >>> from fisher import KernelFisher >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) >>> y = np.array([0, 0, 0, 1, 1, 1]) >>> fd = KernelFisher() >>> fd.fit(X, y) KernelFisher(coef0=1, degree=3, gamma=None, kernel='linear', norm_covariance=False, print_timing=False, priors=None, sigma_sqrd=1e-08, tol=0.001, use_total_scatter=True) >>> print(fd.transform([[-0.8, -1]])) [[-7.62102356]]] """ def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3, kernel="linear", gamma=None, degree=3, coef0=1, norm_covariance = False, priors=None, print_timing=False): self.use_total_scatter = use_total_scatter self.sigma_sqrd = sigma_sqrd self.tol = tol self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self._centerer = KernelCenterer() self.norm_covariance = norm_covariance self.print_timing = print_timing self.priors = np.asarray(priors) if priors is not None else None if self.priors is not None: if (self.priors < 0).any(): raise ValueError('priors must be non-negative') if self.priors.sum() != 1: print 'warning: the priors do not sum to 1. Renormalizing' self.priors = self.priors / self.priors.sum() @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} try: return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params) except AttributeError: raise ValueError("%s is not a valid kernel. Valid kernels are: " "rbf, poly, sigmoid, linear and precomputed." % self.kernel) def fit(self, X, y): """ Fit the Kernelized Fisher Discriminant model according to the given training data and parameters. Based on "Algorithm 5" in Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228 NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers) """ X, y = check_arrays(X, y, sparse_format='dense') self.classes_, y = unique( y, return_inverse=True) n_samples, n_features = X.shape n_classes = len(self.classes_) n_samples_perclass = np.bincount(y) if n_classes < 2: raise ValueError('y has less than 2 classes') if self.priors is None: self.priors_ = np.bincount(y) / float(n_samples) else: self.priors_ = self.priors ts = time.time() self.means_ = [] for ind in xrange(n_classes): Xg = X[y == ind, :] meang = Xg.mean(0) self.means_.append(np.asarray(meang)) if self.print_timing: print 'KernelFisher.fit: means took', time.time() - ts ts = time.time() PI_diag = np.diag( 1.0*n_samples_perclass ) # shape(PI_diag) = n_classes x n_classes PI_inv = np.diag( 1.0 / (1.0*n_samples_perclass) ) # shape(PI_inv) = n_classes x n_classes PI_sqrt_inv = np.sqrt( PI_inv ) # shape(PI_sqrt_inv) = n_classes x n_classes #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples)) E=np.zeros( (n_samples,n_classes) ) # shape(E) = n_samples x n_classes E[[range(n_samples),y]]=1 E_PIsi = np.dot(E, PI_sqrt_inv) One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T ) # shape(One_minus_E_Pi_Et) = n_samples x n_samples if self.print_timing: print 'KernelFisher.fit: matrices took', time.time() - ts ##################################################################################################################### #C = HKH = (I - 1/n 1x1.T) K (I - 1/n 1x1.T) = (K - 1xK_mean.T) * (I - 1/n 1x1.T) # = K - K_meanx1.T - 1xK_mean.T + K_allmean 1x1 # --> which is the same as what self._centerer.fit_transform(C) performs # # if use_total_scatter=False, # then using Sw which is (1-E*Pi*E.T)K(1-E*Pi*E.T) ##################################################################################################################### ts = time.time() C = self._get_kernel(X) K_mean = np.sum(C, axis=1) / (1.0*C.shape[1]) if self.use_total_scatter: C = self._centerer.fit_transform(C) else: C = np.inner( One_minus_E_Pi_Et, np.inner(C, One_minus_E_Pi_Et).T) if self.print_timing: print 'KernelFisher.fit: Kernel Calculation took', time.time() - ts ts = time.time() Uc, Sc, Utc, Sc_norm = self.condensed_svd( C, self.tol, store_singular_vals=True ) if self.print_timing: print 'KernelFisher.fit: Uc, Sc, Utc took', time.time() - ts ts = time.time() #scale up sigma to appropriate range of singular values reg_factor = self.sigma_sqrd * Sc_norm St_reg_inv = np.inner( Uc, np.inner(np.diag(1.0/(Sc + reg_factor)), Utc.T).T ) if self.print_timing: print 'KernelFisher.fit: St_reg_inv took', time.time() - ts ts = time.time() R = np.inner(E_PIsi.T, np.inner(C, np.inner( St_reg_inv, E_PIsi.T ).T ).T ) if self.print_timing: print 'KernelFisher.fit: R took', time.time() - ts ts = time.time() Vr, Lr, Vtr, Lr_norm = self.condensed_svd( R, tol=1e-6 ) if self.print_timing: print 'KernelFisher.fit: Vr, Lr, Vtr took', time.time() - ts ts = time.time() ##################################################################################################################### #This capital Z is Upsilon.T * H from equation (22) ##################################################################################################################### #Z = np.inner( np.diag(1.0 / np.sqrt(Lr)), np.inner(Vtr, np.inner(E_PIsi.T, np.inner(C, St_reg_inv.T ).T ).T ).T ) Z = np.inner( np.inner( np.inner( np.inner( np.diag(1.0 / np.sqrt(Lr)), Vtr.T), E_PIsi), C.T), St_reg_inv) Z = (Z.T - (Z.sum(axis=1) / (1.0*Z.shape[1])) ).T if self.print_timing: print 'KernelFisher.fit: Z took', time.time() - ts self.Z = Z self.n_components_found_ = Z.shape[0] ##################################################################################################################### #This K_mean is (1/n) K*1_n from equation (22) ##################################################################################################################### self.K_mean = K_mean #print Z.shape, K_mean.shape, self.n_components_found_ self.X_fit_ = X return self def condensed_svd(self, M, tol=1e-3, store_singular_vals=False): U, S, Vt = linalg.svd(M, full_matrices=False) if store_singular_vals: self.singular_vals = S #want tolerance on fraction of variance in singular value #when not norm_covariance, need to normalize singular values S_norm = np.sum(S) rank = np.sum( (S/S_norm) > tol ) return U[:,:rank], S[:rank], Vt[:rank,:], S_norm @property def classes(self): warnings.warn("KernelFisher.classes is deprecated and will be removed in 0.14. " "Use .classes_ instead.", DeprecationWarning, stacklevel=2) return self.classes_ def _decision_function(self, X): #X = np.asarray(X) return self.transform(X) def decision_function(self, X): """ This function return the decision function values related to each class on an array of test vectors X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- X_new : array, shape = [n_samples, n_components_found_] Decision function values related to each class, per sample n_components_found_ is the number of components requested and found NOTE: currently identical to self.transform(X) """ return self._decision_function(X) def transform(self, X): """ Project the data so as to maximize class separation (large separation between projected class means and small variance within each class). Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- X_new : array, shape = [n_samples, n_components_found_] """ #X = np.asarray(X) #ts = time.time() k = self._get_kernel(X, self.X_fit_) #if self.print_timing: print 'KernelFisher.transform: k took', time.time() - ts #ts = time.time() z = np.inner(self.Z, (k-self.K_mean) ).T #if self.print_timing: print 'KernelFisher.transform: z took', time.time() - ts return z def fit_transform(self, X, y, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3): """ Fit the Fisher Discriminant model according to the given training data and parameters. The project the data onto up to n_components_found_ so as to maximize class separation (large separation between projected class means and small variance within each class). NOTE this function is not clever, it simply runs fit(X,y [, ...]).transform(X) Parameters ---------- X : array-like, shape = [n_samples, n_features] y : array, shape = [n_samples] Target values (integers) store_covariance : boolean If True the covariance matrix of each class and each iteration is computed and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2 Returns ------- X_new : array, shape = [n_samples, n_components_found_] """ return self.fit(X, y, use_total_scatter=use_total_scatter, sigma_sqrd=sigma_sqrd, tol=tol).transform(X)
class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin): """ Kernel Fisher Discriminant Analysis (KFDA) Parameters ---------- sigma_sqrd: float tol: float kernel: "linear","poly","rbf","sigmoid" degree : Degree for poly gamma : gamma as in LDA coef0 : coefficient in poly and sigmoid """ def __init__(self, sigma_sqrd=1e-8, tol=1.0e-3, kernel="linear", gamma=None, degree=3, coef0=1): self.sigma_sqrd = sigma_sqrd self.tol = tol self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self._centerer = KernelCenterer() @property def _pairwise(self): return self.kernel == "kerenl" def _get_kernel(self, X, Y=None): params = { "gamma": self.gamma, "degree": self.degree, "coef0": self.coef0 } return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params) def fit(self, X, y): X, y = check_X_y(X, y) #does not accept sparse arrays self.classes_, y = np.unique(y, return_inverse=True) n_samples, n_features = X.shape n_classes = len(self.classes_) n_samples_perclass = np.bincount(y) self.means_ = [] for ind in range(n_classes): Xg = X[y == ind, :] meang = Xg.mean(0) self.means_.append(np.asarray(meang)) PI_diag = np.diag( 1.0 * n_samples_perclass) # shape(PI_diag) = n_classes x n_classes PI_inv = np.diag( 1.0 / (1.0 * n_samples_perclass)) # shape(PI_inv) = n_classes x n_classes PI_sqrt_inv = np.sqrt( PI_inv) # shape(PI_sqrt_inv) = n_classes x n_classes E = np.zeros( (n_samples, n_classes)) # shape(E) = n_samples x n_classes E[[range(n_samples), y]] = 1 EPI = np.dot(E, PI_sqrt_inv) #One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T ) # shape(One_minus_E_Pi_Et) = n_samples x n_samples C = self._get_kernel(X) K_mean = np.sum(C, axis=1) / (1.0 * C.shape[1]) C = self._centerer.fit_transform(C) Uc, Sc, Utc, Sc_norm = self.svd_comp(C, self.tol, flag=True) reg_factor = self.sigma_sqrd * Sc_norm St_reg_inv = np.inner( Uc, np.inner(np.diag(1.0 / (Sc + reg_factor)), Utc.T).T) R = np.inner(EPI.T, np.inner(C, np.inner(St_reg_inv, EPI.T).T).T) Vr, Lr, Vtr, Lr_norm = self.svd_comp(R, tol=1e-6) Z = np.inner( np.inner( np.inner(np.inner(np.diag(1.0 / np.sqrt(Lr)), Vtr.T), EPI), C.T), St_reg_inv) Z = (Z.T - (Z.sum(axis=1) / (1.0 * Z.shape[1]))).T self.Z = Z self.n_components_found_ = Z.shape[0] self.K_mean = K_mean self.X_fit_ = X return self def svd_comp(self, M, tol=1e-3, flag=False): U, S, Vt = linalg.svd(M, full_matrices=False) if flag: self.singular_vals = S S_norm = np.sum(S) rank = np.sum((S / S_norm) > tol) return U[:, :rank], S[:rank], Vt[:rank, :], S_norm @property def classes(self): return self.classes_ def _decision_function(self, X): return self.transform(X) def decision_function(self, X): return self._decision_function(X) def transform(self, X): k = self._get_kernel(X, self.X_fit_) z = np.inner(self.Z, (k - self.K_mean)).T return z def fit_transform(self, X, y, sigma_sqrd=1e-8, tol=1.0e-3): return self.fit(X, y, sigma_sqrd=sigma_sqrd, tol=tol).transform(X)
#S = np.sqrt(eVals)[::-1] #reverse since eigenvalues are in increasing order #Y=S.T.dot(X_std) # Make a list of (eigenvalue, eigenvector) tuples # d×k-dimensional eigenvector matrix W. #W = [(abs(eVals[i]), eVecs[:,i]) for i in range(len(eVals))] #Y=X×W #Y=X_std.dot(eVecs.real) #Y=np.dot(eVecs.T, X_std.T).T #Kernel_PCA since d>>n K = X.dot(X.T) #Centering Kernel since data has to be standardizied kern_cent = KernelCenterer() S = kern_cent.fit_transform(K.toarray()) #val,vec=linalg.eigs(S,k,which='LM') eig_vals, eig_vecs = np.linalg.eig(S) eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:, i]) for i in range(len(eig_vals))] # Sort the (eigenvalue, eigenvector) tuples from high to low eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True) vec = np.array([eig_pairs[i][1] for i in range(k)]) vec = vec.T # to make eigen vector matrix nxk # d×k-dimensional eigenvector matrix W. W = X.T.dot(vec) Y = X.dot(W)
class KernelPCA(TransformerMixin, BaseEstimator): def __init__(self, degree=3, coef0=1, kernel_params=None, alpha=1.0, eigen_solver='auto', neigh=8, tol=0, max_iter=None, remove_zero_eig=True, n_components=2, random_state=None, n_jobs=None, coeficient=None, nkernel=10): self.kernel_params = kernel_params self.gamma = 0.0001 self.neigh = neigh self.nkernel = nkernel self.n_components = n_components self.degree = degree self.coef0 = coef0 self.alpha = alpha self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self.random_state = random_state self.n_jobs = n_jobs self._centerer = KernelCenterer() self.coeficient = coeficient def KLE(self, X, neigh): LE = SpectralEmbedding(n_neighbors=neigh) return LE.K(X) def KIsomap(self, X, neigh): Iso = Isomap(n_neighbors=neigh) return Iso.K(X) def KLLE(self, X, neigh): LLE = LocallyLinearEmbedding(n_neighbors=neigh) return LLE.K(X) def kernels(self, X): kern = [ 'linear', 'poly', 'polynomial', 'rbf', 'laplacian', 'sigmoid', 'cosine' ] if self.neigh > len(X): self.neigh = len(X) - 3 tkernel = len(kern) + 3 * self.neigh - 1 K = [] K.append((KCMDS(X))) for i in kern: if i == 'rbf': for j in range(0, self.nkernel - tkernel): self.gamma = (j) * 0.01 K.append((self._get_kernel(X, i))) else: K.append((self._get_kernel(X, i))) for i in range(2, 2 + self.neigh): K.append((self.KLE(X, i))) K.append((self.KLLE(X, i))) K.append((self.KIsomap(X, i))) #IN case that coeffcients weren't set if not (self.coeficient): self.coeficient = np.zeros(len(K)) self.coeficient[0] = 1 #Linear Kernel self.SuperK = self.createSuperK(K) return K def _get_kernel(self, X, kernel): params = { "gamma": self.gamma, "degree": self.degree, "coef0": self.coef0 } return pairwise_kernels(X, None, metric=kernel, filter_params=True, n_jobs=self.n_jobs, **params) def Solve(self, K): #GET EIGENVALUES AND EIGENVECTOR THE CENTER KERNEL self.lambdas_, self.vectors_ = linalg.eigh( K, eigvals=(K.shape[0] - self.n_components, K.shape[0] - 1)) # make sure that the eigenvalues are ok and fix numerical issues self.lambdas_ = _check_psd_eigenvalues(self.lambdas_, enable_warnings=False) # flip eigenvectors' sign to enforce deterministic output self.vectors_, _ = svd_flip(self.vectors_, np.empty_like(self.vectors_).T) # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.vectors_ = self.vectors_[:, indices] # remove eigenvectors with a zero eigenvalue (null space) if required if self.remove_zero_eig: self.vectors_ = self.vectors_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] return K def fit_transform(self, X, y=None): #X=normalize(X) X = check_array(X, accept_sparse='csr', copy=True) self.K = self.kernels(X) return self.KPCA(self.coeficient) def KPCA(self, alpha): ##GET THE KERNEL WITH ALPHAS self.coeficient = alpha self.Kernel = self.Kf(self.K) #CENTER THE KERNEL self.Center_Kernel = self._centerer.fit_transform(self.Kernel) #GET THE EIGENVALUES AND EIGENVECTORS self.Solve(self.Center_Kernel) #GET THE DIMENSIONAL REDUCTION X_Transform = self.vectors_ * np.sqrt(self.lambdas_) #X_Transform=np.matmul(self.Kernel,self.vectors_) return X_Transform, self.SuperK def Kf(self, K): Kf = np.zeros(K[0].shape) for i in range(0, len(self.coeficient)): Kf += self.coeficient[i] * K[i] return Kf def createSuperK(self, K): data = [] for i in range(len(K)): data.append(np.ravel(K[i])[np.newaxis]) return np.concatenate(tuple(data), axis=0).T
def ALIGNFSOFT(kernel_list, ky, y, test_fold, tags): # Find best upper bound in CV and train on whole data # Reutrn the weights y = y.ravel() n_km = len(kernel_list) tag = np.array(tags) tag = tag[tag!=test_fold] remain_fold = np.unique(tag).tolist() all_best_c = [] for validate_fold in remain_fold: train = tag != validate_fold validate = tag == validate_fold # train on train fold ,validate on validate_fold. # Do not use test fold. test fold used in outter cv ky_train = ky[np.ix_(train, train)] y_train = y[train] y_validate = y[validate] train_km_list = [] validate_km_list = [] n_train = len(y_train) n_validate = len(y_validate) for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] validate_km = km[np.ix_(validate, train)] # center train and validate kernels train_km_c = kc.fit_transform(train_km) train_km_list.append(train_km_c) validate_km_c = kc.transform(validate_km) validate_km_list.append(validate_km_c) # if the label is too biased, SVM CV will fail, just return ALIGNF solution if np.sum(y_train==1) > n_train-3 or np.sum(y_train==-1) > n_train-3: return 1e8, ALIGNFSLACK(train_km_list, ky_train, 1e8) Cs = np.exp2(np.array(range(-9,7))).tolist() + [1e8] W = np.zeros((n_km, len(Cs))) for i in xrange(len(Cs)): W[:,i] = ALIGNFSLACK(train_km_list, ky_train, Cs[i]) W = W / np.linalg.norm(W, 2, 0) f1 = np.zeros(len(Cs)) for i in xrange(len(Cs)): train_ckm = np.zeros((n_train,n_train)) validate_ckm = np.zeros((n_validate,n_train)) w = W[:,i] for j in xrange(n_km): train_ckm += w[j]*train_km_list[j] validate_ckm += w[j]*validate_km_list[j] f1[i] = svm(train_ckm, validate_ckm, y_train, y_validate) # return the first maximum maxind = np.argmax(f1) bestC = Cs[maxind] all_best_c.append(bestC) print f1 print "..Best C is", bestC bestC = np.mean(all_best_c) print "..Take the average best upper bound", bestC # use the best upper bound to solve ALIGNFSOFT return bestC, ALIGNFSLACK(kernel_list, ky, bestC)
def AirbnbKNN_score(dc_listings): #数据选择 if (filter_flag == True): house_features = dc_listings[dc_listings.price < 1000] else: house_features = dc_listings house_features = house_features.dropna(subset=['host_acceptance_rate' ]) #去除未成交房屋 del house_features['city'] #去除重复信息 del house_features['zipcode'] del house_features['state'] del house_features['minimum_nights'] del house_features['maximum_nights'] del house_features['host_listings_count'] if (cleaning_fee_flag == False): del house_features['cleaning_fee'] if (security_deposit_flag == False): del house_features['security_deposit'] if (independent_flag == True): del house_features['bedrooms'] del house_features['beds'] if (response_flag == False): del house_features['host_response_rate'] del house_features['host_acceptance_rate'] if (review_flag == False): del house_features['number_of_reviews'] if (room_type_flag == False): del house_features['room_type'] #距离替换经纬度 if (distance_flag == True): DC_capital_lat = 38.889931 DC_capital_long = -77.009003 distance = ((house_features['latitude'] - DC_capital_lat)**2 + (house_features['longitude'] - DC_capital_long)**2)**0.5 house_features['latitude'] = distance del house_features['longitude'] #产生KNN输入 house_features = house_features.fillna(0) #补充 cleaning fee等列的nan AirbnbKNN_X = house_features AirbnbKNN_y = np.array(house_features['price']) del AirbnbKNN_X['price'] AirbnbKNN_X = np.array(AirbnbKNN_X) print(house_features.iloc[0]) #归一化 if (normalizer_flag == "min_max_scaler"): from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() AirbnbKNN_X = min_max_scaler.fit_transform(AirbnbKNN_X) if (normalizer_flag == "kernel_centerer"): from sklearn.preprocessing import KernelCenterer kernel_centerer = KernelCenterer().fit(AirbnbKNN_X) AirbnbKNN_X = kernel_centerer.fit_transform(AirbnbKNN_X) if (normalizer_flag == "standard_scaler"): from sklearn.preprocessing import StandardScaler standard_scaler = StandardScaler() AirbnbKNN_X = standard_scaler.fit_transform(AirbnbKNN_X) #数据切分,训练模型,用训练好的模型进行预测,并对预测好坏进行评估 from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor knn = KNeighborsRegressor(n_neighbors=16) #定义用sklearn中的KNN分类算法 total_score = 0.0 trial_number = 1000 score_list = [] for i in range(trial_number): X_train, X_test, y_train, y_test = train_test_split(AirbnbKNN_X, AirbnbKNN_y, test_size=0.3) knn.fit(X_train, y_train) #print(knn.predict(X_test)) #这里的knn就是已经train好了的knn #print(y_test) # 对比真实值 score = knn.score(X_test, y_test) total_score += score score_list.append(score) if (i == 0): viz(score, knn.predict(X_test), y_test, 1) avg_score = total_score / trial_number viz(avg_score, score_list, y_test, 2) print("Trial: " + str(trial_number) + " times")
class KernelPCA(BaseEstimator, TransformerMixin): """Kernel Principal component analysis (KPCA) Non-linear dimensionality reduction through the use of kernels (see :ref:`metrics`). Parameters ---------- n_components: int or None Number of components. If None, all non-zero components are kept. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default: "linear" degree : int, default=3 Degree for poly kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, optional Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. alpha: int Hyperparameter of the ridge regression that learns the inverse transform (when fit_inverse_transform=True). Default: 1.0 fit_inverse_transform: bool Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point) Default: False eigen_solver: string ['auto'|'dense'|'arpack'] Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol: float convergence tolerance for arpack. Default: 0 (optimal value will be chosen by arpack) max_iter : int maximum number of iterations for arpack Default: None (optimal value will be chosen by arpack) remove_zero_eig : boolean, default=True If True, then all components with zero eigenvalues are removed, so that the number of components in the output may be < n_components (and sometimes even zero due to numerical instability). When n_components is None, this parameter is ignored and components with zero eigenvalues are removed regardless. Attributes ---------- lambdas_ : Eigenvalues of the centered kernel matrix alphas_ : Eigenvectors of the centered kernel matrix evals_ : array[float], shape=(n_features) All eigenvalues of centered kernel matrix evecs_ : array[float, float], shape=(n_features, n_samples) All eigenvectors of centered kernel matrix dual_coef_ : Inverse transform matrix X_transformed_fit_ : Projection of the fitted data on the kernel principal components References ---------- Kernel PCA was introduced in: Bernhard Schoelkopf, Alexander J. Smola, and Klaus-Robert Mueller. 1999. Kernel principal component analysis. In Advances in kernel methods, MIT Press, Cambridge, MA, USA 327-352. """ def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False): if fit_inverse_transform and kernel == 'precomputed': raise ValueError( "Cannot fit_inverse_transform with a precomputed kernel.") self.n_components = n_components self.kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self._centerer = KernelCenterer() @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self.kernel): params = self.kernel_params or {} else: params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0} return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params) def _fit_transform(self, K): """ Fit's using kernel K""" # center kernel K = self._centerer.fit_transform(K) if self.n_components is None: n_components = K.shape[0] else: n_components = min(K.shape[0], self.n_components) # compute eigenvectors if self.eigen_solver == 'auto': if K.shape[0] > 200 and n_components < 10: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self.eigen_solver if eigen_solver == 'dense': self.lambdas_, self.alphas_ = linalg.eigh( K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)) self.evals_, self.evecs_ = linalg.eigh(K) elif eigen_solver == 'arpack': self.lambdas_, self.alphas_ = eigsh(K, n_components, which="LA", tol=self.tol, maxiter=self.max_iter) # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.alphas_ = self.alphas_[:, indices] # remove eigenvectors with a zero eigenvalue if self.remove_zero_eig or self.n_components is None: self.alphas_ = self.alphas_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] return K def _fit_inverse_transform(self, X_transformed, X): if hasattr(X, "tocsr"): raise NotImplementedError("Inverse transform not implemented for " "sparse matrices!") n_samples = X_transformed.shape[0] K = self._get_kernel(X_transformed) K.flat[::n_samples + 1] += self.alpha self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True) self.X_transformed_fit_ = X_transformed def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ K = self._get_kernel(X) self._fit_transform(K) if self.fit_inverse_transform: sqrt_lambdas = np.diag(np.sqrt(self.lambdas_)) X_transformed = np.dot(self.alphas_, sqrt_lambdas) self._fit_inverse_transform(X_transformed, X) self.X_fit_ = X return self def fit_transform(self, X, y=None, **params): """Fit the model from data in X and transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new: array-like, shape (n_samples, n_components) """ self.fit(X, **params) X_transformed = self.alphas_ * np.sqrt(self.lambdas_) if self.fit_inverse_transform: self._fit_inverse_transform(X_transformed, X) return X_transformed def transform(self, X): """Transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Returns ------- X_new: array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'X_fit_') K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) def inverse_transform(self, X): """Transform X back to original space. Parameters ---------- X: array-like, shape (n_samples, n_components) Returns ------- X_new: array-like, shape (n_samples, n_features) References ---------- "Learning to Find Pre-Images", G BakIr et al, 2004. """ if not self.fit_inverse_transform: raise NotFittedError("The fit_inverse_transform parameter was not" " set to True when instantiating and hence " "the inverse transform is not available.") K = self._get_kernel(X, self.X_transformed_fit_) return np.dot(K, self.dual_coef_)
class KernelPCA(BaseEstimator, TransformerMixin): """Kernel Principal component analysis (KPCA) Non-linear dimensionality reduction through the use of kernels (see :ref:`metrics`). Read more in the :ref:`User Guide <kernel_PCA>`. Parameters ---------- n_components : int, default=None Number of components. If None, all non-zero components are kept. kernel : "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default="linear". gamma : float, default=1/n_features Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels. degree : int, default=3 Degree for poly kernels. Ignored by other kernels. coef0 : float, default=1 Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, default=None Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. alpha : int, default=1.0 Hyperparameter of the ridge regression that learns the inverse transform (when fit_inverse_transform=True). fit_inverse_transform : bool, default=False Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point) eigen_solver : string ['auto'|'dense'|'arpack'], default='auto' Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol : float, default=0 Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack. max_iter : int, default=None Maximum number of iterations for arpack. If None, optimal value will be chosen by arpack. remove_zero_eig : boolean, default=False If True, then all components with zero eigenvalues are removed, so that the number of components in the output may be < n_components (and sometimes even zero due to numerical instability). When n_components is None, this parameter is ignored and components with zero eigenvalues are removed regardless. random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Used when ``eigen_solver`` == 'arpack'. .. versionadded:: 0.18 copy_X : boolean, default=True If True, input X is copied and stored by the model in the `X_fit_` attribute. If no further changes will be done to X, setting `copy_X=False` saves memory by storing a reference. .. versionadded:: 0.18 n_jobs : int, default=1 The number of parallel jobs to run. If `-1`, then the number of jobs is set to the number of CPU cores. .. versionadded:: 0.18 Attributes ---------- lambdas_ : array, (n_components,) Eigenvalues of the centered kernel matrix in decreasing order. If `n_components` and `remove_zero_eig` are not set, then all values are stored. alphas_ : array, (n_samples, n_components) Eigenvectors of the centered kernel matrix. If `n_components` and `remove_zero_eig` are not set, then all components are stored. dual_coef_ : array, (n_samples, n_features) Inverse transform matrix. Set if `fit_inverse_transform` is True. X_transformed_fit_ : array, (n_samples, n_components) Projection of the fitted data on the kernel principal components. X_fit_ : (n_samples, n_features) The data used to fit the model. If `copy_X=False`, then `X_fit_` is a reference. This attribute is used for the calls to transform. References ---------- Kernel PCA was introduced in: Bernhard Schoelkopf, Alexander J. Smola, and Klaus-Robert Mueller. 1999. Kernel principal component analysis. In Advances in kernel methods, MIT Press, Cambridge, MA, USA 327-352. """ def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=1): if fit_inverse_transform and kernel == 'precomputed': raise ValueError( "Cannot fit_inverse_transform with a precomputed kernel.") self.n_components = n_components self.kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self._centerer = KernelCenterer() self.random_state = random_state self.n_jobs = n_jobs self.copy_X = copy_X @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self.kernel): params = self.kernel_params or {} else: params = { "gamma": self.gamma, "degree": self.degree, "coef0": self.coef0 } return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, n_jobs=self.n_jobs, **params) def _fit_transform(self, K): """ Fit's using kernel K""" # center kernel K = self._centerer.fit_transform(K) if self.n_components is None: n_components = K.shape[0] else: n_components = min(K.shape[0], self.n_components) # compute eigenvectors if self.eigen_solver == 'auto': if K.shape[0] > 200 and n_components < 10: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self.eigen_solver if eigen_solver == 'dense': self.lambdas_, self.alphas_ = linalg.eigh( K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)) elif eigen_solver == 'arpack': random_state = check_random_state(self.random_state) # initialize with [-1,1] as in ARPACK v0 = random_state.uniform(-1, 1, K.shape[0]) self.lambdas_, self.alphas_ = eigsh(K, n_components, which="LA", tol=self.tol, maxiter=self.max_iter, v0=v0) # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.alphas_ = self.alphas_[:, indices] # remove eigenvectors with a zero eigenvalue if self.remove_zero_eig or self.n_components is None: self.alphas_ = self.alphas_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] return K def _fit_inverse_transform(self, X_transformed, X): if hasattr(X, "tocsr"): raise NotImplementedError("Inverse transform not implemented for " "sparse matrices!") n_samples = X_transformed.shape[0] K = self._get_kernel(X_transformed) K.flat[::n_samples + 1] += self.alpha self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True) self.X_transformed_fit_ = X_transformed def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ X = check_array(X, accept_sparse='csr', copy=self.copy_X) K = self._get_kernel(X) self._fit_transform(K) if self.fit_inverse_transform: sqrt_lambdas = np.diag(np.sqrt(self.lambdas_)) X_transformed = np.dot(self.alphas_, sqrt_lambdas) self._fit_inverse_transform(X_transformed, X) self.X_fit_ = X return self def fit_transform(self, X, y=None, **params): """Fit the model from data in X and transform X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ self.fit(X, **params) X_transformed = self.alphas_ * np.sqrt(self.lambdas_) if self.fit_inverse_transform: self._fit_inverse_transform(X_transformed, X) return self.alphas_, self.lambdas_ def transform(self, X): """Transform X. Parameters ---------- X : array-like, shape (n_samples, n_features) Returns ------- X_new : array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'X_fit_') K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) def get_eigen(self): return self.alphas_, self.lambdas_ def inverse_transform(self, X): """Transform X back to original space. Parameters ---------- X : array-like, shape (n_samples, n_components) Returns ------- X_new : array-like, shape (n_samples, n_features) References ---------- "Learning to Find Pre-Images", G BakIr et al, 2004. """ if not self.fit_inverse_transform: raise NotFittedError("The fit_inverse_transform parameter was not" " set to True when instantiating and hence " "the inverse transform is not available.") K = self._get_kernel(X, self.X_transformed_fit_) return np.dot(K, self.dual_coef_)
class KernelFisher(BaseEstimator, ClassifierMixin, TransformerMixin): """ Kernalized Fisher Discriminant Analysis (KDA) A classifier with a non-linear decision boundary, generated by fitting class conditional densities to the data fisher criteria of maximizing between class variance while minimizing within class variance. The fisher criteria is used in a non-linear space, by transforming the data, X, of dimension D onto a D-dimensional manifold of a D' dimensional space (where D' is possible infinite) using a funtion f(X). The key to solving the problem in the non-linear space is to write the solution to fisher only in terms of inner products of the vectors X*Y. Then the kernel trick can be employed, such that the standard inner product is promoted to a general inner product. That is, K(X,Y) = X*Y --> K(X,Y) = f(X)*f(Y), which is allowed for valid Kernels. In this case, the function f() does not need to be known, but only the kernel K(X,Y). The fitted model can also be used to reduce the dimensionality of the input, by projecting it to the most discriminative directions. Parameters ---------- use_total_scatter : boolean If True then use total scatter matrix St = Sum_i (x_i - m)(x_i - m).T instead of Sw If False, use Sw = Sum_{c=1... n_classes} Sum_{i; x in class c} norm_c (x_i - m_c)(x_i - m_c).T where norm_c = 1/N_samples_class_c if norm_covariance=True, else norm_c = 1 sigma_sqrd: float smooth regularization parameter, which is size of singular value where smoothing becomes important. NOTE: is fraction in case norm_covariance=False, as a priori the scale of the singular values is not known in this case tol: float used for truncated SVD of St. Essentially a form of regularization. Tol for SVD(R) is 1e-6, fixed right now kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel used for generalized inner product. Default: "linear" degree : int, optional Degree for poly Default: 3. gamma : float, optional Kernel coefficient for rbf, sigmoid and poly kernels. Default: 1/n_features. coef0 : float, optional Independent term in poly and sigmoid kernels. norm_covariance : boolean if true, the covariance of each class will be divided by (n_points_in_class - 1) NOTE: not currently used priors : array, optional, shape = [n_classes] Priors on classes print_timing: boolean print time for several matrix operations in the algorithm Attributes ---------- `means_` : array-like, shape = [n_components_found_, [n_classes, n_features] ] Class means, for each component found `priors_` : array-like, shape = [n_classes] Class priors (sum to 1) `n_components_found_` : int number of fisher components found, which is <= n_components Examples (put fisher.py in working directory) -------- >>> import numpy as np >>> from fisher import KernelFisher >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) >>> y = np.array([0, 0, 0, 1, 1, 1]) >>> fd = KernelFisher() >>> fd.fit(X, y) KernelFisher(coef0=1, degree=3, gamma=None, kernel='linear', norm_covariance=False, print_timing=False, priors=None, sigma_sqrd=1e-08, tol=0.001, use_total_scatter=True) >>> print(fd.transform([[-0.8, -1]])) [[-7.62102356]]] """ def __init__(self, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3, kernel="linear", gamma=None, degree=3, coef0=1, norm_covariance=False, priors=None, print_timing=False): self.use_total_scatter = use_total_scatter self.sigma_sqrd = sigma_sqrd self.tol = tol self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self._centerer = KernelCenterer() self.norm_covariance = norm_covariance self.print_timing = print_timing self.priors = np.asarray(priors) if priors is not None else None if self.priors is not None: if (self.priors < 0).any(): raise ValueError('priors must be non-negative') if self.priors.sum() != 1: print 'warning: the priors do not sum to 1. Renormalizing' self.priors = self.priors / self.priors.sum() @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): params = { "gamma": self.gamma, "degree": self.degree, "coef0": self.coef0 } try: return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params) except AttributeError: raise ValueError("%s is not a valid kernel. Valid kernels are: " "rbf, poly, sigmoid, linear and precomputed." % self.kernel) def fit(self, X, y): """ Fit the Kernelized Fisher Discriminant model according to the given training data and parameters. Based on "Algorithm 5" in Zhang, et. al. 'Regularized Discriminant Analysis, Ridge Regression and Beyond' Journal of Machine Learning Research 11 (2010) 2199-2228 NOTE: setting norm_covariance=False and use_total_scatter=True, and solution_norm = 'A' or 'B' will give the algorithm from paper Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers) """ X, y = check_X_y(X, y) #does not accept sparse arrays self.classes_, y = unique(y, return_inverse=True) n_samples, n_features = X.shape n_classes = len(self.classes_) n_samples_perclass = np.bincount(y) if n_classes < 2: raise ValueError('y has less than 2 classes') if self.priors is None: self.priors_ = np.bincount(y) / float(n_samples) else: self.priors_ = self.priors ts = time.time() self.means_ = [] for ind in xrange(n_classes): Xg = X[y == ind, :] meang = Xg.mean(0) self.means_.append(np.asarray(meang)) if self.print_timing: print 'KernelFisher.fit: means took', time.time() - ts ts = time.time() PI_diag = np.diag( 1.0 * n_samples_perclass) # shape(PI_diag) = n_classes x n_classes PI_inv = np.diag( 1.0 / (1.0 * n_samples_perclass)) # shape(PI_inv) = n_classes x n_classes PI_sqrt_inv = np.sqrt( PI_inv) # shape(PI_sqrt_inv) = n_classes x n_classes #H = np.identity(n_samples) - (1.0/(1.0*n_samples))*np.ones((n_samples,n_samples)) E = np.zeros( (n_samples, n_classes)) # shape(E) = n_samples x n_classes E[[range(n_samples), y]] = 1 E_PIsi = np.dot(E, PI_sqrt_inv) One_minus_E_Pi_Et = np.identity(n_samples) - np.inner( E, np.inner(PI_diag, E).T) # shape(One_minus_E_Pi_Et) = n_samples x n_samples if self.print_timing: print 'KernelFisher.fit: matrices took', time.time() - ts ##################################################################################################################### #C = HKH = (I - 1/n 1x1.T) K (I - 1/n 1x1.T) = (K - 1xK_mean.T) * (I - 1/n 1x1.T) # = K - K_meanx1.T - 1xK_mean.T + K_allmean 1x1 # --> which is the same as what self._centerer.fit_transform(C) performs # # if use_total_scatter=False, # then using Sw which is (1-E*Pi*E.T)K(1-E*Pi*E.T) ##################################################################################################################### ts = time.time() C = self._get_kernel(X) K_mean = np.sum(C, axis=1) / (1.0 * C.shape[1]) if self.use_total_scatter: C = self._centerer.fit_transform(C) else: C = np.inner(One_minus_E_Pi_Et, np.inner(C, One_minus_E_Pi_Et).T) if self.print_timing: print 'KernelFisher.fit: Kernel Calculation took', time.time() - ts ts = time.time() Uc, Sc, Utc, Sc_norm = self.condensed_svd(C, self.tol, store_singular_vals=True) if self.print_timing: print 'KernelFisher.fit: Uc, Sc, Utc took', time.time() - ts ts = time.time() #scale up sigma to appropriate range of singular values reg_factor = self.sigma_sqrd * Sc_norm St_reg_inv = np.inner( Uc, np.inner(np.diag(1.0 / (Sc + reg_factor)), Utc.T).T) if self.print_timing: print 'KernelFisher.fit: St_reg_inv took', time.time() - ts ts = time.time() R = np.inner(E_PIsi.T, np.inner(C, np.inner(St_reg_inv, E_PIsi.T).T).T) if self.print_timing: print 'KernelFisher.fit: R took', time.time() - ts ts = time.time() Vr, Lr, Vtr, Lr_norm = self.condensed_svd(R, tol=1e-6) if self.print_timing: print 'KernelFisher.fit: Vr, Lr, Vtr took', time.time() - ts ts = time.time() ##################################################################################################################### #This capital Z is Upsilon.T * H from equation (22) ##################################################################################################################### #Z = np.inner( np.diag(1.0 / np.sqrt(Lr)), np.inner(Vtr, np.inner(E_PIsi.T, np.inner(C, St_reg_inv.T ).T ).T ).T ) Z = np.inner( np.inner( np.inner(np.inner(np.diag(1.0 / np.sqrt(Lr)), Vtr.T), E_PIsi), C.T), St_reg_inv) Z = (Z.T - (Z.sum(axis=1) / (1.0 * Z.shape[1]))).T if self.print_timing: print 'KernelFisher.fit: Z took', time.time() - ts self.Z = Z self.n_components_found_ = Z.shape[0] ##################################################################################################################### #This K_mean is (1/n) K*1_n from equation (22) ##################################################################################################################### self.K_mean = K_mean #print Z.shape, K_mean.shape, self.n_components_found_ self.X_fit_ = X return self def condensed_svd(self, M, tol=1e-3, store_singular_vals=False): U, S, Vt = linalg.svd(M, full_matrices=False) if store_singular_vals: self.singular_vals = S #want tolerance on fraction of variance in singular value #when not norm_covariance, need to normalize singular values S_norm = np.sum(S) rank = np.sum((S / S_norm) > tol) return U[:, :rank], S[:rank], Vt[:rank, :], S_norm @property def classes(self): warnings.warn( "KernelFisher.classes is deprecated and will be removed in 0.14. " "Use .classes_ instead.", DeprecationWarning, stacklevel=2) return self.classes_ def _decision_function(self, X): #X = np.asarray(X) return self.transform(X) def decision_function(self, X): """ This function return the decision function values related to each class on an array of test vectors X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- X_new : array, shape = [n_samples, n_components_found_] Decision function values related to each class, per sample n_components_found_ is the number of components requested and found NOTE: currently identical to self.transform(X) """ return self._decision_function(X) def transform(self, X): """ Project the data so as to maximize class separation (large separation between projected class means and small variance within each class). Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- X_new : array, shape = [n_samples, n_components_found_] """ #X = np.asarray(X) #ts = time.time() k = self._get_kernel(X, self.X_fit_) #if self.print_timing: print 'KernelFisher.transform: k took', time.time() - ts #ts = time.time() z = np.inner(self.Z, (k - self.K_mean)).T #if self.print_timing: print 'KernelFisher.transform: z took', time.time() - ts return z def fit_transform(self, X, y, use_total_scatter=True, sigma_sqrd=1e-8, tol=1.0e-3): """ Fit the Fisher Discriminant model according to the given training data and parameters. The project the data onto up to n_components_found_ so as to maximize class separation (large separation between projected class means and small variance within each class). NOTE this function is not clever, it simply runs fit(X,y [, ...]).transform(X) Parameters ---------- X : array-like, shape = [n_samples, n_features] y : array, shape = [n_samples] Target values (integers) store_covariance : boolean If True the covariance matrix of each class and each iteration is computed and stored in `self.covs_` attribute. has dimensions [n_iterations][2] where 2 is for nclasses = 2 Returns ------- X_new : array, shape = [n_samples, n_components_found_] """ return self.fit(X, y, use_total_scatter=use_total_scatter, sigma_sqrd=sigma_sqrd, tol=tol).transform(X)
class MIDA(BaseEstimator, TransformerMixin): """Maximum independence domain adaptation Args: n_components (int): Number of components to keep. kernel (str): "linear", "rbf", or "poly". Kernel to use for MIDA. Defaults to "linear". mu (float): Hyperparameter of the l2 penalty. Defaults to 1.0. eta (float): Hyperparameter of the label dependence. Defaults to 1.0. augmentation (bool): Whether using covariates as augment features. Defaults to False. kernel_params (dict or None): Parameters for the kernel. Defaults to None. References: [1] Yan, K., Kou, L. and Zhang, D., 2018. Learning domain-invariant subspace using domain features and independence maximization. IEEE transactions on cybernetics, 48(1), pp.288-299. """ def __init__( self, n_components, kernel="linear", lambda_=1.0, mu=1.0, eta=1.0, augmentation=False, kernel_params=None, ): self.n_components = n_components self.kernel = kernel self.mu = mu self.eta = eta self.augmentation = augmentation if kernel_params is None: self.kernel_params = {} else: self.kernel_params = kernel_params self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1) self._centerer = KernelCenterer() self.x_fit = None def _get_kernel(self, x, y=None): if self.kernel in ["linear", "rbf", "poly"]: params = self.kernel_params or {} else: raise ValueError("Pre-computed kernel not supported") return pairwise_kernels(x, y, metric=self.kernel, filter_params=True, **params) def fit(self, x, y=None, covariates=None): """ Args: x : array-like. Input data, shape (n_samples, n_features) y : array-like. Labels, shape (nl_samples,) covariates : array-like. Domain co-variates, shape (n_samples, n_co-variates) Note: Unsupervised MIDA is performed if y is None. Semi-supervised MIDA is performed is y is not None. """ if self.augmentation and type(covariates) == np.ndarray: x = np.concatenate((x, covariates), axis=1) # Kernel matrix kernel_x = self._get_kernel(x) kernel_x[np.isnan(kernel_x)] = 0 # Solve the optimization problem self._fit(kernel_x, y, covariates) self.x_fit = x return self def _fit(self, kernel_x, y, covariates=None): """solve MIDA Args: kernel_x: array-like, kernel matrix of input data x, shape (n_samples, n_samples) y: array-like. Labels, shape (nl_samples,) covariates: array-like. Domain co-variates, shape (n_samples, n_covariates) Returns: self """ n_samples = kernel_x.shape[0] # Identity (unit) matrix unit_mat = np.eye(n_samples) # Centering matrix ctr_mat = unit_mat - 1.0 / n_samples * np.ones((n_samples, n_samples)) kernel_x = self._centerer.fit_transform(kernel_x) if type(covariates) == np.ndarray: kernel_c = np.dot(covariates, covariates.T) else: kernel_c = np.zeros((n_samples, n_samples)) if y is not None: n_labeled = y.shape[0] if n_labeled > n_samples: raise ValueError("Number of labels exceeds number of samples") y_mat_ = self._label_binarizer.fit_transform(y) y_mat = np.zeros((n_samples, y_mat_.shape[1])) y_mat[:n_labeled, :] = y_mat_ ker_y = np.dot(y_mat, y_mat.T) obj = multi_dot([ kernel_x, self.mu * ctr_mat + self.eta * multi_dot([ctr_mat, ker_y, ctr_mat]) - multi_dot([ctr_mat, kernel_c, ctr_mat]), kernel_x.T, ]) else: obj = multi_dot([ kernel_x, self.mu * ctr_mat - multi_dot([ctr_mat, kernel_c, ctr_mat]), kernel_x.T ]) eig_values, eig_vectors = linalg.eigh( obj, subset_by_index=[n_samples - self.n_components, n_samples - 1]) idx_sorted = eig_values.argsort()[::-1] self.eig_values_ = eig_values[idx_sorted] self.U = eig_vectors[:, idx_sorted] self.U = np.asarray(self.U, dtype=np.float) return self def fit_transform(self, x, y=None, covariates=None): """ Args: x : array-like, shape (n_samples, n_features) y : array-like, shape (n_samples,) covariates : array-like, shape (n_samples, n_covariates) Returns: x_transformed : array-like, shape (n_samples, n_components) """ self.fit(x, y, covariates) return self.transform(x, covariates) def transform(self, x, covariates=None): """ Args: x : array-like, shape (n_samples, n_features) covariates : array-like, augmentation features, shape (n_samples, n_covariates) Returns: x_transformed : array-like, shape (n_samples, n_components) """ check_is_fitted(self, "x_fit") if type(covariates) == np.ndarray and self.augmentation: x = np.concatenate((x, covariates), axis=1) kernel_x = self._centerer.transform( pairwise_kernels(x, self.x_fit, metric=self.kernel, filter_params=True, **self.kernel_params)) return np.dot(kernel_x, self.U)
class SupervisedPCA(BaseEstimator, TransformerMixin): """ Supervised Principal component analysis (SPCA) Finally for Python 3 Non-linear dimensionality reduction through the use of kernels. Parameters ---------- n_components: int or None Number of components. If None, all non-zero components are kept. kernel: 'linear' | 'poly' | 'rbf' | 'sigmoid' | 'precomputed' Kernel. Default: 'linear' degree : int, optional Degree for poly, rbf and sigmoid kernels. Default: 3. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. coef0 : float, optional Independent term in poly and sigmoid kernels. eigen_solver: string ['auto'|'dense'|'arpack'] Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol: float convergence tolerance for arpack. Default: 0 (optimal value will be chosen by arpack) max_iter : int maximum number of iterations for arpack Default: None (optimal value will be chosen by arpack) Attributes ---------- `lambdas_`, `alphas_`: Eigenvalues and eigenvectors of the centered kernel matrix """ def __init__(self, n_components=None, kernel='linear', gamma=0, degree=3, coef0=1, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None): self.n_components = n_components self.kernel = kernel.lower() self.gamma = gamma self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.tol = tol self.max_iter = max_iter self.centerer = KernelCenterer() def transform(self, X): """ Returns a new X, X_trans, based on previous self.fit() estimates """ return X @ self.alphas_ def fit(self, X, y): self._fit(X, y) return self def fit_transform(self, X, y=None, **fit_params): if y is None: raise ValueError('SPCA requires a target variable') self.fit(X, y) return X @ self.alphas_ def _fit(self, X, y): # find kernel matrix of Y K = self.centerer.fit_transform(self._get_kernel(y)) # scale X X_scale = scale(X) n_components = K.shape[0] if self.n_components is None else min( K.shape[0], self.n_components) # compute eigenvalues of X^TKX M = X.T @ K @ X if self.eigen_solver == 'auto': if M.shape[0] > 200 and n_components < 10: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self.eigen_solver if eigen_solver == 'dense': warnings.warn( '`dense` is experimental! Please verify results or use < 10 components.' ) self.lambdas_, self.alphas_ = la.eigh(M) elif eigen_solver == 'arpack': self.lambdas_, self.alphas_ = eigsh(M, n_components, which='LA', tol=self.tol) indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.alphas_ = self.alphas_[:, indices] # remove the zero/negative eigenvalues self.alphas_ = self.alphas_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] self.X_fit = X def _get_kernel(self, X, Y=None): params = { 'gamma': self.gamma, 'degree': self.degree, 'coef0': self.coef0 } try: return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, n_jobs=-1, **params) except AttributeError: raise ValueError( f'{self.kernel} is not a valid kernel. Valid kernels are: ' 'rbf, poly, sigmoid, linear and precomputed.')
class SupervisedPCA(BaseEstimator, TransformerMixin): def __init__(self, n_components, kernel='linear', eigen_solver='auto', max_iterations=None, gamma=0, degree=3, coef0=1, alpha=1.0, tolerance=0, fit_inverse_transform=False): self._n_components = n_components self._gamma = gamma self._tolerance = tolerance self._fit_inverse_transform = fit_inverse_transform self._max_iterations = max_iterations self._degree = degree self._kernel = kernel self._eigen_solver = eigen_solver self._coef0 = coef0 self._centerer = KernelCenterer() self._alpha = alpha def _get_kernel(self, X, Y=None): """ Returns a kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors of the given matrix X, if y is None. If y is not None, then K_{i, j} is the kernel between the ith array from X and the jth array from Y. Valid kernels are 'linear', 'rbf', 'poly', 'sigmoid', 'precomputed' """ kwargs = { 'gamma': self._gamma, 'degree': self._degree, 'coef0': self._coef0 } return pairwise_kernels(X, Y, metric=self._kernel, n_jobs=-1, filter_params=True, **kwargs) def _fit(self, X, y): # Calculate kernel matrix of the labels Y and centre it and call it K (=H.L.H) K = self._centerer.fit_transform(self._get_kernel(y.reshape(-1, 1))) # deciding on the number of components to use if self._n_components is not None: n_components = min(K.shape[0], self._n_components) else: n_components = K.shape[0] # Scale X # scaled_X = scale(X) # calculate the eigen values and eigen vectors for X^T.K.X Q = (X.T).dot(K).dot(X) # If n_components is much less than the number of training samples, # arpack may be more efficient than the dense eigensolver. if (self._eigen_solver == 'auto'): if (Q.shape[0] / n_components) > 20: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self._eigen_solver if eigen_solver == 'dense': # Return the eigenvalues (in ascending order) and eigenvectors of a Hermitian or symmetric matrix. self._lambdas, self._alphas = linalg.eigh( Q, eigvals=(Q.shape[0] - n_components, Q.shape[0] - 1)) # argument eigvals = Indexes of the smallest and largest (in ascending order) eigenvalues elif eigen_solver == 'arpack': # deprecated :: self._lambdas, self._alphas = utils.arpack.eigsh(A=Q, n_components, which="LA", tol=self._tolerance) self._lambdas, self._alphas = ssl_eigsh(A=Q, k=n_components, which="LA", tol=self._tolerance) indices = self._lambdas.argsort()[::-1] self._lambdas = self._lambdas[indices] self._lambdas = self._lambdas[ self._lambdas > 0] # selecting values only for non zero eigen values self._alphas = self._alphas[:, indices] self._alphas = self._alphas[:, self._lambdas > 0] # selecting values only for non zero eigen values self.X_fit = X def _transform(self): return self.X_fit.dot(self._alphas) def transform(self, X): return X.dot(self._alphas) def fit(self, X, Y): self._fit(X, Y) return def fit_transform(self, X, Y): self.fit(X, Y) return self._transform()
class KernelPCA(BaseEstimator, TransformerMixin): """Kernel Principal component analysis (KPCA) Non-linear dimensionality reduction through the use of kernels (see :ref:`metrics`). Parameters ---------- n_components: int or None Number of components. If None, all non-zero components are kept. kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default: "linear" degree : int, default=3 Degree for poly kernels. Ignored by other kernels. gamma : float, optional Kernel coefficient for rbf and poly kernels. Default: 1/n_features. Ignored by other kernels. coef0 : float, optional Independent term in poly and sigmoid kernels. Ignored by other kernels. kernel_params : mapping of string to any, optional Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels. alpha: int Hyperparameter of the ridge regression that learns the inverse transform (when fit_inverse_transform=True). Default: 1.0 fit_inverse_transform: bool Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point) Default: False eigen_solver: string ['auto'|'dense'|'arpack'] Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver. tol: float convergence tolerance for arpack. Default: 0 (optimal value will be chosen by arpack) max_iter : int maximum number of iterations for arpack Default: None (optimal value will be chosen by arpack) remove_zero_eig : boolean, default=True If True, then all components with zero eigenvalues are removed, so that the number of components in the output may be < n_components (and sometimes even zero due to numerical instability). When n_components is None, this parameter is ignored and components with zero eigenvalues are removed regardless. Attributes ---------- lambdas_ : Eigenvalues of the centered kernel matrix alphas_ : Eigenvectors of the centered kernel matrix evals_ : array[float], shape=(n_features) All eigenvalues of centered kernel matrix evecs_ : array[float, float], shape=(n_features, n_samples) All eigenvectors of centered kernel matrix dual_coef_ : Inverse transform matrix X_transformed_fit_ : Projection of the fitted data on the kernel principal components References ---------- Kernel PCA was introduced in: Bernhard Schoelkopf, Alexander J. Smola, and Klaus-Robert Mueller. 1999. Kernel principal component analysis. In Advances in kernel methods, MIT Press, Cambridge, MA, USA 327-352. """ def __init__(self, n_components=None, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None, alpha=1.0, fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None, remove_zero_eig=False): if fit_inverse_transform and kernel == 'precomputed': raise ValueError( "Cannot fit_inverse_transform with a precomputed kernel.") self.n_components = n_components self.kernel = kernel self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 self.alpha = alpha self.fit_inverse_transform = fit_inverse_transform self.eigen_solver = eigen_solver self.remove_zero_eig = remove_zero_eig self.tol = tol self.max_iter = max_iter self._centerer = KernelCenterer() @property def _pairwise(self): return self.kernel == "precomputed" def _get_kernel(self, X, Y=None): if callable(self.kernel): params = self.kernel_params or {} else: params = { "gamma": self.gamma, "degree": self.degree, "coef0": self.coef0 } return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params) def _fit_transform(self, K): """ Fit's using kernel K""" # center kernel K = self._centerer.fit_transform(K) if self.n_components is None: n_components = K.shape[0] else: n_components = min(K.shape[0], self.n_components) # compute eigenvectors if self.eigen_solver == 'auto': if K.shape[0] > 200 and n_components < 10: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self.eigen_solver if eigen_solver == 'dense': self.lambdas_, self.alphas_ = linalg.eigh( K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)) self.evals_, self.evecs_ = linalg.eigh(K) elif eigen_solver == 'arpack': self.lambdas_, self.alphas_ = eigsh(K, n_components, which="LA", tol=self.tol, maxiter=self.max_iter) # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.alphas_ = self.alphas_[:, indices] # remove eigenvectors with a zero eigenvalue if self.remove_zero_eig or self.n_components is None: self.alphas_ = self.alphas_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] return K def _fit_inverse_transform(self, X_transformed, X): if hasattr(X, "tocsr"): raise NotImplementedError("Inverse transform not implemented for " "sparse matrices!") n_samples = X_transformed.shape[0] K = self._get_kernel(X_transformed) K.flat[::n_samples + 1] += self.alpha self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True) self.X_transformed_fit_ = X_transformed def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ K = self._get_kernel(X) self._fit_transform(K) if self.fit_inverse_transform: sqrt_lambdas = np.diag(np.sqrt(self.lambdas_)) X_transformed = np.dot(self.alphas_, sqrt_lambdas) self._fit_inverse_transform(X_transformed, X) self.X_fit_ = X return self def fit_transform(self, X, y=None, **params): """Fit the model from data in X and transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new: array-like, shape (n_samples, n_components) """ self.fit(X, **params) X_transformed = self.alphas_ * np.sqrt(self.lambdas_) if self.fit_inverse_transform: self._fit_inverse_transform(X_transformed, X) return X_transformed def transform(self, X): """Transform X. Parameters ---------- X: array-like, shape (n_samples, n_features) Returns ------- X_new: array-like, shape (n_samples, n_components) """ check_is_fitted(self, 'X_fit_') K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) def inverse_transform(self, X): """Transform X back to original space. Parameters ---------- X: array-like, shape (n_samples, n_components) Returns ------- X_new: array-like, shape (n_samples, n_features) References ---------- "Learning to Find Pre-Images", G BakIr et al, 2004. """ if not self.fit_inverse_transform: raise NotFittedError("The fit_inverse_transform parameter was not" " set to True when instantiating and hence " "the inverse transform is not available.") K = self._get_kernel(X, self.X_transformed_fit_) return np.dot(K, self.dual_coef_)
def fit(self, X, Y): """Fit the KCCA model with two views represented by kernels X and Y. Parameters ---------- X : array_like, shape = (n_samples, n_features) for data matrix or shape = (n_samples, n_samples) for kernel matrix. When both X and Y are kernel matrix, the kernel parameter should be set to 'precomputed'. It is considered to be one view of the data. Y : array_like, shape = (n_samples, n_features) for data matrix or shape = (n_samples, n_samples) for kernel matrix. When both X and Y are kernel matrix, the kernel parameter should be set to 'precomputed'. It is considered to be another view of the data. Returns ------- self : object Returns the instance itself. """ check_consistent_length(X, Y) X = check_array(X, dtype=np.float, copy=self.copy) Y = check_array(Y, dtype=np.float, copy=self.copy, ensure_2d=False) if Y.ndim == 1: Y = Y.reshape(-1,1) n = X.shape[0] p = X.shape[1] q = Y.shape[1] if self.n_components < 1 or self.n_components > n: raise ValueError('Invalid number of components: %d' % self.n_components) if self.eigen_solver not in ("auto", "dense", "arpack"): raise ValueError("Got eigen_solver %s when only 'auto', " "'dense' and 'arparck' are valid" % self.algorithm) if self.kernel == 'precomputed' and (p != n or q != n): raise ValueError('Invalid kernel matrices dimension') if not self.pgso and (self.kapa <= 0 or self.kapa >= 1): raise ValueError('kapa should be in (0, 1) when pgso=False') if self.pgso and (self.kapa < 0 or self.kapa > 1): raise ValueError('kapa should be in [0, 1] when pgso=True') KX = self._get_kernel(X) KY = self._get_kernel(Y) if self.center: kc = KernelCenterer() self.KXc_ = kc.fit_transform(KX) self.KYc_ = kc.fit_transform(KY) else: self.KXc_ = KX self.KYc_ = KY if self.pgso: # use PGSO to decompose kernel matrix self._fit_pgso(self.KXc_, self.KYc_) else: self._fit(self.KXc_, self.KYc_) return self