def fit(self, train_data): ''' KPCA模型的训练过程 train_data 训练数据 会根据normalize做标准化 ''' n_components = self.n_components normalize = self.normalize threshold = self.threshold kernel = self.kernel gamma = self.gamma fit_inverse_transform = self.fit_inverse_transform samplesNum, featuresNum = train_data.shape TrainDataScaler = None if normalize is True: scaler = preprocessing.StandardScaler().fit(train_data) trainDataScale = scaler.transform(train_data) TrainDataScaler = trainDataScale self.Scaler = scaler self.TrainDataScaler = trainDataScale else: TrainDataScaler = train_data kpca_model = KernelPCA( kernel=kernel, fit_inverse_transform=fit_inverse_transform, gamma=gamma, ) kpca_model.fit(TrainDataScaler) #用训练数据训练模型 val = kpca_model.lambdas_ #核矩阵的特征值与特征向量 vec = kpca_model.alphas_ kernel_matrix = kpca_model._get_kernel(TrainDataScaler) self.val = val self.vec = vec if n_components is None: n_components = MY_KPCA.calComponentSelectNums(val, threshold=threshold) self.n_components = n_components kpca_N = KernelPCA(kernel=kernel, fit_inverse_transform=fit_inverse_transform, gamma=gamma, n_components=n_components) kpca_N.fit(TrainDataScaler) #重新训练模型 val_n_components = kpca_N.lambdas_ lamda = np.diag(val_n_components) #特征值对角矩阵 self.Kpca_Total = kpca_model self.Kpca_N = kpca_N self.samplesNum = samplesNum self.featuresNum = featuresNum self.lamda = lamda self.traindata = train_data self.K = kernel_matrix return self
def kernel_pca_filter(field, nmodes, return_filter=False, **kwargs_pca): """ Apply a Kernel Principal Component Analysis (KPCA) filter to a field. This subtracts off functions in the frequency direction that correspond to the highest SNR modes of the empirical frequency-frequency covariance, with some non-linear weighting by a specified kernel. (WARNING: Can use a lot of memory) Uses `sklearn.decomposition.KernelPCA`. For more details, see: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html Parameters: field (array_like): 3D array containing the field that the filter will be applied to. NOTE: This assumes that the 3rd axis of the array is frequency. nmodes (int): Number of eigenmodes to filter out (modes are ordered by SNR). return_filter (bool, optional): Whether to also return the linear FG filter operator and coefficients. **kwargs_pca (dict, optional): Keyword arguments for the `sklearn.decomposition.KernelPCA` Returns: cleaned_field (array_like), transformer (sklearn.decomposition.KernelPCA instance, optional): Foreground-filtered field and KPCA filter object. - ``cleaned_field (array_like)``: Foreground-cleaned field. - ``transformer sklearn.decomposition.KernelPCA instance, optional)``: Contains the KPCA filter. Only returned if `return_operator = True`. To get the foreground model, you can do the following: ``` x = field - mean_field # shape (Npix, Nfreq) x_trans = transformer.fit_transform(x.T) # mode amplitudes per pixel x_fg = transformer.inverse_transform(x_trans).T # foreground model ``` """ # Subtract mean vs. frequency x = mean_spectrum_filter(field).reshape((-1, field.shape[-1])).T # Build PCA model and get amplitudes for each mode per pixel transformer = KernelPCA(n_components=nmodes, fit_inverse_transform=True, **kwargs_pca) x_trans = transformer.fit_transform(x.T) # Manually perform inverse transform, using the remaining eigenmode with # the smallest eigenvalue X = transformer.alphas_[:, -1:] * np.sqrt( transformer.lambdas_[-1:]) # = x_trans K = transformer._get_kernel(X, transformer.X_transformed_fit_[:, -1:]) n_samples = transformer.X_transformed_fit_.shape[0] K.flat[::n_samples + 1] += transformer.alpha x_clean = np.dot(K, transformer.dual_coef_).reshape(field.shape) # Return FG-subtracted data (and, optionally, the PCA filter instance) if return_filter: return x_clean, transformer else: return x_clean