def reduceDataset(self,nr=3,method='PCA'): '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library Methods available: 'PCA' 'FactorAnalysis' 'KPCArbf','KPCApoly' 'KPCAcosine','KPCAsigmoid' 'IPCA' 'FastICADeflation' 'FastICAParallel' 'Isomap' 'LLE' 'LLEmodified' 'LLEltsa' ''' dataset=self.ModelInputs['Dataset'] #dataset=self.dataset[Model.in_columns] #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']] #PCA if method=='PCA': sklearn_pca = sklearnPCA(n_components=nr) reduced = sklearn_pca.fit_transform(dataset) #Factor Analysis elif method=='FactorAnalysis': fa=FactorAnalysis(n_components=nr) reduced=fa.fit_transform(dataset) #kernel pca with rbf kernel elif method=='KPCArbf': kpca=KernelPCA(nr,kernel='rbf') reduced=kpca.fit_transform(dataset) #kernel pca with poly kernel elif method=='KPCApoly': kpca=KernelPCA(nr,kernel='poly') reduced=kpca.fit_transform(dataset) #kernel pca with cosine kernel elif method=='KPCAcosine': kpca=KernelPCA(nr,kernel='cosine') reduced=kpca.fit_transform(dataset) #kernel pca with sigmoid kernel elif method=='KPCAsigmoid': kpca=KernelPCA(nr,kernel='sigmoid') reduced=kpca.fit_transform(dataset) #ICA elif method=='IPCA': ipca=IncrementalPCA(nr) reduced=ipca.fit_transform(dataset) #Fast ICA elif method=='FastICAParallel': fip=FastICA(nr,algorithm='parallel') reduced=fip.fit_transform(dataset) elif method=='FastICADeflation': fid=FastICA(nr,algorithm='deflation') reduced=fid.fit_transform(dataset) elif method == 'All': self.dimensionalityReduction(nr=nr) return self self.ModelInputs.update({method:reduced}) self.datasetsAvailable.append(method) return self
def dimensionalityReduction(self,nr=5): '''It applies all the dimensionality reduction techniques available in this class: Techniques available: 'PCA' 'FactorAnalysis' 'KPCArbf','KPCApoly' 'KPCAcosine','KPCAsigmoid' 'IPCA' 'FastICADeflation' 'FastICAParallel' 'Isomap' 'LLE' 'LLEmodified' 'LLEltsa' ''' dataset=self.ModelInputs['Dataset'] sklearn_pca = sklearnPCA(n_components=nr) p_components = sklearn_pca.fit_transform(dataset) fa=FactorAnalysis(n_components=nr) factors=fa.fit_transform(dataset) kpca=KernelPCA(nr,kernel='rbf') rbf=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='poly') poly=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='cosine') cosine=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='sigmoid') sigmoid=kpca.fit_transform(dataset) ipca=IncrementalPCA(nr) i_components=ipca.fit_transform(dataset) fip=FastICA(nr,algorithm='parallel') fid=FastICA(nr,algorithm='deflation') ficaD=fip.fit_transform(dataset) ficaP=fid.fit_transform(dataset) '''isomap=Isomap(n_components=nr).fit_transform(dataset) try: lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset) except ValueError: lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset) try: lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset) except ValueError: lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) try: lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset) except ValueError: lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)''' values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3] keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa'] self.ModelInputs.update(dict(zip(keys, values))) [self.datasetsAvailable.append(key) for key in keys ] #debug #dataset=pd.DataFrame(self.ModelInputs['Dataset']) #dataset['Output']=self.ModelOutput #self.debug['Dimensionalityreduction']=dataset ### return self
def ica(self, n_components=None, sources='left'): """Return result from independent component analysis. X = SA + m Sklearn's FastICA implementation is used. When sources=left the sources are returned in the first (left) matrix and the mixing matrix is returned in the second (right) matrix, corresponding to X = SA. When sources=right the sources are returned in the second matrix while the mixing matrix is returned in the first, corresponding to X = AS. Parameters ---------- n_components : int, optional Number of ICA components. sources : left or right, optional Indicates whether the sources should be the left or right matrix. Returns ------- first : Matrix Estimated source matrix (S) if sources=left. second : Matrix Estimated mixing matrix (A) if sources=right. mean_vector : brede.core.vector.Vector Estimated mean vector References ---------- http://scikit-learn.org/stable/modules/decomposition.html#ica """ if n_components is None: min_shape = min(self.shape[0], len(self._eeg_columns)) n_components = int(np.ceil(sqrt(float(min_shape) / 2))) ica = FastICA(n_components=n_components) if sources == 'left': sources = Matrix(ica.fit_transform( self.ix[:, self._eeg_columns].values), index=self.index) mixing_matrix = Matrix(ica.mixing_.T, columns=self._eeg_columns) mean_vector = Vector(ica.mean_, index=self._eeg_columns) return sources, mixing_matrix, mean_vector elif sources == 'right': sources = Matrix(ica.fit_transform( self.ix[:, self._eeg_columns].values.T).T, columns=self._eeg_columns) mixing_matrix = Matrix(ica.mixing_, index=self.index) mean_vector = Vector(ica.mean_, index=self.index) return mixing_matrix, sources, mean_vector else: raise ValueError('Wrong argument to "sources"')
def mixing_matrix(data, n_components, display=True): features, weights, labels = data ica = FastICA(n_components=n_components) ica.fit_transform(features) mixing = ica.mixing_ if display: f, ax = plt.subplots(figsize=(10, 4)) sns.heatmap(mixing) plt.title('Signal Mixing Estimated Matrix') return mixing
def run_ica(data, comp): ica = FastICA(n_components=comp, whiten=True, max_iter=5000) data_out=np.zeros((comp,np.shape(data[0,:,0])[0],np.shape(data[0,0,:])[0])) for i in range(np.shape(data[0,:,0])[0]): print i data_out[:,i,:]=np.transpose(ica.fit_transform(np.transpose(data[:,i,:]))) return data_out
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## ICA ## ica = FastICA(n_components=X_train_scl.shape[1]) X_ica = ica.fit_transform(X_train_scl) ## ## Plots ## ph = plot_helper() kurt = kurtosis(X_ica) print(kurt) title = 'Kurtosis (FastICA) for ' + data_set_name name = data_set_name.lower() + '_ica_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1), kurt, np.arange(1, len(kurt)+1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename)
def best_ica_nba(self): dh = data_helper() X_train, X_test, y_train, y_test = dh.get_nba_data() scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ica = FastICA(n_components=X_train_scl.shape[1]) X_train_transformed = ica.fit_transform(X_train_scl, y_train) X_test_transformed = ica.transform(X_test_scl) ## top 2 kurt = kurtosis(X_train_transformed) i = kurt.argsort()[::-1] X_train_transformed_sorted = X_train_transformed[:, i] X_train_transformed = X_train_transformed_sorted[:,0:2] kurt = kurtosis(X_test_transformed) i = kurt.argsort()[::-1] X_test_transformed_sorted = X_test_transformed[:, i] X_test_transformed = X_test_transformed_sorted[:,0:2] # save filename = './' + self.save_dir + '/nba_ica_x_train.txt' pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_ica_x_test.txt' pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_ica_y_train.txt' pd.DataFrame(y_train).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_ica_y_test.txt' pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
def align(movie_data, options, args, lrh): print 'pICA(scikit-learn)' nvoxel = movie_data.shape[0] nTR = movie_data.shape[1] nsubjs = movie_data.shape[2] align_algo = args.align_algo nfeature = args.nfeature randseed = args.randseed if not os.path.exists(options['working_path']): os.makedirs(options['working_path']) # zscore the data bX = np.zeros((nsubjs*nvoxel,nTR)) for m in range(nsubjs): bX[m*nvoxel:(m+1)*nvoxel,:] = stats.zscore(movie_data[:, :, m].T ,axis=0, ddof=1).T del movie_data np.random.seed(randseed) A = np.mat(np.random.random((nfeature,nfeature))) ica = FastICA(n_components= nfeature, max_iter=500,w_init=A,random_state=randseed) St = ica.fit_transform(bX.T) ES = St.T bW = ica.mixing_ R = np.zeros((nvoxel,nfeature,nsubjs)) for m in range(nsubjs): R[:,:,m] = bW[m*nvoxel:(m+1)*nvoxel,:] niter = 10 # initialization when first time run the algorithm np.savez_compressed(options['working_path']+align_algo+'_'+lrh+'_'+str(niter)+'.npz',\ R = R, G=ES.T, niter=niter) return niter
def ica(self, n_components=None): """Return result from independent component analysis. X = SA + m Sklearn's FastICA implementation is used. Parameters ---------- n_components : int, optional Number of ICA components. Returns ------- source : Matrix Estimated source matrix (S) mixing_matrix : Matrix Estimated mixing matrix (A) mean_vector : brede.core.vector.Vector Estimated mean vector References ---------- http://scikit-learn.org/stable/modules/decomposition.html#ica """ if n_components is None: n_components = int(np.ceil(np.sqrt(float(min(self.shape)) / 2))) ica = FastICA(n_components=n_components) sources = Matrix(ica.fit_transform(self.values), index=self.index) mixing_matrix = Matrix(ica.mixing_.T, columns=self.columns) mean_vector = Vector(ica.mean_, index=self.columns) return sources, mixing_matrix, mean_vector
def test_inverse_transform(): # Test FastICA.inverse_transform n_features = 10 n_samples = 100 n1, n2 = 5, 10 rng = np.random.RandomState(0) X = rng.random_sample((n_samples, n_features)) expected = {(True, n1): (n_features, n1), (True, n2): (n_features, n2), (False, n1): (n_features, n2), (False, n2): (n_features, n2)} for whiten in [True, False]: for n_components in [n1, n2]: n_components_ = (n_components if n_components is not None else X.shape[1]) ica = FastICA(n_components=n_components, random_state=rng, whiten=whiten) with warnings.catch_warnings(record=True): # catch "n_components ignored" warning Xt = ica.fit_transform(X) expected_shape = expected[(whiten, n_components_)] assert_equal(ica.mixing_.shape, expected_shape) X2 = ica.inverse_transform(Xt) assert_equal(X.shape, X2.shape) # reversibility test in non-reduction case if n_components == X.shape[1]: assert_array_almost_equal(X, X2)
def test_inverse_transform(): """Test FastICA.inverse_transform""" rng = np.random.RandomState(0) X = rng.random_sample((100, 10)) rng = np.random.RandomState(0) X = rng.random_sample((100, 10)) n_features = X.shape[1] expected = {(True, 5): (n_features, 5), (True, 10): (n_features, 10), (False, 5): (n_features, 10), (False, 10): (n_features, 10)} for whiten in [True, False]: for n_components in [5, 10]: ica = FastICA(n_components=n_components, random_state=rng, whiten=whiten) Xt = ica.fit_transform(X) expected_shape = expected[(whiten, n_components)] assert_equal(ica.mixing_.shape, expected_shape) X2 = ica.inverse_transform(Xt) assert_equal(X.shape, X2.shape) # reversibility test in non-reduction case if n_components == X.shape[1]: assert_array_almost_equal(X, X2)
def filter_frames(self, data): logging.debug("I am starting the old componenty vous") data = data[0] print 'The length of the data is'+str(data.shape) sh = data.shape newshape = (np.prod(sh[:-1]), sh[-1]) print "The shape of the data is:"+str(data.shape) + str(newshape) data = np.reshape(data, (newshape)) # data will already be shaped correctly logging.debug("Making the matrix") ica = FastICA(n_components=self.parameters['number_of_components'], algorithm='parallel', whiten=self.parameters['whiten'], w_init=self.parameters['w_init'], random_state=self.parameters['random_state']) logging.debug("Performing the fit") data = self.remove_nan_inf(data) #otherwise the fit flags up an error for obvious reasons # print "I'm here" S_ = ica.fit_transform(data) # print "S_Shape is:"+str(S_.shape) # print "self.images_shape:"+str(self.images_shape) scores = np.reshape(S_, (self.images_shape)) eigenspectra = ica.components_ logging.debug("mange-tout") return [scores, eigenspectra]
def getHeartRate(window, lastHR): # Normalize across the window to have zero-mean and unit variance mean = np.mean(window, axis=0) std = np.std(window, axis=0) normalized = (window - mean) / std # Separate into three source signals using ICA ica = FastICA() srcSig = ica.fit_transform(normalized) # Find power spectrum powerSpec = np.abs(np.fft.fft(srcSig, axis=0))**2 freqs = np.fft.fftfreq(WINDOW_SIZE, 1.0 / FPS) # Find heart rate maxPwrSrc = np.max(powerSpec, axis=1) validIdx = np.where((freqs >= MIN_HR_BPM / SEC_PER_MIN) & (freqs <= MAX_HR_BMP / SEC_PER_MIN)) validPwr = maxPwrSrc[validIdx] validFreqs = freqs[validIdx] maxPwrIdx = np.argmax(validPwr) hr = validFreqs[maxPwrIdx] print hr #plotSignals(normalized, "Normalized color intensity") #plotSignals(srcSig, "Source signal strength") #plotSpectrum(freqs, powerSpec) return hr
def test_ica(eng): t = linspace(0, 10, 100) s1 = sin(t) s2 = square(sin(2*t)) x = c_[s1, s2, s1+s2] random.seed(0) x += 0.001*random.randn(*x.shape) x = fromarray(x, engine=eng) def normalize_ICA(s, aT): a = aT.T c = a.sum(axis=0) return s*c, (a/c).T from sklearn.decomposition import FastICA ica = FastICA(n_components=2, fun='cube', random_state=0) s1 = ica.fit_transform(x.toarray()) aT1 = ica.mixing_.T s1, aT1 = normalize_ICA(s1, aT1) s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x) s2, aT2 = normalize_ICA(s2, aT2) tol=1e-1 assert allclose_sign_permute(s1, s2, atol=tol) assert allclose_sign_permute(aT1, aT2, atol=tol)
def ica(tx, ty, rx, ry): compressor = ICA(whiten=True) # for some people, whiten needs to be off newtx = compressor.fit_transform(tx) newrx = compressor.fit_transform(rx) em(newtx, ty, newrx, ry, add="wICAtr", times=10) km(newtx, ty, newrx, ry, add="wICAtr", times=10) nn(newtx, ty, newrx, ry, add="wICAtr")
def __create_image_obser(self, image_observations) : """ Creation of a space in which the images will be compared (learning stage). Firstly PCA is applied in order to reduce the number of features in the images. Reduction is done so that 99% of measured variance is covered. After that, ICA is performed on the coefficients calculated by transforming (reducing) the face images with PCA. From the learned ICA components basis_images (vectors), original images coefficients and transformation for new comming images are extracted. """ pca = PCA() pca.fit(image_observations) sum = 0 components_to_take = 0 for ratio in pca.explained_variance_ratio_: components_to_take += 1 sum += ratio if (sum > 0.99): break print("PCA reduces the number of dimensions to: " + str(components_to_take)) pca = PCA(whiten=True, n_components=components_to_take) self.__transformed_images = pca.fit_transform(image_observations) self.__transformed_images_mean = np.mean(self.__transformed_images, axis=0) self.__transformed_images -= self.__transformed_images_mean self.__pca = pca ica = FastICA(whiten=True, max_iter=100000) self.__original_images_repres = ica.fit_transform(self.__transformed_images) self.__basis_images = ica.mixing_.T self.__transformation = ica.components_
def independent_component(x, y): clf = FastICA(random_state=1) transformed = clf.fit_transform(x.reshape(-1, 1)) comp = clf.components_[0, 0] mm = clf.mixing_[0, 0] src_max = transformed.max() src_min = transformed.min() return [comp, mm, src_max, src_min]
def transform(data, n_components=3): features, weights, labels = data start = time() ica = FastICA(n_components=n_components) transformed = ica.fit_transform(features) elapsed = time() - start df = pd.DataFrame(transformed) return df, elapsed
def generate_peoples_results_files(self): self.np_result = np.c_[self.results[0]['blue'], self.results[0]['green'], self.results[0]['red']] list_number = len(self.results[0]['blue']) # ICA ica = FastICA(n_components=3, fun='logcosh', max_iter=2000) ica_transformed = ica.fit_transform(self.np_result) component_all = ica_transformed.ravel([1]) component_1 = component_all[:list_number] component_2 = component_all[list_number:(2 * list_number)] component_3 = component_all[(2 * list_number):(3 * list_number)] # butter_smooth N = 8 Wn = [1.6 / 30, 4.0 / 30] t = np.linspace(1 / 30, list_number / 30, list_number) b, a = signal.butter(N, Wn, 'bandpass', analog=False) filter_1 = signal.filtfilt(b, a, component_1) filter_2 = signal.filtfilt(b, a, component_2) filter_3 = signal.filtfilt(b, a, component_3) lowess_1 = sm.nonparametric.lowess(filter_1, t, frac=10.0 / list_number) lowess_2 = sm.nonparametric.lowess(filter_2, t, frac=10.0 / list_number) lowess_3 = sm.nonparametric.lowess(filter_3, t, frac=10.0 / list_number) smooths = [] smooth_1 = lowess_1[:, 1] smooth_2 = lowess_2[:, 1] smooth_3 = lowess_3[:, 1] smooths.append(smooth_1) smooths.append(smooth_2) smooths.append(smooth_3) # FFT and spectrum fft_1 = np.fft.fft(smooth_1, 256) fft_2 = np.fft.fft(smooth_2, 256) fft_3 = np.fft.fft(smooth_3, 256) spectrum_1 = list(np.abs(fft_1) ** 2) spectrum_2 = list(np.abs(fft_2) ** 2) spectrum_3 = list(np.abs(fft_3) ** 2) max1 = max(spectrum_1) max2 = max(spectrum_2) max3 = max(spectrum_3) num_spec1 = spectrum_1.index(max(spectrum_1)) if num_spec1 > (list_number / 2): num_spec1 = 256 - num_spec1 num_spec2 = spectrum_2.index(max(spectrum_2)) if num_spec2 > (list_number / 2): num_spec2 = 256 - num_spec2 num_spec3 = spectrum_3.index(max(spectrum_3)) if num_spec3 > (list_number / 2): num_spec3 = 256 - num_spec3 num_spec = [num_spec1, num_spec2, num_spec3] max_all = [max1, max2, max3] max_num = max_all.index(max(max_all)) self.heartRate = int(num_spec[max_num] * 1800 / 256) + 1 return smooths[max_num]
def _fit_local(self, data): from sklearn.decomposition import FastICA from numpy import random random.seed(self.seed) model = FastICA(n_components=self.k, fun="cube", max_iter=self.max_iter, tol=self.tol, random_state=self.seed) signals = model.fit_transform(data) return signals, model.mixing_.T
def fit_transform_ica(X): ica = FastICA(n_components=50, max_iter=2000, tol=0.05, algorithm='parallel', fun='cube', fun_args={'alpha': 1.0}, random_state=42) #26 36 76 start = time.time() X = ica.fit_transform(X) end = time.time() print "Done!\nFit ICA transform time (secs): {:.3f}".format(end - start) return X, ica
def print_kurtosis(scaled_data): #print the kurtosis of the scaled data print "Kurotsis of original DF:", kurtosis(scaled_data) #print the kurtosis of the ICA transformed columns for i in range(1,len(scaled_data[0])+1): ica = FastICA(n_components=i) ica_fit = ica.fit_transform(scaled_data) print "Kurtosis of ICA Transformed data when i=" + str(i) + ":", kurtosis(ica_fit)
def ICA(model_data, components = None, transform_data = None): t0 = time() ica = FastICA(n_components=components) if transform_data == None: projection = ica.fit_transform(model_data) else: ica.fit(model_data) projection = ica.transform(transform_data) print "ICA Time: %0.3f" % (time() - t0) return projection
class ICA(Transform): def __init__(self, dependency, n_components=6): self.ica = FastICA(n_components) self.dependency = dependency def requires(self): return [self.dependency] def apply(self, data): return self.ica.fit_transform(data.T).T
def fun_doICA(X, nc): ''' Perform ICA and sort signals by bimodality ''' ica = FastICA(n_components = nc) Sest = ica.fit_transform(X) A = ica.mixing_ S = fun_sort_bimod(Sest) out = {'S':S, 'A':A} return out
def pca_ica(mov, components=50, batch=1000, mu=0.5, ica_func='logcosh', show_status=True): """Perform iterative PCA/ICA ROI extraction Parameters ---------- mov : pyfluo.Movie input movie components : int number of independent components to return batch : int number of pixels to load into memory simultaneously. More leads to a better fit, but requires more memory mu : float from 0-1. In spatiotemporal ICA, closer to 1 means more weight on spatial ica_func : str cdf for entropy maximization in ICA show_status : bool show time elapsed while running Returns ------- Array of shape (n,y,x) where n is number of components, and y,x correspond to shape of mov """ if show_status: p = mup.Process(target=display_time_elapsed) p.start() eigenseries, eigenframes,_proj = ipca(mov, components, batch) # normalize the series frame_scale = mu / np.max(eigenframes) frame_mean = np.mean(eigenframes, axis = 0) n_eigenframes = frame_scale * (eigenframes - frame_mean) series_scale = (1-mu) / np.max(eigenframes) series_mean = np.mean(eigenseries, axis = 0) n_eigenseries = series_scale * (eigenseries - series_mean) # build new features from the space/time data # and compute ICA on them eigenstuff = np.concatenate([n_eigenframes, n_eigenseries]) ica = FastICA(n_components=components, fun=ica_func) joint_ics = ica.fit_transform(eigenstuff) # extract the independent frames num_frames, h, w = mov.shape frame_size = h * w ind_frames = joint_ics[:frame_size, :] ind_frames = np.reshape(ind_frames.T, (components, h, w)) if show_status: p.terminate() return ind_frames
def Get_Result(filename1,filename2,withbeam=True): '''filename1: 21cm filename2 :foreground or 21cm+fg+beam ''' call('rm *.eps', shell=True) Plot = False pol = 0 N = 4 # withbeam=True ############read data##################### map1 = tt.ICA.ReadMap(filename1) map2 = tt.ICA.ReadMap(filename2) if withbeam==True: map = map2 Freq_num = map.shape[0] S = map.T del map else: map=map1[:,pol]+map2[:,pol] Freq_num = map.shape[0] S=map.T ############FastICA####################### ica = FastICA( n_components=N, algorithm='parallel', whiten=True, fun='logcosh', fun_args=None, max_iter=200, tol=0.0001, w_init=None, random_state=None) S_ = ica.fit_transform(S) A_ = ica.mixing_ ########################################## # tt.ICA.GetComponent(N, S_) # re=tt.ICA.rebuild(N, A_, S_, 0, Plot=False) # residuals=tt.ICA.RESULT(0,N,pol,A_,S_,map1,map2,map3,Plot) res = [] # return (map1[100,pol],map1[100,pol]+map2[100,pol]-re) ########################################## for i in range(Freq_num): res.append(tt.ICA.RESULT(i, N, pol, A_, S_, map1, map2, Plot, withbeam)) print res[-1] plt.close('all') res = np.array(res) resx = np.linspace(700, 800, Freq_num, endpoint=True) # plt.plot(resx,res,label='freq_%d pixel_%d'%(F,P)) # plt.show() return np.c_[resx,res]
def decompose(data, data_cols=None, kind='ICA', n_components=None, iterations=300): decompositor = None if kind == 'ICA': decompositor = FastICA(n_components=n_components, max_iter=iterations) elif kind == 'PCA': decompositor = PCA(n_components=n_components) elif kind == 'Kernel': decompositor = KernelPCA(n_components=n_components, max_iter=iterations) transformed_data = decompositor.fit_transform(data.as_matrix(data_cols)) # columns = ['pca{0:0>3}'.format(idx) for idx, value in enumerate(transformed_data, start=0)] dataframe = pd.DataFrame(transformed_data, index=data.index) dataframe.insert(len(dataframe.columns), 'class', data['class']) return dataframe
def calcICA(delta_data, components): data = preprocess(delta_data) ica = FastICA(n_components=components) x_ica = ica.fit_transform(data['cleanMatrix']) ica_fill = np.ones((delta_data.shape[0],components))*np.nan ica_fill[data['cleanind']] = x_ica ica_weights = ica.components_.T delta_ica = {'transform':ica_fill, 'weights' : ica_weights, } return delta_ica
def fast_ica(brain, components): ica = FastICA(n_components=components) S_ = ica.fit_transform(brain) # Reconstruct signals A_ = ica.mixing_ # Get estimated mixing matrix return S_ # outfile = infile.split('.')[0] + 'fast_ica.csv' # with open(outfile, 'wb') as s: # writer = csv.writer(s) # writer.writerows(S_) # return outfile
# S2 = np.sin(2 * time) # Signal 1 : sinusoidal signal S2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal # S_2 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal # S2 += 0.2 * np.random.normal(size=S2.shape) # Add noise S = np.c_[S1, S2] S /= S.std(axis=0) # Standardize data # Mix data A = np.c_[np.ones([2, 1]), np.random.rand(2, 2)] # Mixing matrix X = np.dot(S, A) # Generate observations # Compute ICA ica = FastICA(n_components=2) S_ica = ica.fit_transform(X) # ############################################################################# # Plot results plt.figure() # plot source signal ax = plt.subplot(4, 1, 1) ax.set_title('Source 1') ax.plot(S1) ax = plt.subplot(4, 1, 2) ax.set_title('Source 1') ax.plot(S2) # plot mixing signal ax = plt.subplot(4, 1, 3) ax.set_title('Observations')
S = np.c_[s1, s2, s3] S += 0.2 * np.random.normal(size=S.shape) # Add noise S /= S.std(axis=0) # Standardize data # Mix data A = np.array([[1, 1, 1], [0.5, 2, 1.0], [1.5, 1.0, 2.0]]) # Mixing matrix X = np.dot(S, A.T) # Generate observations ############################################################################### # Now try to recover the sources # ------------------------------ # compute ICA ica = FastICA(n_components=3) S_ = ica.fit_transform(X) # Get the estimated sources A_ = ica.mixing_ # Get estimated mixing matrix # compute PCA pca = PCA(n_components=3) H = pca.fit_transform(X) # estimate PCA sources plt.figure(figsize=(9, 6)) models = [X, S, S_, H] names = [ 'Observations (mixed signal)', 'True Sources', 'ICA estimated sources', 'PCA estimated sources' ] colors = ['red', 'steelblue', 'orange']
def apply_ICA(proj_data, proj_weights=None): ica = FastICA(n_components=2, random_state=RANDOM_SEED); result = ica.fit_transform(proj_data.copy().T); # Copy needed because ICA whitens the input matrix return result;
def vis_embeddings(dim_red_method, epochs, sample): n_comp = 2 x_train = epochs.get_data() x_train = x_train.transpose(0, 2, 1).reshape(-1, x_train.shape[1]) x_train = StandardScaler().fit_transform(x_train) y_train = get_y_train(sample) inds = np.arange(15, 8000, 50) x_train = x_train[inds] y_train = y_train[inds] print('fitting {}'.format(dim_red_method)) if dim_red_method == 'pca': pca = PCA(n_components=n_comp) reduced_data = pca.fit_transform(x_train) elif dim_red_method == 'ica': ica = FastICA(n_components=n_comp) reduced_data = ica.fit_transform(x_train) elif dim_red_method == 'se': se = SpectralEmbedding(n_components=n_comp) reduced_data = se.fit_transform(x_train) elif dim_red_method == 'tsne': pca = PCA(n_components=50) pca_data = pca.fit_transform(x_train) tsne = TSNE(n_components=n_comp, verbose=1, perplexity=10, learning_rate=200) reduced_data = tsne.fit_transform(pca_data) else: raise ValueError("{} method not implemented".format(dim_red_method)) print('fitting done') if n_comp == 2: reduced_data_df = pd.DataFrame(data=reduced_data, columns=['PC1', 'PC2']) elif n_comp == 3: reduced_data_df = pd.DataFrame(data=reduced_data, columns=['PC1', 'PC2', 'PC3']) y_train_df = pd.DataFrame(data=y_train, columns=["labels"]) final_df = pd.concat([reduced_data_df, y_train_df[['labels']]], axis=1) if n_comp == 2: sns.set() palette = sns.color_palette("bright", 8) ax = sns.scatterplot(x='PC1', y='PC2', hue='labels', data=final_df, palette=palette, legend='full') ax.set(xlabel='PC1', ylabel='PC2', title='2 component {}'.format(dim_red_method)) plt.show() elif n_comp == 3: ax = plt.figure(figsize=(16, 10)).gca(projection='3d') ax.scatter(xs=final_df["PC1"], ys=final_df["PC2"], zs=final_df["PC2"], c=final_df["labels"], cmap='tab10') ax.set_xlabel('PC1') ax.set_ylabel('PC2') ax.set_zlabel('PC3') plt.show()
s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal S = np.c_[s1, s2, s3] S += 0.2 * np.random.normal(size=S.shape) # Add noise S /= S.std(axis=0) # Standardize data # Mix data A = np.array([[1, 1, 1], [0.5, 2, 1.0], [1.5, 1.0, 2.0]]) B = np.array([[1, 3, 2], [2, 0.1, 1.0], [2.0, 1.5, 0.5]]) C = np.array([[1, 2, 0.5], [2, 0.5, 0.5], [0.5, 0.5, 2.0]]) # Mixing matrix ica = FastICA(n_components=3) XA = np.dot(S, A.T) # Generate observations SA_ = ica.fit_transform(XA) # Reconstruct signals A_ = ica.mixing_ # Get estimated mixing matrix XB = np.dot(S, B.T) # Generate observations SB_ = ica.fit_transform(XB) # Reconstruct signals B_ = ica.mixing_ # Get estimated mixing matrix XC = np.dot(S, C.T) # Generate observations SC_ = ica.fit_transform(XC) # Reconstruct signals C_ = ica.mixing_ # Get estimated mixing matrix plt.figure() models = [XA, XB, XC, S, SA_, SB_, SC_] names = [ 'Observations(mixed signal by A)', 'Observations(mixed signal by B)', 'Observations(mixed signal by C)', 'True Sources',
def run_and_fit(self, model_string, nr_components, nr_timepoints, nr_neurons, lambd=0): np.random.seed(7) #X=self.simulate_data(nr_components,nr_timepoints,nr_neurons) X = self.simulate_data_w_noise(nr_components, nr_timepoints, nr_neurons, noise_ampl_mult=4) if model_string == 'EnsemblePursuit': options_dict = {'seed_neuron_av_nr': 10, 'min_assembly_size': 1} ep_pt = EnsemblePursuitPyTorch(n_ensembles=nr_components, lambd=lambd, options_dict=options_dict) U, V = ep_pt.fit_transform(X) self.U = U.numpy() self.V = V.numpy().T if model_string == 'EnsemblePursuitNumpy': options_dict = {'seed_neuron_av_nr': 10, 'min_assembly_size': 1} ep_np = EnsemblePursuitNumpy(n_ensembles=nr_components, lambd=lambd, options_dict=options_dict) U, V, self.corrs = ep_np.fit_transform(X) self.U = U self.V = V.T if model_string == 'ICA': ica = FastICA(n_components=nr_components, random_state=7) self.V = ica.fit_transform(X.T).T self.U = ica.mixing_ if model_string == 'PCA': pca = PCA(n_components=nr_components, random_state=7) self.V = pca.fit_transform(X.T).T self.U = pca.components_.T if model_string == 'sparsePCA': spca = SparsePCA(n_components=nr_components, random_state=7) self.V = spca.fit_transform(X.T).T self.U = spca.components_.T if model_string == 'NMF': X -= X.min(axis=0) nmf = NMF(n_components=nr_components, init='nndsvd', random_state=7, alpha=lambd, l1_ratio=0.5) self.V = nmf.fit_transform(X.T).T self.U = nmf.components_.T if model_string == 'LDA': X -= X.min(axis=0) nmf = LatentDirichletAllocation(n_components=nr_components, random_state=7) self.V = nmf.fit_transform(X.T).T self.U = nmf.components_.T print('SHPS', self.U.shape, self.V.shape) self.orig = X self.approx = self.U @ self.V print('orig', self.orig.shape) print('approx', self.approx.shape)
def add_pld_params(model_params, fluxes, pld_intensities, n_pld=9, order=3, add_unity=True, do_pca=True, do_ica=False, do_std=True, pca_cut=False, n_ppm=1.0, start_unity=False, verbose=False): # Make a local copy pld_intensities = pld_intensities.copy() if len(pld_intensities) != n_pld * order: pld_intensities = np.vstack( [list(pld_intensities**k) for k in range(1, order + 1)]) # check that the second set is the square of the first set, and so onself. for k in range(order): assert (np.allclose(pld_intensities[:n_pld]**(k + 1), pld_intensities[k * n_pld:(k + 1) * n_pld])) if do_pca or do_ica: do_std = True stdscaler = StandardScaler() pld_intensities = stdscaler.fit_transform( pld_intensities.T) if do_std else pld_intensities.T if do_pca: pca = PCA() pld_intensities = pca.fit_transform(pld_intensities) evrc = pca.explained_variance_ratio_.cumsum() n_pca = np.where(evrc > 1.0 - n_ppm / ppm)[0].min() if pca_cut: pld_intensities = pld_intensities[:, :n_pca] if verbose: print(evrc, n_pca) if do_ica: ica = FastICA() pld_intensities = ica.fit_transform(pld_intensities) # evrc = ica.explained_variance_ratio_.cumsum() # n_ica = np.where(evrc > 1.0-n_ppm/ppm)[0].min() # if ica_cut: pld_intensities = pld_intensities[:,:n_ica] # # if verbose: print(evrc, n_ica) if add_unity: pld_intensities = np.vstack( [pld_intensities.T, np.ones(pld_intensities.shape[0])]).T pld_coeffs = np.linalg.lstsq( pld_intensities, fluxes)[0] if not start_unity else np.ones( pld_intensities.shape[1]) / pld_intensities.shape[1.0] n_pld_out = n_pca if do_pca and pca_cut else n_pld * order for k in range(n_pld_out): model_params.add_many(('pld{}'.format(k), pld_coeffs[k], True)) # if add_unity: model_params.add_many(('pld{}'.format(n_pld_out), pld_coeffs[n_pld_out], True)) # FINDME: Maybe make min,max = 0,2 or = 0.9,1.1 if add_unity: model_params.add_many( ('pldBase', pld_coeffs[n_pld_out], True)) # FINDME: Maybe make min,max = 0,2 or = 0.9,1.1 if verbose: [ print('{:5}: {}'.format(val.name, val.value)) for val in model_params.values() if 'pld' in val.name.lower() ] return model_params, pld_intensities.T
s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal S = np.c_[s1, s2, s3] S += 0.2 * np.random.normal(size=S.shape) # Add noise S /= S.std(axis=0) # Standardize data # Mix data A = np.array([[1, 1, 1], [0.5, 2, 1.0], [1.5, 1.0, 2.0]]) # Mixing matrix X = np.dot(S, A.T) # Generate observations # Compute ICA ica = FastICA(n_components=3) S_ = ica.fit_transform(X) # Reconstruct signals A_ = ica.mixing_ # Get estimated mixing matrix # We can `prove` that the ICA model applies by reverting the unmixing. assert np.allclose(X, np.dot(S_, A_.T) + ica.mean_) # For comparison, compute PCA pca = PCA(n_components=3) H = pca.fit_transform(X) # Reconstruct signals based on orthogonal components ############################################################################### # Plot results plt.figure() models = [X, S, S_, H]
gs.fit(labels_EM_PCA.reshape(-1, 1), dataY) tmp = pd.DataFrame(gs.cv_results_) tmp.to_csv(out + 'QSAR NN EM PCA.csv') best_indices = tmp.index[tmp['rank_test_score'] == 1].tolist() best_em = best_em.append( { 'Layers': str(tmp.iloc[best_indices[0], 4]), 'Iterations': tmp.iloc[best_indices[0], 5], 'Score': tmp.iloc[best_indices[0], 12] }, ignore_index=True) # Fit/transform with FastICA print("Running FastICA...") ica = FastICA(n_components=10, random_state=5) dataX_ICA = ica.fit_transform(dataX) # Run KM print("Running k-means...") model = KMeans(n_clusters=km) labels_KM_PCA = model.fit_predict(dataX_ICA) grid = { 'NN__hidden_layer_sizes': nn_arch, 'NN__max_iter': nn_iter, 'NN__learning_rate_init': [0.016], 'NN__alpha': [0.316227766] } mlp = MLPClassifier(activation='relu', early_stopping=True, random_state=5)
def ica(self, whiten = True): ica = FastICA(n_components = 5, whiten = whiten) self.train = ica.fit_transform(self.train)
print("Transforming...") n_comp = 50 # tSVD tsvd = TruncatedSVD(n_components=n_comp, random_state=420) tsvd_results_train = tsvd.fit_transform(train_df) tsvd_results_test = tsvd.transform(test_df) # PCA pca = PCA(n_components=n_comp, random_state=420) pca2_results_train = pca.fit_transform(train_df) pca2_results_test = pca.transform(test_df) # ICA ica = FastICA(n_components=n_comp, random_state=420) ica2_results_train = ica.fit_transform(train_df) ica2_results_test = ica.transform(test_df) # GRP grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420) grp_results_train = grp.fit_transform(train_df) grp_results_test = grp.transform(test_df) # SRP srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420) srp_results_train = srp.fit_transform(train_df) srp_results_test = srp.transform(test_df) ############
return n_components_ica runs = (("data/creditcards_train.arff", "Credit Default", "d1"), ("data/htru_train.arff", "Pulsar Detection", "d2")) for (fname, label, abbrev) in runs: X, y, feature_names = load_data(fname) # model selection (optimal number of components) n_components = optimize_components(X, feature_names, label, abbrev) # save as new set of features ica = FastICA(n_components=n_components, random_state=SEED) start_time = time.perf_counter() df = pd.DataFrame(ica.fit_transform(X)) run_time = time.perf_counter() - start_time print(label + ": run time = " + str(run_time)) print(label + ": iterations until convergence = " + str(ica.n_iter_)) df.to_pickle(path.join(PKL_DIR, abbrev + "_ica.pickle")) # parallel coordinates plot visualizer = ParallelCoordinates(sample=0.2, shuffle=True, fast=True) visualizer.fit_transform(df, y) visualizer.ax.set_xticklabels(visualizer.ax.get_xticklabels(), rotation=45, horizontalalignment='right') visualizer.finalize() plt.savefig(path.join(PLOT_DIR, abbrev + "_ica_parallel.png"), bbox_inches='tight') visualizer.show()
# shape print('Shape train: {}\nShape test: {}'.format(train.shape, test.shape)) y_train = train["y"] y_mean = np.mean(y_train) #PCA/ICA for dimensionality reduction n_comp = 10 # PCA pca = PCA(n_components=n_comp, random_state=42) pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1)) pca2_results_test = pca.transform(test) # ICA ica = FastICA(n_components=n_comp, random_state=42) ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1)) ica2_results_test = ica.transform(test) train_cols = [col for col in list(train)] test_cols = [col for col in list(test)] print(train_cols) print(test_cols) train.drop(train_cols, axis=1, inplace=True) test.drop(test_cols, axis=1, inplace=True) # Append decomposition components to datasets for i in range(1, n_comp + 1): train['pca_' + str(i)] = pca2_results_train[:, i - 1] test['pca_' + str(i)] = pca2_results_test[:, i - 1]
plt.vlines(0, min_y, max_y, linewidth=2) plt.xlim(min_x, max_x) plt.ylim(min_y, max_y) plt.title(title) pml.savefig(f'{file_name}.pdf') plt.show() np.random.seed(2) N = 100 A = np.array([[2, 3], [2, 1]]) * 0.3 # Mixing matrix S_uni = (np.random.rand(N, 2) * 2 - 1) * np.sqrt(3) X_uni = S_uni @ A.T pca = PCA(whiten=True) S_pca = pca.fit(X_uni).transform(X_uni) ica = FastICA() S_ica = ica.fit_transform(X_uni) S_ica /= S_ica.std(axis=0) plot_samples(S_uni, 'Uniform Data', 'ica-uniform-source') plot_samples(X_uni, 'Uniform Data after Linear Mixing', 'ica-uniform-mixed') plot_samples(S_pca, 'PCA Applied to Mixed Data from Uniform Source', 'ica-uniform-PCA') plot_samples(S_ica, 'ICA Applied to Mixed Data from Uniform Source', 'ica-uniform-ICA')
label='mean_kurtosis') line2, = plt.plot(k_arr, kurt_var, color='b', marker='o', label='variance of kurtosis') plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)}) plt.ylabel(' kurtosis') plt.xlabel('Number of components') plt.show() return None kurt(X, y, 20) ica = FastICA(n_components=11, random_state=0) ica_2d = ica.fit_transform(X) X_ica = ica.transform(X) plt.scatter(ica_2d[:, 0], ica_2d[:, 1], c=y, cmap="RdGy", edgecolor="None", alpha=1, vmin=75, vmax=150) plt.colorbar() plt.title('ICA Scatter Plot') def plot_samples(S, axis_list=None): plt.scatter(S[:, 0],
x = dataset.iloc[:, 1:-5] y = dataset.iloc[:, -5:] #collect the wavelengths in the spectra wavelengths = dataset.columns[1:-5] #list of constituents constituents = list(dataset.columns[-5:]) from sklearn.decomposition import FastICA from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from math import sqrt from numpy import savetxt #find out the ICs on the whole dataset transformer = FastICA(n_components=n_IC, random_state=rdnSeed, max_iter=4000) IC = transformer.fit_transform(x.T) M = transformer.mixing_ #set the target variable target = constituents[ constituentIndex] #change the index to fit the model to different constituents y = np.array(y[target]) #create the dataframe so that we can drop the missing values both in x and y df = np.append(M, y.reshape(len(y), 1), axis=1) columns_ = np.append(wavelengths, [target]).transpose() df = pd.DataFrame(data=df) #drop the rows that has missing values in any columns and reset the index df.dropna(inplace=True)
c = 0 X_inner_trainingset = X_all[X_arr[subtrain[train], ].reshape( (numDay - 2) * duration), ] Y_inner_trainingset = Y_arr[subtrain[train], ].reshape( (numDay - 2) * duration) X_validate = X_all[X_arr[subtrain[validate], ], ].reshape( duration, 600) Y_validate = Y_all[X_arr[subtrain[validate]]].reshape(duration) for C in components: print(test, subtrain[validate], subtrain[train], C) ica = FastICA(n_components=C, max_iter=5000, tol=0.0001) #tol = 0.001 X_inner_train = ica.fit_transform( X_inner_trainingset ) #pull components from ica fit transformation X_inner_test = ica.transform(X_validate) clf = svm.SVC(kernel='linear', class_weight='balanced', probability=True) y_inner_score = clf.fit( X_inner_train, Y_inner_trainingset).decision_function(X_inner_test) fpr, tpr, _ = roc_curve(Y_validate, y_inner_score) roc_auc[t, v, c] = auc(fpr, tpr) c += 1 v += 1
def transform(self, graph_file, first_node=None): logging.info('loading graph') """ input: csv file of graph; formate: start_node, end_node, weight output: graph, a list, the elements are tuples, like [(1, 2, 1) (3, 1, 1) (2, 3, 1)] count amount of nodes from G """ self.graph = self.load_graph(graph_file) # obtain a array of graph self.node_count = self.find_node_count( self.graph) # find the number of nodes in graph self.edge_count = len(self.graph) print("nodes:", self.node_count) print("edges:", self.edge_count) self.node_range = range(1, self.node_count + 1) logging.info('computing distance matrix') self.distance_matrix = self.compute_distance_matrix( self.graph, self.node_count) # self.distance_matrix = self.nomalization_distance_mtrix(distance_matrix=self.distance_matrix) # nomalized distance matrix ############################## adjacency matrix ########################################## self.adjacency_matrix = self.get_adjacency_matrix( self.graph, self.node_count) ########################################################################### if first_node is None: """self.first_node = randint(0, self.node_count) + 1 # Choose the first pivot from V randomly.""" self.first_node = randint(1, self.node_count) else: self.first_node = first_node # Specify the first pivot. logging.info('finding pivots') """ dimensions=m choose m pivots according to k-center. """ ##################################################### if self.pivot_select == "randomly": self.pivot_nodes = self.choose_pivots_randomly( dimension=self.dimension, number_nodes=self.node_count) ##################################################### else: self.pivot_nodes = self.choose_pivot_points( self.graph, self.dimension) # self.pivot_nodes: a list logging.info('drawing graph in high dimensional space') """ note that the number of pivot nodes is the same as dimension. formate of points: G=(V, E) |V|=n, dimensions = m = pivots d(vi, pj) denotes a distance computered by Dijkstra's algorithm in a G. p1 p2 p3 ... pm v1 d(v1, p1) d(v1, p2) d(v1, p3) d(v1, pm) v2 . v3 . v4 . . . . . . . . . . vn d(vn, p1) ... d(vn, pm) """ self.points = list( map( lambda i: tuple(self.distance_matrix[i - 1, p - 1] for p in self.pivot_nodes), self.node_range)) if self.normalization is True: ############################################################################################################## self.points = self.nomalization_distance_mtrix( distance_matrix=self.points) # nomalized self.points ############################################################################################################## logging.info('project into a low dimension use PCA') if self.version == "HDE-SV": if self.dimension == 2: self.transformed_points = np.array(self.points) """ PCA: input array-like: shape of self.points = (n_sample, n_feature) output array-like: shape of self.transformed_points = (n_sample, n_component) """ if self.version == "HDE": # PCA denotes that algorithm uses PCA to decomposite original space. pca = PCA(n_components=2, copy=True) self.transformed_points = pca.fit_transform(self.points) if self.version == "HDE-Level": # PCA denotes that algorithm uses PCA to decomposite original space. pca = PCA(n_components=3, copy=True) self.transformed_points = pca.fit_transform(self.points) pca = PCA(n_components=2, copy=True) self.transformed_points = pca.fit_transform( self.transformed_points) ''' replace initial version as paper. by mty 2017-8-9 ''' if self.version == "HDE-PIT": # PIT denotes that algorithm uses poweriteration to computer eigenvectors for decomposition space. X, S = self.covariance(self.points) # X = np.array(self.points).T # X = X.astype(float) U = self.poweriteration(S, epsilon=self.epsilon) self.transformed_points = self.decomposition_space(X, U) if self.node_count == (self.edge_count + 1): # determine wether it is a tree. FR = FR_Algorithm(number_of_nodes=self.node_count, initial_temperature=self.initial_temperature, cooling_factor=self.cooling_factor, factor_attract=self.factor_attract, factor_repulsion=self.factor_repulsion) # use FR to fine-tune self.transformed_points = FR.apply_force_directed_algorithm( iteration=self.fr_iteration, graph=self.graph, coord_decomposition=self.transformed_points) if self.version == "HDE-MDS": # HDE-MDS denotes that algorithm combines with MDS. hde_mds = MDS() # MDS object self.transformed_points = hde_mds.fit_transform(self.points) if self.version == "Pivot-MDS": # Pivot-MDS denotes that original version of Pivot MDS. pivot_mds = PivotMDS(d=self.distance_matrix, pivots=self.dimension) # PivotMDS object self.transformed_points = pivot_mds.optimize() if self.version == "HDE-FICA": # FICA denotes that algorithm uses Fast ICA to decomposite original space. # fun, Could be either 'logcosh', 'exp', or 'cube'. fica = FastICA(n_components=2) # print(np.array(self.points).shape) self.transformed_points = fica.fit_transform(self.points) # print(np.array(self.transformed_points).shape) # FR = FR_Algorithm(number_of_nodes=self.node_count, initial_temperature=self.initial_temperature, # cooling_factor=self.cooling_factor, factor_attract=self.factor_attract, factor_repulsion=self.factor_repulsion) # # use FR to fine-tune # self.transformed_points = FR.apply_force_directed_algorithm(iteration=self.fr_iteration, graph=self.graph, coord_decomposition=self.transformed_points) if self.version == "HDE-KPCA": # FPCA denotes that algorithm uses kernel PCA to decomposite original space. kpca = KernelPCA(n_components=2, kernel=self.kpca_fun, gamma=self.gamma) self.transformed_points = kpca.fit_transform(self.points) if self.version == "HDE-NMF": nmf = NMF(n_components=2) self.transformed_points = nmf.fit_transform(self.points) if self.version == "HDE-TruncatedSVD": tsvd = TruncatedSVD(n_components=2) self.transformed_points = tsvd.fit_transform(self.points) if self.version == "HDE-LDA": lda = LinearDiscriminantAnalysis(n_components=2) y = [] for i in range(self.node_count): y.append(1) y = np.array(y) lda = lda.fit(self.points, y=y) self.transformed_points = lda.transform(self.points) if self.version == "HDE-FR": pca = PCA(n_components=2, copy=True) self.transformed_points = pca.fit_transform(self.points) if self.node_count == (self.edge_count + 1): # determine wether it is a tree. FR = FR_Algorithm(number_of_nodes=self.node_count, initial_temperature=self.initial_temperature, cooling_factor=self.cooling_factor, factor_attract=self.factor_attract, factor_repulsion=self.factor_repulsion) # use FR to fine-tune self.transformed_points = FR.apply_force_directed_algorithm( iteration=self.fr_iteration, graph=self.graph, coord_decomposition=self.transformed_points) if self.version == "HDE-FICA-FR": fica = FastICA(n_components=2) self.transformed_points = fica.fit_transform(self.points) if self.node_count == (self.edge_count + 1): # determine wether it is a tree. FR = FR_Algorithm(number_of_nodes=self.node_count, initial_temperature=self.initial_temperature, cooling_factor=self.cooling_factor, factor_attract=self.factor_attract, factor_repulsion=self.factor_repulsion) # use FR to fine-tune self.transformed_points = FR.apply_force_directed_algorithm( iteration=self.fr_iteration, graph=self.graph, coord_decomposition=self.transformed_points) if self.version == "HDE-TSNE-FR": # pca = PCA(n_components=10, copy=True) # self.transformed_points = pca.fit_transform(self.points) tsne = TSNE(learning_rate=self.learning_rate, init=self.init ) # 'init' must be 'pca', 'random', or a numpy array self.transformed_points = tsne.fit_transform(self.points) if self.node_count == (self.edge_count + 1): # determine wether it is a tree. FR = FR_Algorithm(number_of_nodes=self.node_count, initial_temperature=self.initial_temperature, cooling_factor=self.cooling_factor, factor_attract=self.factor_attract, factor_repulsion=self.factor_repulsion) # use FR to fine-tune self.transformed_points = FR.apply_force_directed_algorithm( iteration=self.fr_iteration, graph=self.graph, coord_decomposition=self.transformed_points) if self.version == "HDE-SPE": IP = SpectralEmbedding(n_components=2) self.transformed_points = IP.fit_transform(self.distance_matrix) # pca = PCA(n_components=2, copy=True) # self.transformed_points = pca.fit_transform( self.transformed_points) return self.node_count, self.edge_count
class SourceMethod(): def __init__(self, name, itrN, rg=[1, 8], err=0.1, method='pca'): import xlrd #print(name) wb = xlrd.open_workbook(name) st = wb.sheet_by_index(0) self.xlsdata = [] for itr in range(st.nrows): self.xlsdata.append((st.row_values(itr))) self.rdata = [] self.sample = [] for itr in self.xlsdata: self.rdata.append(itr[rg[0]:]) self.sample.append(itr[0]) self.title = self.xlsdata[0] self.data_orig = np.abs(self.rdata[1:]) self.orig_data = np.array(self.rdata[1:]) self.dest_err = err self.data_max = np.max(np.abs(self.data_orig), axis=0) self.data = np.divide(self.data_orig, self.data_max) self.minus = np.divide(self.orig_data[1, :], np.abs(self.data_orig[1, :])) self.itrN = itrN self.train() def func(): None def train(self): #data_avg=np.average(self.data) data_min = np.min(np.abs(self.data), axis=0) base = 0.5 data_sub = np.subtract(self.data, base * data_min) #print(np.shape(data_min)) for itr in range(1, len(self.data)): itr = 3 self.components_, self.array = mynmf(data_sub, itr, self.itrN, self.dest_err) self.method = FastICA(n_components=itr) self.array = np.transpose( self.method.fit_transform(np.transpose(data_sub))) self.components_ = self.method.mixing_ self.array = np.add(self.array, data_min * base / itr) #err=np.mean(np.abs(self.data-np.dot(self.components_,self.array))) self.for_sta = np.multiply( np.abs(np.dot(self.components_, self.array)), self.data_max) sum_cof = 0 for itra in range(len(self.data[0])): sum_cof += self.pearson( np.transpose(self.data)[itra], np.transpose(self.for_sta)[itra]) sum_cof = sum_cof / len(self.data[0]) print(sum_cof) if (self.dest_err > 1 - sum_cof): break if (itr > 10): break #self.for_sta=np.multiply(np.abs(np.dot(self.components_,self.array)),self.data_max) self.array = np.multiply(self.array, self.data_max) self.data_orig = np.multiply(self.data_orig, self.minus) self.for_sta = np.multiply(self.for_sta, self.minus) self.array = np.multiply(self.array, self.minus) self.data_orig = np.transpose(self.data_orig) self.for_sta = np.transpose(self.for_sta) self.array = np.transpose(self.array) self.data = np.transpose(self.data) self.orig_data = np.transpose(self.orig_data) def get_par(self): return self.array, self.method.n_components_, self.method.components_ def print_sta(self): print("Source Number:") print(self.components_) ratio_sum = np.transpose([np.sum(self.method.components_, axis=1)]) print("Mixing ratio matrix:") print(np.divide(self.method.components_, ratio_sum)) def pearson(self, x, y): x_avg = np.average(x) y_avg = np.average(y) xv = x - x_avg yv = y - y_avg cof1 = np.sum(xv * yv) x2 = np.sum(np.square(xv)) y2 = np.sum(np.square(yv)) if (y2 == 0): cof = 0 else: cof = cof1 / np.sqrt(x2 * y2) return cof def plot_sta(self): import matplotlib.pyplot as plt #plt.style.use('bmh') plt.figure(1) tick = np.arange(len(self.array)) width = 0.6 / len(self.array[0]) cont = 0 for dt in np.transpose(self.array): #plt.plot(tick+width*cont,dt,alpha=0.4,color=list(plt.rcParams['axes.prop_cycle'])[cont]['color']) #plt.bar(tick+width*cont,dt,width,alpha=0.2,color=list(plt.rcParams['axes.prop_cycle'])[cont]['color']) cont = cont + 1 plt.figure(2) cont = 0 for dt in np.transpose(self.array): #plt.plot(dt,alpha=0.4,color=list(plt.rcParams['axes.prop_cycle'])[cont]['color']) cont = cont + 1 plt.figure(3) plt.plot(np.average(self.for_sta, axis=0)) for itr in range(len(self.data)): plt.figure(4 + itr) z1 = np.polyfit(self.data[itr], self.for_sta[itr], 1) plt.scatter(self.data[itr], self.for_sta[itr]) mi = min(self.data[itr]) ma = max(self.data[itr]) idtv = ma - mi x = np.arange(0, ma, 0.01) plt.title("$" + str(self.title[itr]) + "$") cof = self.pearson(self.data[itr], self.for_sta[itr]) #plt.text(mi/6+ma/6,idtv*0.001,"$f(x)=%fx+%f;cof=%f$"%(z1[0],z1[1],cof)) #plt.text(mi/1.9+ma/1.9,idtv*0.05,) plt.plot(x, x * z1[0] + z1[1]) plt.boxplot(np.transpose(self.data_orig - self.for_sta)) plt.show()
emis = f["emis"][...] X = f["X"][...] f.close() TOL = 1e-4 emis[emis < TOL] = TOL emis[emis > 1 - TOL] = 1 - TOL ix = np.argsort(X) X = X[ix] emis = emis[ix, :] OD = -np.log(1 - emis) pcaOD = PCA(whiten=True, n_components=48) ica = FastICA(n_components=36, max_iter=5000) ODIR = ica.fit_transform(OD) # Reconstruct signals OD2 = ica.inverse_transform(ODIR) emis2 = 1 - np.exp(-OD2) # Reconstruct signals A_ = ica.mixing_ # Get estimated mixing matrix nmf = NMF(n_components=48) ODNR = nmf.fit_transform(OD) OD2 = nmf.inverse_transform(ODNR) emis2 = 1 - np.exp(-OD2) N = 48 knots = np.linspace(X.min(), X.max(), N)[1:-1] tck = splrep(X, -np.log(emis[:, 350]), t=knots) t = tck[0] c = np.zeros((emis.shape[-1], tck[1].size))
import numpy as np import matplotlib.pyplot as plt from scipy import signal from sklearn.decomposition import PCA, FastICA # Load data input_file = 'mixture_of_signals.txt' X = np.loadtxt(input_file) # Compute ICA ica = FastICA(n_components=4) # Reconstruct the signals signals_ica = ica.fit_transform(X) # Get estimated mixing matrix mixing_mat = ica.mixing_ # Perform PCA pca = PCA(n_components=4) signals_pca = pca.fit_transform( X) # Reconstruct signals based on orthogonal components # Specify parameters for output plots models = [X, signals_ica, signals_pca] colors = ['blue', 'red', 'black', 'green'] # Plotting input signal plt.figure() plt.title('Input signal (mixture)')
func = func_new conv = signal.fftconvolve(func, emo, mode='same', axes=0) conv /= np.linalg.norm(conv, axis=0) conv = conv * 8 # PCA X = np.concatenate((func, emo), axis=1) pca = PCA(n_components=3, svd_solver='randomized') pca.fit(X.transpose()) pca = pca.components_.transpose() pca = pca * 10 # ICA transformer = FastICA(n_components=3, random_state=0) ica = transformer.fit_transform(X) ica = ica * 8 # Fa transformer = FactorAnalysis(n_components=3, random_state=0) fa = transformer.fit_transform(X) # GMM from sklearn import mixture gmmodel = mixture.GaussianMixture(n_components=3, covariance_type='tied', max_iter=100, random_state=10).fit(X.transpose()) gmm = gmmodel.means_.transpose() gmm_samp, gmm_y = gmmodel.sample(118)
def perform_feature_engineering(train, test, config): for c in train.columns: if (len(train[c].value_counts()) == 2): if (train[c].mean() < config['SparseThreshold']): del train[c] del test[c] col = list(test.columns) if config['ID'] != True: col.remove('ID') # tSVD if (config['tSVD'] == True): tsvd = TruncatedSVD(n_components=config['n_comp']) tsvd_results_train = tsvd.fit_transform(train[col]) tsvd_results_test = tsvd.transform(test[col]) for i in range(1, config['n_comp'] + 1): train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1] test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1] # PCA if (config['PCA'] == True): pca = PCA(n_components=config['n_comp']) pca2_results_train = pca.fit_transform(train[col]) pca2_results_test = pca.transform(test[col]) for i in range(1, config['n_comp'] + 1): train['pca_' + str(i)] = pca2_results_train[:, i - 1] test['pca_' + str(i)] = pca2_results_test[:, i - 1] # ICA if (config['ICA'] == True): ica = FastICA(n_components=config['n_comp']) ica2_results_train = ica.fit_transform(train[col]) ica2_results_test = ica.transform(test[col]) for i in range(1, config['n_comp'] + 1): train['ica_' + str(i)] = ica2_results_train[:, i - 1] test['ica_' + str(i)] = ica2_results_test[:, i - 1] # GRP if (config['GRP'] == True): grp = GaussianRandomProjection(n_components=config['n_comp'], eps=0.1) grp_results_train = grp.fit_transform(train[col]) grp_results_test = grp.transform(test[col]) for i in range(1, config['n_comp'] + 1): train['grp_' + str(i)] = grp_results_train[:, i - 1] test['grp_' + str(i)] = grp_results_test[:, i - 1] # SRP if (config['SRP'] == True): srp = SparseRandomProjection(n_components=config['n_comp'], dense_output=True, random_state=420) srp_results_train = srp.fit_transform(train[col]) srp_results_test = srp.transform(test[col]) for i in range(1, config['n_comp'] + 1): train['srp_' + str(i)] = srp_results_train[:, i - 1] test['srp_' + str(i)] = srp_results_test[:, i - 1] if config['magic'] == True: magic_mat = train[['ID', 'X0', 'y']] magic_mat = magic_mat.groupby(['X0'])['y'].mean() magic_mat = pd.DataFrame({ 'X0': magic_mat.index, 'magic': list(magic_mat) }) mean_magic = magic_mat['magic'].mean() train = train.merge(magic_mat, on='X0', how='left') test = test.merge(magic_mat, on='X0', how='left') test['magic'] = test['magic'].fillna(mean_magic) return train, test
sc = StandardScaler() ica2 = FastICA(n_components = 2) ica80 = FastICA(n_components = 80) #Dataset 1 data1 = pd.read_csv('dist1.txt', sep = ' ') data1.head() data1 = data1.dropna(axis = 'index') dataset1 = data1.values data1_std = sc.fit_transform(dataset1) data1_std_ica2 = ica2.fit_transform(data1_std) data1_std_ica80 = ica80.fit_transform(data1_std) data1_std_ica2 data1_std_ica80 plt.scatter(data1_std_ica2[:,0], data1_std_ica2[:,1]) dataset1frame80 = pd.DataFrame(data1_std_ica80) dataset1frame80.head() dataset1frame80['mean'] = dataset1frame80.mean(axis=1) dataset1frame80.head() ica_power1 = dataset1frame80['mean'].values
X = np.array(trainFile.data) Y = np.array(trainFile.labels) # just like the face recognition, we compute the avg digit image avg_digit = compute_avg_digits(X, configs.IMAGE_WIDTH) print "Avg digit computed ..." # Substract each input with the avg X_normalized_avg = normalize_with_avg(X, avg_digit) X_normalized = preprocessing.normalize(X_normalized_avg) print "Normalize X ..." # ICA Face ica = FastICA() features = ica.fit_transform(X_normalized) print "Transform done ..." # split into training and testing cutoff = len(Y) * 0.75 features_train = np.array(features[:cutoff]) Y_train = np.array(Y[:cutoff]) features_test = np.array(features[cutoff:]) Y_test = np.array(Y[cutoff:]) #Submission #features_train = np.array(features) #Y_train = np.array(Y) #X_test = np.array(testFile.data) #X_test_normalized_avg = normalize_with_avg(X_test, avg_digit) #X_test_normalized = preprocessing.normalize(X_test_normalized_avg)
def get_dc_feature(df_train, df_test, n_comp=12, id_column=None, label_column=None): """ 构造分解特征 """ train = df_train.copy() test = df_test.copy() if id_column: train_id = train[id_column] test_id = test[id_column] train = drop_columns(train, [id_column]) test = drop_columns(test, [id_column]) if label_column: train_y = train[label_column] train = drop_columns(train, [label_column]) # tSVD tsvd = TruncatedSVD(n_components=n_comp, random_state=420) tsvd_results_train = tsvd.fit_transform(train) tsvd_results_test = tsvd.transform(test) # PCA pca = PCA(n_components=n_comp, random_state=420) pca2_results_train = pca.fit_transform(train) pca2_results_test = pca.transform(test) # ICA ica = FastICA(n_components=n_comp, random_state=420) ica2_results_train = ica.fit_transform(train) ica2_results_test = ica.transform(test) # GRP grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420) grp_results_train = grp.fit_transform(train) grp_results_test = grp.transform(test) # SRP srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420) srp_results_train = srp.fit_transform(train) srp_results_test = srp.transform(test) # Append decomposition components to datasets for i in range(1, n_comp + 1): train['pca_' + str(i)] = pca2_results_train[:, i - 1] test['pca_' + str(i)] = pca2_results_test[:, i - 1] train['ica_' + str(i)] = ica2_results_train[:, i - 1] test['ica_' + str(i)] = ica2_results_test[:, i - 1] train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1] test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1] train['grp_' + str(i)] = grp_results_train[:, i - 1] test['grp_' + str(i)] = grp_results_test[:, i - 1] train['srp_' + str(i)] = srp_results_train[:, i - 1] test['srp_' + str(i)] = srp_results_test[:, i - 1] if id_column: train[id_column] = train_id test[id_column] = test_id if label_column: train[label_column] = train_y return train, test
def update_EM(X, K, gamma, A, pi, mu, sigma_sqr, threshold=5e-5, A_mode='GA', grad_mode='GA', max_em_steps=30, n_gd_steps=20): if type(X) is not torch.Tensor: X = torch.tensor(X) X = X.type(DTYPE).to(device) N, D = X.shape END = lambda dA, dsigma_sqr: (dA + dsigma_sqr) < threshold Y = None niters = 0 dA, dsigma_sqr = 10, 10 ret_time = {'E':[], 'obj':[]} grad_norms, objs= [], [] if A_mode == 'random': A = ortho_group.rvs(D) A = to_tensor(A) elif A_mode == 'ICA': cov = X.T.matmul(X) / len(X) cnt = 0 n_tries = 20 while cnt < n_tries: try: ica = FastICA() _ = ica.fit_transform(X.cpu()) Aorig = ica.mixing_ # avoid numerical instability U, ss, V = np.linalg.svd(Aorig) ss /= ss[0] ss[ss < SINGULAR_SMALL] = SINGULAR_SMALL Aorig = (U * ss).dot(V) A = np.linalg.inv(Aorig) _, ss, _ = np.linalg.svd(A) A = to_tensor(A / ss[0]) cnt = 2*n_tries except: cnt += 1 if cnt != 2*n_tries: print('ICA failed. Use random.') A = to_tensor(ortho_group.rvs(D)) while (not END(dA, dsigma_sqr)) and niters < max_em_steps: niters += 1 A_prev, sigma_sqr_prev = A.clone(), sigma_sqr.clone() objs += [], if TIME: e_start = time() Y, w, w_sumN, w_sumNK = E(X, A, pi, mu, sigma_sqr, Y=Y) if TIME: ret_time['E'] += time() - e_start, # M-step if A_mode == 'ICA' or A_mode == 'None': pi, mu, sigma_sqr = update_pi_mu_sigma(X, A, w, w_sumN, w_sumNK) obj = get_objetive(X, A, pi, mu, sigma_sqr, w) objs[-1] += obj, if A_mode == 'CF': # gradient ascent if CHECK_OBJ: objs[-1] += get_objetive(X, A, pi, mu, sigma_sqr, w), for i in range(n_gd_steps): cf_start = time() if VERBOSE: print(A.view(-1)) pi, mu, sigma_sqr = update_pi_mu_sigma(X, A, w, w_sumN, w_sumNK) if TIME: a_start = time() if grad_mode == 'CF1': A = set_grad_zero(X, A, w, mu, sigma_sqr) A = A.T elif grad_mode == 'CF2': cofs = get_cofactors(A) det = torch.det(A) if det < 0: # TODO: ignore neg det for now cofs = cofs * -1 newA = A.clone() for i in range(D): for j in range(D): t1 = (w[:, i] * X[:,j,None]**2 / sigma_sqr[i]).sum() / N diff = (Y[i] - A[i,j] * X[:, j])[:, None] - mu[i] t2 = (w[:, i] * X[:,j,None] * diff / sigma_sqr[i]).sum() / N c1 = t1 * cofs[i,j] c2 = t1 * (det - A[i,j]*cofs[i,j]) + t2 * cofs[i,j] c3 = t2 * (det - A[i,j]*cofs[i,j]) - cofs[i,j] inner = c2**2 - 4*c1*c3 if inner < 0: print('Problme at solving for A[{},{}]: no real sol.'.format(i,j)) pdb.set_trace() if c1 == 0: sol = - c3 / c2 else: sol = (inner**0.5 - c2) / (2*c1) if False: # check whether obj gets improved with each updated entry of A curr_A = newA.clone() curr_A[i,j] = sol curr_obj = get_objetive(X, curr_A, pi, mu, sigma_sqr, w) newA[i,j] = sol A = newA.double() # avoid numerical instability U, ss, V = torch.svd(A) ss = ss / ss[0] ss[ss < SINGULAR_SMALL] = SINGULAR_SMALL A = (U * ss).matmul(V) if TIME: if 'A' not in ret_time: ret_time['A'] = [] ret_time['A'] += time() - a_start, if 'CF' not in ret_time: ret_time['CF'] = [] ret_time['CF'] += time() - cf_start, if CHECK_OBJ: if TIME: obj_start = time() obj = get_objetive(X, A, pi, mu, sigma_sqr, w) if TIME: ret_time['obj'] += time() - obj_start, objs[-1] += obj, if VERBOSE: print('iter {}: obj= {:.5f}'.format(i, obj)) # pdb.set_trace() # pdb.set_trace() if A_mode == 'GA': # gradient ascent if CHECK_OBJ: objs[-1] += get_objetive(X, A, pi, mu, sigma_sqr, w), for i in range(n_gd_steps): ga_start = time() if VERBOSE: print(A.view(-1)) pi, mu, sigma_sqr = update_pi_mu_sigma(X, A, w, w_sumN, w_sumNK) if TIME: a_start = time() # gradient steps grad, y_time = get_grad(X, A, w, mu, sigma_sqr) if TIME: if 'Y' not in ret_time: ret_time['Y'] = [] ret_time['Y'] += y_time, if grad_mode == 'BTLS': # backtracking line search if TIME: obj_start = time() obj = get_objetive(X, A, pi, mu, sigma_sqr, w) if TIME: ret_time['obj'] += time() - obj_start, beta, t, flag = 0.6, 1, True gnorm = torch.norm(grad) n_iter, ITER_LIM = 0, 10 while flag and n_iter < ITER_LIM: n_iter += 1 Ap = A + t * grad _, ss, _ = torch.svd(Ap) Ap /= ss[0] if TIME: obj_start = time() obj_p = get_objetive(X, Ap, pi, mu, sigma_sqr, w) if TIME: ret_time['obj'] += time() - obj_start, t *= beta base = obj - 0.5 * t * gnorm flag = obj_p < base gamma = t ret_time['btls_nIters'] += n_iter, elif grad_mode == 'perturb': # perturb perturb = A.std() * 0.1 * torch.randn(A.shape).type(DTYPE).to(device) perturbed = A + perturb perturbed_grad, _ = get_grad(X, perturbed, w, mu, sigma_sqr) grad_diff = torch.norm(grad - perturbed_grad) gamma = 1 /(EPS_GRAD + grad_diff) * 0.03 grad_norms += torch.norm(grad).item(), A += gamma * grad _, ss, _ = torch.svd(A) A /= ss[0] if TIME: if 'A' not in ret_time: ret_time['A'] = [] ret_time['A'] += time() - a_start, if 'GA' not in ret_time: ret_time['GA'] = [] ret_time['GA'] += time() - ga_start, if CHECK_OBJ: if TIME: obj_start = time() obj = get_objetive(X, A, pi, mu, sigma_sqr, w) if TIME: ret_time['obj'] += time() - obj_start, objs[-1] += obj, if VERBOSE: print('iter {}: obj= {:.5f}'.format(i, obj)) # pdb.set_trace() # pdb.set_trace() if VERBOSE: print('#{}: dA={:.3e} / dsigma_sqr={:.3e}'.format(niters, dA, dsigma_sqr)) print('A:', A.view(-1)) if TIME: for key in ret_time: ret_time[key] = np.array(ret_time[key]) if ret_time[key] else 0 # pdb.set_trace() return A, pi, mu, sigma_sqr, grad_norms, objs, ret_time
# thus we need to use mask_strategy='epi' to compute the mask from the # EPI images masker = NiftiMasker(smoothing_fwhm=8, memory='nilearn_cache', memory_level=1, mask_strategy='epi', standardize=True) data_masked = masker.fit_transform(func_filename) ##################################################################### # Apply ICA from sklearn.decomposition import FastICA n_components = 10 ica = FastICA(n_components=n_components, random_state=42) components_masked = ica.fit_transform(data_masked.T).T # Normalize estimated components, for thresholding to make sense components_masked -= components_masked.mean(axis=0) components_masked /= components_masked.std(axis=0) # Threshold import numpy as np components_masked[np.abs(components_masked) < .8] = 0 # Now invert the masking operation, going back to a full 3D # representation component_img = masker.inverse_transform(components_masked) ##################################################################### # Visualize the results
def test_fastica_simple(add_noise, seed): # Test the FastICA algorithm on very simple data. rng = np.random.RandomState(seed) # scipy.stats uses the global RNG: n_samples = 1000 # Generate two sources: s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 s2 = stats.t.rvs(1, size=n_samples) s = np.c_[s1, s2].T center_and_norm(s) s1, s2 = s # Mixing angle phi = 0.6 mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]]) m = np.dot(mixing, s) if add_noise: m += 0.1 * rng.randn(2, 1000) center_and_norm(m) # function as fun arg def g_test(x): return x**3, (3 * x**2).mean(axis=-1) algos = ["parallel", "deflation"] nls = ["logcosh", "exp", "cube", g_test] whitening = [True, False] for algo, nl, whiten in itertools.product(algos, nls, whitening): if whiten: k_, mixing_, s_ = fastica(m.T, fun=nl, algorithm=algo, random_state=rng) with pytest.raises(ValueError): fastica(m.T, fun=np.tanh, algorithm=algo) else: pca = PCA(n_components=2, whiten=True, random_state=rng) X = pca.fit_transform(m.T) k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False, random_state=rng) with pytest.raises(ValueError): fastica(X, fun=np.tanh, algorithm=algo) s_ = s_.T # Check that the mixing model described in the docstring holds: if whiten: assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m)) center_and_norm(s_) s1_, s2_ = s_ # Check to see if the sources have been estimated # in the wrong order if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)): s2_, s1_ = s_ s1_ *= np.sign(np.dot(s1_, s1)) s2_ *= np.sign(np.dot(s2_, s2)) # Check that we have estimated the original sources if not add_noise: assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2) else: assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1) # Test FastICA class _, _, sources_fun = fastica(m.T, fun=nl, algorithm=algo, random_state=seed) ica = FastICA(fun=nl, algorithm=algo, random_state=seed) sources = ica.fit_transform(m.T) assert ica.components_.shape == (2, 2) assert sources.shape == (1000, 2) assert_array_almost_equal(sources_fun, sources) assert_array_almost_equal(sources, ica.transform(m.T)) assert ica.mixing_.shape == (2, 2) for fn in [np.tanh, "exp(-.5(x^2))"]: ica = FastICA(fun=fn, algorithm=algo) with pytest.raises(ValueError): ica.fit(m.T) with pytest.raises(TypeError): FastICA(fun=range(10)).fit(m.T)
print('//===========================pca==========================') pca = PCA(n) traindata_pca = pca.fit_transform(traindata) testdata_pca = pca.transform(testdata) Faceidentifier(traindata_pca,trainlabel,testdata_pca,testlabel) print('//===========================sfa==========================') sfa = sfa.SFA() traindata_sfa = sfa.fit_transform(traindata.T,conponents =n).T testdata_sfa = sfa.transform(testdata.T).T Faceidentifier(traindata_sfa,trainlabel,testdata_sfa,testlabel) print('//===========================fastica==========================') fastica = FastICA(n) traindata_fastica = fastica.fit_transform(traindata) testdata_fastica = fastica.transform(testdata) Faceidentifier(traindata_fastica,trainlabel,testdata_fastica,testlabel) for i in range(0,9): if i == 0: b = 0.1 elif i == 1: b = 0.2 elif i == 2: b = 0.5 elif i == 3: b = 0.8 elif i == 4: b = 1 elif i == 5: