def test_inverse_transform(): # Test FastICA.inverse_transform n_features = 10 n_samples = 100 n1, n2 = 5, 10 rng = np.random.RandomState(0) X = rng.random_sample((n_samples, n_features)) expected = {(True, n1): (n_features, n1), (True, n2): (n_features, n2), (False, n1): (n_features, n2), (False, n2): (n_features, n2)} for whiten in [True, False]: for n_components in [n1, n2]: n_components_ = (n_components if n_components is not None else X.shape[1]) ica = FastICA(n_components=n_components, random_state=rng, whiten=whiten) with warnings.catch_warnings(record=True): # catch "n_components ignored" warning Xt = ica.fit_transform(X) expected_shape = expected[(whiten, n_components_)] assert_equal(ica.mixing_.shape, expected_shape) X2 = ica.inverse_transform(Xt) assert_equal(X.shape, X2.shape) # reversibility test in non-reduction case if n_components == X.shape[1]: assert_array_almost_equal(X, X2)
def getHeartRate(window, lastHR): # Normalize across the window to have zero-mean and unit variance mean = np.mean(window, axis=0) std = np.std(window, axis=0) normalized = (window - mean) / std # Separate into three source signals using ICA ica = FastICA() srcSig = ica.fit_transform(normalized) # Find power spectrum powerSpec = np.abs(np.fft.fft(srcSig, axis=0))**2 freqs = np.fft.fftfreq(WINDOW_SIZE, 1.0 / FPS) # Find heart rate maxPwrSrc = np.max(powerSpec, axis=1) validIdx = np.where((freqs >= MIN_HR_BPM / SEC_PER_MIN) & (freqs <= MAX_HR_BMP / SEC_PER_MIN)) validPwr = maxPwrSrc[validIdx] validFreqs = freqs[validIdx] maxPwrIdx = np.argmax(validPwr) hr = validFreqs[maxPwrIdx] print hr #plotSignals(normalized, "Normalized color intensity") #plotSignals(srcSig, "Source signal strength") #plotSpectrum(freqs, powerSpec) return hr
def ica(self, n_components=None): """Return result from independent component analysis. X = SA + m Sklearn's FastICA implementation is used. Parameters ---------- n_components : int, optional Number of ICA components. Returns ------- source : Matrix Estimated source matrix (S) mixing_matrix : Matrix Estimated mixing matrix (A) mean_vector : brede.core.vector.Vector Estimated mean vector References ---------- http://scikit-learn.org/stable/modules/decomposition.html#ica """ if n_components is None: n_components = int(np.ceil(np.sqrt(float(min(self.shape)) / 2))) ica = FastICA(n_components=n_components) sources = Matrix(ica.fit_transform(self.values), index=self.index) mixing_matrix = Matrix(ica.mixing_.T, columns=self.columns) mean_vector = Vector(ica.mean_, index=self.columns) return sources, mixing_matrix, mean_vector
def test_inverse_transform(): """Test FastICA.inverse_transform""" rng = np.random.RandomState(0) X = rng.random_sample((100, 10)) rng = np.random.RandomState(0) X = rng.random_sample((100, 10)) n_features = X.shape[1] expected = {(True, 5): (n_features, 5), (True, 10): (n_features, 10), (False, 5): (n_features, 10), (False, 10): (n_features, 10)} for whiten in [True, False]: for n_components in [5, 10]: ica = FastICA(n_components=n_components, random_state=rng, whiten=whiten) Xt = ica.fit_transform(X) expected_shape = expected[(whiten, n_components)] assert_equal(ica.mixing_.shape, expected_shape) X2 = ica.inverse_transform(Xt) assert_equal(X.shape, X2.shape) # reversibility test in non-reduction case if n_components == X.shape[1]: assert_array_almost_equal(X, X2)
class ICA(method.Method): def __init__(self, params): self.params = params self.ica = FastICA(**params) def __str__(self): return "FastICA" def train(self, data): """ Train the FastICA on the withened data :param data: whitened data, ready to use """ self.ica.fit(data) def encode(self, data): """ Encodes the ready to use data :returns: encoded data with dimension n_components """ return self.ica.transform(data) def decode(self, components): """ Decode the data to return whitened reconstructed data :returns: reconstructed data """ return self.ica.inverse_transform(components)
def RunICAScikit(): totalTimer = Timer() # Load input dataset. data = np.genfromtxt(self.dataset, delimiter=',') opts = {} if "num_components" in options: opts["n_components"] = int(options.pop("num_components")) if "algorithm" in options: opts["algorithm"] = str(options.pop("algorithm")) if opts["algorithm"] not in ['parallel', 'deflation']: Log.Fatal("Invalid value for algorithm: "+ str(algorithm.group(1))+" .Must be either parallel or deflation") return -1 if "function" in options: opts["fun"] = str(options.pop("function")) if opts["fun"] not in ['logcosh', 'exp', 'cube']: Log.Fatal("Invalid value for fun: "+ str(fun.group(1))+" .Must be either logcosh,exp or cube") return -1 if "tolerance" in options: opts["tol"] = float(options.pop("tolerance")) try: # Perform ICA. with totalTimer: model = FastICA(**opts) ic = model.fit(data).transform(data) except Exception as e: return -1 return totalTimer.ElapsedTime()
def wrapper_fastica(data, random_state=None): """Call FastICA implementation from scikit-learn.""" ica = FastICA(random_state=random_state) ica.fit(cat_trials(data).T) u = ica.components_.T m = ica.mixing_.T return m, u
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## ICA ## ica = FastICA(n_components=X_train_scl.shape[1]) X_ica = ica.fit_transform(X_train_scl) ## ## Plots ## ph = plot_helper() kurt = kurtosis(X_ica) print(kurt) title = 'Kurtosis (FastICA) for ' + data_set_name name = data_set_name.lower() + '_ica_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1), kurt, np.arange(1, len(kurt)+1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename)
def best_ica_nba(self): dh = data_helper() X_train, X_test, y_train, y_test = dh.get_nba_data() scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ica = FastICA(n_components=X_train_scl.shape[1]) X_train_transformed = ica.fit_transform(X_train_scl, y_train) X_test_transformed = ica.transform(X_test_scl) ## top 2 kurt = kurtosis(X_train_transformed) i = kurt.argsort()[::-1] X_train_transformed_sorted = X_train_transformed[:, i] X_train_transformed = X_train_transformed_sorted[:,0:2] kurt = kurtosis(X_test_transformed) i = kurt.argsort()[::-1] X_test_transformed_sorted = X_test_transformed[:, i] X_test_transformed = X_test_transformed_sorted[:,0:2] # save filename = './' + self.save_dir + '/nba_ica_x_train.txt' pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_ica_x_test.txt' pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_ica_y_train.txt' pd.DataFrame(y_train).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_ica_y_test.txt' pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
def filter_frames(self, data): logging.debug("I am starting the old componenty vous") data = data[0] print 'The length of the data is'+str(data.shape) sh = data.shape newshape = (np.prod(sh[:-1]), sh[-1]) print "The shape of the data is:"+str(data.shape) + str(newshape) data = np.reshape(data, (newshape)) # data will already be shaped correctly logging.debug("Making the matrix") ica = FastICA(n_components=self.parameters['number_of_components'], algorithm='parallel', whiten=self.parameters['whiten'], w_init=self.parameters['w_init'], random_state=self.parameters['random_state']) logging.debug("Performing the fit") data = self.remove_nan_inf(data) #otherwise the fit flags up an error for obvious reasons # print "I'm here" S_ = ica.fit_transform(data) # print "S_Shape is:"+str(S_.shape) # print "self.images_shape:"+str(self.images_shape) scores = np.reshape(S_, (self.images_shape)) eigenspectra = ica.components_ logging.debug("mange-tout") return [scores, eigenspectra]
def __create_image_obser(self, image_observations) : """ Creation of a space in which the images will be compared (learning stage). Firstly PCA is applied in order to reduce the number of features in the images. Reduction is done so that 99% of measured variance is covered. After that, ICA is performed on the coefficients calculated by transforming (reducing) the face images with PCA. From the learned ICA components basis_images (vectors), original images coefficients and transformation for new comming images are extracted. """ pca = PCA() pca.fit(image_observations) sum = 0 components_to_take = 0 for ratio in pca.explained_variance_ratio_: components_to_take += 1 sum += ratio if (sum > 0.99): break print("PCA reduces the number of dimensions to: " + str(components_to_take)) pca = PCA(whiten=True, n_components=components_to_take) self.__transformed_images = pca.fit_transform(image_observations) self.__transformed_images_mean = np.mean(self.__transformed_images, axis=0) self.__transformed_images -= self.__transformed_images_mean self.__pca = pca ica = FastICA(whiten=True, max_iter=100000) self.__original_images_repres = ica.fit_transform(self.__transformed_images) self.__basis_images = ica.mixing_.T self.__transformation = ica.components_
def reduceDataset(self,nr=3,method='PCA'): '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library Methods available: 'PCA' 'FactorAnalysis' 'KPCArbf','KPCApoly' 'KPCAcosine','KPCAsigmoid' 'IPCA' 'FastICADeflation' 'FastICAParallel' 'Isomap' 'LLE' 'LLEmodified' 'LLEltsa' ''' dataset=self.ModelInputs['Dataset'] #dataset=self.dataset[Model.in_columns] #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']] #PCA if method=='PCA': sklearn_pca = sklearnPCA(n_components=nr) reduced = sklearn_pca.fit_transform(dataset) #Factor Analysis elif method=='FactorAnalysis': fa=FactorAnalysis(n_components=nr) reduced=fa.fit_transform(dataset) #kernel pca with rbf kernel elif method=='KPCArbf': kpca=KernelPCA(nr,kernel='rbf') reduced=kpca.fit_transform(dataset) #kernel pca with poly kernel elif method=='KPCApoly': kpca=KernelPCA(nr,kernel='poly') reduced=kpca.fit_transform(dataset) #kernel pca with cosine kernel elif method=='KPCAcosine': kpca=KernelPCA(nr,kernel='cosine') reduced=kpca.fit_transform(dataset) #kernel pca with sigmoid kernel elif method=='KPCAsigmoid': kpca=KernelPCA(nr,kernel='sigmoid') reduced=kpca.fit_transform(dataset) #ICA elif method=='IPCA': ipca=IncrementalPCA(nr) reduced=ipca.fit_transform(dataset) #Fast ICA elif method=='FastICAParallel': fip=FastICA(nr,algorithm='parallel') reduced=fip.fit_transform(dataset) elif method=='FastICADeflation': fid=FastICA(nr,algorithm='deflation') reduced=fid.fit_transform(dataset) elif method == 'All': self.dimensionalityReduction(nr=nr) return self self.ModelInputs.update({method:reduced}) self.datasetsAvailable.append(method) return self
def ica(tx, ty, rx, ry): compressor = ICA(whiten=True) # for some people, whiten needs to be off newtx = compressor.fit_transform(tx) newrx = compressor.fit_transform(rx) em(newtx, ty, newrx, ry, add="wICAtr", times=10) km(newtx, ty, newrx, ry, add="wICAtr", times=10) nn(newtx, ty, newrx, ry, add="wICAtr")
def align(movie_data, options, args, lrh): print 'pICA(scikit-learn)' nvoxel = movie_data.shape[0] nTR = movie_data.shape[1] nsubjs = movie_data.shape[2] align_algo = args.align_algo nfeature = args.nfeature randseed = args.randseed if not os.path.exists(options['working_path']): os.makedirs(options['working_path']) # zscore the data bX = np.zeros((nsubjs*nvoxel,nTR)) for m in range(nsubjs): bX[m*nvoxel:(m+1)*nvoxel,:] = stats.zscore(movie_data[:, :, m].T ,axis=0, ddof=1).T del movie_data np.random.seed(randseed) A = np.mat(np.random.random((nfeature,nfeature))) ica = FastICA(n_components= nfeature, max_iter=500,w_init=A,random_state=randseed) St = ica.fit_transform(bX.T) ES = St.T bW = ica.mixing_ R = np.zeros((nvoxel,nfeature,nsubjs)) for m in range(nsubjs): R[:,:,m] = bW[m*nvoxel:(m+1)*nvoxel,:] niter = 10 # initialization when first time run the algorithm np.savez_compressed(options['working_path']+align_algo+'_'+lrh+'_'+str(niter)+'.npz',\ R = R, G=ES.T, niter=niter) return niter
def main(mode): path = "/local/attale00/extracted_pascal__4__Multi-PIE" path_ea = path + "/color128/" allLabelFiles = utils.getAllFiles("/local/attale00/a_labels") labeledImages = [i[0:16] + ".png" for i in allLabelFiles] # labs=utils.parseLabelFiles(path+'/Multi-PIE/labels','mouth',labeledImages,cutoffSeq='.png',suffix='_face0.labels') labs = utils.parseLabelFiles( "/local/attale00/a_labels", "mouth", labeledImages, cutoffSeq=".png", suffix="_face0.labels" ) testSet = fg.dataContainer(labs) roi = (50, 74, 96, 160) X = fg.getAllImagesFlat(path_ea, testSet.fileNames, (128, 256), roi=roi) # perform ICA if mode not in ["s", "v"]: ica = FastICA(n_components=100, whiten=True) ica.fit(X) meanI = np.mean(X, axis=0) X1 = X - meanI data = ica.transform(X1) filters = ica.components_ elif mode in ["s", "v"]: W = np.load("/home/attale00/Desktop/classifiers/ica/filter1.npy") m = np.load("/home/attale00/Desktop/classifiers/ica/meanI1.npy") X1 = X - m data = np.dot(X1, W.T) for i in range(len(testSet.data)): testSet.data[i].extend(data[i, :]) strel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) # fg.getHogFeature(testSet,roi,path=path_ea,ending='.png',extraMask = None,orientations = 3, cells_per_block=(6,2),maskFromAlpha=False) # fg.getColorHistogram(testSet,roi,path=path_ea,ending='.png',colorspace='lab',bins=10) testSet.targetNum = map(utils.mapMouthLabels2Two, testSet.target) rf = classifierUtils.standardRF(max_features=np.sqrt(len(testSet.data[0])), min_split=5, max_depth=40) if mode in ["s", "v"]: print "Classifying with loaded classifier" classifierUtils.classifyWithOld( path, testSet, mode, clfPath="/home/attale00/Desktop/classifiers/ica/rf128ICA_1" ) elif mode in ["c"]: print "cross validation of data" print "Scores" # print classifierUtils.standardCrossvalidation(rf,testSet,n_jobs=5) # _cvDissect(testSet,rf) classifierUtils.dissectedCV(rf, testSet) print "----" elif mode in ["save"]: print "saving new classifier" _saveRF(testSet) else: print "not doing anything"
def run_ica(data, comp): ica = FastICA(n_components=comp, whiten=True, max_iter=5000) data_out=np.zeros((comp,np.shape(data[0,:,0])[0],np.shape(data[0,0,:])[0])) for i in range(np.shape(data[0,:,0])[0]): print i data_out[:,i,:]=np.transpose(ica.fit_transform(np.transpose(data[:,i,:]))) return data_out
def dim_survey(X, entry_id): # convert to numpy X = np.array(X) # run the reduction. X_pca = PCA(n_components=3).fit_transform(X) X_tsne = TSNE(n_components=3).fit_transform(X) X_ica = FastICA(n_components=3).fit_transform(X) # connect to db. with mongoctx() as db: # update the stuff. db['entry'].update( { '_id': ObjectId(entry_id) }, { '$set': { 'pca': X_pca.tolist(), 'tsne': X_tsne.tolist(), 'ica': X_ica.tolist(), } } )
def test_ica(eng): t = linspace(0, 10, 100) s1 = sin(t) s2 = square(sin(2*t)) x = c_[s1, s2, s1+s2] random.seed(0) x += 0.001*random.randn(*x.shape) x = fromarray(x, engine=eng) def normalize_ICA(s, aT): a = aT.T c = a.sum(axis=0) return s*c, (a/c).T from sklearn.decomposition import FastICA ica = FastICA(n_components=2, fun='cube', random_state=0) s1 = ica.fit_transform(x.toarray()) aT1 = ica.mixing_.T s1, aT1 = normalize_ICA(s1, aT1) s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x) s2, aT2 = normalize_ICA(s2, aT2) tol=1e-1 assert allclose_sign_permute(s1, s2, atol=tol) assert allclose_sign_permute(aT1, aT2, atol=tol)
def fit(self, x, y, i=0): # if gaussian processes are being used, data dimensionality needs to be reduced before fitting if self.method[i] == 'GP': if self.reduce_dim == 'FastICA': print('Reducing dimensionality with ICA') do_ica = FastICA(n_components=self.n_components) self.do_reduce_dim = do_ica.fit(x) if self.reduce_dim == 'PCA': print('Reducing dimensionality with PCA') do_pca = PCA(n_components=self.n_components) self.do_reduce_dim = do_pca.fit(x) x = self.do_reduce_dim.transform(x) #try: print('Training model...') try: self.model.fit(x, y) self.goodfit = True print(self.model) except: self.goodfit = False if self.method[i] == 'GP': print('Model failed to train! (For GP this does not always indicate a problem, especially for low numbers of components.)') pass else: print('Model failed to train!') traceback.print_stack() if self.ransac: self.outliers = np.logical_not(self.model.inlier_mask_) print(str(np.sum(self.outliers)) + ' outliers removed with RANSAC')
def ica(self, n_components=None, sources='left'): """Return result from independent component analysis. X = SA + m Sklearn's FastICA implementation is used. When sources=left the sources are returned in the first (left) matrix and the mixing matrix is returned in the second (right) matrix, corresponding to X = SA. When sources=right the sources are returned in the second matrix while the mixing matrix is returned in the first, corresponding to X = AS. Parameters ---------- n_components : int, optional Number of ICA components. sources : left or right, optional Indicates whether the sources should be the left or right matrix. Returns ------- first : Matrix Estimated source matrix (S) if sources=left. second : Matrix Estimated mixing matrix (A) if sources=right. mean_vector : brede.core.vector.Vector Estimated mean vector References ---------- http://scikit-learn.org/stable/modules/decomposition.html#ica """ if n_components is None: min_shape = min(self.shape[0], len(self._eeg_columns)) n_components = int(np.ceil(sqrt(float(min_shape) / 2))) ica = FastICA(n_components=n_components) if sources == 'left': sources = Matrix(ica.fit_transform( self.ix[:, self._eeg_columns].values), index=self.index) mixing_matrix = Matrix(ica.mixing_.T, columns=self._eeg_columns) mean_vector = Vector(ica.mean_, index=self._eeg_columns) return sources, mixing_matrix, mean_vector elif sources == 'right': sources = Matrix(ica.fit_transform( self.ix[:, self._eeg_columns].values.T).T, columns=self._eeg_columns) mixing_matrix = Matrix(ica.mixing_, index=self.index) mean_vector = Vector(ica.mean_, index=self.index) return mixing_matrix, sources, mean_vector else: raise ValueError('Wrong argument to "sources"')
def dimensionalityReduction(self,nr=5): '''It applies all the dimensionality reduction techniques available in this class: Techniques available: 'PCA' 'FactorAnalysis' 'KPCArbf','KPCApoly' 'KPCAcosine','KPCAsigmoid' 'IPCA' 'FastICADeflation' 'FastICAParallel' 'Isomap' 'LLE' 'LLEmodified' 'LLEltsa' ''' dataset=self.ModelInputs['Dataset'] sklearn_pca = sklearnPCA(n_components=nr) p_components = sklearn_pca.fit_transform(dataset) fa=FactorAnalysis(n_components=nr) factors=fa.fit_transform(dataset) kpca=KernelPCA(nr,kernel='rbf') rbf=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='poly') poly=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='cosine') cosine=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='sigmoid') sigmoid=kpca.fit_transform(dataset) ipca=IncrementalPCA(nr) i_components=ipca.fit_transform(dataset) fip=FastICA(nr,algorithm='parallel') fid=FastICA(nr,algorithm='deflation') ficaD=fip.fit_transform(dataset) ficaP=fid.fit_transform(dataset) '''isomap=Isomap(n_components=nr).fit_transform(dataset) try: lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset) except ValueError: lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset) try: lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset) except ValueError: lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) try: lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset) except ValueError: lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)''' values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3] keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa'] self.ModelInputs.update(dict(zip(keys, values))) [self.datasetsAvailable.append(key) for key in keys ] #debug #dataset=pd.DataFrame(self.ModelInputs['Dataset']) #dataset['Output']=self.ModelOutput #self.debug['Dimensionalityreduction']=dataset ### return self
def transform(data, n_components=3): features, weights, labels = data start = time() ica = FastICA(n_components=n_components) transformed = ica.fit_transform(features) elapsed = time() - start df = pd.DataFrame(transformed) return df, elapsed
def wrapper_fastica(data): """ Call FastICA implementation from scikit-learn. """ ica = FastICA() ica.fit(datatools.cat_trials(data)) u = ica.components_.T m = ica.mixing_.T return m, u
def independent_component(x, y): clf = FastICA(random_state=1) transformed = clf.fit_transform(x.reshape(-1, 1)) comp = clf.components_[0, 0] mm = clf.mixing_[0, 0] src_max = transformed.max() src_min = transformed.min() return [comp, mm, src_max, src_min]
def fastica(eeg_data): """ Sample function to apply `FastICA`_ to the EEG data. Parameters ---------- eeg_data : array EEG data in a CxTxE array. With C the number of channels, T the number of time samples and E the number of events. Returns ------- ica : ICA object Trained `FastICA`_ object. ica_data : array EEG projected data in a CxTxE array. With C the number of components, T the number of time samples and E the number of events. """ # Dimension shapes ch_len = eeg_data.shape[ch_dim] t_len = eeg_data.shape[t_dim] ev_len = eeg_data.shape[ev_dim] # ------------------------------------------------------------------------- # 1. Fit the FastICA model # We need to collapse time and events dimensions coll_data = eeg_data.transpose([t_dim, ev_dim, ch_dim])\ .reshape([t_len*ev_len, ch_len]) # Fit model ica = FastICA() ica.fit(coll_data) # Normalize ICs to unit norm k = np.linalg.norm(ica.mixing_, axis=0) # Frobenius norm ica.mixing_ /= k ica.components_[:] = (ica.components_.T * k).T # ------------------------------------------------------------------------- # 2. Transform data # Project data bss_data = ica.transform(coll_data) # Adjust shape and dimensions back to "eeg_data" shape ic_len = bss_data.shape[1] bss_data = np.reshape(bss_data, [ev_len, t_len, ic_len]) new_order = [0, 0, 0] # TODO: Check the following order new_order[ev_dim] = 0 new_order[ch_dim] = 2 new_order[t_dim] = 1 bss_data = bss_data.transpose(new_order) # End return ica, bss_data
def _fit_local(self, data): from sklearn.decomposition import FastICA from numpy import random random.seed(self.seed) model = FastICA(n_components=self.k, fun="cube", max_iter=self.max_iter, tol=self.tol, random_state=self.seed) signals = model.fit_transform(data) return signals, model.mixing_.T
def ica(self): ''' Perform ICA on the data source_matrix -- rows are sources, columns are time points, values are ? mixing_matrix -- rows are electrodes, columns are source, values are contributions of the electrode to the source ''' ica = FastICA(self.number_of_sources) ica.fit(self.data) self.mixing_matrix = ica.mixing_ # estimated mixing matrix
def generate_peoples_results_files(self): self.np_result = np.c_[self.results[0]['blue'], self.results[0]['green'], self.results[0]['red']] list_number = len(self.results[0]['blue']) # ICA ica = FastICA(n_components=3, fun='logcosh', max_iter=2000) ica_transformed = ica.fit_transform(self.np_result) component_all = ica_transformed.ravel([1]) component_1 = component_all[:list_number] component_2 = component_all[list_number:(2 * list_number)] component_3 = component_all[(2 * list_number):(3 * list_number)] # butter_smooth N = 8 Wn = [1.6 / 30, 4.0 / 30] t = np.linspace(1 / 30, list_number / 30, list_number) b, a = signal.butter(N, Wn, 'bandpass', analog=False) filter_1 = signal.filtfilt(b, a, component_1) filter_2 = signal.filtfilt(b, a, component_2) filter_3 = signal.filtfilt(b, a, component_3) lowess_1 = sm.nonparametric.lowess(filter_1, t, frac=10.0 / list_number) lowess_2 = sm.nonparametric.lowess(filter_2, t, frac=10.0 / list_number) lowess_3 = sm.nonparametric.lowess(filter_3, t, frac=10.0 / list_number) smooths = [] smooth_1 = lowess_1[:, 1] smooth_2 = lowess_2[:, 1] smooth_3 = lowess_3[:, 1] smooths.append(smooth_1) smooths.append(smooth_2) smooths.append(smooth_3) # FFT and spectrum fft_1 = np.fft.fft(smooth_1, 256) fft_2 = np.fft.fft(smooth_2, 256) fft_3 = np.fft.fft(smooth_3, 256) spectrum_1 = list(np.abs(fft_1) ** 2) spectrum_2 = list(np.abs(fft_2) ** 2) spectrum_3 = list(np.abs(fft_3) ** 2) max1 = max(spectrum_1) max2 = max(spectrum_2) max3 = max(spectrum_3) num_spec1 = spectrum_1.index(max(spectrum_1)) if num_spec1 > (list_number / 2): num_spec1 = 256 - num_spec1 num_spec2 = spectrum_2.index(max(spectrum_2)) if num_spec2 > (list_number / 2): num_spec2 = 256 - num_spec2 num_spec3 = spectrum_3.index(max(spectrum_3)) if num_spec3 > (list_number / 2): num_spec3 = 256 - num_spec3 num_spec = [num_spec1, num_spec2, num_spec3] max_all = [max1, max2, max3] max_num = max_all.index(max(max_all)) self.heartRate = int(num_spec[max_num] * 1800 / 256) + 1 return smooths[max_num]
def fit_transform_ica(X): ica = FastICA(n_components=50, max_iter=2000, tol=0.05, algorithm='parallel', fun='cube', fun_args={'alpha': 1.0}, random_state=42) #26 36 76 start = time.time() X = ica.fit_transform(X) end = time.time() print "Done!\nFit ICA transform time (secs): {:.3f}".format(end - start) return X, ica
def independent_component(x, y): clf = FastICA(random_state=1) clf.fit(x.reshape(-1, 1), y) comp = clf.components_[0][0] mm = clf.get_mixing_matrix()[0][0] sources = clf.sources_.flatten() src_max = max(sources) src_min = min(sources) return [comp, mm, src_max, src_min]
train, test = train[train_features], test[train_features] print("\nTrain shape: {}\nTest shape: {}".format(train.shape, test.shape)) print("\nStart decomposition process...") print("PCA") pca = PCA(n_components=N_COMP, random_state=17) pca_results_train = pca.fit_transform(train) pca_results_test = pca.transform(test) print("tSVD") tsvd = TruncatedSVD(n_components=N_COMP, random_state=17) tsvd_results_train = tsvd.fit_transform(train) tsvd_results_test = tsvd.transform(test) print("ICA") ica = FastICA(n_components=N_COMP, random_state=17) ica_results_train = ica.fit_transform(train) ica_results_test = ica.transform(test) print("GRP") grp = GaussianRandomProjection(n_components=N_COMP, eps=0.1, random_state=17) grp_results_train = grp.fit_transform(train) grp_results_test = grp.transform(test) print("SRP") srp = SparseRandomProjection(n_components=N_COMP, dense_output=True, random_state=17) srp_results_train = srp.fit_transform(train) srp_results_test = srp.transform(test)
from sklearn.decomposition import FastICA import numpy import string import scipy data = numpy.genfromtxt(open("modifiedLetter2.data"), delimiter=",") print data.shape X = numpy.delete(data, -1, 1) print X.shape ica = FastICA() ica_sources = ica.fit_transform(X) print scipy.stats.kurtosis(ica_sources)
def perform_fastICA(self): self.dm = FastICA(n_components=self.n_components) self.components = self.dm.fit_transform(self.dataset.transpose())
def calculateCovariance(X): meanX = np.mean(X, axis=0) lenX = X.shape[0] X = X - meanX covariance = X.T.dot(X) / lenX return covariance P = calculateCovariance(data) print(P) print(P.shape) # eigenvalue decomposition W, V = np.linalg.eig(P) D = np.diag(W) print(V @ D @ V.T - P) # apply whitening ??? A = data.dot(V[1]) X = A / np.sqrt(W[1] + 1e-5) print(X) X = X.reshape(-1, 1) transformer = FastICA() X_transformed = transformer.fit_transform(X) print(X_transformed.shape)
plot=True, targetcluster=3, stats=True) #kmeans.run() #kmeans.run() em = ExpectationMaximizationTestCluster(X_train, y_train, clusters=range(1, 31), plot=True, targetcluster=3, stats=True) #em.run() pca = PCA(n_components=3) S_pca = pca.fit_transform(features) ica = FastICA(n_components=3) S_ica = ica.fit_transform(features) rpg = random_projection.GaussianRandomProjection(n_components=3) g_rpg = rpg.fit_transform(features) spg = random_projection.SparseRandomProjection(n_components=3) s_rp = spg.fit_transform(features) threshold = [ .01, .02, .03, .04, .05, .1, .20, .25, .30, .4, .5, .6, .7, .8, .9, 1 ] lvf = VarianceThreshold() t_lvf = lvf.fit_transform(X_train)
plt.title('The Elbow Method showing the optimal k') plt.show() X = load_wine().data y = load_wine().target scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) ica = ICA(n_components=2) ica.fit(X) dr_X = ica.transform(X) #obtain elbow plot plot_elbow(dr_X) #pick three clusters, and view a few groupings km = KMeans(n_clusters=3,random_state=0).fit(dr_X) labels = km.predict(dr_X) print silhouette_score(dr_X,labels)
#print(ImagesTable[1]) # my code new_data = list() for x in ImagesTable.iterrows(): # (3000,) data = x['image'].reshape(1440,1) new_data.append(data) # ICA Processing print("next Pictures are proccesed with ICA ") stack_new_data = np.hstack(new_data) ica = FastICA(n_components=10) S_ica_ = ica.fit_transform(stack_new_data) # Reconstruct signals A_ica_ = ica.mixing_ # Get estimated mixing matrix abs_value_data = np.absolute(S_ica_) final_data = [abs_value_data[:,y].reshape(12,120) for y in range(10)] for y in range(10): plt.matshow(final_data[y]) plt.show() print("next Pictures are proccesed with PCA ") #PCA Data Processing pca = PCA(n_components=10) PCA_data = pca.fit_transform(stack_new_data)
# PCA pca = PCA(n_components=n_comp, random_state=42) pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1)) pca2_results_test = pca.transform(test) # ICA ica = FastICA(n_components=n_comp, random_state=42) ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1)) ica2_results_test = ica.transform(test) # Append decomposition components to datasets for i in range(1, n_comp+1): train['pca_' + str(i)] = pca2_results_train[:,i-1] test['pca_' + str(i)] = pca2_results_test[:, i-1]
def cluster_activations(separated_activations, nb_clusters=2, nb_dims=10, reduce='FastICA', clustering_method='KMeans'): """ Clusters activations and returns two arrays. 1) separated_clusters: where separated_clusters[i] is a 1D array indicating which cluster each datapoint in the class has been assigned 2) separated_reduced_activations: activations with dimensionality reduced using the specified reduce method :param separated_activations: list where separated_activations[i] is a np matrix for the ith class where each row corresponds to activations for a given data point :type separated_activations: `list` :param nb_clusters: number of clusters (defaults to 2 for poison/clean) :type nb_clusters: `int` :param nb_dims: number of dimensions to reduce activation to via PCA :type nb_dims: `int` :param reduce: Method to perform dimensionality reduction, default is FastICA :type reduce: `str` :param clustering_method: Clustering method to use, default is KMeans :type clustering_method: `str` :return: separated_clusters, separated_reduced_activations :rtype: `tuple` """ from sklearn.cluster import KMeans from sklearn.decomposition import FastICA, PCA separated_clusters = [] separated_reduced_activations = [] if reduce == 'FastICA': projector = FastICA(n_components=nb_dims, max_iter=1000, tol=0.005) elif reduce == 'PCA': projector = PCA(n_components=nb_dims) else: raise ValueError(reduce + " dimensionality reduction method not supported.") if clustering_method == 'KMeans': clusterer = KMeans(n_clusters=nb_clusters) else: raise ValueError(clustering_method + " clustering method not supported.") for i, ac in enumerate(separated_activations): # Apply dimensionality reduction nb_activations = np.shape(ac)[1] if nb_activations > nb_dims: reduced_activations = projector.fit_transform(ac) else: logger.info( "Dimensionality of activations = %i less than nb_dims = %i. Not applying dimensionality " "reduction.", nb_activations, nb_dims) reduced_activations = ac separated_reduced_activations.append(reduced_activations) # Get cluster assignments clusters = clusterer.fit_predict(reduced_activations) separated_clusters.append(clusters) return separated_clusters, separated_reduced_activations
def decompose(values, other_value_sets={}, centroids={}, method=None, number_of_components=None, random=False): if method is None: method = defaults["decomposition_method"] method = proper_string(normalise_string(method), DECOMPOSITION_METHOD_NAMES) if number_of_components is None: number_of_components = defaults["decomposition_dimensionality"] other_values_provided_as_dictionary = True if other_value_sets is not None and not isinstance(other_value_sets, dict): other_value_sets["unknown"] = other_value_sets other_values_provided_as_dictionary = False if random: random_state = None else: random_state = 42 if method == "PCA": if (values.shape[1] <= MAXIMUM_FEATURE_SIZE_FOR_NORMAL_PCA and not scipy.sparse.issparse(values)): model = PCA(n_components=number_of_components) else: model = IncrementalPCA(n_components=number_of_components, batch_size=100) elif method == "SVD": model = TruncatedSVD(n_components=number_of_components) elif method == "ICA": model = FastICA(n_components=number_of_components) elif method == "t-SNE": if number_of_components < 4: tsne_method = "barnes_hut" else: tsne_method = "exact" model = TSNE(n_components=number_of_components, method=tsne_method, random_state=random_state) else: raise ValueError("Method `{}` not found.".format(method)) values_decomposed = model.fit_transform(values) if other_value_sets and method != "t_sne": other_value_sets_decomposed = {} for other_set_name, other_values in other_value_sets.items(): if other_values is not None: other_value_decomposed = model.transform(other_values) else: other_value_decomposed = None other_value_sets_decomposed[other_set_name] = ( other_value_decomposed) else: other_value_sets_decomposed = None if other_value_sets_decomposed and not other_values_provided_as_dictionary: other_value_sets_decomposed = other_value_sets_decomposed["unknown"] # Only supports centroids without data sets as top levels if centroids is not None and method == "PCA": if "means" in centroids: centroids = {"unknown": centroids} components = model.components_ centroids_decomposed = {} for distribution, distribution_centroids in centroids.items(): if distribution_centroids: centroids_distribution_decomposed = {} for parameter, parameter_values in ( distribution_centroids.items()): if parameter == "means": shape = numpy.array(parameter_values.shape) original_dimension = shape[-1] reshaped_parameter_values = parameter_values.reshape( -1, original_dimension) decomposed_parameter_values = model.transform( reshaped_parameter_values) shape[-1] = number_of_components new_parameter_values = ( decomposed_parameter_values.reshape(shape)) elif parameter == "covariance_matrices": shape = numpy.array(parameter_values.shape) original_dimension = shape[-1] reshaped_parameter_values = parameter_values.reshape( -1, original_dimension, original_dimension) n_centroids = reshaped_parameter_values.shape[0] decomposed_parameter_values = numpy.empty( shape=(n_centroids, 2, 2)) for i in range(n_centroids): decomposed_parameter_values[i] = ( components @ reshaped_parameter_values[i] @ components.T) shape[-2:] = number_of_components new_parameter_values = ( decomposed_parameter_values.reshape(shape)) else: new_parameter_values = parameter_values centroids_distribution_decomposed[parameter] = ( new_parameter_values) centroids_decomposed[distribution] = ( centroids_distribution_decomposed) else: centroids_decomposed[distribution] = None if "unknown" in centroids_decomposed: centroids_decomposed = centroids_decomposed["unknown"] else: centroids_decomposed = None output = [values_decomposed] if other_value_sets != {}: output.append(other_value_sets_decomposed) if centroids != {}: output.append(centroids_decomposed) return output
yj = np.abs(np.corrcoef(train[j], train['y'])[0, 1]) yij = np.abs(np.corrcoef(train[j] + train[i], train['y'])[0, 1]) if yij > yi + yj: print(i + '_' + j + ': ' + str(yi) + ' ' + str(yj) + ' ' + str(yij)) interact_feat['Itr_' + i + '_' + j] = all_dt[j] + all_dt[i] interact_feat = feat_standardize(interact_feat) # 2. PCA, ICA & SVD n_comp = 12 random_seed = 624 # 2.1 PCA pca = PCA(n_components=n_comp, random_state=random_seed) pca_feat = pca.fit_transform(all_dt.drop(['y'], axis=1)) # 2.2 ICA ica = FastICA(n_components=n_comp, random_state=random_seed) ica_feat = ica.fit_transform(all_dt.drop(['y'], axis=1)) # 2.3 SVD svd = TruncatedSVD(n_components=n_comp, random_state=random_seed) svd_feat = svd.fit_transform(all_dt.drop(['y'], axis=1)) # 2.4 TSNE tsne = TSNE(n_components=3, random_state=random_seed, verbose=1) tsne_feat = tsne.fit_transform(all_dt.drop(['y'], axis=1)) # 2.5 KMeans kmeans = KMeans(n_clusters=4, random_state=random_seed) kmeans_feat = kmeans.fit_transform(all_dt.drop(['y'], axis=1)) # 2.6 Logistic PCA # 3. Random Projection # 3.1 SRP srp = SparseRandomProjection(n_components=n_comp,
cached = True elif args.nmf and os.path.exists(f"cache/nmf-{args.n_components}.pkl"): with open(f"cache/nmf-{args.n_components}.pkl", "rb") as f: sklearn_transformer = pickle.load(f) cached = True elif args.truncatedsvd and os.path.exists( f"cache/truncatedsvd-{args.n_components}.pkl"): with open(f"cache/truncatedsvd-{args.n_components}.pkl", "rb") as f: sklearn_transformer = pickle.load(f) cached = True elif args.pca: sklearn_transformer = PCA(n_components=args.n_components, svd_solver="full", random_state=1234) elif args.fastica: sklearn_transformer = FastICA(n_components=args.n_components, random_state=1234) elif args.incrementalpca: sklearn_transformer = IncrementalPCA(n_components=args.n_components) elif args.kernelpca: sklearn_transformer = KernelPCA(n_components=args.n_components, random_state=1234, n_jobs=-1) elif args.nmf: sklearn_transformer = NMF(n_components=args.n_components, random_state=1234) elif args.truncatedsvd: sklearn_transformer = TruncatedSVD(n_components=args.n_components, random_state=1234) if not cached: print(f"Fitting {sklearn_transformer.__class__.__name__}...", end="")
def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): """ Perform ICA on `data` and returns mixing matrix Parameters ---------- data : (S x T) :obj:`numpy.ndarray` Dimensionally reduced optimally combined functional data, where `S` is samples and `T` is time n_components : :obj:`int` Number of components retained from PCA decomposition fixed_seed : :obj:`int` Seed for ensuring reproducibility of ICA results maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional Maximum number of attempted decompositions to perform with different random seeds. ICA will stop running if there is convergence prior to reaching this limit. Default is 10. Returns ------- mmix : (T x C) :obj:`numpy.ndarray` Z-scored mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` Notes ----- Uses `sklearn` implementation of FastICA for decomposition """ warnings.filterwarnings(action='ignore', module='scipy', message='^internal gelsd') if fixed_seed == -1: fixed_seed = np.random.randint(low=1, high=1000) for i_attempt in range(maxrestart): ica = FastICA(n_components=n_components, algorithm='parallel', fun='logcosh', max_iter=maxit, random_state=fixed_seed) with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered in order to capture # convergence failures. warnings.simplefilter('always') ica.fit(data) w = list(filter(lambda i: issubclass(i.category, UserWarning), w)) if len(w): LGR.warning('ICA attempt {0} failed to converge after {1} ' 'iterations'.format(i_attempt + 1, ica.n_iter_)) if i_attempt < maxrestart - 1: fixed_seed += 1 LGR.warning( 'Random seed updated to {0}'.format(fixed_seed)) else: LGR.info('ICA attempt {0} converged in {1} ' 'iterations'.format(i_attempt + 1, ica.n_iter_)) break mmix = ica.mixing_ mmix = stats.zscore(mmix, axis=0) return mmix
# Start timer start_time = time.time() # Load the data from income_data import X, y, X_train, X_test, y_train, y_test # Scale the data scaler = StandardScaler() scaler.fit(X) X_train_std = scaler.transform(X) X_test_std = scaler.transform(X) X_toCluster = X_train_std y_inputs = y # Reduce Dimensionality (ICA) projection = ProjectionAlgorithm(n_components=29) X_toCluster = projection.fit_transform(X_toCluster) ###### # Run em clustering with 2 clusters and plot ###### cluster = GaussianMixture(random_state=0, n_components=2).fit(X_toCluster) cluster_labels = cluster.predict(X_toCluster) X_transformed = np.dot(X_toCluster, np.transpose(cluster.means_)) # print diagnostics print('X_toCluster.shape \n', X_toCluster.shape) print('X_transformed.shape \n', X_transformed.shape) print('Labels \n', cluster_labels) print('Weights \n', cluster.weights_)
pca.fit(r) X = pca.transform(r) np.savetxt( r'C:\Users\justjo\PycharmProjects\SaS_clustering\tensorboard\SaS_2020-03-04-22-10-42\PCA_embeds.csv', X, delimiter=',') from sklearn.neighbors import NearestNeighbors import numpy as np import matplotlib.pyplot as plt import tensorflow as tf import os os.environ['CUDA_VISIBLE_DEVICES'] = '-1' from sklearn.decomposition import FastICA transformer = FastICA(n_components=32, random_state=0) X_transformed = transformer.fit_transform(embed_simclr) embeds = np.array(np.load(r'J:\SaS\embeds.npy', allow_pickle=True)) # embeds_std = np.array([x.reshape(-1,32).std(axis=0) for x in embeds]) embeds = np.array([x.reshape(-1, 32).mean(axis=0) for x in embeds]) embeds = embeds[:, embeds.std(axis=0) > 0] # embeds_std = embeds_std[:, embeds_std.std(axis=0)>0] imgs_raw = np.load(r'J:\SaS\imgs_raw_coded_png_bytes.npy') # embeds_stacked = np.hstack((embeds, embeds_std)) nbrs = NearestNeighbors(n_neighbors=100, algorithm='ball_tree').fit(embeds) # nbrs_std = NearestNeighbors(n_neighbors=100, algorithm='ball_tree').fit(embeds_std) # nbrs_stacked = NearestNeighbors(n_neighbors=100, algorithm='ball_tree').fit(embeds_stacked) _, indices = nbrs.kneighbors(embeds[8856].reshape(1, -1), 1000) # _, indices_std = nbrs_std.kneighbors(embeds_std[194485].reshape(1,-1), 1000) # _, indices_stacked = nbrs_std.kneighbors(embeds_std[200000].reshape(1,-1), 1000)
def ica_model(train_x,components): model = FastICA(n_components=components).fit(train_x) return model
fica_input = pca_out.copy() #(Nv,Nt) #####fica_input /= fica_input.std(axis=0) #(Nv,Nt) #NOT SURE IF NEEDED OR NOT if options.reuse and os.path.exists(ica_mix_path) and os.path.exists(ica_mix_zsc_path) and os.path.exists(ica_mix_fft_path) and os.path.exists(ica_mix_freqs_path) and os.path.exists(ica_maps_path) and os.path.exists(ica_octs_path): print(" + Loading pre-existing ICA Mixing matrix [%s]." % (ica_mix_path)) print(" + Loading pre-existing Normalized ICA Mixing matrix [%s]." % (ica_mix_zsc_path)) print(" + Loading pre-existing ICA Maps [%s]." % (ica_maps_path)) fica_mmix = np.loadtxt(ica_mix_path).T fica_mmix_zsc = np.loadtxt(ica_mix_zsc_path).T octs_afterICA,_,_ = meu.niiLoad(ica_octs_path) octs_afterICA = octs_afterICA[mask,:] fica_out,_,_ = meu.niiLoad(ica_maps_path) fica_out = fica_out[mask,:] else: print(" + Perform ICA....") fica = FastICA(n_components=Nc, max_iter=500) fica_out = fica.fit_transform(fica_input).T #Original did not have the .T fica_out -= fica_out.mean(axis=0) fica_out /= fica_out.std(axis=0) fica_out = fica_out.T fica_mmix = fica.mixing_.T # Correct the sign of components # ------------------------------ print(" + Correct the sign of the ICA components....") fica_signs = skew(np.reshape(fica_out,(Nv,Nc)),axis=0) #(Nc,) fica_signs /= np.abs(fica_signs) #(Nc,) fica_out = (fica_out.T*fica_signs[:,np.newaxis]).T #(Nv,Nc) fica_mmix = (fica_mmix*fica_signs[:,np.newaxis]) #(Nc,Nt) fica_mmix_zsc = zscore(fica_mmix,axis=-1) #(Nc,Nt) # Save ICA Mixing matrix, its normalized version and the ICA maps
from scipy import signal from scipy.fftpack import fft, fftfreq, fftshift from sklearn.decomposition import PCA, FastICA from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt # Change these variables based on the location of your cascade classifier PATH_TO_HAAR_CASCADES = "image_processing/" face_cascade = cv2.CascadeClassifier( PATH_TO_HAAR_CASCADES + 'haarcascade_frontalface_default.xml') # Full pathway must be used firstFrame = None time = [] R = [] G = [] B = [] pca = FastICA(n_components=3) cap = cv2.VideoCapture(0) if cap.isOpened() == False: print("Failed to open webcam") frame_num = 0 plt.ion() while cap.isOpened(): ret, frame = cap.read() if ret == True: frame_num += 1 if firstFrame is None: start = datetime.datetime.now() time.append(0) # Take first frame and find face in it firstFrame = frame cv2.imshow("frame", firstFrame)
fs2, psa2 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft2Data\\AF3.csv") fs3, psa3 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft3Data\\AF3.csv") fs4, psa4 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft4Data\\AF3.csv") fs6, psa6 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft6Data\\AF3.csv") fs7, psa7 = csvReader("D:\\noela\\Documents\\3TI\\TFE\\github\\csv\\csvCleanData\\winkLeft\\winkLeft7Data\\AF3.csv") """ X1 = np.c_[np.array(fs1), np.array(fs2), np.array(fs3), np.array(fs4), np.array(fs6), np.array(fs7)] #X2 = np.c_[np.array(psa1), np.array(psa2), np.array(psa3), np.array(psa4), np.array(psa6), np.array(psa7)] # ICA ica = FastICA(n_components=3) S_ = np.array(ica.fit_transform(X1)) # Reconstruct signals fs_1, t1, psa_1 = signal.spectrogram(S_[:, 0], 128, nfft=nfft) fs_2, t2, psa_2 = signal.spectrogram(S_[:, 1], 128, nfft=nfft) fs_3, t3, psa_3 = signal.spectrogram(S_[:, 2], 128, nfft=nfft) X2 = np.c_[np.array(psa_1), np.array(psa_2)] # PCA pca = PCA(n_components=3) H = pca.fit_transform(X1) """ fs_1, psa_1 = signal.periodogram(H[:,0], 128, nfft=nfft) fs_2, psa_2 = signal.periodogram(H[:,1], 128, nfft=nfft) fs_3, psa_3 = signal.periodogram(H[:,2], 128, nfft=nfft)
def main(tree_k=0,bank_k=0,tree_cluster=0,bank_cluster=0, \ tree_pca=0, bank_pca=0, tree_ica=0, bank_ica=0,tree_rp=0,bank_rp=0,\ tree_feature=0, bank_feature=0, tree_NN=0, NN_KM=0, NN_EM=0, NN_without_org=0 ): bank_NN = 0 # PREPROCESS WILT DATA data = pd.read_csv('wilt_full.csv') data['class'].replace(['n'], 0, inplace=True) data['class'].replace(['w'], 1, inplace=True) x_data = data.loc[:, data.columns != 'class'] y_data = data.loc[:, 'class'] scaler = StandardScaler() x_data = scaler.fit_transform(x_data) columns = list(data.columns.values) random_state = 100 # Hold out test set for final performance measure x_train, x_test, y_train, y_test = train_test_split( x_data, y_data, test_size=0.3, random_state=random_state, shuffle=True, stratify=y_data) if tree_k: plot_silhouette_test(x_train, 'Silhouette score for Diseased Tree dataset') plot_sse_test( x_data, 'Sum Squared Errors (K-means) for Diseased Tree dataset') plot_bic_test( x_data, 'BIC score (Expectation Maximization) for Diseased Tree dataset') if tree_cluster: # PLOT CLUSTER FOR K-MEANS generate_clusters('KM', x_data, y_data, columns, 'K-means cluster scatter plots for each attribute', [1, 5], 'class') # PLOT CLUSTER FOR EM generate_clusters('EM', x_data, y_data, columns, 'EM cluster scatter plots for each attribute', [1, 5], 'class') # PLOT CLUSTER FOR GROUND TRUTH plot_clusters(data, [1, 5], 'class', 'Ground truth cluster scatter plots for each attribute') if tree_pca: print(x_data.shape) transformer = PCA(n_components=2) x_pca = transformer.fit_transform(x_data) eigen_vals = transformer.explained_variance_ print(x_pca.shape) proj = transformer.inverse_transform(x_pca) loss = ((x_data - proj)**2).mean() print('PCA loss is: ', loss) # Sebastian Raschka, Vahid Mirjalili - Python Machine Learning_ Machine Learning and Deep Learning with Python, scikit-learn, and TensorFlow 2 total_eigen = sum(eigen_vals) var_exp = [(i / total_eigen) for i in sorted(eigen_vals, reverse=True)] cum_var_exp = np.cumsum(var_exp) plt.bar(range(1, 3), var_exp, align='center', label='individual explained variance') plt.step(range(1, 3), cum_var_exp, where='mid', label='Cummulative explained variance', color='green') plt.xlabel('Principal component index') plt.ylabel('Explained variance ratio') plt.tight_layout() plt.show() columns = ['class', 'principle component 1', 'principle component 2'] generate_clusters( 'KM', x_pca, y_data, columns, 'Cluster dist. plots for each PCA component (K-means)', [1, 2], 'class', type='num') generate_clusters('EM', x_pca, y_data, columns, 'Cluster dist. plots for each PCA component (EM)', [1, 2], 'class', type='num') if tree_ica: kurts = [] comps = [i for i in range(1, 6)] for i in comps: transformer = FastICA(n_components=i) x_ICA = transformer.fit_transform(x_data) kurt = kurtosis(x_ICA).mean() print(kurt) kurts.append(kurt) plt.plot(comps, kurts) plt.xlabel('Components') plt.ylabel('Kurtosis') plt.title('Kurtosis plot for ICA (Tree)') plt.xticks(comps) plt.show() transformer = FastICA(n_components=2) x_ICA = transformer.fit_transform(x_data) # mu = np.mean(x_data, axis=0) # print(x_RP.shape) # print(transformer.mixing_) # proj2 = np.linalg.lstsq(x_RP.T, transformer.components_)[0] # proj2 = x_RP.dot(transformer.components_) + mu proj = transformer.inverse_transform(x_ICA) loss = ((x_data - proj)**2).mean() print('ICA loss is: ', loss) columns = [ 'class', 'Independent component 1', 'Idependent component 2' ] generate_clusters( 'KM', x_ICA, y_data, columns, 'Cluster dist. plots for eachICA component (K-means)', [1, 2], 'class', type='num') generate_clusters('EM', x_ICA, y_data, columns, 'Cluster dist. plots for each ICA component (EM)', [1, 2], 'class', type='num') if tree_rp: losses, kurts = [], [] comps = [i for i in range(2, 6)] for i in comps: transformer = random_projection.GaussianRandomProjection( n_components=i) mu = np.mean(x_data, axis=0) x_RP = transformer.fit_transform(x_data) t_matrix = transformer.components_ proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu loss = ((x_data - proj)**2).mean() kurt = kurtosis(x_RP).mean() kurts.append(kurt) losses.append(loss) fig = plt.figure(1) ax = fig.add_subplot(121) ax.plot(comps, kurts) ax.set(xlabel='Components', ylabel='Kurtosis', title='Kurtosis plot for RP (Tree)', xticks=comps) ax = fig.add_subplot(122) ax.plot(comps, losses) ax.set(xlabel='Components', ylabel='Loss', title='Loss plot for RP (Tree)', xticks=comps) losses, kurts = [], [] comps = range(1, 11) for i in comps: transformer = random_projection.GaussianRandomProjection( n_components=2) mu = np.mean(x_data, axis=0) x_RP = transformer.fit_transform(x_data) t_matrix = transformer.components_ proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu loss = ((x_data - proj)**2).mean() kurt = kurtosis(x_RP).mean() kurts.append(kurt) losses.append(loss) fig = plt.figure(2) ax = fig.add_subplot(121) ax.plot(comps, kurts) ax.set(xlabel='Run index', ylabel='Kurtosis', title='Kurtosis plot for RP (Tree)', xticks=comps) ax = fig.add_subplot(122) ax.plot(comps, losses) ax.set(xlabel='Run index', ylabel='Loss', title='Loss plot for RP (Tree)', xticks=comps) plt.show() transformer = random_projection.GaussianRandomProjection( n_components=2) mu = np.mean(x_data, axis=0) x_RP = transformer.fit_transform(x_data) t_matrix = transformer.components_ proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu loss = ((x_data - proj)**2).mean() print('RP loss is: ', loss) columns = ['class', 'Random component 1', 'Random component 2'] generate_clusters( 'KM', x_RP, y_data, columns, 'Cluster dist. plots for each Random Projection component (K-means)', [1, 2], 'class', type='num') generate_clusters( 'EM', x_RP, y_data, columns, 'Cluster dist. plots for each Random Projection component (EM)', [1, 2], 'class', type='num') if tree_feature: clf = ExtraTreesClassifier(n_estimators=50) clf = clf.fit(x_data, y_data) print(clf.feature_importances_) model = SelectFromModel(clf, prefit=True, threshold=0.02, max_features=2) # default is mean threshold x_FS = model.transform(x_data) feature_counts = x_FS.shape[1] print(x_FS.shape) columns = [ 'class', 'Feature sel. component 1', 'Feature sel. component 2' ] generate_clusters( 'KM', x_FS, y_data, columns, 'Cluster dist. plots for each Feature Selection component (K-means)', [1, 2], 'class', type='num') generate_clusters( 'EM', x_FS, y_data, columns, 'Cluster dist. plots for each Feature Selection component (EM)', [1, 2], 'class', type='num') if tree_NN: num_comps = 2 num_clusters = 3 f1_scores, accuracys, train_times = [], [], [] clfs = [] data_sets = [x_data] data_sets_km = [x_data] data_sets_em = [x_data] names = ['Original', 'PCA', 'ICA', 'Rand. Proj.', 'Feature Sel '] transformer_PCA = PCA(n_components=num_comps) x_PCA = transformer_PCA.fit_transform(x_data) data_sets.append(x_PCA) clusterer = KMeans(n_clusters=num_clusters, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_PCA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_PCA, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=num_clusters) y_prime = clusterer.fit_predict(x_PCA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_PCA, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_em.append(x_new) transformer_ICA = FastICA(n_components=num_comps) x_ICA = transformer_ICA.fit_transform(x_data) data_sets.append(x_ICA) clusterer = KMeans(n_clusters=num_clusters, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_ICA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_ICA, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=num_clusters) y_prime = clusterer.fit_predict(x_ICA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_ICA, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_em.append(x_new) transformer_RP = random_projection.GaussianRandomProjection( n_components=num_comps) x_RP = transformer_RP.fit_transform(x_data) data_sets.append(x_RP) clusterer = KMeans(n_clusters=num_clusters, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_RP) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_RP, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=num_clusters) y_prime = clusterer.fit_predict(x_RP) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_RP, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_em.append(x_new) clf_FS = ExtraTreesClassifier(n_estimators=50) clf_FS = clf_FS.fit(x_data, y_data) model = SelectFromModel(clf_FS, prefit=True, threshold=0.02, max_features=2) # default is mean threshold x_FS = model.transform(x_data) data_sets.append(x_FS) clusterer = KMeans(n_clusters=num_clusters, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_FS) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_FS, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=num_clusters) y_prime = clusterer.fit_predict(x_FS) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_FS, y_prime), axis=1) if NN_without_org: x_new = y_prime data_sets_em.append(x_new) # Experiment with NN on projected data if NN_KM: print('K-means cluster as a feature ...') data_sets = data_sets_km print(len(data_sets)) suffix = ' (KM)' elif NN_EM: data_sets = data_sets_em print('EM cluster as a feature ...') data_sets = data_sets_km print(len(data_sets)) suffix = ' (EM)' else: data_sets = data_sets suffix = '' for x_data in data_sets: print(x_data) clf = mlrose.NeuralNetwork( hidden_nodes = [6,6], activation = 'relu', \ algorithm = 'gradient_descent', max_iters = 1000, \ bias = True, is_classifier = True, learning_rate = 0.0001, \ early_stopping = True, clip_max = 5, max_attempts = 100, \ random_state = 30) curves, train_score, test_score, train_acc, test_acc, train_time, test_time = \ return_stratified_kcv_results(clf, x_data, y_data) f1_scores.append(test_score) accuracys.append(test_acc) print(accuracys) print(f1_scores) train_times.append(train_time) df_plot = pd.DataFrame({ 'names': names, 'CV_F1_Score': f1_scores, 'CV_accuracy': accuracys }) # df_plot = pd.wide_to_long(df_plot, i=['CV_F1_Score', 'CV_accuracy'], j='Measures') df_plot = pd.melt(df_plot, id_vars=['names'], value_vars=['CV_F1_Score','CV_accuracy'],\ var_name='Measures', value_name='Score') fig = plt.figure(1) ax = fig.add_subplot(121) sb.barplot(x="names", y="Score", hue="Measures", data=df_plot, axes=ax) ax.set(xlabel='dataset', ylabel='score', title='NN on org. + proj. data' + suffix) plt.xticks(rotation=30) ax = fig.add_subplot(122) ax.bar(names, train_times, align='center') ax.set(xlabel='dataset', ylabel='Train time (s)', title='Train time of NN on org. + proj. data' + suffix) fig.tight_layout() plt.xticks(rotation=30) plt.show() # PREPROCESS BANK DATA data = pd.read_csv('bank_full.csv', sep=';') data.drop(['day', 'month'], axis=1, inplace=True) data['y'].replace(['no'], 0, inplace=True) data['y'].replace(['yes'], 1, inplace=True) # convert data to numeric where possible data = data.apply(pd.to_numeric, errors='ignore', downcast='float') # print(data.hist) x_data = data.loc[:, data.columns != "y"] x_data_org = x_data y_data = data.loc[:, "y"] numerical_features = x_data.dtypes == 'float32' categorical_features = ~numerical_features columns = list(data.columns.values) random_state = 100 preprocess = make_column_transformer( (OneHotEncoder(), categorical_features), (Normalizer(), numerical_features), remainder="passthrough") x_data = preprocess.fit_transform(x_data) # Hold out test set for final performance measure x_train, x_test, y_train, y_test = train_test_split( x_data, y_data, test_size=0.3, random_state=random_state, shuffle=True, stratify=y_data) if bank_k: plot_silhouette_test(x_data, 'Silhouette score for Bank Marketing dataset') plot_sse_test( x_data, 'Sum Squared Errors (K-means) for Bank Marketing dataset') plot_bic_test( x_data, 'BIC score (Expectation Maximization) for Bank Marketing dataset') if bank_cluster: # PLOT CLUSTER FOR K-MEANS generate_clusters('KM', x_data, y_data, columns, 'K-means cluster scatter plots for each attribute', [2, 7], 'y', x_data_org=x_data_org) # PLOT CLUSTER FOR EM generate_clusters('EM', x_data, y_data, columns, 'EM cluster scatter plots for each attribute', [2, 7], 'y', x_data_org=x_data_org) # PLOT CLUSTER FOR GROUND TRUTH plot_clusters(data, [2, 7], 'y', 'Ground truth cluster scatter plots for each attribute') if bank_pca: print(x_data.shape) transformer = PCA(n_components=8) x_pca = transformer.fit_transform(x_data) eigen_vals = transformer.explained_variance_ proj = transformer.inverse_transform(x_pca) loss = ((x_data - proj)**2).mean() print('PCA loss is: ', loss) total_eigen = sum(eigen_vals) var_exp = [(i / total_eigen) for i in sorted(eigen_vals, reverse=True)] cum_var_exp = np.cumsum(var_exp) plt.bar(range(1, 9), var_exp, align='center', label='individual explained variance') plt.step(range(1, 9), cum_var_exp, where='mid', label='Cummulative explained variance', color='green') plt.xlabel('Principal component index') plt.ylabel('Explained variance ratio') plt.tight_layout() plt.show() # Sebastian Raschka, Vahid Mirjalili - Python Machine Learning_ Machine Learning and Deep Learning with Python, scikit-learn, and TensorFlow 2 columns = ['class' ] + ['principle component ' + str(i) for i in range(1, 9)] generate_clusters( 'KM', x_pca, y_data, columns, 'Bank Cluster dist. plots for each PCA component (K-means)', [2, 4], 'class', type='num') generate_clusters( 'EM', x_pca, y_data, columns, 'Bank Cluster dist. plots for each PCA component (EM)', [2, 4], 'class', type='num') if bank_ica: kurts = [] comps = [i for i in range(2, 12)] for i in comps: transformer = FastICA(n_components=i) x_ICA = transformer.fit_transform(x_data) kurt = kurtosis(x_ICA).mean() print(kurt) kurts.append(kurt) plt.plot(comps, kurts) plt.xlabel('Components') plt.ylabel('Kurtosis') plt.title('Kurtosis plot for ICA (bank)') plt.xticks(comps) plt.show() transformer = FastICA(n_components=8) x_ICA = transformer.fit_transform(x_data) proj = transformer.inverse_transform(x_ICA) loss = ((x_data - proj)**2).mean() print('ICA loss is: ', loss) columns = ['class' ] + ['principle component ' + str(i) for i in range(1, 9)] generate_clusters( 'KM', x_ICA, y_data, columns, 'Bank Cluster dist. plots for each ICA component (K-means-bank)', [2, 4], 'class', type='num') generate_clusters( 'EM', x_ICA, y_data, columns, 'Bank Cluster dist. plots for each ICA component (EM-bank)', [2, 4], 'class', type='num') if bank_rp: losses, kurts = [], [] comps = [i for i in range(1, 12)] for i in comps: transformer = random_projection.GaussianRandomProjection( n_components=i) mu = np.mean(x_data, axis=0) x_RP = transformer.fit_transform(x_data) t_matrix = transformer.components_ proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu loss = ((x_data - proj)**2).mean() kurt = kurtosis(x_RP).mean() kurts.append(kurt) losses.append(loss) fig = plt.figure(1) ax = fig.add_subplot(121) ax.plot(comps, kurts) ax.set(xlabel='Components', ylabel='Kurtosis', title='Kurtosis plot for RP (Bank)', xticks=comps) ax = fig.add_subplot(122) ax.plot(comps, losses) ax.set(xlabel='Components', ylabel='Loss', title='Kurtosis plot for RP (Bank)', xticks=comps) losses, kurts = [], [] comps = range(1, 12) for i in comps: transformer = random_projection.GaussianRandomProjection( n_components=8) mu = np.mean(x_data, axis=0) x_RP = transformer.fit_transform(x_data) t_matrix = transformer.components_ proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu loss = ((x_data - proj)**2).mean() kurt = kurtosis(x_RP).mean() kurts.append(kurt) losses.append(loss) fig = plt.figure(2) ax = fig.add_subplot(121) ax.plot(comps, kurts) ax.set(xlabel='Run index', ylabel='Kurtosis', title='Kurtosis plot for RP (Bank)', xticks=comps) ax = fig.add_subplot(122) ax.plot(comps, losses) ax.set(xlabel='Run index', ylabel='Loss', title='Loss plot for RP (Bank)', xticks=comps) plt.show() transformer = random_projection.GaussianRandomProjection( n_components=8) mu = np.mean(x_data, axis=0) x_RP = transformer.fit_transform(x_data) t_matrix = transformer.components_ proj = np.linalg.lstsq(x_RP.T, t_matrix)[0] + mu loss = ((x_data - proj)**2).mean() print('RP loss is: ', loss) columns = ['class' ] + ['Random component ' + str(i) for i in range(1, 9)] generate_clusters( 'KM', x_RP, y_data, columns, 'Dist. plots for each Random Projection component (K-means)', [2, 4], 'class', type='num') generate_clusters( 'EM', x_RP, y_data, columns, 'Dist. plots for each Random Projection component (EM)', [2, 4], 'class', type='num') if bank_feature: clf = ExtraTreesClassifier(n_estimators=50) clf = clf.fit(x_data, y_data) print(clf.feature_importances_) model = SelectFromModel(clf, prefit=True, threshold=0.00525, max_features=8) # default is mean threshold x_FS = model.transform(x_data) print(x_FS.shape) columns = ['class'] + [ 'Feature selection component ' + str(i) for i in range(1, 9) ] generate_clusters( 'KM', x_FS, y_data, columns, 'Cluster dist. plots for each Feature Selection component (K-means)', [2, 4], 'class', type='num') generate_clusters( 'EM', x_FS, y_data, columns, 'Cluster dist. plots for each Feature Selection component (EM)', [2, 4], 'class', type='num') if bank_NN: f1_scores, accuracys, train_times = [], [], [] clfs = [] data_sets = [x_data] data_sets_km = [x_data] data_sets_em = [x_data] names = ['Original', 'PCA', 'ICA', 'Rand. Proj.', 'Feature Sel '] transformer = PCA(n_components=8) x_PCA = transformer.fit_transform(x_data) data_sets.append(x_PCA) clusterer = KMeans(n_clusters=2, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_PCA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_PCA, y_prime), axis=1) data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=2) y_prime = clusterer.fit_predict(x_PCA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_PCA, y_prime), axis=1) data_sets_em.append(x_new) transformer = FastICA(n_components=8) x_ICA = transformer.fit_transform(x_data) data_sets.append(x_ICA) clusterer = KMeans(n_clusters=2, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_ICA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_ICA, y_prime), axis=1) data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=2) y_prime = clusterer.fit_predict(x_ICA) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_ICA, y_prime), axis=1) data_sets_em.append(x_new) transformer = random_projection.GaussianRandomProjection( n_components=8) x_RP = transformer.fit_transform(x_data) data_sets.append(x_RP) clusterer = KMeans(n_clusters=2, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_RP) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_RP, y_prime), axis=1) data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=2) y_prime = clusterer.fit_predict(x_RP) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_RP, y_prime), axis=1) data_sets_em.append(x_new) clf_FS = ExtraTreesClassifier(n_estimators=50) clf_FS = clf_FS.fit(x_data, y_data) model = SelectFromModel(clf_FS, prefit=True, threshold=0.0002, max_features=8) # default is mean threshold x_FS = model.transform(x_data) data_sets.append(x_FS) clusterer = KMeans(n_clusters=2, n_init=30, max_iter=300, random_state=100) y_prime = clusterer.fit_predict(x_FS) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_FS, y_prime), axis=1) data_sets_km.append(x_new) clusterer = GaussianMixture(n_components=2) y_prime = clusterer.fit_predict(x_FS) y_prime = np.array([y_prime]).T x_new = np.concatenate((x_FS, y_prime), axis=1) data_sets_em.append(x_new) # Experiment with NN on projected data if NN_KM: print('K-means cluster as a feature ...') data_sets = data_sets_km print(len(data_sets)) suffix = '-with K-means cluster' elif NN_EM: data_sets = data_sets_em print('K-means cluster as a feature ...') data_sets = data_sets_km print(len(data_sets)) suffix = '-with EM cluster' else: data_sets = data_sets suffix = '' for x_data in data_sets: print(x_data.shape) clf = mlrose.NeuralNetwork( hidden_nodes = [6,6], activation = 'relu', \ algorithm = 'gradient_descent', max_iters = 1000, \ bias = True, is_classifier = True, learning_rate = 0.0001, \ early_stopping = True, clip_max = 5, max_attempts = 100, \ random_state = 30) curves, train_score, test_score, train_acc, test_acc, train_time, test_time = \ return_stratified_kcv_results(clf, x_data, y_data) f1_scores.append(test_score) accuracys.append(test_acc) print(accuracys) print(f1_scores) train_times.append(train_time) df_plot = pd.DataFrame({ 'names': names, 'CV_F1_Score': f1_scores, 'CV_accuracy': accuracys }) # df_plot = pd.wide_to_long(df_plot, i=['CV_F1_Score', 'CV_accuracy'], j='Measures') df_plot = pd.melt(df_plot, id_vars=['names'], value_vars=['CV_F1_Score','CV_accuracy'],\ var_name='Measures', value_name='Score') fig = plt.figure(1) ax = fig.add_subplot(121) sb.barplot(x="names", y="Score", hue="Measures", data=df_plot, axes=ax) ax.set(xlabel='dataset', ylabel='score', title='NN on original + proj. data' + suffix) plt.xticks(rotation=30) ax = fig.add_subplot(122) ax.bar(names, train_times, align='center') ax.set(xlabel='dataset', ylabel='Train time (s)', title='NN on original + proj. data' + suffix) fig.tight_layout() plt.xticks(rotation=30) plt.show()
def apply_ICA(proj_data, proj_weights=None): ica = FastICA(n_components=2, random_state=RANDOM_SEED) norm_data = normalize_columns(proj_data) result = ica.fit_transform(norm_data.T) # Copy needed because ICA whitens the input matrix return result
# name="PCA-based", # data=data) #print(79 * '_') #(2) apply dimension reduction algorithms from sklearn.decomposition import PCA from sklearn.decomposition import FastICA PCA_data = PCA(n_components=5, whiten=False) temp = PCA_data.fit(data) #temp1= temp.components_ PCA_data_trans = PCA_data.transform(data) PCA_data_trans_test = PCA_data.transform(data_test) #PCA_comp = PCA_data.components_ ICA_data = FastICA(n_components=5) ICA_data.fit(data) ICA_data_trans = ICA_data.transform(data) ICA_data_trans_test = ICA_data.transform(data_test) from sklearn.random_projection import GaussianRandomProjection transformer = GaussianRandomProjection(n_components=5, eps=0.1) RP_data_trans = transformer.fit_transform(data) RP_data_trans_test = transformer.fit_transform(data_test) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis transformer = LinearDiscriminantAnalysis(solver="svd", n_components=5) LDA_data_trans = transformer.fit_transform(data) # LDA_data_temp = LDA_data.fit(X=data,y=labels)
def doICA(data, w1, w3, tau2, n_comp=10): data_r = np.zeros((data.shape[2], data.shape[0] * data.shape[1])) for i in range(data.shape[2]): data_r[i] = np.nan_to_num(data[:, :, i]).ravel() # Standardize # data_r = (data_r - np.mean(data_r,axis=0))/np.std(data_r,ddof=1,axis=0) # data_r = normalize(data_r,norm='l2',axis=0) ica = FastICA(n_components=n_comp, whiten=True) ica.fit(data_r) comp = np.zeros((data.shape[0], data.shape[1], ica.components_.shape[0])) for i in range(ica.components_.shape[0]): comp[:, :, i] = ica.components_[i].reshape(data.shape[0], data.shape[1]) # Plot a series of components w1grid, w3grid = np.meshgrid(w3, w1) fig, axarr = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True) fig.subplots_adjust(hspace=0.3, wspace=0.4) for i in range(9): ax = axarr.flatten()[i] img = comp[:, :, i] ax.pcolormesh(w1grid, w3grid, img) ax.set_title('Component ' + str(i)) ax.set_xlim(w1grid.min(), w1grid.max()) ax.set_ylim(w3grid.min(), w3grid.max()) plt.setp(ax.get_yticklabels(), visible=True) plt.setp(ax.get_xticklabels(), visible=True) plt.show() #create surface plots surf3d(w1, w3, comp[:, :, 0]) surf3d(w1, w3, comp[:, :, 1]) surf3d(w1, w3, comp[:, :, 2]) data_c = ica.transform(data_r) # Plot filtered contours w1grid, w3grid = np.meshgrid(w3, w1) fig, axarr = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True) fig.subplots_adjust(hspace=0.3, wspace=0.4) for j in range(9): ax = axarr.flatten()[j] i = 2 * j img = comp[:,:,0]*data_c[i,0] + comp[:,:,1]*data_c[i,1] + \ comp[:,:,2]*data_c[i,2] + comp[:,:,3]*data_c[i,3] + comp[:,:,4]*data_c[i,4] ax.pcolormesh(w1grid, w3grid, img) ax.set_title('Time ' + str(tau2[i])) ax.set_xlim(w1grid.min(), w1grid.max()) ax.set_ylim(w3grid.min(), w3grid.max()) plt.setp(ax.get_yticklabels(), visible=True) plt.setp(ax.get_xticklabels(), visible=True) plt.show() # Plot against time plt.figure(figsize=(5, 5)) plt.scatter(tau2, data_c[:, 0], color='red', label='C0') # component 0 plt.scatter(tau2, data_c[:, 1], color='orange', label='C1') plt.scatter(tau2, data_c[:, 2], color='green', label='C2') plt.scatter(tau2, data_c[:, 3], color='blue', label='C3') plt.xlabel('Time (fs)') plt.legend(loc='lower right') # Plot value of component contribution vs. component for selected times plt.figure(figsize=(5, 5)) comp_num = list(range(10)) plt.plot(comp_num, (data_c[0]), color='red', label='t=0') # component 0 plt.plot(comp_num, (data_c[3]), color='orange', label='t=270') plt.plot(comp_num, (data_c[6]), color='green', label='t=540') plt.plot(comp_num, (data_c[9]), color='blue', label='t=5000') plt.plot(comp_num, (data_c[12]), color='purple', label='t=20000') plt.xlim(-0.5, 10.5) plt.ylim(-5, 6.6) plt.xlabel('Component') plt.ylabel('Component contribution') plt.legend(loc='upper right') plt.show() #print(tau2.shape) p0 = .1, .1, .1 popt, pcov = curve_fit(my_exponential, tau2.ravel() * 0.001, data_c[:, 0], p0, maxfev=1000) print(popt)
def lca(data): lca = FastICA(n_components=150) data = lca.fit_transform(data) return pd.DataFrame(data)
def test_fastica_simple(add_noise=False): """ Test the FastICA algorithm on very simple data. """ rng = np.random.RandomState(0) # scipy.stats uses the global RNG: np.random.seed(0) n_samples = 1000 # Generate two sources: s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 s2 = stats.t.rvs(1, size=n_samples) s = np.c_[s1, s2].T center_and_norm(s) s1, s2 = s # Mixing angle phi = 0.6 mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]]) m = np.dot(mixing, s) if add_noise: m += 0.1 * rng.randn(2, 1000) center_and_norm(m) # function as fun arg def g_test(x): return x ** 3, 3 * x ** 2 algos = ['parallel', 'deflation'] nls = ['logcosh', 'exp', 'cube', g_test] whitening = [True, False] for algo, nl, whiten in itertools.product(algos, nls, whitening): if whiten: k_, mixing_, s_ = fastica(m.T, fun=nl, algorithm=algo) assert_raises(ValueError, fastica, m.T, fun=np.tanh, algorithm=algo) else: X = PCA(n_components=2, whiten=True).fit_transform(m.T) k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False) assert_raises(ValueError, fastica, X, fun=np.tanh, algorithm=algo) s_ = s_.T # Check that the mixing model described in the docstring holds: if whiten: assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m)) center_and_norm(s_) s1_, s2_ = s_ # Check to see if the sources have been estimated # in the wrong order if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)): s2_, s1_ = s_ s1_ *= np.sign(np.dot(s1_, s1)) s2_ *= np.sign(np.dot(s2_, s2)) # Check that we have estimated the original sources if not add_noise: assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2) else: assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1) # Test FastICA class ica = FastICA(fun=nl, algorithm=algo, random_state=0) ica.fit(m.T) ica.get_mixing_matrix() assert_true(ica.components_.shape == (2, 2)) assert_true(ica.sources_.shape == (1000, 2)) for fn in [np.tanh, "exp(-.5(x^2))"]: ica = FastICA(fun=fn, algorithm=algo, random_state=0) assert_raises(ValueError, ica.fit, m.T) assert_raises(TypeError, FastICA(fun=moves.xrange(10)).fit, m.T)
def perform_pca(self): self.dm = PCA(n_components=self.n_components) self.dm.fit(self.dataset) self.components = self.dm.components_
def main(): # まずは生体情報のデータeta_data, beta_dataを取り出す ------------------------------------- # データがあるパスに作業ディレクトリ変更 os.chdir(DATAPATH) # data格納用のデータフレームを準備 data_df = pd.DataFrame([]) for i_sub in range(len(FILENAME_LIST)): # データフレームにデータ格納(このデータはすでに標準化済み) mean_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="mean").drop("Statistics", axis=1) max_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="max").drop("Statistics", axis=1) min_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="min").drop("Statistics", axis=1) std_df = pd.read_excel(FILENAME_LIST[i_sub], sheet_name="std").drop("Statistics", axis=1) # [平均,最大,最小,標準偏差]の順に横に並べる df = pd.concat([mean_df, max_df.drop("Task", axis=1), min_df.drop("Task", axis=1), std_df.drop("Task", axis=1)], axis=1, sort=False) # 各被験者の結果を縦に連結(ついでに標準化する) data_df = data_df.append(df) # タスク番号の削除 data2_df = data_df.drop(["Task"], axis=1) data2_df.columns = COLUMNS48 # dataを列ごとに標準化(標準化の標準化) stan_data = scipy.stats.zscore(data2_df, axis=0) # データフレーム型に変換 stan_data_df = pd.DataFrame(stan_data, columns=COLUMNS48) # 粘性,剛性をそれぞれ取り出す eta_data_df = stan_data_df.iloc[:, [0, 6, 12, 18, 24, 30, 36, 42]] beta_data_df = stan_data_df.iloc[:, [1, 7, 13, 19, 25, 31, 37, 43]] eta_data = eta_data_df.values beta_data = beta_data_df.values # ------------------------------------------------------------------------- # 次に主観評価のデータq_stan_dataを取り出す ------------------------------------ os.chdir(DATAPATH2) # データがあるパスに作業ディレクトリ変更 # data格納用のデータフレームを準備 q_data_df = pd.DataFrame([]) for i_sub in range(len(FILENAME_LIST2)): # データフレームにデータ格納 q_df = pd.read_excel(FILENAME_LIST2[i_sub]) # 各被験者の結果を縦に連結(ついでに標準化する) q_data_df = q_data_df.append(arrange_data(q_df, i_sub)) # タスク番号, 刺激の種類の削除 q_data2_df = q_data_df.drop(["No", "Stimulation"], axis=1) # dataを列ごとに標準化(標準化の標準化) q_stan_data = scipy.stats.zscore(q_data2_df, axis=0) # ------------------------------------------------------------------------- # 刺激の種類のndarray配列を用意 odor = q_data_df["Stimulation"].values.tolist() odor = np.reshape(odor, (len(odor),1)) # 刺激の種類 # 主観評価の主成分分析する--------------------------------------------------- pca1 = PCA() pca1.fit(q_stan_data) # 分析結果を元にデータセットを主成分に変換する transformed1 = pca1.fit_transform(q_stan_data) # データフレーム型に変換 transformed1 = pd.DataFrame(transformed1, columns=["PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", ]) # ------------------------------------------------------------------------- # 相関係数を計算 # 計算用のデータフレームを作る cor_df = pd.concat([transformed1["PC1"], stan_data_df], axis=1, sort=False) # 相関データフレーム cor_df = cor_df.corr()["PC1"] # 相関が0.45以上の指標だけをつかって主成分分析------------------------------------ # 相関が0.45以上の指標を抜き出す new_stan_data_df = stan_data_df.drop(cor_df[cor_df<0.45].index, axis=1) new_stan_data = new_stan_data_df.values # PCAする(返り値はndarray) un_score, non_score = mypca(q_stan_data, new_stan_data, odor) my2Dplot(un_score, non_score) # ICAする # データの準備 ica_data = np.vstack((un_score, non_score)) # FastICAで独立成分分析 ica = FastICA() ica.fit(ica_data) Uica = ica.components_.T Aica = ica.transform(ica_data).T Uica = Uica / np.sqrt((Uica**2).sum(axis=0)) un = Aica[:, 0:len(un_score)] non = Aica[:, len(un_score):] # グラフ描画 plt.figure(figsize=(5, 5)) plt.scatter(un[0], un[1], s=80, c=[0.4, 0.6, 0.9], alpha=0.8, linewidths="1", edgecolors=[0, 0, 0]) plt.scatter(non[0], non[1], s=80, c=[0.5, 0.5, 0.5], alpha=0.8, linewidths="1", edgecolors=[0, 0, 0]) plt.title("ICA scatter", fontsize=18) # グラフを表示する plt.tight_layout() # タイトルの被りを防ぐ plt.show() """
class ICA(object): """ Wrapper for sklearn package. Performs fast ICA (Independent Component Analysis) ICA has 4 methods: - fit(waveforms) update class instance with ICA fit - fit_transform() do what fit() does, but additionally return the projection onto ICA space - inverse_transform(A) inverses the decomposition, returns waveforms for an input A, using Z - get_params() returns metadata used for fits. """ def __init__(self, num_components=10, catalog_name='unknown', whiten=True, fun='logcosh', fun_args=None, max_iter=600, tol=.00001, w_init=None, random_state=None, algorithm='parallel'): self._decomposition = 'Fast ICA' self._num_components = num_components self._catalog_name = catalog_name self._whiten = whiten self._fun = fun self._fun_args = fun_args self._max_iter = max_iter self._tol = tol self._w_init = w_init self._random_state = random_state self._algorithm = algorithm self._ICA = FastICA(n_components=self._num_components, whiten=self._whiten, fun=self._fun, fun_args=self._fun_args, max_iter=self._max_iter, tol=self._tol, w_init=self._w_init, random_state=self._random_state, algorithm=self._algorithm) def fit(self, waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._ICA.fit(self._waveforms) def fit_transform(self, waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._A = self._ICA.fit_transform(self._waveforms) return self._A def inverse_transform(self, A): # convert basis back to waveforms using fit new_waveforms = self._ICA.inverse_transform(A) return new_waveforms def get_params(self): # TODO know what catalog was used! (include waveform metadata) params = self._ICA.get_params() params['num_components'] = params.pop('n_components') params['Decompositon'] = self._decomposition return params def get_basis(self): """ Return the ICA basis vectors (Z^\dagger)""" return self._ICA.get_mixing_matrix()
def train_NN_ICA(filename, X_train, X_test, y_train, y_test, debug=False, numFolds=10, njobs=-1, scalar=1, make_graphs=False, pNN={}, nolegend=False, random_seed=1, num_dim=4): np.random.seed(random_seed) algo = 'ICA-' + str(num_dim) start = time.time() ica = FastICA(n_components=num_dim, random_state=random_seed) ica.fit(X_train) X_train = ica.transform(X_train) X_test = ica.transform(X_test) param_grid = [{ 'hidden_layer_sizes': [(512, 512, 512, 512)], 'activation': ['relu'], # 'identity', 'solver': ['adam'], 'alpha': [0.0001, 0.001, 0.01, 0.1], 'batch_size': ['auto'], 'learning_rate_init': [0.001, 0.01], 'max_iter': [10000], 'warm_start': [True], 'early_stopping': [True], 'random_state': [1] }] nn_classifier = MLPClassifier() grid_search = GridSearchCV(nn_classifier, param_grid, cv=numFolds, scoring='roc_auc_ovr_weighted', return_train_score=True, n_jobs=njobs, verbose=debug) grid_search.fit(X_train, y_train) cvres = grid_search.cv_results_ util.save_gridsearch_to_csv(cvres, algo, filename[:-4] + '-' + str(num_dim), scalar, '-kmeans') start = time.time() nn_classifier.fit(X_train, y_train) print('NN Fit Time: ', time.time() - start) start = time.time() y_prob = nn_classifier.predict_proba(X_train) train_score = roc_auc_score(y_train, y_prob, multi_class="ovr", average="weighted") print('NN Train Score Time: ', train_score, time.time() - start) start = time.time() y_prob = nn_classifier.predict_proba(X_test) test_score = roc_auc_score(y_test, y_prob, multi_class="ovr", average="weighted") print('NN Test Score Time: ', test_score, time.time() - start) test_class = MLPClassifier() test_class.set_params(**pNN) if make_graphs: # computer Model Complexity/Validation curves util.plot_learning_curve(nn_classifier, algo, filename[:-4], X_train, y_train, ylim=(0.0, 1.05), cv=10, n_jobs=njobs, debug=debug) # util.compute_vc(algo, 'alpha', # [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, # 1000, 5000, 10000, 100000, 1000000], X_train, y_train, X_test, y_test, nn_classifier, # filename[:-4], test_class, pNN, log=True, njobs=njobs, debug=debug) return time.time() - start, round(train_score, 4), round(test_score, 4)
def ICEBEEM_wrapper(X, Y, ebm_hidden_size, n_layers_ebm, n_layers_flow, lr_flow, lr_ebm, seed, ckpt_file='icebeem.pt', test=False): np.random.seed(seed) torch.manual_seed(seed) data_dim = X.shape[1] model_ebm = MLP_general(input_size=data_dim, hidden_size=[ebm_hidden_size] * n_layers_ebm, n_layers=n_layers_ebm, output_size=data_dim, use_bn=True, activation_function=F.leaky_relu) prior = TransformedDistribution( Uniform(torch.zeros(data_dim), torch.ones(data_dim)), SigmoidTransform().inv) nfs_flow = NSF_AR flows = [ nfs_flow(dim=data_dim, K=8, B=3, hidden_dim=16) for _ in range(n_layers_flow) ] convs = [Invertible1x1Conv(dim=data_dim) for _ in flows] norms = [ActNorm(dim=data_dim) for _ in flows] flows = list(itertools.chain(*zip(norms, convs, flows))) # construct the model model_flow = NormalizingFlowModel(prior, flows) pretrain_flow = True augment_ebm = True # instantiate ebmFCE object fce_ = ConditionalFCE(data=X.astype(np.float32), segments=Y.astype(np.float32), energy_MLP=model_ebm, flow_model=model_flow, verbose=False) init_ckpt_file = os.path.splitext(ckpt_file)[0] + '_0' + os.path.splitext( ckpt_file)[1] if not test: if pretrain_flow: # print('pretraining flow model..') fce_.pretrain_flow_model(epochs=1, lr=1e-4) # print('pretraining done.') # first we pretrain the final layer of EBM model (this is g(y) as it depends on segments) fce_.train_ebm_fce(epochs=15, augment=augment_ebm, finalLayerOnly=True, cutoff=.5) # then train full EBM via NCE with flow contrastive noise: fce_.train_ebm_fce(epochs=50, augment=augment_ebm, cutoff=.5, useVAT=False) torch.save( { 'ebm_mlp': fce_.energy_MLP.state_dict(), 'ebm_finalLayer': fce_.ebm_finalLayer, 'flow': fce_.flow_model.state_dict() }, init_ckpt_file) else: state = torch.load(init_ckpt_file, map_location=fce_.device) fce_.energy_MLP.load_state_dict(state['ebm_mlp']) fce_.ebm_finalLayer = state['ebm_finalLayer'] fce_.flow_model.load_stat_dict(state['flow']) # evaluate recovery of latents recov = fce_.unmixSamples(X, modelChoice='ebm') source_est_ica = FastICA().fit_transform((recov)) recov_sources = [source_est_ica] # iterate between updating noise and tuning the EBM eps = .025 for iter_ in range(3): mid_ckpt_file = os.path.splitext(ckpt_file)[0] + '_' + str( iter_ + 1) + os.path.splitext(ckpt_file)[1] if not test: # update flow model: fce_.train_flow_fce(epochs=5, objConstant=-1., cutoff=.5 - eps, lr=lr_flow) # update energy based model: fce_.train_ebm_fce(epochs=50, augment=augment_ebm, cutoff=.5 + eps, lr=lr_ebm, useVAT=False) torch.save( { 'ebm_mlp': fce_.energy_MLP.state_dict(), 'ebm_finalLayer': fce_.ebm_finalLayer, 'flow': fce_.flow_model.state_dict() }, mid_ckpt_file) else: state = torch.load(mid_ckpt_file, map_location=fce_.device) fce_.energy_MLP.load_state_dict(state['ebm_mlp']) fce_.ebm_finalLayer = state['ebm_finalLayer'] fce_.flow_model.load_stat_dict(state['flow']) # evaluate recovery of latents recov = fce_.unmixSamples(X, modelChoice='ebm') source_est_ica = FastICA().fit_transform((recov)) recov_sources.append(source_est_ica) return recov_sources