def __init__(self, imgArr, nComponents=None): ''' Compute ICA on a stack of images and explore the resulting components. Args: imgArr (np.array): Size (nframes, width, height) nComponents (int): Number of components to use. If unspecified, PCA will extract number of components required to explain 95% of variance. ''' self.imgArr = imgArr self.nFrames = self.imgArr.shape[0] self.imgShape = self.imgArr.shape[1:] self.nComponents = nComponents #Reshape to an array of shape (nFrames, nFeatures) #-1 trick for reshape calculates shape needed based on #remaining dimensions self.featureArr = self.imgArr.reshape(self.imgArr.shape[0], -1) #Scale the resulting feature array self.scaler = preprocessing.StandardScaler() self.scaler.fit(self.featureArr) self.featureArr = self.scaler.transform(self.featureArr) #PCA instance to retain components necessary to explain 95% of variance. if self.nComponents == None: self.ica = decomposition.FastICA() else: self.ica = decomposition.FastICA(n_components=self.nComponents)
def _rank_features(self, X=None, dendrogram=False, rotation='promax'): if X is not None: self.fit(X) elif self.hasFitted: pass else: raise ValueError('The model has not fitted and the X is None') try: # TODO: the columns with all zero value have to be eliminated. # TODO: it is the problem of whiten=True. icaModel = decomposition.FastICA(whiten=True, random_state=1).fit( self.originData) except: warnings.warn("ICA is forced to run without whitening.", UserWarning) icaModel = decomposition.FastICA(whiten=False, random_state=1).fit( self.originData) icaModel = decomposition.FastICA(whiten=False, random_state=1).fit( self.originData) finally: # The transpose of ICA components are used because the output of ICA is(n_component,n_features) independentComponents = icaModel.components_ # the rotation that amplified the load of important component in any feature # The row is the components and the columns are the features if rotation == 'promax': promaxRotation = ObliqueRotation('promax') promaxRotation.fit(independentComponents) rotatedIndependentComponents = promaxRotation.oblique_rotate() independentComponents = rotatedIndependentComponents # The rotated ICA components (n_component,n_features) transpose to the (n_features, n_component) independentComponents = independentComponents.T predefinedCentroids = self.__centroid_predefining( independentComponents) # Do the clustering on rows that are the features. featureClustering = KMeans( n_clusters=self.k_features, max_iter=300, algorithm='auto', precompute_distances='auto', init=predefinedCentroids).fit(independentComponents) featureSubstes = featureClustering.predict(independentComponents) featureSubstesCentroid = featureClustering.cluster_centers_ self.featureScore['scores']['subset'] = featureSubstes for index, label in enumerate(featureSubstes): self.featureScore['scores']['internal_score'][ index] = euclideanDistance( independentComponents[index, :], featureSubstesCentroid[label, :])
def plot_data(method, X, y, title, filename): fig, (ax1) = plt.subplots(1, 1) n_labels = len(y) if method == 'pca': t = decomposition.PCA(n_components=2) X = t.fit_transform(X) elif method == 'ica': t = decomposition.FastICA(n_components=2, whiten=True) X = t.fit_transform(X) elif method == 'rp': t = GaussianRandomProjection(n_components=2) X = t.fit_transform(X) np.random.seed(20) for label in np.unique(y): ax1.scatter(X[y == label, 0], X[y == label, 1], color=np.random.rand(3), linewidths=1) ax1.set_title(title) ax1.grid() plt.tight_layout() plt.savefig('/'.join(['output', filename])) plt.close("all")
def PreprocessingICA(self, PCA_coefficients, MNE_coefficients, N_neighbors, whiten=True): """ :type MNE_coefficients: int :type PCA_coefficients: int :param MNE_coefficients: number of coefficnents for mns projection :param PCA_coefficients: number of n_coefficients for PCA transform :param N_neighbors: number of neighbors for embedding """ self.MNE_coefficients = MNE_coefficients self.PCA_coefficients = PCA_coefficients self.N_neighbors = N_neighbors self.pca = decomposition.FastICA(n_components=self.PCA_coefficients, algorithm='parallel', whiten=whiten, fun='logcosh', fun_args=None, max_iter=200, tol=0.0001, w_init=None, random_state=0) self.Embedding = manifold.SpectralEmbedding( n_components=self.MNE_coefficients, affinity='nearest_neighbors', gamma=None, random_state=11, n_neighbors=self.N_neighbors) self.X_pca = self.pca.fit_transform(self.Waves_Coefficients) self.X_red = self.Embedding.fit_transform(self.X_pca) return self.X_red
def feature_analysis(data=None, feature=None, pca_components=None, graph=False, start=None, end=None): X = data[feature].values.reshape(-1, len(feature)) X_train = data[feature].ix[start:end].values.reshape(-1, len(feature)) pca = decomposition.KernelPCA(n_components=pca_components) pca.fit(X_train) pcaresult = pca.transform(X) # print(pca.components_) ica = decomposition.FastICA(n_components=pca_components) ica.fit(X_train) icaresult = ica.transform(X) pcaresult = (pcaresult.T.reshape(pca_components, -1)) icaresult = (icaresult.T.reshape(pca_components, -1)) for n in range(pca_components): data['%s-pcomponent' % str(n + 1)] = pcaresult[n] data['%s-icomponent' % str(n + 1)] = icaresult[n] # print(pca.explained_variance_ratio_.cumsum()) if graph is True: for j in range(1, pca_components + 1): plt.clf() data['%i-pcomponent' % j].plot() plt.legend() plt.plot() plt.show() return data
def ICA(self,N_component): ICA_calculator = skdecomp.FastICA(N_component,max_iter=1500,tol=0.03,whiten=True) self.ICAed_data = ICA_calculator.fit(self.vector_centered) all_ICs = self.ICAed_data.components_ pp.save_variable(all_ICs,save_folder+r'\\ICAed_Data.pkl') print('ICA calculation done, generating graphs') self.cell_graph_plot('ICA',all_ICs)
def _train(self): x = self._train_features y = self._train_outputs pipe = pipeline.Pipeline([ ('drop', transformers.ColumnDropper( columns=(0, 3, 5, 14, 26, 35, 40, 65, 72, 95, 99, 104, 124) )), ('scale', preprocessing.StandardScaler( with_mean=True, with_std=False )), ('reduce', decomposition.FastICA( n_components=40, fun='exp', random_state=1742, )), ('select', feature_selection.SelectPercentile( percentile=57, score_func=feature_selection.mutual_info_classif, )), ('estim', naive_bayes.GaussianNB()), ]) pipe.fit(x, y) self._model = pipe.predict
def __get_implementation(self, method: Method, n_components): # decomposition if method == self.Method.PCA: return self.Implementation( 'pca', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True)) elif method == self.Method.ICA: return self.Implementation( 'ica', decomposition.FastICA(n_components=n_components, whiten=True, max_iter=1000)) elif method == self.Method.FA: return self.Implementation( 'fa', decomposition.FactorAnalysis(n_components=n_components)) elif method == self.Method.TSVD: return self.Implementation( 'tsvd', decomposition.TruncatedSVD(n_components=n_components)) elif method == self.Method.NMF: return self.Implementation( 'nmf', decomposition.NMF(n_components=n_components)) #clustering elif method == self.Method.KMEANS: return self.Implementation( 'kmeans', cluster.MiniBatchKMeans(n_clusters=n_components, tol=1e-3)) else: raise Exception( 'Error creating estimator. Invalid Type specified.')
def ica(dfData, n_components = 2, boolPlot = False): """ Parameters #--------- dfData Pandas.DataFrame containing data n_components Integer: number of components for which information is outputted. Description #---------- Uses sklearn builtin function for computing. Maximizes independency between n_components """ dfNum = get_numericals(dfData) dfNum = dfNum.dropna(axis = 1) min_max_scaler = MinMaxScaler() dfNum = min_max_scaler.fit_transform(dfNum) ica = skd.FastICA(n_components) ica.fit(dfNum) S_ = ica.fit_transform(dfNum) A_ = ica.mixing_ if boolPlot == True: if n_components == 2: plt.scatter(S_[:, 0], S_[:, 1], alpha=0.8) plt.xlabel('independent component 1') plt.ylabel('independent component 2') else: print("Plotting is only implemented for 2 components") print(S_.shape) print(A_) return S_, A_
def run_ica(): log('loading data') start = util.now() voxels, xdim, ydim, zdim = load_data() log(' elapsed: {}'.format(util.elapsed(start))) log('running independent component analysis') start = util.now() ica = decomposition.FastICA(n_components=64, max_iter=200) sources = ica.fit_transform(voxels) sources = to_dataframe(sources, load_subject_ids(), ['X{}'.format(i) for i in range(64)]) log(' elapsed: {}'.format(util.elapsed(start))) log('calculating correlations between voxel and component time courses') start = util.now() correlations = [] for voxel in voxels.columns[:32]: voxel = voxels[voxel] max_correlation = 0 for source in sources.columns: source = sources[source] correlation = np.corrcoef(voxel, source) if correlation > max_correlation: max_correlation = correlation correlations.append(max_correlation) log(' elapsed: {}'.format(util.elapsed(start)))
def _train(self): x = self._train_features y = self._train_outputs pipe = pipeline.Pipeline([ ('drop', transformers.ColumnDropper(columns=(7, 8, 11, 12, 13, 14))), ('scale', preprocessing.StandardScaler(with_mean=True, with_std=True)), ('expand', preprocessing.PolynomialFeatures(degree=1, interaction_only=False, include_bias=False)), ('reduce', decomposition.FastICA( fun='cube', random_state=1742, )), ('select', feature_selection.SelectKBest( k=7, score_func=feature_selection.mutual_info_classif, )), ('estim', naive_bayes.GaussianNB()), ]) pipe.fit(x, y) self._model = pipe.predict
def getEstimators(self): ''' Makes list of ('name', estimator) pairs for PCA, ICA, FA and fits estimatorsto data ''' if not self._preprocessed: raise ValueError( "Data must be preprocessed and estimators constructed") self._estimators = [ ('PCA', decomposition.PCA(n_components=self.n_components, svd_solver='randomized', whiten=True)), ('FastICA', decomposition.FastICA(n_components=self.n_components, whiten=True)), ('FactorAnalysis', decomposition.FactorAnalysis(n_components=self.n_components, max_iter=20)) ] for name, estimator in self._estimators: print("Calculating %d features using %s..." % (self.n_components, name)) t0 = time() estimator.fit(self.data) train_time = (time() - t0) print("\tTime taken = %0.3fs" % train_time) self._estimators_estimated = True
def transform(self, df, y=None): pca = decomposition.PCA(n_components=self.n_components, random_state=self.random_state) pca_train = pca.fit_transform(df) ica = decomposition.FastICA(n_components=self.n_components, random_state=self.random_state) ica_train = ica.fit_transform(df) tsvd = decomposition.TruncatedSVD(n_components=self.n_components, random_state=self.random_state) tsvd_train = tsvd.fit_transform(df) nmf = decomposition.NMF(n_components=self.n_components, random_state=self.random_state) nmf_train = nmf.fit_transform(df) for i in range(1, self.n_components + 1): df['pca_' + str(i)] = pca_train[:, i - 1] df['ica_' + str(i)] = ica_train[:, i - 1] df['tsvd_' + str(i)] = tsvd_train[:, i - 1] df['nmf_' + str(i)] = nmf_train[:, i - 1] return df
def ICA(data, n_components, whiten = False, max_iter = 10): estimator = decomposition.FastICA(n_components = n_components, whiten = whiten, max_iter = max_iter) estimator.fit(data) factors = estimator.transform(data) scores = estimator.get_mixing_matrix() return factors, scores
def perform_ica(wf2, n_components): ica = decomposition.FastICA(n_components=n_components) ica.fit(wf2) features = ica.transform(wf2) names = np.array(['ica {}'.format(n) for n in range(n_components)], dtype='U') return features, names
def ICA(self, n_comps=[]): if n_comps == []: n_comps = self.datatrain.shape[1] self.indcomp = decomp.FastICA() self.indcomp.fit(self.datatrain) self.indcompscores = [ self.indcomp.transform(self.datatrain), self.indcomp.transform(self.dataval), self.indcomp.transform(self.datatest) ]
def FastICA(self, source): min_max_scaler = preprocessing.MinMaxScaler() data_source = min_max_scaler.fit_transform(source) pca = decomposition.FastICA(n_components=2) #mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm result = {} result['data'] = pca.fit_transform(data_source) result['params'] = 0 return result
def reduce_data(self, data): scaler = preprocessing.Normalizer() normalized_x = scaler.fit_transform(data) ica = decomposition.FastICA(n_components=3) dim_reduced_x = ica.fit_transform(normalized_x) return dim_reduced_x
def ica(self, n_components, transform_df=False): self.ica_fit = decomposition.FastICA(n_components=n_components) self.ica_df = self.ica_fit.fit_transform(self.df) colnames = ['ica_{}'.format(x) for x in range(0, n_components)] self.ica_df = pd.DataFrame(self.ica_df, index=self.df.index, columns=colnames) if transform_df: out_df = self.ica_fit.transform(transform_df) return out_df
def __init__(self, data, control_points, parent): super(ICA, self).__init__(data, control_points, parent) self.name = "ICA" try: ica = decomposition.FastICA(n_components=2) ica.fit(data) self.embedding = np.array(ica.transform(data)) except: msg = "It seems like the embedding algorithm did not converge with the given parameter setting" QMessageBox.about(parent, "Embedding error", msg)
def main(): start_time = time.time() seed = 7 numpy.random.seed(seed) column_names = ["preg", "plas", "pres", "skin", "insu", "mass", "pedi", "age", "class"] with open('Diabetes Dataset/pima-indians-diabetes.csv') as f: data = pandas.read_csv(f, sep=',', names=column_names) X, y = data.iloc[:, :6], data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0) X_train = X_train.as_matrix() X_test = X_test.as_matrix() y_test = y_test.as_matrix() y_train = y_train.as_matrix() #PCA ica = decomposition.FastICA(n_components=6, whiten=True) X_train = ica.fit_transform(X_train) X_test = ica.fit_transform(X_test) model = Sequential() model.add(Dense(15, input_dim=6, init='uniform', activation='relu')) model.add(Dense(6, init='uniform', activation='linear')) model.add(Dense(4, init='uniform', activation='relu')) model.add(Dense(1, init='uniform', activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=50, batch_size=20, verbose=0) print("--- %s seconds ---" % (time.time() - start_time)) print(history.history.keys()) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() scores = model.evaluate(X_test, y_test, verbose=0) print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) print("Train acc", history.history["acc"])
def dim_down(self, method='tsne', ndim=2, rand_seed=6): """ :param method: selected method of dimension reduction. :param ndim: number of retained dimensions. :param rand_seed: seed used by the random number generator. :return: embedding space with N cells * d feature. """ X = self.data # http://scikit-learn.org/stable/modules/manifold.html if method == 'tsne' or method == 'TSNE': print( "Dimension reduction with t-stochastic neighbor embedding(tSNE).\n" ) V = manifold.TSNE(n_components=ndim, random_state=rand_seed, init='pca').fit_transform(X) if method == 'lle' or method == 'LLE': print("Dimension reduction with locally_linear_embedding(LLE).\n") V, err = manifold.locally_linear_embedding(X, n_neighbors=20, n_components=ndim, random_state=rand_seed, method='modified') if method == 'mds' or method == 'MDS': print("Dimension reduction with Multidimensional scaling(MDS).\n") V = manifold.MDS(n_components=ndim, random_state=rand_seed, max_iter=100, n_init=1).fit_transform(X) if method == 'se' or method == 'SE': print("Dimension reduction with Spectral Embedding(SE).\n") V = manifold.SpectralEmbedding( n_components=ndim, random_state=rand_seed).fit_transform(X) # http://scikit-learn.org/stable/modules/decomposition.html if method == 'ica' or method == 'ICA': print( "Matrix decomposition with Independent component analysis(FastICA).\n" ) V = decomposition.FastICA(n_components=ndim, random_state=rand_seed).fit_transform(X) if method == 'pca' or method == 'PCA': print( "Matrix decomposition with Principal component analysis(PCA).\n" ) V = decomposition.PCA(n_components=ndim, random_state=rand_seed).fit_transform(X) return V
def _apply_component_analysis(training: EmbeddingCollection, test: EmbeddingCollection, ncomponents: int, analysis='pca'): # Validate the arguments. if len(training) == 0: raise ValueError(f'No embedding matrix found for training') for em in training[1:]: if not training[0].is_compatible(em): raise ValueError( f'Training embedding matrices are not compatible with each other' ) if test is not None and len(test) != 0: for em in test: if training[0].dim != em.dim: raise ValueError( f'Test embedding matrices are not compatible with training matrices' ) ncomponents = min(ncomponents, training[0].dim, training[0].items) def _apply(collection, transform_fn): # Prepare datasets for analysis. items = collection[0].items matrix = np.zeros((len(collection) * items, collection[0].dim)) for i, em in enumerate(collection): matrix[i * items:(i + 1) * items] = em.matrix # Apply analysis. components = transform_fn(matrix) # Create a new collection of EmbeddingMatrix objects. comp_collection: EmeddingCollection = [] for i in range(len(collection)): em = EmbeddingMatrix(items, components.shape[-1]) em.matrix = components[i * items:(i + 1) * items] comp_collection.append(em) return comp_collection # Training data. if analysis == 'ica': engine = decomposition.FastICA(n_components=ncomponents) else: engine = decomposition.PCA(n_components=ncomponents) training_comp_collection = _apply(training, engine.fit_transform) if test is None or len(test) == 0: return training_comp_collection, None, engine # Test data. test_comp_collection = _apply(test, engine.transform) return training_comp_collection, test_comp_collection, engine
def NonGaussianICA(array, percent_samples): print "NonGaussian Independent Component Analysis", percent_samples * 100, "% of training data." print "Features\tTime" array = array[:int(percent_samples * len(array))] for pct in pct_features_list: num_features = int(pct * len(array[0])) start = time() Y = decomposition.FastICA( n_components=num_features).fit_transform(array) end = time() print num_features, "\t", (end - start)
def create_estimator_no_params(self): if self.estimator_name == 'K-means': self.estimator = cluster.KMeans(n_jobs=self.n_jobs) elif self.estimator_name == 'EM': self.estimator = mixture.GaussianMixture() elif self.estimator_name == 'PCA': self.estimator = decomposition.PCA() elif self.estimator_name == 'ICA': self.estimator = decomposition.FastICA() elif self.estimator_name == 'Random_Projection': self.estimator = random_projection.gaussian_random_matrix() elif self.estimator_name == 'Dictionary_Learning': self.estimator = decomposition.DictionaryLearning()
def plot_ica(self): # Generate sample data np.random.seed(0) n_samples = 2000 time = np.linspace(0, 8, n_samples) s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal S = np.c_[s1, s2, s3] # 縦に連結 S += 0.2 * np.random.normal(size=S.shape) # Add noise S /= S.std(axis=0) # Standardize data # Mix data A = np.array([[1, 1, 1], [0.5, 2, 1.0], [1.5, 1.0, 2.0]]) # Mixing matrix X = np.dot(S, A.T) # Generate observations(観察データということにする) # Compute ICA ica = decomposition.FastICA(n_components=3) S_ = ica.fit_transform(X) # Reconstruct signals A_ = ica.mixing_ # Get estimated mixing matrix # We can `prove` that the ICA model applies by reverting the unmixing. assert np.allclose(X, np.dot(S_, A_.T) + ica.mean_) # For comparison, compute PCA pca = decomposition.PCA(n_components=3) H = pca.fit_transform(X) # Reconstruct signals based on orthogonal components ############################################################################### # Plot results plt.figure() models = [X, S, S_, H] names = ['Observations (mixed signal)', 'True Sources', 'ICA recovered signals', 'PCA recovered signals'] colors = ['red', 'steelblue', 'orange'] for ii, (model, name) in enumerate(zip(models, names), 1): plt.subplot(4, 1, ii) plt.title(name) for sig, color in zip(model.T, colors): plt.plot(sig, color=color) plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.46) plt.savefig("ica.jpg")
def __call__(self, timeseries): self.pca = sld.PCA(n_components=self.variance) if self.latent_series: base = self.pca.fit_transform(timeseries.base.timecourses.T) time = np.dot(self.pca.components_, timeseries.timecourses.T) else: try: base = self.pca.fit_transform(timeseries.timecourses.T) except np.linalg.LinAlgError: return 'Error' time = self.pca.components_ self.obj = float(np.sum(self.pca.explained_variance_ratio_)) normed_base = base / np.sqrt(self.pca.explained_variance_) normed_time = time * np.sqrt( self.pca.explained_variance_.reshape((-1, 1))) print normed_base.shape, normed_time.shape self.ica = sld.FastICA(whiten=False) self.ica.fit(normed_base) out = timeseries.copy() base = self.ica.components_.T new_norm = np.diag(base[:, np.argmax(np.abs(base), 1)]) base /= new_norm.reshape((-1, 1)) time = self.ica.transform(normed_time.T) time *= new_norm timesign = np.sign(np.sum(time, 0)) time *= timesign base *= timesign.reshape((-1, 1)) out.timecourses = time out.label_objects = ['mode' + str(i) for i in range(base.shape[0])] out.shape = (len(out.label_objects), ) out.typ = 'latent_series' out.name += '_sica' out.base = TimeSeries(base, shape=timeseries.shape, name=out.name, label_sample=out.label_objects) if self.latent_series: out.base.shape = timeseries.base.shape out.reconstruction_error = self.obj return out
def configure(self, feats): # Precision errors with float32 feats = tf.cast(feats, tf.float64) self.mean.assign( tf.cast(tf.reduce_mean(feats, axis=[0, 1, 2], keepdims=True), tf.float32)) ica = decomposition.FastICA(n_components=self.out_dim) feats_shape = tf.shape(feats) n_samples, feat_dim = tf.reduce_prod(feats_shape[:-1]), feats_shape[-1] ica.fit(tf.reshape(feats, [n_samples, feat_dim])) tf.debugging.assert_equal(tf.squeeze(self.mean), tf.constant(ica.mean_, dtype=tf.float32)) self.projection.assign( tf.constant(ica.components_.T, dtype=self.projection.dtype))
def choose_decomposition_method(method, n_components): """Return the decomposition corresponding to `method`.""" if method == 'PCA': return decomposition.PCA(n_components) elif method == 'Randomized PCA': return decomposition.RandomizedPCA(n_components) elif method == 'Kernel PCA': return decomposition.KernelPCA(n_components, kernel='rbf') elif method == 'Sparse PCA': return decomposition.SparsePCA(n_components, n_jobs=1) elif method == 'SVD': return decomposition.TruncatedSVD(n_components) elif method == 'Factor Analysis': return decomposition.FactorAnalysis(n_components) elif method == 'ICA': return decomposition.FastICA(n_components) raise ValueError('{} is not a known method'.format(method))
def reduce(self, dataSet): self.dataSet = dataSet X, Y = dataSet.load() self.nFeatures = len(X.columns) self.model = decomposition.FastICA(n_components=self.nComponents, algorithm=self.algorithm, whiten=self.whiten, fun=self.fun, fun_args=self.funArgs, max_iter=self.maxIter, tol=self.tol, w_init=self.wInit, random_state=self.randomState) self.X = self.model.fit_transform(X) return self.X