def init_rois(self, n_components=100, show=False): Ain, Cin, center = greedyROI2d(self.Y, nr=n_components, gSig=[2, 2], gSiz=[7, 7], use_median=False) Cn = np.mean(self.Y, axis=-1) if show: pl1 = pl.imshow(Cn, interpolation='none') pl.colorbar() pl.scatter(x=center[:, 1], y=center[:, 0], c='m', s=40) pl.axis((-0.5, self.Y.shape[1] - 0.5, -0.5, self.Y.shape[0] - 0.5)) pl.gca().invert_yaxis() active_pixels = np.squeeze(np.nonzero(np.sum(Ain, axis=1))) Yr = np.reshape(self.Y, (self.Y.shape[0] * self.Y.shape[1], self.Y.shape[2]), order='F') P = arpfit(Yr, p=2, pixels=active_pixels) Y_res = Yr - np.dot(Ain, Cin) model = ProjectedGradientNMF(n_components=1, init='random', random_state=0) model.fit(np.maximum(Y_res, 0)) fin = model.components_.squeeze() self.Yr, self.Cin, self.fin, self.Ain, self.P, self.Cn = Yr, Cin, fin, Ain, P, Cn
def matdecomp(imregion, method): """Compute matrix decomposition Parameters ---------- imregion : 2D array The image region data method : str Options for method ('eigen', 'NMF') """ if method == 'eigen': ## columns are eigen vectors e_vals, e_vecs = LA.eig(imregion) return e_vecs if method == 'NMF': model = ProjectedGradientNMF(n_components=2, init='random',random_state=0) model.fit(imregion) comp = model.components_ err = model.reconstruction_err_ return comp
class NMF(method.Method): def __init__(self, params): self.params = params self.dec = ProjectedGradientNMF(**params) def __str__(self): return "Non-Negative matrix factorization by Projected Gradient (NMF)" def train(self, data): """ Train the NMF on the withened data :param data: whitened data, ready to use """ self.dec.fit(data) def encode(self, data): """ Encodes the ready to use data :returns: encoded data with dimension n_components """ return self.dec.transform(data) def decode(self, components): """ Decode the data to return whitened reconstructed data :returns: reconstructed data """ return self.dec.inverse_transform(components)
def _nmf(X, K): nmf = ProjectedGradientNMF(n_components=K, max_iter=1000) nmf.fit(X) B = nmf.components_ A = np.dot(X, np.linalg.pinv(B)) return (A, B)
class SparseApproxSpectrumNonNegative(SparseApproxSpectrum): """Non-negative sparse dictionary learning from 2D spectrogram patches initialization: patch_size=(12,12) - size of time-frequency 2D patches in spectrogram units (freq,time) max_samples=1000000 - if num audio patches exceeds this threshold, randomly sample spectrum """ def __init__(self, patch_size=(12, 12), max_samples=1000000): self.patch_size = patch_size self.max_samples = max_samples self.D = None self.data = None self.components = None self.zscore = False self.log_amplitude = False def extract_codes(self, X, n_components=16, log_amplitude=True, **nmf_args): """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data inputs: X - spectrogram data (frequency x time) n_components - how many components to extract [16] log_amplitude - weather to apply log amplitude scaling log(1+X) **nmf_args - keyword arguments for ProjectedGradientNMF(...) [None] outputs: self.data - 2D patches of input spectrogram self.D.components_ - dictionary of 2D NMF components """ zscore = False self._extract_data_patches(X, zscore, log_amplitude) self.n_components = n_components nmf_args.setdefault('sparseness', 'components') nmf_args.setdefault('init', 'nndsvd') nmf_args.setdefault('beta', 0.5) print("NMF...") self.model = ProjectedGradientNMF(n_components=self.n_components, **nmf_args) self.model.fit(self.data) self.D = self.model def reconstruct_spectrum(self, w=None, randomize=False): "reconstruct by fitting current NMF 2D dictionary to self.data" if w is None: self.w = self.model.transform(self.data) w = self.w return SparseApproxSpectrum.reconstruct_spectrum(self, w=w, randomize=randomize)
def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) #print 'train user:' + str(self.dataModel.getUsersNum()) V = self.dataModel.getData() model = ProjectedGradientNMF(n_components=self.factors, max_iter=1000, nls_max_iter=1000) self.pu = model.fit_transform(V) self.qi = model.fit(V).components_.transpose()
class SparseApproxSpectrumNonNegative(SparseApproxSpectrum): """Non-negative sparse dictionary learning from 2D spectrogram patches initialization: patch_size=(12,12) - size of time-frequency 2D patches in spectrogram units (freq,time) max_samples=1000000 - if num audio patches exceeds this threshold, randomly sample spectrum """ def __init__(self, patch_size=(12,12), max_samples=1000000): self.patch_size = patch_size self.max_samples = max_samples self.D = None self.data = None self.components = None self.zscore=False self.log_amplitude=False def extract_codes(self, X, n_components=16, log_amplitude=True, **nmf_args): """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data inputs: X - spectrogram data (frequency x time) n_components - how many components to extract [16] log_amplitude - weather to apply log amplitude scaling log(1+X) **nmf_args - keyword arguments for ProjectedGradientNMF(...) [None] outputs: self.data - 2D patches of input spectrogram self.D.components_ - dictionary of 2D NMF components """ zscore=False self._extract_data_patches(X, zscore, log_amplitude) self.n_components=n_components nmf_args.setdefault('sparseness','components') nmf_args.setdefault('init','nndsvd') nmf_args.setdefault('beta',0.5) print "NMF..." self.model = ProjectedGradientNMF(n_components=self.n_components, **nmf_args) self.model.fit(self.data) self.D = self.model def reconstruct_spectrum(self, w=None, randomize=False): "reconstruct by fitting current NMF 2D dictionary to self.data" if w is None: self.w = self.model.transform(self.data) w = self.w return SparseApproxSpectrum.reconstruct_spectrum(self, w=w, randomize=randomize)
def recommend(matrix_3filled, matrix_raw, user, numOfNeighbors=5): # The following 3 lines uses Scikit-learn. For more information, refer to the documentation link in README. model = ProjectedGradientNMF(n_components=2, init='random', random_state=0) model.fit(matrix_3filled) # transformed matrix is the result of non-negative matrix factorization, and we will use this for the recommendations transformed = np.dot(model.fit_transform(matrix_3filled), model.components_) neighbors=[] # Calculate distances from the current user to every other users. distances = np.sum((transformed-transformed[user])**2, axis=1) # Find nearest neighbors. for x in xrange(numOfNeighbors): distances[np.argmin(distances)] = sys.float_info.max neighbors.append(np.argmin(distances)) # Get an average for nearest neighbors. average is a vector containing the average rating for each humor. average=[0.0]*transformed.shape[1] for x in xrange(numOfNeighbors): average += transformed[neighbors[x]] average = average/numOfNeighbors # Find the unrated items for current users. unratedItems=[] for x in xrange(np.shape(matrix_raw)[1]): if matrix_raw[user][x] == 0: unratedItems.append(x) # If there are no unrated items, just return an item with max average rating. if len(unratedItems) is 0: item = np.argmax(average) return item # Else, return an unrated item with max average rating. else: maxAverage = 0 item = np.argmax(average) for x in xrange(len(unratedItems)): if average[unratedItems[x]] > maxAverage: maxAverage = average[unratedItems[x]] item = unratedItems[x] return item
def matrixFactorization(inmatrix, p_components=False): from sklearn.decomposition import PCA from sklearn.decomposition import ProjectedGradientNMF import pdb if p_components: p_comp = p_components else: pca = PCA(n_components=inmatrix.shape[1]) pca.fit(inmatrix) explained_variance = pca.explained_variance_ratio_.cumsum() explained_variance = explained_variance[explained_variance <= .9] p_comp = len(explained_variance) model = ProjectedGradientNMF(n_components=p_comp, init='nndsvd', beta=1, sparseness=None) #pdb.set_trace() model.fit(inmatrix) return model
def init_rois(self, n_components=100, show=False): Ain,Cin,center = greedyROI2d(self.Y, nr=n_components, gSig=[2,2], gSiz=[7,7], use_median=False) Cn = np.mean(self.Y, axis=-1) if show: pl1 = pl.imshow(Cn,interpolation='none') pl.colorbar() pl.scatter(x=center[:,1], y=center[:,0], c='m', s=40) pl.axis((-0.5,self.Y.shape[1]-0.5,-0.5,self.Y.shape[0]-0.5)) pl.gca().invert_yaxis() active_pixels = np.squeeze(np.nonzero(np.sum(Ain,axis=1))) Yr = np.reshape(self.Y,(self.Y.shape[0]*self.Y.shape[1],self.Y.shape[2]),order='F') P = arpfit(Yr, p=2, pixels=active_pixels) Y_res = Yr - np.dot(Ain,Cin) model = ProjectedGradientNMF(n_components=1, init='random', random_state=0) model.fit(np.maximum(Y_res,0)) fin = model.components_.squeeze() self.Yr,self.Cin,self.fin,self.Ain,self.P,self.Cn = Yr,Cin,fin,Ain,P,Cn
class NMFSpectrum(SparseApproxSpectrum): def __init__(self, **kwargs): SparseApproxSpectrum.__init__(self,**kwargs) def extract_codes(self, X, **kwargs): self.standardize=False self._extract_data_patches(X) kwargs.setdefault('sparseness','components') kwargs.setdefault('init','nndsvd') kwargs.setdefault('beta',0.5) print "NMF..." self.model = ProjectedGradientNMF(n_components=self.n_components, **kwargs) self.model.fit(self.data) self.D = self.model return self def reconstruct_spectrum(self, w=None, randomize=False): if w is None: self.w = self.model.transform(self.data) w = self.w return SparseApproxSpectrum.reconstruct_spectrum(self, w=w, randomize=randomize)
def decomposition(V, W, H, n_components, solver='mu', update_H=True): if solver != 'project': W, H, _ = non_negative_factorization(V, W=W, H=H, n_components=n_components, update_H=update_H, max_iter=1000, solver=solver) #regularization='transformation', l1_ratio=0.1) else: model = ProjectedGradientNMF(n_components=n_components, init='random', random_state=0, sparseness='data', beta=0, max_iter=100000) model.fit(V) H = model.components_ W = model.fit_transform(V) return W, H
class NMFSpectrum(SparseApproxSpectrum): def __init__(self, **kwargs): SparseApproxSpectrum.__init__(self,**kwargs) def extract_codes(self, X, **kwargs): self.standardize=False self._extract_data_patches(X) kwargs.setdefault('sparseness','components') kwargs.setdefault('init','nndsvd') kwargs.setdefault('beta',0.5) print("NMF...") self.model = ProjectedGradientNMF(n_components=self.n_components, **kwargs) self.model.fit(self.data) self.D = self.model return self def reconstruct_spectrum(self, w=None, randomize=False): if w is None: self.w = self.model.transform(self.data) w = self.w return SparseApproxSpectrum.reconstruct_spectrum(self, w=w, randomize=randomize)
def _nmf_fixed_component(self, i, X): """ Uses sklearn to make the non negative factorization input: i, number of clusters for this NMF instance author: Arthur Desjardins """ model = ProjectedGradientNMF(n_components=i, init='nndsvd') model.fit(X) # H-matrix (clusters x words) H = model.components_ # W-matrix (documents x clusters) W = model.transform(X) # word matrix words = open(attributFile).read().split() # processing extremely basic cluster bush most_relevant_words = np.argmax(H, axis=1) docs_per_cluster = [0]*i for tweet in W: most_relevant_cluster = np.argmax(tweet) docs_per_cluster[most_relevant_cluster] += 1 clusters = dict(((words[most_relevant_words[i]], docs_per_cluster[i]) for i in range(0, i))) return clusters
def _nonNegativeFactorization(self): """ Uses sklearn to make the non negative factorization """ print 'Loading data..' X = np.asmatrix(np.loadtxt(dataFile)) print 'Data loaded. Making model..' model = ProjectedGradientNMF(init='nndsvd') print 'Fitting model..' model.fit(X) print 'Model fit' print 'Error rate is', model.reconstruction_err_ # H-matrix outFile1 = open(factoredHMatrix, 'w') np.savetxt(outFile1, model.components_, fmt='%i') outFile1.close # W-matrix outFile2 = open(factoredWMatrix, 'w') np.savetxt(outFile2, model.transform(X), fmt='%i') outFile2.close
def perform_nmf(X, w_dir): # factorize composition into components print "Performing NMF..." n_com = 48 model = ProjectedGradientNMF(n_components=n_com, sparseness='data', beta=1, eta=0.9, tol=0.000001, max_iter=2000, nls_max_iter=5000, random_state=None) model.fit(X) print model.reconstruction_err_ nmf_components = model.components_ print "done." # visualize Base Rules # nmf_components = project_data(nmf_components) f_name = w_dir + "base_rules_48.png" visualize_base_rules(nmf_components, n_com, f_name) return model
def train_model(self): print 'begin' RATE_MATRIX = np.zeros((9238, 7973)) for line in self.train.values: print line uid = int(float(line[1])) iid = int(float(line[2])) RATE_MATRIX[uid][iid] = int(float(line[3])) V = spr.csr_matrix(RATE_MATRIX) model = ProjectedGradientNMF(n_components=self.n_features, max_iter=1000, nls_max_iter=10000) self.pu = model.fit_transform(V) self.qi = model.fit(V).components_.transpose() print model.reconstruction_err_ self.ValidateF1() t = pd.DataFrame(np.array(self.pu)) t.to_csv('50pu') t = pd.DataFrame(np.array(self.qi)) t.to_csv('50qi') print("model generation over")
Ain,Cin,center = greedyROI2d(Y, nr = nr, gSig = [4,4], gSiz = [9,9]) t_elGREEDY = time()-t1 #%% arpfit active_pixels = np.squeeze(np.nonzero(np.sum(Ain,axis=1))) Yr = np.reshape(Y,(d1*d2,T),order='F') p = 2; P = arpfit(Yr,p=2,pixels = active_pixels) #%% nmf Y_res = Yr - np.dot(Ain,Cin) model = ProjectedGradientNMF(n_components=1, init='random', random_state=0) model.fit(np.maximum(Y_res,0)) fin = model.components_.squeeze() #%% update spatial components t1 = time() A,b = update_spatial_components(Yr, Cin, fin, Ain, d1=d1, d2=d2, sn = P['sn']) t_elSPATIAL = time() - t1 #%% t1 = time() C,f,Y_res,Pnew = update_temporal_components(Yr,A,b,Cin,fin,ITER=2) t_elTEMPORAL1 = time() - t1 #%% solving using spgl1 for deconvolution
import numpy as np X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]]) from sklearn.decomposition import ProjectedGradientNMF model = ProjectedGradientNMF(n_components=10, init='random', random_state=0) model.fit(X) print model.components_ U = X.dot(model.components_.T) print U print U.dot(model.components_) model.reconstruction_err_ model = ProjectedGradientNMF( n_components=2, sparseness='components', init='random', random_state=0) model.fit(X) ProjectedGradientNMF(beta=1, eta=0.1, init='random', max_iter=200, n_components=2, nls_max_iter=2000, random_state=0, sparseness='components', tol=0.0001) model.components_ model.reconstruction_err_
import numpy as np X = np.array([[1,1,2,3], [2, 1,4,5], [3, 2,4,5], [4, 1,2,1], [5, 4,3,1], [6, 1,4,3]]) from sklearn.decomposition import ProjectedGradientNMF model = ProjectedGradientNMF(n_components=2, init='random', random_state=0) print model.fit(X) #ProjectedGradientNMF(beta=1, eta=0.1, init='random', max_iter=200, # n_components=2, nls_max_iter=2000, random_state=0, sparseness=None, # tol=0.0001) print model.components_ #array([[ 0.77032744, 0.11118662], # [ 0.38526873, 0.38228063]]) print model.reconstruction_err_ #0.00746... W = model.fit_transform(X); H = model.components_; print 'w: ' + str(W) print 'h: ' + str(H) model = ProjectedGradientNMF(n_components=2, sparseness='components', init='random', random_state=0) print model.fit(X) #ProjectedGradientNMF(beta=1, eta=0.1, init='random', max_iter=200, # n_components=2, nls_max_iter=2000, random_state=0, # sparseness='components', tol=0.0001)
def select_features_nmf(train_X, train_y, test_X, k): selector = ProjectedGradientNMF(n_components=k, init='nndsvd', random_state=42) selector.fit(train_X) train_X = selector.transform(train_X) test_X = selector.transform(test_X) return train_X, test_X
def driver_movie_data_test_sklearn(train_filename,test_filename,k): (A,movie_ids,user_ids,m_count,u_count) = read_data(train_filename) # Do nnmf #(U1,V1) = hack_nmf_iter(A,k,.07,16*A.nnz) model = ProjectedGradientNMF(n_components=k) model.fit(A) V1 = model.components_ U1 = model.transform(A) print A.shape print U1.shape print V1.shape # Read test data (A,movie_ids,user_ids,m_count,u_count) = read_data(test_filename,movie_ids,user_ids,m_count,u_count,discard=True) (error,del_U,del_V,random_pairs) = evaluate_gradients(A,U1,V1,.07,16*A.nnz,hard=True) reverse_user = inverse_map(user_ids) reverse_movie = inverse_map(movie_ids) # Test on Ratings! outfile = open("test.sklearn.predictions","w") print ("Doing %d test ratings" % A.nnz) (n,m) = A.shape for row in xrange(n): for row_col_index in xrange(A.indptr[row],A.indptr[row+1]): col = A.indices[row_col_index] elt = A.data[row_col_index] print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],reverse_user[col], nd.dot(U1[row,:],V1[:,col])) # Test on completely random pairs outfile = open("test.sklearn.rndpairs.predictions","w") for n_pairs in xrange(1000): row = r.randint(0,n-1) col = r.randint(0,m) print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],reverse_user[col], nd.dot(U1[row,:],V1[:,col])) # Test on difficult distribution that ephasizes non-rated pairs where movies and users # are chosen based on rating count. outfile = open("test.sklearn.hard.rndpairs.predictions","w") for n_pairs in xrange(1000): i = r.randint(0,A.nnz -1) row = find_index(A.indptr,i) j = r.randint(0,A.nnz -1) col = A.indices[j] if (row > A.shape[0]-1): print row, A.shape, "what is going on" continue if (col > A.shape[1]-1): print col, A.shape, "what is going on" continue #print "shape,row,col", A.shape,row,col # if (A[row][col] > 0): # continue print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],reverse_user[col], nd.dot(U1[row,:],V1[:,col])) print ("test rsme", math.sqrt(error)) for i in xrange(k): print ("Factor:", i) print_movie_factor(U1,reverse_movie, i) return(U1,V1,reverse_movie,reverse_user)
####THEIRS- not needed # Example data matrix X ###MINE X = DataFrame(matrix) X_imputed = X.copy() X = pa.DataFrame(matrix)# DataFrame(toy_vals, index = range(nrows), columns = range(ncols)) ###use some way to mask only a few vals.... thst too either 0 or 1 msk = (X.values + np.random.randn(*X.shape) - X.values) < 0.8 X_imputed.values[~msk] = 0 ##THEIRS # Hiding values to test imputation # Initializing model nmf_model = ProjectedGradientNMF(n_components = 600, init='nndsvda', random_state=0,max_iter=300, eta=0.01, alpha = 0.01) nmf_model.fit(X_imputed.values) # iterate model #while nmf_model.reconstruction_err_**2 > 10: #nmf_model = NMF( n_components = 600, init='nndsvda', random_state=0,max_iter=300, eta=0.01, alpha = 0.01) W = nmf_model.fit_transform(X_imputed.values) X_imputed.values[~msk] = W.dot(nmf_model.components_)[~msk] print nmf_model.reconstruction_err_ H = nmf_model.components_ rHat = np.dot(W,H) np.savetxt("rHat.txt" ,rHat)
def driver_movie_data_test_sklearn(train_filename, test_filename, k): (A, movie_ids, user_ids, m_count, u_count) = read_data(train_filename) # Do nnmf #(U1,V1) = hack_nmf_iter(A,k,.07,16*A.nnz) model = ProjectedGradientNMF(n_components=k) model.fit(A) V1 = model.components_ U1 = model.transform(A) print A.shape print U1.shape print V1.shape # Read test data (A, movie_ids, user_ids, m_count, u_count) = read_data(test_filename, movie_ids, user_ids, m_count, u_count, discard=True) (error, del_U, del_V, random_pairs) = evaluate_gradients(A, U1, V1, .07, 16 * A.nnz, hard=True) reverse_user = inverse_map(user_ids) reverse_movie = inverse_map(movie_ids) # Test on Ratings! outfile = open("test.sklearn.predictions", "w") print("Doing %d test ratings" % A.nnz) (n, m) = A.shape for row in xrange(n): for row_col_index in xrange(A.indptr[row], A.indptr[row + 1]): col = A.indices[row_col_index] elt = A.data[row_col_index] print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row], reverse_user[col], nd.dot(U1[row, :], V1[:, col])) # Test on completely random pairs outfile = open("test.sklearn.rndpairs.predictions", "w") for n_pairs in xrange(1000): row = r.randint(0, n - 1) col = r.randint(0, m) print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row], reverse_user[col], nd.dot(U1[row, :], V1[:, col])) # Test on difficult distribution that ephasizes non-rated pairs where movies and users # are chosen based on rating count. outfile = open("test.sklearn.hard.rndpairs.predictions", "w") for n_pairs in xrange(1000): i = r.randint(0, A.nnz - 1) row = find_index(A.indptr, i) j = r.randint(0, A.nnz - 1) col = A.indices[j] if (row > A.shape[0] - 1): print row, A.shape, "what is going on" continue if (col > A.shape[1] - 1): print col, A.shape, "what is going on" continue #print "shape,row,col", A.shape,row,col # if (A[row][col] > 0): # continue print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row], reverse_user[col], nd.dot(U1[row, :], V1[:, col])) print("test rsme", math.sqrt(error)) for i in xrange(k): print("Factor:", i) print_movie_factor(U1, reverse_movie, i) return (U1, V1, reverse_movie, reverse_user)
if ans != "y": exit() from sklearn.cluster import MiniBatchKMeans, KMeans km = MiniBatchKMeans(n_clusters=k, init='k-means++', n_init=1, init_size=1000, batch_size=1000, verbose=1) km2 = KMeans(n_clusters=k, init='k-means++', verbose=1) y2 = km2.fit_transform(X) topics5 = [[(km.cluster_centers_[l][i], feature_names[i]) for i in np.argsort(-np.abs(km.cluster_centers_[l]))[:10]] for l in range(k)] print topics5 ### NMF ####################### ans = raw_input("Start NMF with Scikit ? ") if ans != "y": exit() from sklearn.decomposition import ProjectedGradientNMF # BEWARE : THIS IS COMPUTATIONNALY INTENSIVE nmf = ProjectedGradientNMF(n_components=k, max_iter=10, nls_max_iter=100) nmf.fit(X) topics6 = [[(nmf.components_[l][i], feature_names[i]) for i in np.argsort(-np.abs(nmf.components_[l]))[:10]] for l in range(k)]
import numpy as np X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]]) from sklearn.decomposition import ProjectedGradientNMF model = ProjectedGradientNMF(n_components=10, init='random', random_state=0) model.fit(X) print model.components_ U = X.dot(model.components_.T) print U print U.dot(model.components_) model.reconstruction_err_ model = ProjectedGradientNMF(n_components=2, sparseness='components', init='random', random_state=0) model.fit(X) ProjectedGradientNMF(beta=1, eta=0.1, init='random', max_iter=200, n_components=2, nls_max_iter=2000, random_state=0, sparseness='components', tol=0.0001) model.components_ model.reconstruction_err_
plt1 = plt.imshow(Cn,interpolation='none') plt.colorbar() plt.scatter(x=center[:,1], y=center[:,0], c='m', s=40) plt.axis((-0.5,d2-0.5,-0.5,d1-0.5)) plt.gca().invert_yaxis() #%% crd = plot_contours(coo_matrix(Ain[:,::-1]),Cn,thr=0.9) #%% active_pixels = np.squeeze(np.nonzero(np.sum(Ain,axis=1))) Yr = np.reshape(Y,(d1*d2,T),order='F') p = 2; P = arpfit(Yr,p=1,pixels = active_pixels) Y_res = Yr - np.dot(Ain,Cin) model = ProjectedGradientNMF(n_components=1, init='random', random_state=0) model.fit(np.maximum(Y_res,0)) fin = model.components_.squeeze() #%% t1 = time() A,b,Cin = update_spatial_components(Yr, Cin, fin, Ain, d1=d1, d2=d2, sn = P['sn'],dist=2,max_size=8,min_size=3) t_elSPATIAL = time() - t1 #%% crd = plot_contours(A,Cn2,thr=0.9,cmap=pl.cm.gray) #%% t1 = time() C,f,Y_res,Pnew = update_temporal_components(Yr,A,b,Cin,fin,ITER=2,deconv_method = 'spgl1') t_elTEMPORAL2 = time() - t1 #%% t1 = time() A_sp=A.tocsc();
genreMat4 = np.vstack(genreMat4) print genreMat4 index = filmsbygenre['Action'] E = y[index, :] ### K-Means ################### ans = raw_input("Start K-Means with Scikit ? ") if ans != "y": exit() from sklearn.cluster import MiniBatchKMeans, KMeans km = MiniBatchKMeans(n_clusters=k, init='k-means++', n_init=1, init_size=1000, batch_size=1000, verbose=1) km2 = KMeans(n_clusters = k, init='k-means++', verbose=1) y2 = km2.fit_transform(X) topics5 = [[(km.cluster_centers_[l][i], feature_names[i]) for i in np.argsort(-np.abs(km.cluster_centers_[l]))[:10]] for l in range(k)] print topics5 ### NMF ####################### ans = raw_input("Start NMF with Scikit ? ") if ans != "y": exit() from sklearn.decomposition import ProjectedGradientNMF # BEWARE : THIS IS COMPUTATIONNALY INTENSIVE nmf = ProjectedGradientNMF(n_components=k, max_iter = 10, nls_max_iter=100) nmf.fit(X) topics6 = [[(nmf.components_[l][i], feature_names[i]) for i in np.argsort(-np.abs(nmf.components_[l]))[:10]] for l in range(k)]