Ejemplo n.º 1
0
    def init_rois(self, n_components=100, show=False):
        Ain, Cin, center = greedyROI2d(self.Y,
                                       nr=n_components,
                                       gSig=[2, 2],
                                       gSiz=[7, 7],
                                       use_median=False)
        Cn = np.mean(self.Y, axis=-1)

        if show:
            pl1 = pl.imshow(Cn, interpolation='none')
            pl.colorbar()
            pl.scatter(x=center[:, 1], y=center[:, 0], c='m', s=40)
            pl.axis((-0.5, self.Y.shape[1] - 0.5, -0.5, self.Y.shape[0] - 0.5))
            pl.gca().invert_yaxis()

        active_pixels = np.squeeze(np.nonzero(np.sum(Ain, axis=1)))
        Yr = np.reshape(self.Y,
                        (self.Y.shape[0] * self.Y.shape[1], self.Y.shape[2]),
                        order='F')
        P = arpfit(Yr, p=2, pixels=active_pixels)
        Y_res = Yr - np.dot(Ain, Cin)
        model = ProjectedGradientNMF(n_components=1,
                                     init='random',
                                     random_state=0)
        model.fit(np.maximum(Y_res, 0))
        fin = model.components_.squeeze()

        self.Yr, self.Cin, self.fin, self.Ain, self.P, self.Cn = Yr, Cin, fin, Ain, P, Cn
Ejemplo n.º 2
0
def matdecomp(imregion, method):
    """Compute matrix decomposition

    Parameters
    ----------
    imregion : 2D array
        The image region data
    method : str
        Options for method ('eigen', 'NMF')
        
    """

    if method == 'eigen':
        ## columns are eigen vectors
        e_vals, e_vecs = LA.eig(imregion)

        return e_vecs

    if method == 'NMF':
        model = ProjectedGradientNMF(n_components=2, init='random',random_state=0)
        model.fit(imregion)
        
        comp = model.components_
        err = model.reconstruction_err_

        return comp
Ejemplo n.º 3
0
Archivo: nmf.py Proyecto: kuntzer/sclas
class NMF(method.Method):
	
	def __init__(self, params):
		self.params = params
		self.dec = ProjectedGradientNMF(**params)
	
	def __str__(self):
		return "Non-Negative matrix factorization by Projected Gradient (NMF)"
		
	def train(self, data):
		"""
		Train the NMF on the withened data
		
		:param data: whitened data, ready to use
		"""
		self.dec.fit(data)
	
	def encode(self, data):
		"""
		Encodes the ready to use data
		
		:returns: encoded data with dimension n_components
		"""
		return self.dec.transform(data)
	
	def decode(self, components):
		"""
		Decode the data to return whitened reconstructed data
		
		:returns: reconstructed data
		"""
		return self.dec.inverse_transform(components)
Ejemplo n.º 4
0
def _nmf(X, K):
    nmf = ProjectedGradientNMF(n_components=K, max_iter=1000)
    nmf.fit(X)

    B = nmf.components_
    A = np.dot(X, np.linalg.pinv(B))

    return (A, B)
Ejemplo n.º 5
0
class SparseApproxSpectrumNonNegative(SparseApproxSpectrum):
    """Non-negative sparse dictionary learning from 2D spectrogram patches 
    initialization:
        patch_size=(12,12) - size of time-frequency 2D patches in spectrogram units (freq,time)
        max_samples=1000000 - if num audio patches exceeds this threshold, randomly sample spectrum
    """
    def __init__(self, patch_size=(12, 12), max_samples=1000000):
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.D = None
        self.data = None
        self.components = None
        self.zscore = False
        self.log_amplitude = False

    def extract_codes(self,
                      X,
                      n_components=16,
                      log_amplitude=True,
                      **nmf_args):
        """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data
        inputs:
            X - spectrogram data (frequency x time)
            n_components - how many components to extract [16]
            log_amplitude - weather to apply log amplitude scaling log(1+X)
            **nmf_args - keyword arguments for ProjectedGradientNMF(...) [None]
        outputs:
            self.data - 2D patches of input spectrogram
            self.D.components_ - dictionary of 2D NMF components
        """
        zscore = False
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components = n_components
        nmf_args.setdefault('sparseness', 'components')
        nmf_args.setdefault('init', 'nndsvd')
        nmf_args.setdefault('beta', 0.5)
        print("NMF...")
        self.model = ProjectedGradientNMF(n_components=self.n_components,
                                          **nmf_args)
        self.model.fit(self.data)
        self.D = self.model

    def reconstruct_spectrum(self, w=None, randomize=False):
        "reconstruct by fitting current NMF 2D dictionary to self.data"
        if w is None:
            self.w = self.model.transform(self.data)
            w = self.w
        return SparseApproxSpectrum.reconstruct_spectrum(self,
                                                         w=w,
                                                         randomize=randomize)
Ejemplo n.º 6
0
 def fit(self, trainSamples, trainTargets):
     self.dataModel = MemeryDataModel(trainSamples, trainTargets)
     #print 'train user:' + str(self.dataModel.getUsersNum())
     V = self.dataModel.getData()
     model = ProjectedGradientNMF(n_components=self.factors, max_iter=1000, nls_max_iter=1000)
     self.pu = model.fit_transform(V)
     self.qi = model.fit(V).components_.transpose()
Ejemplo n.º 7
0
 def fit(self, trainSamples, trainTargets):
     self.dataModel = MemeryDataModel(trainSamples, trainTargets)
     #print 'train user:' + str(self.dataModel.getUsersNum())
     V = self.dataModel.getData()
     model = ProjectedGradientNMF(n_components=self.factors,
                                  max_iter=1000,
                                  nls_max_iter=1000)
     self.pu = model.fit_transform(V)
     self.qi = model.fit(V).components_.transpose()
class SparseApproxSpectrumNonNegative(SparseApproxSpectrum):
    """Non-negative sparse dictionary learning from 2D spectrogram patches 
    initialization:
    	patch_size=(12,12) - size of time-frequency 2D patches in spectrogram units (freq,time)
    	max_samples=1000000 - if num audio patches exceeds this threshold, randomly sample spectrum
    """
    def __init__(self, patch_size=(12,12), max_samples=1000000):
        self.patch_size = patch_size
        self.max_samples = max_samples
        self.D = None
        self.data = None
        self.components = None
        self.zscore=False
        self.log_amplitude=False

    def extract_codes(self, X, n_components=16, log_amplitude=True, **nmf_args):
    	"""Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data
        inputs:
            X - spectrogram data (frequency x time)
            n_components - how many components to extract [16]
            log_amplitude - weather to apply log amplitude scaling log(1+X)
            **nmf_args - keyword arguments for ProjectedGradientNMF(...) [None]
        outputs:
            self.data - 2D patches of input spectrogram
            self.D.components_ - dictionary of 2D NMF components
        """
        zscore=False
        self._extract_data_patches(X, zscore, log_amplitude)
        self.n_components=n_components
        nmf_args.setdefault('sparseness','components')
        nmf_args.setdefault('init','nndsvd')
        nmf_args.setdefault('beta',0.5)
        print "NMF..."
        self.model = ProjectedGradientNMF(n_components=self.n_components, **nmf_args)
        self.model.fit(self.data)
        self.D = self.model

    def reconstruct_spectrum(self, w=None, randomize=False):
    	"reconstruct by fitting current NMF 2D dictionary to self.data"
        if w is None:
            self.w = self.model.transform(self.data)
            w = self.w
        return SparseApproxSpectrum.reconstruct_spectrum(self, w=w, randomize=randomize)
Ejemplo n.º 9
0
def recommend(matrix_3filled, matrix_raw, user, numOfNeighbors=5):
    	
	# The following 3 lines uses Scikit-learn. For more information, refer to the documentation link in README.
    	model = ProjectedGradientNMF(n_components=2, init='random', random_state=0)
    	model.fit(matrix_3filled)

	# transformed matrix is the result of non-negative matrix factorization, and we will use this for the recommendations
    	transformed = np.dot(model.fit_transform(matrix_3filled), model.components_)
    
   	neighbors=[]
	# Calculate distances from the current user to every other users.
    	distances = np.sum((transformed-transformed[user])**2, axis=1)

	# Find nearest neighbors.
    	for x in xrange(numOfNeighbors):
        	distances[np.argmin(distances)] = sys.float_info.max
        	neighbors.append(np.argmin(distances))

	# Get an average for nearest neighbors. average is a vector containing the average rating for each humor.
    	average=[0.0]*transformed.shape[1]
    	for x in xrange(numOfNeighbors):
        	average += transformed[neighbors[x]]
    	average = average/numOfNeighbors

	# Find the unrated items for current users.
    	unratedItems=[]
    	for x in xrange(np.shape(matrix_raw)[1]):
        	if matrix_raw[user][x] == 0:
            		unratedItems.append(x)
    
	# If there are no unrated items, just return an item with max average rating.
    	if len(unratedItems) is 0:
        	item = np.argmax(average)
        	return item
	# Else, return an unrated item with max average rating.
    	else:
        	maxAverage = 0
        	item = np.argmax(average)
        	for x in xrange(len(unratedItems)):
            		if average[unratedItems[x]] > maxAverage:
                		maxAverage = average[unratedItems[x]] 
                		item = unratedItems[x]
        	return item
Ejemplo n.º 10
0
def matrixFactorization(inmatrix, p_components=False):
	from sklearn.decomposition import PCA
	from sklearn.decomposition import ProjectedGradientNMF
	import pdb
	if p_components:
		p_comp = p_components
	else:
		pca = PCA(n_components=inmatrix.shape[1])
		pca.fit(inmatrix)
		explained_variance = pca.explained_variance_ratio_.cumsum()
		explained_variance = explained_variance[explained_variance <= .9]
		p_comp = len(explained_variance)
	model = ProjectedGradientNMF(n_components=p_comp,
				     init='nndsvd',
				     beta=1,
				     sparseness=None)
	#pdb.set_trace()
	model.fit(inmatrix)
	return model
    def init_rois(self, n_components=100, show=False):
        Ain,Cin,center = greedyROI2d(self.Y, nr=n_components, gSig=[2,2], gSiz=[7,7], use_median=False)
        Cn = np.mean(self.Y, axis=-1)

        if show:
            pl1 = pl.imshow(Cn,interpolation='none')
            pl.colorbar()
            pl.scatter(x=center[:,1], y=center[:,0], c='m', s=40)
            pl.axis((-0.5,self.Y.shape[1]-0.5,-0.5,self.Y.shape[0]-0.5))
            pl.gca().invert_yaxis()

        active_pixels = np.squeeze(np.nonzero(np.sum(Ain,axis=1)))
        Yr = np.reshape(self.Y,(self.Y.shape[0]*self.Y.shape[1],self.Y.shape[2]),order='F')
        P = arpfit(Yr, p=2, pixels=active_pixels)
        Y_res = Yr - np.dot(Ain,Cin)
        model = ProjectedGradientNMF(n_components=1, init='random', random_state=0)
        model.fit(np.maximum(Y_res,0))
        fin = model.components_.squeeze()
        
        self.Yr,self.Cin,self.fin,self.Ain,self.P,self.Cn = Yr,Cin,fin,Ain,P,Cn
Ejemplo n.º 12
0
class NMFSpectrum(SparseApproxSpectrum):
    def __init__(self, **kwargs):
        SparseApproxSpectrum.__init__(self,**kwargs)

    def extract_codes(self, X, **kwargs):
        self.standardize=False
        self._extract_data_patches(X)
        kwargs.setdefault('sparseness','components')
        kwargs.setdefault('init','nndsvd')
        kwargs.setdefault('beta',0.5)
        print "NMF..."
        self.model = ProjectedGradientNMF(n_components=self.n_components, **kwargs)
        self.model.fit(self.data)        
        self.D = self.model
        return self

    def reconstruct_spectrum(self, w=None, randomize=False):
        if w is None:
            self.w = self.model.transform(self.data)
            w = self.w
        return SparseApproxSpectrum.reconstruct_spectrum(self, w=w, randomize=randomize)
def decomposition(V, W, H, n_components, solver='mu', update_H=True):
    if solver != 'project':
        W, H, _ = non_negative_factorization(V,
                                             W=W,
                                             H=H,
                                             n_components=n_components,
                                             update_H=update_H,
                                             max_iter=1000,
                                             solver=solver)
        #regularization='transformation', l1_ratio=0.1)
    else:
        model = ProjectedGradientNMF(n_components=n_components,
                                     init='random',
                                     random_state=0,
                                     sparseness='data',
                                     beta=0,
                                     max_iter=100000)
        model.fit(V)
        H = model.components_
        W = model.fit_transform(V)
    return W, H
Ejemplo n.º 14
0
class NMFSpectrum(SparseApproxSpectrum):
    def __init__(self, **kwargs):
        SparseApproxSpectrum.__init__(self,**kwargs)

    def extract_codes(self, X, **kwargs):
        self.standardize=False
        self._extract_data_patches(X)
        kwargs.setdefault('sparseness','components')
        kwargs.setdefault('init','nndsvd')
        kwargs.setdefault('beta',0.5)
        print("NMF...")
        self.model = ProjectedGradientNMF(n_components=self.n_components, **kwargs)
        self.model.fit(self.data)        
        self.D = self.model
        return self

    def reconstruct_spectrum(self, w=None, randomize=False):
        if w is None:
            self.w = self.model.transform(self.data)
            w = self.w
        return SparseApproxSpectrum.reconstruct_spectrum(self, w=w, randomize=randomize)
Ejemplo n.º 15
0
 def _nmf_fixed_component(self, i, X):
     """
     Uses sklearn to make the non negative factorization
     input: i, number of clusters for this NMF instance
     author: Arthur Desjardins
     """
     model = ProjectedGradientNMF(n_components=i, init='nndsvd')
     model.fit(X)
     #  H-matrix (clusters x words)
     H = model.components_
     # W-matrix (documents x clusters)
     W = model.transform(X)
     # word matrix
     words = open(attributFile).read().split()
     # processing extremely basic cluster bush
     most_relevant_words = np.argmax(H, axis=1)
     docs_per_cluster = [0]*i
     for tweet in W:
         most_relevant_cluster = np.argmax(tweet)
         docs_per_cluster[most_relevant_cluster] += 1
     clusters = dict(((words[most_relevant_words[i]], docs_per_cluster[i])
                      for i in range(0, i)))
     return clusters
Ejemplo n.º 16
0
    def _nonNegativeFactorization(self):
        """
        Uses sklearn to make the non negative factorization
        """

        print 'Loading data..'
        X = np.asmatrix(np.loadtxt(dataFile))
        print 'Data loaded. Making model..'
        model = ProjectedGradientNMF(init='nndsvd')
        print 'Fitting model..'
        model.fit(X)
        print 'Model fit'

        print 'Error rate is', model.reconstruction_err_

        #  H-matrix
        outFile1 = open(factoredHMatrix, 'w')
        np.savetxt(outFile1, model.components_, fmt='%i')
        outFile1.close

        # W-matrix
        outFile2 = open(factoredWMatrix, 'w')
        np.savetxt(outFile2, model.transform(X), fmt='%i')
        outFile2.close
Ejemplo n.º 17
0
def perform_nmf(X, w_dir):

    # factorize composition into components
    print "Performing NMF..."
    n_com = 48
    model = ProjectedGradientNMF(n_components=n_com,
                                 sparseness='data',
                                 beta=1,
                                 eta=0.9,
                                 tol=0.000001,
                                 max_iter=2000,
                                 nls_max_iter=5000,
                                 random_state=None)
    model.fit(X)
    print model.reconstruction_err_
    nmf_components = model.components_
    print "done."

    # visualize Base Rules
    # nmf_components = project_data(nmf_components)
    f_name = w_dir + "base_rules_48.png"
    visualize_base_rules(nmf_components, n_com, f_name)

    return model
Ejemplo n.º 18
0
 def train_model(self):
     print 'begin'
     RATE_MATRIX = np.zeros((9238, 7973))
     for line in self.train.values:
         print line
         uid = int(float(line[1]))
         iid = int(float(line[2]))
         RATE_MATRIX[uid][iid] = int(float(line[3]))
     V = spr.csr_matrix(RATE_MATRIX)
     model = ProjectedGradientNMF(n_components=self.n_features, max_iter=1000, nls_max_iter=10000)
     self.pu = model.fit_transform(V)
     self.qi = model.fit(V).components_.transpose()
     print model.reconstruction_err_
     self.ValidateF1()
     t = pd.DataFrame(np.array(self.pu))
     t.to_csv('50pu')
     t = pd.DataFrame(np.array(self.qi))
     t.to_csv('50qi')
     print("model generation over")
Ejemplo n.º 19
0
Ain,Cin,center = greedyROI2d(Y, nr = nr, gSig = [4,4], gSiz = [9,9])
t_elGREEDY = time()-t1

#%% arpfit


active_pixels = np.squeeze(np.nonzero(np.sum(Ain,axis=1)))
Yr = np.reshape(Y,(d1*d2,T),order='F')
p = 2;
P = arpfit(Yr,p=2,pixels = active_pixels)

#%% nmf

Y_res = Yr - np.dot(Ain,Cin)
model = ProjectedGradientNMF(n_components=1, init='random', random_state=0)
model.fit(np.maximum(Y_res,0)) 

fin = model.components_.squeeze()

#%% update spatial components

t1 = time()
A,b = update_spatial_components(Yr, Cin, fin, Ain, d1=d1, d2=d2, sn = P['sn'])
t_elSPATIAL = time() - t1

#%% 
t1 = time()
C,f,Y_res,Pnew = update_temporal_components(Yr,A,b,Cin,fin,ITER=2)
t_elTEMPORAL1 = time() - t1

#%%  solving using spgl1 for deconvolution
Ejemplo n.º 20
0
import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import ProjectedGradientNMF
model = ProjectedGradientNMF(n_components=10, init='random', random_state=0)
model.fit(X)

print model.components_
U = X.dot(model.components_.T)
print U
print U.dot(model.components_)
model.reconstruction_err_

model = ProjectedGradientNMF(
    n_components=2, sparseness='components', init='random', random_state=0)
model.fit(X)
ProjectedGradientNMF(beta=1, eta=0.1, init='random', max_iter=200,
                     n_components=2, nls_max_iter=2000, random_state=0,
                     sparseness='components', tol=0.0001)
model.components_
model.reconstruction_err_
Ejemplo n.º 21
0

import numpy as np
X = np.array([[1,1,2,3], [2, 1,4,5], [3, 2,4,5], [4, 1,2,1], [5, 4,3,1], [6, 1,4,3]])
from sklearn.decomposition import ProjectedGradientNMF
model = ProjectedGradientNMF(n_components=2, init='random', random_state=0)

print model.fit(X)
#ProjectedGradientNMF(beta=1, eta=0.1, init='random', max_iter=200,
#        n_components=2, nls_max_iter=2000, random_state=0, sparseness=None,
#        tol=0.0001)
print model.components_
#array([[ 0.77032744,  0.11118662],
#       [ 0.38526873,  0.38228063]])
print model.reconstruction_err_
#0.00746...

W = model.fit_transform(X);
H = model.components_;

print 'w: ' + str(W)
print 'h: ' + str(H)

model = ProjectedGradientNMF(n_components=2, sparseness='components', init='random', random_state=0)


print model.fit(X)
#ProjectedGradientNMF(beta=1, eta=0.1, init='random', max_iter=200,
#            n_components=2, nls_max_iter=2000, random_state=0,
#            sparseness='components', tol=0.0001)
def select_features_nmf(train_X, train_y, test_X, k):
    selector = ProjectedGradientNMF(n_components=k, init='nndsvd', random_state=42)
    selector.fit(train_X)
    train_X = selector.transform(train_X)
    test_X = selector.transform(test_X)
    return train_X, test_X
Ejemplo n.º 23
0
def driver_movie_data_test_sklearn(train_filename,test_filename,k):

    (A,movie_ids,user_ids,m_count,u_count) = read_data(train_filename)

    # Do nnmf
    #(U1,V1) = hack_nmf_iter(A,k,.07,16*A.nnz)

    model = ProjectedGradientNMF(n_components=k)

    model.fit(A)
    V1 = model.components_
    U1 = model.transform(A)
    print A.shape
    print U1.shape
    print V1.shape
    # Read test data
    (A,movie_ids,user_ids,m_count,u_count) = read_data(test_filename,movie_ids,user_ids,m_count,u_count,discard=True)
    (error,del_U,del_V,random_pairs) =  evaluate_gradients(A,U1,V1,.07,16*A.nnz,hard=True)

    reverse_user = inverse_map(user_ids)
    reverse_movie = inverse_map(movie_ids)
    
    # Test on Ratings!
    outfile = open("test.sklearn.predictions","w")
    print ("Doing %d test ratings" % A.nnz)
    (n,m) = A.shape
    for row in xrange(n):
        for row_col_index in xrange(A.indptr[row],A.indptr[row+1]):
            col = A.indices[row_col_index]
            elt = A.data[row_col_index]
            print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],reverse_user[col], nd.dot(U1[row,:],V1[:,col])) 

    # Test on completely random pairs
    outfile = open("test.sklearn.rndpairs.predictions","w")
    for n_pairs in xrange(1000):
        row = r.randint(0,n-1)
        col = r.randint(0,m)
        print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],reverse_user[col], nd.dot(U1[row,:],V1[:,col])) 
    
    # Test on difficult distribution that ephasizes non-rated pairs where movies and users
    # are chosen based on rating count.
    outfile = open("test.sklearn.hard.rndpairs.predictions","w")
    for n_pairs in xrange(1000):
        i = r.randint(0,A.nnz -1)
        row = find_index(A.indptr,i)
        j = r.randint(0,A.nnz -1)
        col = A.indices[j]
        if (row > A.shape[0]-1):
            print row, A.shape, "what is going on"
            continue
        if (col > A.shape[1]-1):
            print col, A.shape, "what is going on"
            continue
        #print "shape,row,col", A.shape,row,col
        # if (A[row][col] > 0):
        #    continue
        print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],reverse_user[col], nd.dot(U1[row,:],V1[:,col])) 


    print ("test rsme", math.sqrt(error))
    for i in xrange(k):
        print ("Factor:", i)
        print_movie_factor(U1,reverse_movie, i)
    return(U1,V1,reverse_movie,reverse_user)
Ejemplo n.º 24
0
####THEIRS- not needed
# Example data matrix X

###MINE
X = DataFrame(matrix)
X_imputed = X.copy()
X = pa.DataFrame(matrix)# DataFrame(toy_vals, index = range(nrows), columns = range(ncols))
###use some way to mask only a few vals.... thst too either 0 or 1
msk = (X.values + np.random.randn(*X.shape) - X.values) < 0.8
X_imputed.values[~msk] = 0


##THEIRS

# Hiding values to test imputation
# Initializing model
nmf_model = ProjectedGradientNMF(n_components = 600, init='nndsvda', random_state=0,max_iter=300, eta=0.01, alpha = 0.01)
nmf_model.fit(X_imputed.values)

# iterate model
#while nmf_model.reconstruction_err_**2 > 10:
    #nmf_model = NMF( n_components = 600, init='nndsvda', random_state=0,max_iter=300, eta=0.01, alpha = 0.01)
W = nmf_model.fit_transform(X_imputed.values)
X_imputed.values[~msk] = W.dot(nmf_model.components_)[~msk]
print nmf_model.reconstruction_err_

H = nmf_model.components_
rHat = np.dot(W,H)
np.savetxt("rHat.txt" ,rHat) 
Ejemplo n.º 25
0
def driver_movie_data_test_sklearn(train_filename, test_filename, k):

    (A, movie_ids, user_ids, m_count, u_count) = read_data(train_filename)

    # Do nnmf
    #(U1,V1) = hack_nmf_iter(A,k,.07,16*A.nnz)

    model = ProjectedGradientNMF(n_components=k)

    model.fit(A)
    V1 = model.components_
    U1 = model.transform(A)
    print A.shape
    print U1.shape
    print V1.shape
    # Read test data
    (A, movie_ids, user_ids, m_count, u_count) = read_data(test_filename,
                                                           movie_ids,
                                                           user_ids,
                                                           m_count,
                                                           u_count,
                                                           discard=True)
    (error, del_U, del_V, random_pairs) = evaluate_gradients(A,
                                                             U1,
                                                             V1,
                                                             .07,
                                                             16 * A.nnz,
                                                             hard=True)

    reverse_user = inverse_map(user_ids)
    reverse_movie = inverse_map(movie_ids)

    # Test on Ratings!
    outfile = open("test.sklearn.predictions", "w")
    print("Doing %d test ratings" % A.nnz)
    (n, m) = A.shape
    for row in xrange(n):
        for row_col_index in xrange(A.indptr[row], A.indptr[row + 1]):
            col = A.indices[row_col_index]
            elt = A.data[row_col_index]
            print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],
                                               reverse_user[col],
                                               nd.dot(U1[row, :], V1[:, col]))

    # Test on completely random pairs
    outfile = open("test.sklearn.rndpairs.predictions", "w")
    for n_pairs in xrange(1000):
        row = r.randint(0, n - 1)
        col = r.randint(0, m)
        print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],
                                           reverse_user[col],
                                           nd.dot(U1[row, :], V1[:, col]))

    # Test on difficult distribution that ephasizes non-rated pairs where movies and users
    # are chosen based on rating count.
    outfile = open("test.sklearn.hard.rndpairs.predictions", "w")
    for n_pairs in xrange(1000):
        i = r.randint(0, A.nnz - 1)
        row = find_index(A.indptr, i)
        j = r.randint(0, A.nnz - 1)
        col = A.indices[j]
        if (row > A.shape[0] - 1):
            print row, A.shape, "what is going on"
            continue
        if (col > A.shape[1] - 1):
            print col, A.shape, "what is going on"
            continue
        #print "shape,row,col", A.shape,row,col
        # if (A[row][col] > 0):
        #    continue
        print >> outfile, "%s,%s,%0.5f" % (reverse_movie[row],
                                           reverse_user[col],
                                           nd.dot(U1[row, :], V1[:, col]))

    print("test rsme", math.sqrt(error))
    for i in xrange(k):
        print("Factor:", i)
        print_movie_factor(U1, reverse_movie, i)
    return (U1, V1, reverse_movie, reverse_user)
Ejemplo n.º 26
0
if ans != "y":
    exit()

from sklearn.cluster import MiniBatchKMeans, KMeans
km = MiniBatchKMeans(n_clusters=k,
                     init='k-means++',
                     n_init=1,
                     init_size=1000,
                     batch_size=1000,
                     verbose=1)
km2 = KMeans(n_clusters=k, init='k-means++', verbose=1)
y2 = km2.fit_transform(X)

topics5 = [[(km.cluster_centers_[l][i], feature_names[i])
            for i in np.argsort(-np.abs(km.cluster_centers_[l]))[:10]]
           for l in range(k)]
print topics5

### NMF #######################
ans = raw_input("Start NMF with Scikit ? ")
if ans != "y":
    exit()

from sklearn.decomposition import ProjectedGradientNMF
# BEWARE : THIS IS COMPUTATIONNALY INTENSIVE
nmf = ProjectedGradientNMF(n_components=k, max_iter=10, nls_max_iter=100)
nmf.fit(X)

topics6 = [[(nmf.components_[l][i], feature_names[i])
            for i in np.argsort(-np.abs(nmf.components_[l]))[:10]]
           for l in range(k)]
Ejemplo n.º 27
0
import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import ProjectedGradientNMF
model = ProjectedGradientNMF(n_components=10, init='random', random_state=0)
model.fit(X)

print model.components_
U = X.dot(model.components_.T)
print U
print U.dot(model.components_)
model.reconstruction_err_

model = ProjectedGradientNMF(n_components=2,
                             sparseness='components',
                             init='random',
                             random_state=0)
model.fit(X)
ProjectedGradientNMF(beta=1,
                     eta=0.1,
                     init='random',
                     max_iter=200,
                     n_components=2,
                     nls_max_iter=2000,
                     random_state=0,
                     sparseness='components',
                     tol=0.0001)
model.components_
model.reconstruction_err_
plt1 = plt.imshow(Cn,interpolation='none')
plt.colorbar()

plt.scatter(x=center[:,1], y=center[:,0], c='m', s=40)
plt.axis((-0.5,d2-0.5,-0.5,d1-0.5))
plt.gca().invert_yaxis()
#%%
crd = plot_contours(coo_matrix(Ain[:,::-1]),Cn,thr=0.9)
#%%
active_pixels = np.squeeze(np.nonzero(np.sum(Ain,axis=1)))
Yr = np.reshape(Y,(d1*d2,T),order='F')
p = 2;
P = arpfit(Yr,p=1,pixels = active_pixels)
Y_res = Yr - np.dot(Ain,Cin)
model = ProjectedGradientNMF(n_components=1, init='random', random_state=0)
model.fit(np.maximum(Y_res,0))

fin = model.components_.squeeze()
#%%
t1 = time()
A,b,Cin = update_spatial_components(Yr, Cin, fin, Ain, d1=d1, d2=d2, sn = P['sn'],dist=2,max_size=8,min_size=3)
t_elSPATIAL = time() - t1
#%%
crd = plot_contours(A,Cn2,thr=0.9,cmap=pl.cm.gray)
#%%
t1 = time()
C,f,Y_res,Pnew = update_temporal_components(Yr,A,b,Cin,fin,ITER=2,deconv_method = 'spgl1')
t_elTEMPORAL2 = time() - t1
#%%
t1 = time()
A_sp=A.tocsc();
Ejemplo n.º 29
0
genreMat4 = np.vstack(genreMat4)
print genreMat4

index = filmsbygenre['Action']
E = y[index, :]

### K-Means ###################
ans = raw_input("Start K-Means with Scikit ? ")
if ans != "y":
    exit()

from sklearn.cluster import MiniBatchKMeans, KMeans
km = MiniBatchKMeans(n_clusters=k, init='k-means++', n_init=1, init_size=1000, batch_size=1000, verbose=1)
km2 = KMeans(n_clusters = k, init='k-means++', verbose=1)
y2 = km2.fit_transform(X)

topics5 = [[(km.cluster_centers_[l][i], feature_names[i]) for i in np.argsort(-np.abs(km.cluster_centers_[l]))[:10]] for l in range(k)]
print topics5


### NMF #######################
ans = raw_input("Start NMF with Scikit ? ")
if ans != "y":
    exit()

from sklearn.decomposition import ProjectedGradientNMF
# BEWARE : THIS IS COMPUTATIONNALY INTENSIVE
nmf = ProjectedGradientNMF(n_components=k, max_iter = 10, nls_max_iter=100)
nmf.fit(X)

topics6 = [[(nmf.components_[l][i], feature_names[i]) for i in np.argsort(-np.abs(nmf.components_[l]))[:10]] for l in range(k)]