def lsaTransform(self, dimensions=1): """ Calculate SVD of objects matrix: U . SIGMA . VT = MATRIX Reduce the dimension of sigma by specified factor producing sigma'. Then dot product the matrices: U . SIGMA' . VT = MATRIX' """ rows, cols = self.matrix.shape if dimensions <= rows: #Its a valid reduction #Sigma comes out as a list rather than a matrix u, sigma, vt = linalg.svd(self.matrix) #Dimension reduction, build SIGMA' for index in xrange(rows - dimensions, rows): sigma[index] = 0 print linalg.diagsvd(sigma, len(self.matrix), len(vt)) #Reconstruct MATRIX' reconstructedMatrix = dot( dot(u, linalg.diagsvd(sigma, len(self.matrix), len(vt))), vt) #Save transform self.matrix = reconstructedMatrix else: print "dimension reduction cannot be greater than %s" % rows
def lsaTransform(self,dimensions=1): """ Calculate SVD of objects matrix: U . SIGMA . VT = MATRIX Reduce the dimension of sigma by specified factor producing sigma'. Then dot product the matrices: U . SIGMA' . VT = MATRIX' """ rows,cols= self.matrix.shape if dimensions <= rows: #Its a valid reduction #Sigma comes out as a list rather than a matrix u,sigma,vt = linalg.svd(self.matrix) #Dimension reduction, build SIGMA' for index in xrange(rows-dimensions, rows): sigma[index]=0 print linalg.diagsvd(sigma,len(self.matrix), len(vt)) #Reconstruct MATRIX' reconstructedMatrix= dot(dot(u,linalg.diagsvd(sigma,len(self.matrix),len(vt))),vt) #Save transform self.matrix=reconstructedMatrix else: print "dimension reduction cannot be greater than %s" % rows
def __init__(self, data=None, sym=None): super(SvdArray, self).__init__(data=data, sym=sym) u, s, v = np.linalg.svd(self.x, full_matrices=1) self.u, self.s, self.v = u, s, v self.sdiag = linalg.diagsvd(s, *x.shape) self.sinvdiag = linalg.diagsvd(1./s, *x.shape)
def sparse_stable_svd(R, nboot=50): # generate the boots boots = [np.random.random_integers(0,len(R)-1,len(R)) for i in xrange(nboot)] # calc the original SVD U, s, Vh = np.linalg.svd(np.concatenate(R), full_matrices=False) # do the boots rVs = [] for i in range(len(boots)): Ub, sb, Vhb = np.linalg.svd(np.concatenate(R[boots[i]]), full_matrices=False) rmat = procrustes(U,Ub) rVs.append(np.dot(rmat,np.dot(diagsvd(sb,len(sb),len(sb)),Vhb))) # get the bootstrap ratios rVs = np.array(rVs) Vs = np.dot(diagsvd(s,len(s),len(s)),Vh) boot_ratio = Vs/rVs.std(0) # pass the boot ratios through fdrtool to pick stable features fachist = np.histogram(boot_ratio.flatten(),bins=500) peak = fachist[1][fachist[0]==np.max(fachist[0])][0] results = fdrtool.fdrtool(FloatVector(boot_ratio.flatten()-peak), statistic='normal', plot=False, verbose=False) qv = np.array(results.rx('qval')).reshape(boot_ratio.shape) #qv = None # apply the thresh return U,s,Vh,qv,boot_ratio
def __init__(self, data=None, sym=None): super(SvdArray, self).__init__(data=data, sym=sym) u, s, v = np.linalg.svd(self.x, full_matrices=1) self.u, self.s, self.v = u, s, v self.sdiag = linalg.diagsvd(s, *x.shape) self.sinvdiag = linalg.diagsvd(1. / s, *x.shape)
def svd_spectro_perf(fl,iv,re,log=None): t0 = time.time() ## compute R and F R = sp.sqrt(iv)*re R = R.T F = sp.sqrt(iv)*fl ## svd decomposition u,s,vt = linalg.svd(R) one = linalg.diagsvd(s*0+1,R.shape[0],R.shape[1]) s = linalg.diagsvd(s,R.shape[0],R.shape[1]) flux = vt.T.dot(one.T.dot(u.T.dot(F))) Q = vt.T.dot(sp.sqrt(s.T.dot(s)).dot(vt)) norm = Q.sum(axis=1) w=norm>0 Q[w,:] = Q[w,:]/norm[w,None] flux[w]/=norm[w] ivar = norm**2 t = time.time() sys.stdout.write("spectro perfected in: {} \n".format(t-t0)) if log is not None: log.write("spectro perfected in: {} \n".format(t-t0)) return flux,ivar,Q
def train(self): # make word-doc vector for index, passage in enumerate(self.passages): self.__parse(passage, index) self.__build(len(self.passages)) print self.matrix.shape print self self.tfidfTransform() #print self # SVD self.u, self.sigma, self.vt = linalg.svd(self.matrix) print self.u.shape print len(self.sigma) print self.vt.shape self.sigma_1 = linalg.diagsvd(self.sigma,len(self.sigma), len(self.sigma)) ** -1 print self.sigma_1 print self.sigma_1 * self.sigma print linalg.diagsvd(self.sigma,len(self.sigma), len(self.sigma)) # calculate doc concpets pass
def train(self): # make word-doc vector for index, passage in enumerate(self.passages): self.__parse(passage, index) self.__build(len(self.passages)) print self.matrix.shape print self self.tfidfTransform() #print self # SVD self.u, self.sigma, self.vt = linalg.svd(self.matrix) print self.u.shape print len(self.sigma) print self.vt.shape self.sigma_1 = linalg.diagsvd(self.sigma, len(self.sigma), len(self.sigma))**-1 print self.sigma_1 print self.sigma_1 * self.sigma print linalg.diagsvd(self.sigma, len(self.sigma), len(self.sigma)) # calculate doc concpets pass
def image_svd(n): img=mpimg.imread('image.jpg') [r,g,b] = [img[:,:,i] for i in range(3)] r_1,r_2,r_3 = sp.svd(r) g_1,g_2,g_3 = sp.svd(g) b_1,b_2,b_3 = sp.svd(b) r2_nonzero=(r_2!=0).sum() g2_nonzero=(g_2!=0).sum() b2_nonzero=(b_2!=0).sum() print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r2_nonzero,"," ,g2_nonzero,"and" ,b2_nonzero, "respectively.") r_2[n:800]=np.zeros_like(r_2[n:800]) g_2[n:800]=np.zeros_like(g_2[n:800]) b_2[n:800]=np.zeros_like(b_2[n:800]) # change the dimension to (800,1000) r_2=sp.diagsvd(r_2,800,1000) g_2=sp.diagsvd(g_2,800,1000) b_2=sp.diagsvd(b_2,800,1000) #dot multiplication r_new=np.dot(r_1, np.dot(r_2,r_3)) g_new=np.dot(g_1, np.dot(g_2,g_3)) b_new=np.dot(b_1, np.dot(b_2,b_3)) img[:,:,0]=r_new img[:,:,1]=g_new img[:,:,2]=b_new #plot the images fig = plt.figure(2) ax1 = fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(r, cmap = 'Reds') ax3.imshow(g, cmap = 'Greens') ax4.imshow(b, cmap = 'Blues') plt.show() #original image img=mpimg.imread('image.jpg') [r,g,b]=[img[:,:,i] for i in range(3)] fig=plt.figure(1) ax1 = fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(r, cmap = 'Reds') ax3.imshow(g, cmap = 'Greens') ax4.imshow(b, cmap = 'Blues') plt.show()
def test_less_accurate_than_full_svd(self): A = lowrank(100, 100) U, s, Vh = randomized_svd.randomized_svd(A, 10) S = la.diagsvd(s, U.shape[1], U.shape[1]) randomized_err = la.norm(U.dot(S).dot(Vh) - A, 2) U, s, Vh = self.full_svd(A) S = la.diagsvd(s, U.shape[1], U.shape[1]) full_err = la.norm(U.dot(S).dot(Vh) - A, 2) self.assertGreater(1e-2 * randomized_err, full_err)
def svd(n): img=mpimg.imread('tree.jpg') [r,g,b] = [img[:,:,i] for i in range(3)] r1,r2,r3 = sp.svd(r) g1,g2,g3 = sp.svd(g) b1,b2,b3 = sp.svd(b) r_nonzero=(r2!=0).sum() #count how many non zero matrix g_nonzero=(g2!=0).sum() b_nonzero=(b2!=0).sum() print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r_nonzero,"," ,g_nonzero,"and" ,b_nonzero, "respectively.") #create a matrix for dot multiplication r2[n:800]=np.zeros_like(r2[n:800]) g2[n:800]=np.zeros_like(g2[n:800]) b2[n:800]=np.zeros_like(b2[n:800]) r2=sp.diagsvd(r2,800,1000) g2=sp.diagsvd(g2,800,1000) b2=sp.diagsvd(b2,800,1000) r_new=np.dot(r1, np.dot(r2,r3)) g_new=np.dot(g1, np.dot(g2,g3)) b_new=np.dot(b1, np.dot(b2,b3)) #Create and display new resolution image img[:,:,0]=r_new img[:,:,1]=g_new img[:,:,2]=b_new display the modified picture fig = plt.figure(2) ax1 = fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(r, cmap = 'Reds') ax3.imshow(g, cmap = 'Greens') ax4.imshow(b, cmap = 'Blues') plt.show() #display original picture img=mpimg.imread('tree.jpg') [r,g,b]=[img[:,:,i] for i in range(3)] fig=plt.figure(1) ax1=fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(r, cmap = 'Reds') ax3.imshow(g, cmap = 'Greens') ax4.imshow(b, cmap = 'Blues') plt.show()
def image_svd(n): # read image img=mpimg.imread('SnakeDance.jpg') # generate rgb array [r,g,b] = [img[:,:,i] for i in range(3)] # generate U, sigma,and V for red, green and blue matrix #noted that r1=U, r2=sigma, r3=V, same goes to green and blue matrix r1, r2, r3 = linalg.svd(r) g1, g2, g3 = linalg.svd(g) b1, b2, b3 = linalg.svd(b) #check the number of non zero elements in each color of decompose sigma r2_nonzero=(r2!=0).sum() g2_nonzero=(g2!=0).sum() b2_nonzero=(b2!=0).sum() print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r2_nonzero,"," ,g2_nonzero,"and" ,b2_nonzero, "respectively.") # keeping first n none zero elements r2[n:800] = np.zeros_like(r2[n:800]) g2[n:800] = np.zeros_like(g2[n:800]) b2[n:800] = np.zeros_like(b2[n:800]) # creating diagonal matrix to perform dot multiplication #change the dimension of r2 to (800,1000), since original r2 from linalg.svd is (800,1) #can check dimension with r2.shape r2 = linalg.diagsvd(r2,800,1000) g2 = linalg.diagsvd(g2,800,1000) b2 = linalg.diagsvd(b2,800,1000) # perform dot multiplication to create lower resolutuion mariric r_new = np.dot(r1, np.dot(r2, r3)) g_new = np.dot(g1, np.dot(g2, g3)) b_new = np.dot(b1, np.dot(b2, b3)) img[:,:,0]=r_new img[:,:,1]=g_new img[:,:,2]=b_new fig2 = plt.figure(2) ax1 = fig2.add_subplot(2,2,1) ax2 = fig2.add_subplot(2,2,2) ax3 = fig2.add_subplot(2,2,3) ax4 = fig2.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(r_new, cmap = 'Reds') ax3.imshow(g_new, cmap = 'Greens') ax4.imshow(b_new, cmap = 'Blues') plt.show()
def svd_a_inv(a, b, full_matrices=True): U, s, Vh = linalg.svd(a, full_matrices) # print U.shape, s.shape, Vh.shape S = linalg.diagsvd(s, a.shape[1], a.shape[1]) if full_matrices == True: S_inv = linalg.diagsvd( np.linalg.inv(S).diagonal(), a.shape[1], a.shape[0]) else: S_inv = np.linalg.inv(S) ah = np.dot(Vh.transpose(), np.dot(S_inv, U.transpose())) s = np.dot(ah, b) return ah, s
def MStep(e_tau2, e_delta2, X, y): # re-estimate a new w d = X.shape[1] phi = sl.inv(np.diag(e_tau2.ravel())) U, D, Vt = sl.svd(X, full_matrices=False) # D: (8, ), U: (67, 8) D1 = sl.diagsvd(D**-1, d, d) D2 = sl.diagsvd(D**-2, d, d) val1 = np.dot(phi, Vt.T) val2 = np.dot(Vt, np.dot(phi, Vt.T)) + (1 / e_delta2) * D2 val3 = np.dot(D1, np.dot(U.T, y)) w_new = np.dot(val1, np.dot(sl.inv(val2), val3)) return w_new
def multivariateGaussian(X, mu, sigma2): #MULTIVARIATEGAUSSIAN Computes the probability density function of the #multivariate gaussian distribution. # p = MULTIVARIATEGAUSSIAN(X, mu, sigma2) Computes the probability # density function of the examples X under the multivariate gaussian # distribution with parameters mu and sigma2. If sigma2 is a matrix, it is # treated as the covariance matrix. If sigma2 is a vector, it is treated # as the \sigma^2 values of the variances in each dimension (a diagonal # covariance matrix) # k = len(mu) # turns 1D array into 2D array if sigma2.ndim == 1: sigma2 = np.reshape(sigma2, (-1, sigma2.shape[0])) if sigma2.shape[1] == 1 or sigma2.shape[0] == 1: sigma2 = linalg.diagsvd(sigma2.flatten(), len(sigma2.flatten()), len(sigma2.flatten())) # mu is unrolled (and transposed) here X = X - mu.reshape(mu.size, order='F').T p = np.dot(np.power(2 * np.pi, - k / 2.0), np.power(np.linalg.det(sigma2), -0.5) ) * \ np.exp(-0.5 * np.sum(np.dot(X, np.linalg.pinv(sigma2)) * X, axis=1)) return p
def svd(self, major_allele_matrix): """ Follows procedure of Population Structure and Eigenanalysis Patterson et al 2006. Constructs a genotype matrix of bi-allelic loci where each entry is the number of copies of the major allele at each locus. The genotype matrix has dimensions (number_of_individuals)*(number_of_markers) """ if 'popdata' in list(major_allele_matrix.columns): major_allele_matrix.drop('popdata', axis=1) shifting_factor = np.apply_along_axis(np.mean, axis=1, arr=major_allele_matrix) p_vector = np.divide(shifting_factor, 2) scaling_factor = np.sqrt(np.multiply(p_vector, (1 - p_vector))) corrected_matrix = np.array( pd.DataFrame( list( map( lambda i: (major_allele_matrix.ix[major_allele_matrix.index[ i], :] - shifting_factor[i]) / scaling_factor[i], range(self.population_size))))) # singular value decomposition using scipy linalg module eigenvectors, s, v = linalg.svd(corrected_matrix) eigenvalues = np.diagonal( np.square( linalg.diagsvd(s, self.population_size, self.number_of_markers))).T sum_of_eigenvalues = np.sum(eigenvalues) fraction_of_variance = np.divide(eigenvalues, sum_of_eigenvalues) eigenvalues = np.vstack((eigenvalues, fraction_of_variance)) return eigenvectors, eigenvalues
def pca(X): #PCA Run principal component analysis on the dataset X # [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X # Returns the eigenvectors U, the eigenvalues (on diagonal) in S # # Useful values m, n = X.shape # You need to return the following variables correctly. U = np.zeros(n) S = np.zeros(n) # ====================== YOUR CODE HERE ====================== # Instructions: You should first compute the covariance matrix. Then, you # should use the "svd" function to compute the eigenvectors # and eigenvalues of the covariance matrix. # # Note: When computing the covariance matrix, remember to divide by m (the # number of examples). # # compute the covariance matrix sigma = (1.0/m) * (X.T).dot(X) # compute the eigenvectors (U) and S # from: # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.svd.html#scipy.linalg.svd # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.diagsvd.html#scipy.linalg.diagsvd U, S, Vh = linalg.svd(sigma) S = linalg.diagsvd(S, len(S), len(S)) # ========================================================================= return U, S
def OLS(X, data): """Ordinary least squared using singular value decomposition (SVD)""" X = np.copy(X) U, s, VT = scl.svd(X) D = scl.diagsvd(s, U.shape[0], VT.shape[0]) beta = VT.T @ scl.pinv(D) @ U.T @ data return beta
def truncatedSVD(x, rank=1): assert isATensor(x) and not isAVector(x), "`x` must be a matrix or tensor!" u, s, v = svd(x) u = u[:, :rank] s = diagsvd(s[:rank], rank, rank) v = v[:rank, :] return u, s, v
def question_a(): # matrix_m = [[1, 2], [2, 1], [3, 4], [4, 3]] # U,sig,Vh = linalg.svd(matrix_m, full_matrices = False) # print(U) # print(sig) # print(Vh) # A = np.array([[1, 2, 3], [4, 5, 6]]) matrix_m = np.array([[1, 2], [2, 1], [3, 4], [4, 3]]) print(matrix_m[1]) # A = np.array([[1, 2, 3, 4], [2, 1, 4, 3]]) print(matrix_m) # array([[1, 2, 3], # [4, 5, 6]]) M, N = matrix_m.shape U, s, Vh = linalg.svd(matrix_m) Sig = linalg.diagsvd(s, M, N)[0:2, 0:2] # U, Vh = U, Vh U = U[0:4, 0:2] print(U) # array([[-0.3863177, -0.92236578], # [-0.92236578, 0.3863177]]) print(Sig) # Sig = np.array([[7.61577311, 0], [0, 1.41421356]]) # array([[9.508032, 0., 0.], # [0., 0.77286964, 0.]]) print(Vh) # Vh = np.array([[-0.27854301, -0.27854301, -0.64993368, -0.64993368], # [0.5, -0.5, 0.5, -0.5]]) # array([[-0.42866713, -0.56630692, -0.7039467], # [0.80596391, 0.11238241, -0.58119908], # [0.40824829, -0.81649658, 0.40824829]]) print(U.dot(Sig.dot(Vh))) # check computation
def GetSubmatrWithWatermark(img, word): #Changing colorspace to work with Y component (luminance) #if (type(img[0][0][0]) == np.uint8): #img = np.float32 (img) * 1.0 / 255 #img1 = cv2.cvtColor (img, cv2.COLOR_BGR2YCrCb) #img2 = cv2.cvtColor (img, cv2.COLOR_RGB2GRAY) if (type(img[0][0]) == np.uint8): img = np.float32(img) * 1.0 / 255 #img2 = img1[:,:,0] #Perform wavelet transform coeffs = pywt.wavedec2(img, 'db1') #hl=coeffs[len(coeffs)-2][2] hh = coeffs[len(coeffs) - 2][1] #lh=coeffs[len(coeffs)-2][0] #Let's work with hh matrix dwtDom2Wtmk = hh #Perform first SVD Ui, si, Vhi = linalg.svd(dwtDom2Wtmk, full_matrices=True) Si = linalg.diagsvd(si, min(Ui.shape[0], Vhi.shape[0]), max(Ui.shape[1], Vhi.shape[1])) watermark = GenerateWatermark(Si.shape, word) #Apply watermark Siw = Si + watermark #Perform second SVD Uwi, swi, Vhwi = linalg.svd(Siw, full_matrices=True) return (Uwi, Si, Vhwi)
def low_rank_approx(X,r): U, s, Vh = linalg.svd(X) s [r:] = 0 sk = linalg.diagsvd(s, U.shape[1], Vh.shape[0]) X_app = np.dot(U, np.dot(sk, Vh)) X_app = X_app[:,:r] return X_app
def check_svd_function(svd_function): """check whether svd_function behaves as np.linalg.svd""" try: for dtype in [np.float32, np.float64, np.complex64, np.complex128]: print("dtype = ", dtype) for m, n in [(1, 1), (1, 10), (10, 1), (10, 10), (10, 20)]: print("m, n = ", m, n) tol_NULP = 200 * max(max(m, n)**3, 100) # quite large tolerance, but seems to be required... if np.dtype(dtype).kind == 'c': # complex? A = standard_normal_complex((m, n)) else: A = np.random.standard_normal(size=(m, n)) A = np.asarray(A, dtype) Sonly = svd_function(A, compute_uv=False) Ufull, Sfull, VTfull = svd_function(A, full_matrices=True, compute_uv=True) npt.assert_array_almost_equal_nulp(Sonly, Sfull, tol_NULP) recalc = Ufull.dot(diagsvd(Sfull, m, n)).dot(VTfull) npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP) U, S, VT = svd_function(A, full_matrices=False, compute_uv=True) npt.assert_array_almost_equal_nulp(Sonly, S, tol_NULP) recalc = U.dot(np.diag(S)).dot(VT) npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP) print("types of U, S, VT = ", U.dtype, S.dtype, VT.dtype) nst.eq_(U.dtype, A.dtype) except EnvironmentError as e: print(str(e)) if str(e).startswith("Couldn't find LAPACK"): print("(Not an issue if you have scipy >= 0.18.0)") assert(False)
def fs_c(self, percent=0.9, N=None): """Get the column factor scores (dimensionality-reduced representation), choosing how many factors to retain, directly or based on the explained variance. 'percent': The minimum variance that the retained factors are required to explain (default: 90% = 0.9) 'N': The number of factors to retain. Overrides 'percent'. If the rank is less than N, N is ignored. """ if not 0 <= percent <= 1: raise ValueError("Percent should be a real number between 0 and 1.") if N: if not isinstance(N, (int, np.int64)) or N <= 0: raise ValueError("N should be a positive integer.") N = min(N, self.rank) # maybe we should notify the user? # S = np.zeros((self._numitems, N)) # else: self.k = 1 + np.flatnonzero(np.cumsum(self.L) >= sum(self.L)*percent)[0] # S = np.zeros((self._numitems, self.k)) # the sign of the square root can be either way; singular value vs. eigenvalue # np.fill_diagonal(S, -np.sqrt(self.E) if self.cor else self.s) num2ret = N if N else self.k s = -np.sqrt(self.L) if self.cor else self.s S = diagsvd(s[:num2ret], len(self.Q), num2ret) self.G = _mul(self.D_c, self.Q.T, S) # important! note the transpose on Q return self.G
def __init__(self, corpus, vocab): """ Create CountVectorizer object, Create a tfidf array Use SVD (Singular Value Decomposition) to approximate tfidf array Pickle-able """ self.v = CountVectorizer(vocabulary=vocab) X = self.v.fit_transform(corpus).toarray() transformer = TfidfTransformer() tfidf = transformer.fit_transform(X) # SVD M, N = X.shape U, s, Vt = linalg.svd(X) # Reduce Matrix to only 300 dimensions for i in range(len(s)): if i < 300: continue s[i] = 0 Sig = linalg.diagsvd(s, M, N) print U.shape print Sig.shape print Vt.shape # Store approximated document-term Matrix self.dt = (U.dot(Sig.dot(Vt))).transpose()
def pca(X): #PCA Run principal component analysis on the dataset X # [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X # Returns the eigenvectors U, the eigenvalues (on diagonal) in S # # Useful values m, n = X.shape # You need to return the following variables correctly. U = np.zeros(n) S = np.zeros(n) # ====================== YOUR CODE HERE ====================== # Instructions: You should first compute the covariance matrix. Then, you # should use the "svd" function to compute the eigenvectors # and eigenvalues of the covariance matrix. # # Note: When computing the covariance matrix, remember to divide by m (the # number of examples). # # compute the covariance matrix sigma = (1.0 / m) * (X.T).dot(X) # compute the eigenvectors (U) and S # from: # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.svd.html#scipy.linalg.svd # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.diagsvd.html#scipy.linalg.diagsvd U, S, Vh = linalg.svd(sigma) S = linalg.diagsvd(S, len(S), len(S)) # ========================================================================= return U, S
def svd_thresh(data, threshold=None, n_pc=None, thresh_type='hard'): """Threshold the singular values This method thresholds the input data using singular value decomposition Parameters ---------- data : np.ndarray Input data array threshold : float, optional Threshold value n_pc : int or str, optional Number of principal components, specify an integer value or 'all' threshold_type : str {'hard', 'soft'} Type of noise to be added (default is 'hard') Returns ------- np.ndarray thresholded data Raises ------ ValueError For invalid string entry for n_pc """ if isinstance(n_pc, str) and n_pc != 'all': raise ValueError('Invalid value for "n_pc", specify an integer value ' 'or "all"') # Get SVD of input data. u, s, v = svd(data, check_finite=False, lapack_driver='gesvd') # Find the threshold if not provided. if isinstance(threshold, type(None)): # Find the required number of principal components if not specified. if isinstance(n_pc, type(None)): n_pc = find_n_pc(u, factor=0.1) # If the number of PCs is too large use all of the singular values. if n_pc >= s.size or n_pc == 'all': n_pc = s.size - 1 warn('Using all singular values.') threshold = s[n_pc] # Remove noise from singular values. s_new = thresh(s, threshold, thresh_type) #if np.all(s_new == s): # warn('No change to singular values.') # Reshape the singular values to the shape of the input image. s_new = diagsvd(s_new, *data.shape) # Return the thresholded image. return np.dot(u, np.dot(s_new, v))
def matrix_reduce_sigma(matrix, dimensions=1): """This calculates the SVD of the matrix, reduces it and creates a reduced matrix. @params matrix the matrix to reduce @params dimensions dimensions to reduce. @return matrix The reduced matrix """ uu, sigma, vt = linalg.svd(matrix) rows = sigma.shape[0] cols = sigma.shape[1] #delete n-k smallest singular values #delete ie settings to zero smallerBound = min(rows, cols) for index in xrange(smallerBound - dimensions, rows): sigma[index] = 0 #since sigma is a unidimensional array #convert it to a matrix sigma_matrix = linalg.diagsvd(sigma, len(uu), len(vt)) uu_sigma = numpy.dot(uu, sigma_matrix) uu_sigma_vt = numpy.dot(uu_sigma, vt) return uu_sigma_vt
def Tikhonov(Uh, s, V, b, alpha): ''' Tikhonov: Tikhonov procedure for solving Ax = b via SVD. Inputs: Uh : Hermitian transpose of U, left singular vectors s : singular values V : right singular vectors Such that: U, s, Vh = svd(A) is the singular-value decomposition of matrix A, i.e., A = U @ S @ Vh, s = diag(S) b: right-hand side of the linear system alpha: Tikhonov regularizaton parameter Output: x_alpha: the regularized solution ''' # Construct the pseudoinverse 'Sp' of the diagonal matrix 'S' sigma = np.divide(s, alpha + s**2) Sp = diagsvd(sigma, V.shape[1], Uh.shape[0]) # Return the Tikhonov-regularized solution return V @ (Sp @ (Uh @ b))
def multivariateGaussian(X, mu, sigma2): #MULTIVARIATEGAUSSIAN Computes the probability density function of the #multivariate gaussian distribution. # p = MULTIVARIATEGAUSSIAN(X, mu, sigma2) Computes the probability # density function of the examples X under the multivariate gaussian # distribution with parameters mu and sigma2. If sigma2 is a matrix, it is # treated as the covariance matrix. If sigma2 is a vector, it is treated # as the \sigma^2 values of the variances in each dimension (a diagonal # covariance matrix) # k = len(mu) # turns 1D array into 2D array if sigma2.ndim == 1: sigma2 = np.reshape(sigma2, (-1,sigma2.shape[0])) if sigma2.shape[1] == 1 or sigma2.shape[0] == 1: sigma2 = linalg.diagsvd(sigma2.flatten(), len(sigma2.flatten()), len(sigma2.flatten())) # mu is unrolled (and transposed) here X = X - mu.reshape(mu.size, order='F').T p = np.dot(np.power(2 * np.pi, - k / 2.0), np.power(np.linalg.det(sigma2), -0.5) ) * \ np.exp(-0.5 * np.sum(np.dot(X, np.linalg.pinv(sigma2)) * X, axis=1)) return p
def special_svd(M, K=9): useravg, itemavg = find_user_and_item_avg(M) R_norm = norm_matrix(M, useravg, itemavg) U, s, V = linalg.svd( R_norm, full_matrices = False) new_s = s[:K] sigma = linalg.diagsvd(new_s, K, K) return U[:,:K], V[:K,:], sigma
def plotFirst3PCA(X, labels=None, colors=None): ''' Computes the first 3 principal components of the data matrix X, and shows the samples projected onto the 3 largest components using scatter3d() @param X: Input data, samples are in rows. It is advised to at least mean-center the data, but also to scale each input feature by dividing by standard deviation. Use svo_util.normalize() to do this. @param labels: A vector with length = rows(X), which has an integer label that indicates which class each sample belongs to. None means that the data is not classified, so all points will have the same color. @param colors: A list of color strings or numbers, one per label so that all points with the same label are colored the same. len(colors) == len( unique(labels) ) @return: (T, W) where T is the data in pca-space and W are the loading weights. T and W can be used to reconstruct points from PCA space back to the 'normal' space, as with the function reconstructPCA(). ''' U,s,Vt = LA.svd(X, full_matrices=True) N,p = X.shape S = LA.diagsvd(s,N,p) T = U.dot(S) #samples in PCA space (also, T = X.dot(V) where V=Vt.T) XYZ = T[:,0:3] #first 3 columns are for the 3 largest components scatter3d(XYZ, labels=labels, colors=colors) return T, Vt.T #return the transformed data, and the loading weights
def main(num=5): im = Image.open('me.png') pix = im.load() ma = [[], [], []] for x in xrange(im.size[0]): for i in xrange(3): ma[i].append([]) for y in xrange(im.size[1]): for i in xrange(3): ma[i][-1].append(pix[x, y][i]) for i in xrange(3): u, s, v = linalg.svd(ma[i]) u = u[:, :num] v = v[:num, :] s = s[:num] ma[i] = dot(dot(u, linalg.diagsvd(s, num, num)), v) for x in xrange(im.size[0]): for y in xrange(im.size[1]): ret = [] for i in xrange(3): tmp = int(ma[i][x][y]) if tmp < 0: tmp = 0 if tmp > 255: tmp = 255 ret.append(tmp) pix[x, y] = tuple(ret) #im.show() im.save('me_%d.jpg' % num)
def svd(self, matrix): matrix = numpy.mat(matrix); self._U_, self._SIGMA_, self._Vh_ = linalg.svd(matrix); #perform the SVD self.M, self.N = matrix.shape; Sig = numpy.mat(linalg.diagsvd(self._SIGMA_, self.M, self.N)) print Sig
def fs_c(self, percent=0.9, N=None): """Get the column factor scores (dimensionality-reduced representation), choosing how many factors to retain, directly or based on the explained variance. 'percent': The minimum variance that the retained factors are required to explain (default: 90% = 0.9) 'N': The number of factors to retain. Overrides 'percent'. If the rank is less than N, N is ignored. """ if not 0 <= percent <= 1: raise ValueError( "Percent should be a real number between 0 and 1.") if N: if not isinstance(N, (int, int64)) or N <= 0: raise ValueError("N should be a positive integer.") N = min(N, self.rank) # maybe we should notify the user? # S = zeros((self._numitems, N)) # else: self.k = 1 + flatnonzero(cumsum(self.L) >= sum(self.L) * percent)[0] # S = zeros((self._numitems, self.k)) # the sign of the square root can be either way; singular value vs. eigenvalue # fill_diagonal(S, -sqrt(self.E) if self.cor else self.s) num2ret = N if N else self.k s = -sqrt(self.L) if self.cor else self.s S = diagsvd(s[:num2ret], len(self.Q), num2ret) self.G = _mul(self.D_c, self.Q.T, S) # important! note the transpose on Q return self.G
def load_caltech101_30(folder=CALTECH101_30_DIR, tiny_problem=False): caltech = scio.loadmat(folder + '/caltech101-30.matlab') k_train, k_test = caltech['Ktrain'], caltech['Ktest'] label_tr, label_te = caltech['tr_label'], caltech['te_label'] file_tr, file_te = caltech['tr_files'], caltech['te_files'] if tiny_problem: pattern_step = 5 fraction_limit = 0.2 k_train = k_train[:int(len(label_tr) * fraction_limit):pattern_step, :int(len(label_tr) * fraction_limit):pattern_step] label_tr = label_tr[:int(len(label_tr) * fraction_limit):pattern_step] U, s, Vh = linalg.svd(k_train) S_sqrt = linalg.diagsvd(s ** 0.5, len(s), len(s)) X = np.dot(U, S_sqrt) # examples in rows train_x, val_x, test_x = X[0:len(X):3, :], X[1:len(X):3, :], X[2:len(X):3, :] label_tr_enc = to_one_hot_enc(np.array(label_tr) - 1) train_y, val_y, test_y = label_tr_enc[0:len(X):3, :], label_tr_enc[1:len(X):3, :], label_tr_enc[2:len(X):3, :] train_file, val_file, test_file = file_tr[0:len(X):3], file_tr[1:len(X):3], file_tr[2:len(X):3] test_dataset = Dataset(data=test_x, target=test_y, info={'files': test_file}) validation_dataset = Dataset(data=val_x, target=val_y, info={'files': val_file}) training_dataset = Dataset(data=train_x, target=train_y, info={'files': train_file}) return Datasets(train=training_dataset, validation=validation_dataset, test=test_dataset)
def check_svd_function(svd_function): """check whether svd_function behaves as np.linalg.svd.""" for dtype in [np.float32, np.float64, np.complex64, np.complex128]: print("dtype = ", dtype) for m, n in [(1, 1), (1, 10), (10, 1), (10, 10), (10, 20)]: print("m, n = ", m, n) tol_NULP = 200 * max( max(m, n)**3, 100) # quite large tolerance, but seems to be required... if np.dtype(dtype).kind == 'c': # complex? A = standard_normal_complex((m, n)) else: A = np.random.standard_normal(size=(m, n)) A = np.asarray(A, dtype) Sonly = svd_function(A, compute_uv=False) Ufull, Sfull, VTfull = svd_function(A, full_matrices=True, compute_uv=True) npt.assert_array_almost_equal_nulp(Sonly, Sfull, tol_NULP) recalc = Ufull.dot(diagsvd(Sfull, m, n)).dot(VTfull) npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP) U, S, VT = svd_function(A, full_matrices=False, compute_uv=True) npt.assert_array_almost_equal_nulp(Sonly, S, tol_NULP) recalc = U.dot(np.diag(S)).dot(VT) npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP) print("types of U, S, VT = ", U.dtype, S.dtype, VT.dtype) assert U.dtype == A.dtype
def multivariateGaussian(X, mu, Sigma2): """ Computes the probability density function of the examples X under the multivariate gaussian distribution with parameters mu and sigma2. If Sigma2 is a matrix, it is treated as the covariance matrix. If Sigma2 is a vector, it is treated as the sigma^2 values of the variances in each dimension (a diagonal covariance matrix). Args: X : array(# of training examples m, # of features n) mu : array(# of features n, 1) Sigma2: array(# of features n, # of features n) Returns: p : array(# of training examples m,) """ k = len(mu) if (Sigma2.shape[0] == 1) or (sigma2.shape[1] == 1): Sigma2 = linalg.diagsvd(Sigma2.flatten(), len(Sigma2.flatten()), len(Sigma2.flatten())) X = X - mu.T p = np.dot(np.power(2 * np.pi, - k / 2.0), np.power(np.linalg.det(Sigma2), -0.5)) * \ np.exp(-0.5 * np.sum(np.dot(X, np.linalg.pinv(Sigma2)) * X, axis=1)) return p
def fillmat(M): m, n = M.shape X = np.zeros(shape=(m, n)) tau = 1.0 mu_min = 1.0e-8 eta_mu = 0.25 mu = eta_mu * norm(np.nan_to_num(M)) niter = 0 max_iter = 10000 xtol = 1.0e-3 while (mu > mu_min) and (niter < max_iter): delta = 1.0 while delta > xtol: X_prev = X Y = X - tau * np.nan_to_num(X - M) U, S, V = svd(Y, full_matrices=False) S1 = np.maximum(S - tau * mu, 0) S1 = diagsvd(S1, n, n) X = np.dot(U, np.dot(S1, V)) delta = get_error(X, X_prev) mu = max(mu * eta_mu, mu_min) niter += 1 print 'mu = {:0.4e}'.format(mu) return X
def fs_r(self, percent=0.9, N=None): """Get the row factor scores (dimensionality-reduced representation), choosing how many factors to retain, directly or based on the explained variance. 'percent': The minimum variance that the retained factors are required to explain (default: 90% = 0.9) 'N': The number of factors to retain. Overrides 'percent'. If the rank is less than N, N is ignored. """ if not 0 <= percent <= 1: raise ValueError("Percent should be a real number between 0 and 1.") if N: if not isinstance(N, (int, np.int64)) or N <= 0: raise ValueError("N should be a positive integer.") N = min(N, self.rank) # S = np.zeros((self._numitems, N)) # else: self.k = 1 + np.flatnonzero(np.cumsum(self.L) >= sum(self.L)*percent)[0] # S = np.zeros((self._numitems, self.k)) # the sign of the square root can be either way; singular value vs. eigenvalue # np.fill_diagonal(S, -np.sqrt(self.E) if self.cor else self.s) num2ret = N if N else self.k s = -np.sqrt(self.L) if self.cor else self.s S = diagsvd(s[:num2ret], self._numitems, num2ret) from numpy import ndarray if not isinstance(self.D_r, ndarray): self.F = self.D_r.dot(self.P).dot(S[:self.P.shape[1]]) else: self.F = _mul(self.D_r, self.P, S) return self.F
def image_svd(n): img=mpimg.imread('mypicture.jpg') [r,g,b] = [img[:,:,i] for i in range(3)] r1,r2,r3 = sp.svd(r) g1,g2,g3 = sp.svd(g) b1,b2,b3 = sp.svd(b) r2_nonzero=(r2!=0).sum() g2_nonzero=(g2!=0).sum() b2_nonzero=(b2!=0).sum() print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r2_nonzero,"," ,g2_nonzero,"and" ,b2_nonzero, "respectively.") r2[n:800]=np.zeros_like(r2[n:800]) g2[n:800]=np.zeros_like(g2[n:800]) b2[n:800]=np.zeros_like(b2[n:800]) r2=sp.diagsvd(r2,800,1000) g2=sp.diagsvd(g2,800,1000) b2=sp.diagsvd(b2,800,1000) r_new=np.dot(r1, np.dot(r2,r3)) g_new=np.dot(g1, np.dot(g2,g3)) b_new=np.dot(b1, np.dot(b2,b3)) img[:,:,0]=r_new img[:,:,1]=g_new img[:,:,2]=b_new fig = plt.figure(2) ax1 = fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(r, cmap = 'Reds') ax3.imshow(g, cmap = 'Greens') ax4.imshow(b, cmap = 'Blues') plt.show() img=mpimg.imread('mypicture.jpg') [r,g,b]=[img[:,:,i] for i in range(3)] fig=plt.figure(1) ax1=fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(r, cmap = 'Reds') ax3.imshow(g, cmap = 'Greens') ax4.imshow(b, cmap = 'Blues') plt.show()
def approximation(m, k = 1): '''Do singular value decomposition according to k, which will lower rank to k * rank''' U, s, Vh = linalg.svd(m, False) tarlen = int(k * len(s)) s = s[:tarlen] U = U.T[:tarlen].T V = Vh[:tarlen].T return dot(U, dot(linalg.diagsvd(s, len(s), len(s)), V.T))
def _calc_invJ(J, epsilon=0.01): u, sigma, v = np.linalg.svd(J, full_matrices=True) sigma_ = [1 / s if s > epsilon else 0 for s in sigma] rank_v = np.shape(J)[0] rank_h = np.shape(J)[1] return np.matrix(v.transpose()) * np.matrix( linalg.diagsvd(sigma_, rank_h, rank_v)) * np.matrix(u.transpose())
def pca(X): m = X.shape[0] cov = X.T @ X / m U, S, _ = np.linalg.svd(cov) # 将特征值转换为矩阵形式 # https://docs.scipy.org/doc/scipy-0.19.1/reference/generated/scipy.linalg.diagsvd.html#scipy.linalg.diagsvd S = diagsvd(S, len(S), len(S)) return np.matrix(U), np.matrix(S)
def ApplyWtmk(dwtDom2Wtmk, word): #Perform first SVD Ui, si, Vhi = linalg.svd(dwtDom2Wtmk, full_matrices=True) Si = linalg.diagsvd(si, min(Ui.shape[0], Vhi.shape[0]), max(Ui.shape[1], Vhi.shape[1])) # watermark = GenerateWatermark (dwtDom2Wtmk.shape, word) watermark = GenerateWatermark(Si.shape, word) #Apply watermark Si += watermark #Perform second SVD Uwi, swi, Vhwi = linalg.svd(Si, full_matrices=True) Swi = linalg.diagsvd(swi, min(Uwi.shape[0], Vhwi.shape[0]), max(Uwi.shape[1], Vhwi.shape[1])) #Restore chosen dwt domain with watermark embeded wtmkdDom = np.dot(Ui, np.dot(Swi, Vhi)) #wtmkdDom = dwtDom2Wtmk + watermark; return wtmkdDom
def resiGeneralised(A,B): L = lin.cholesky(B,overwrite_a=True) U,s,Vt = lin.svd(L) #koren Bja #S = lin.diagsvd(s,U[0].size,U[0].size) InvS = lin.diagsvd(1/s,U[0].size,U[0].size) InvKoren = np.matrix(Vt).H * InvS * np.matrix(U).H C = InvKoren * np.matrix(A) * InvKoren print(lin.eig(C)[0])
def svd(R): ''' Returns singular value decomposition of the ratings matrix ''' U, S, Vt = linalg.svd(R, full_matrices=False) k = len(S) S = linalg.diagsvd(S, k, k) return U, S, Vt
def remove_constants(matrix, n=5): U, W, V_t = svd(matrix, full_matrices=True) W[:n] = 0.0 M = W.shape[0] N = V_t.shape[1] S = diagsvd(W, M, N) return np.dot( np.dot(U, S), V_t)
def lsa(document_word_matrix, dimension): """ Take a document-word matrix and retrieve document-concept and concept-word matrices from it using latent semantic analysis (LSA). """ # We need to know the shape of our starting document-word matrix in # terms of number of rows and columns in order to run LSA. rows, cols = document_word_matrix.shape #for row in range(rows): # document_word_matrix[row,:] /= math.sqrt() # We can't create a matrix bigger than what we started with if dimension > rows: raise ValueError("Dimension {} too big!".format(dimension)) # Dimensions also have to be positive elif dimension < 1: raise ValueError("Dimension {} too small!".format(dimension)) # We use singular value decomposition to decompose our original # document-word matrix into three matrixes that, multiplied together, # recreate our original: # - word_topic: a matrix with m terms as rows and r "concept" # proportions as columns, # - singular_values: a nonnegative diagonal matrix of r rows and r # columns, and # - topic_document: a matrix with r "concepts" as rows and n documents # as columns. # Because the singular_values matrix actually only has values on the # diagonal, we just get it as a list of r singular values that would be # the diagonal of the matrix in order from greatest to least. word_topic, singular_values, topic_document = linalg.svd(document_word_matrix) print singular_values # Our goal is to reduce the original dimensions of this to the number # of concepts or "topics" we want, which we do by discarding all of the # columns and rows corresponding to values we don't need. This is # straightforward for our word-topic matrix: we throw out all of the # columns past the dimension we want. lsa_singular_values = singular_values[:dimension] lsa_word_topic = word_topic[:,:dimension] # Our topic-document matrix is a little trickier, because we'd rather # have our documents as rows and topics as columns, and right now it's # the other way around. So we'll switch it or transpose it. lsa_topic_document = topic_document[:dimension,:] lsa_document_topic = np.transpose(lsa_topic_document) # We can check that we did things right by using our new matrices new_singular_matrix = linalg.diagsvd(lsa_singular_values, dimension, dimension) transformed_matrix = dot(dot(lsa_word_topic, new_singular_matrix), lsa_topic_document) # We know that SVD gives us in our singular value matrix the values we care # about in order. print "Representation error: {}".format(np.sum((document_word_matrix - transformed_matrix)**2)) return lsa_word_topic, lsa_document_topic
def compute_svd(self, matrix) : U, sigma, VT = linalg.svd(matrix) sigma_prime = linalg.diagsvd(sigma, len(matrix), len(VT)) tfidf_prime = numpy.dot(numpy.dot(U, sigma_prime), VT) return tfidf_prime
def exsh(quest): qtxt=quest mean=preprocess_wiki() vectorizer = CountVectorizer(min_df=0,stop_words=None,ngram_range=(1 , 1)) X = vectorizer.fit_transform(mean) #analyze = vectorizer.build_analyzer() matrix = X.toarray() values=preprocess_wiki_values() mlist=values qtxt_q=preprocess_q(quest) idevent=quest.split(',') noua= qtxt_q.split(',') fcontent=[] fcontent.append(idevent[0]) fcontent = [wip for wip in noua if re.sub(r'[^0-9A-Za-z]', "", wip)] q=vectorizer.transform([qtxt_q]).toarray() qtr= zip(*q) qt = np.matrix(qtr) M,N = matrix.shape U,s,Vh = linalg.svd(matrix) Sig = linalg.diagsvd(s,M,N) U, Vh = U, Vh # print U # -> is Vh # print Vh # -> is U ur=U.dot(Sig.dot(Vh)) Uk= np.matrix(Vh) #si quisiera guardar la matriz de keywords deberia ser aqui en este lugar #Sigk=np.matrix([[ 1/4.0989,0., 0., 0., 0., 0., 0., 0., 0., 0.,0. ], [ 0., 1/2.3616, 0., 0., 0., 0., 0., 0., 0., 0.,0. ],[ 0.,0.,1/1.27197841,0.,0.,0.,0.,0.,0.,0.,0. ]]) Sig=np.matrix(Sig) Sigk=Sig.getI() Sigk= Sigk.T qr=Sigk.dot(Uk) r=qr.dot(qt) qqr=zip(*r.tolist()) qlen=len(qqr[0]) k = 5 #Rango corte rus=fcontent[:10] #rus=[] if k < qlen: #print "'-._.-'" for tr in range(len(U)): res=1-scipy.spatial.distance.cosine(U[tr][:k], qqr[0][:k]) if res >= 0.8: rus.append(mlist[tr]) else: print "Pruna menor de k ...determina un elemento mayor" tot_res=[qtxt] tot_res.append(rus) return tot_res #resultados titulo + palabras correlacionadas #qtxt='12363 , Carabas Presents: Alternative Drinks (Music, Sci-fi, Film, Comics etc),enjoy along come friends invite ticket book etc art events bands comics books projects work attendees recommendations nostalgia clips wonderful weird screen plasma reel Film Visuals day present others Dance Rock Indie crowdpleaser system sound mix playlist Alternative Soundtrack room function venue Excellent drinks folk meeting enjoys thing kind like sounds thinks anyone frankly woes share ideas bounce work promote others meet professionals artists writers musicians creators like Comics fi Sci Style Music Alternative Film Cult life side alternative loves anyone night social ,1,Career and Life Balance,98,Music , 0' #print exsh(qtxt)
def _make_data(n_samples, n_features, n_tasks, n_components): W = rng.rand(n_tasks, n_features) - 0.5 U, S, V = svd(W, full_matrices=True) S[n_components:] = 0 S = diagsvd(S, U.shape[0], V.shape[0]) W = np.dot(np.dot(U, S), V) X = rng.rand(n_samples, n_features) - 0.5 Y = np.dot(X, W.T) return X, Y, W
def train_complete_SVD(M, K=9): useravg, itemavg = find_user_and_item_avg(M) R_norm = norm_matrix(M, useravg, itemavg) U, s, V = linalg.svd( R_norm, full_matrices=False ) m_user,n_movies = R_norm.shape new_s = s[:K] sigma_1_2 = linalg.diagsvd(np.sqrt(new_s), K, K) U_tilde = np.dot(U[:,:K], sigma_1_2) V_tilde = np.dot(sigma_1_2, V[:K,:]) return U_tilde, V_tilde
def image_svd(n): img=mpimg.imread('rainbow.jpg') [j,k,m] = [img[:,:,i] for i in range(3)] j1, j2, j3 = linalg.svd(j) k1, k2, k3 = linalg.svd(k) m1, m2, m3 = linalg.svd(m) j2_nonzero=(j2!=0).sum() k2_nonzero=(k2!=0).sum() m2_nonzero=(m2!=0).sum() print("The number of non zero elements in decompose sigma of red, green, blue matrices are", j2_nonzero,"," ,k2_nonzero,"and" ,m2_nonzero, "respectively.") j2[n:800] = np.zeros_like(j2[n:800]) k2[n:800] = np.zeros_like(k2[n:800]) m2[n:800] = np.zeros_like(m2[n:800]) j2 = linalg.diagsvd(j2,800,1000) k2 = linalg.diagsvd(k2,800,1000) m2 = linalg.diagsvd(m2,800,1000) j_new = np.dot(j1, np.dot(j2, j3)) k_new = np.dot(k1, np.dot(k2, k3)) m_new = np.dot(m1, np.dot(m2, m3)) img[:,:,0]=j_new img[:,:,1]=k_new img[:,:,2]=m_new fig2 = plt.figure(2) ax1 = fig2.add_subplot(2,2,1) ax2 = fig2.add_subplot(2,2,2) ax3 = fig2.add_subplot(2,2,3) ax4 = fig2.add_subplot(2,2,4) ax1.imshow(img) ax2.imshow(j_new, cmap = 'Blues') ax3.imshow(k_new, cmap = 'Reds') ax4.imshow(m_new, cmap = 'Greens') plt.show()
def mySVD(matrix,doCheck=0,kk=False): ''' Singular Value decomposition method to compute the inverse of matrix option: doCheck=0,1,2 0: return only the inverted SVD matrix 1: return previous plus composite matrices 2: return previous plus checks 0,1 ,2 USAGE: s,sU,sUt,sV,sVh,sSig,sinvSig,sCheck = numMath.mySVD(a,doCheck=2) ''' ### Compute the SVD parts M,N = matrix.shape U,s,Vh = linalg.svd(matrix) Sig = linalg.diagsvd(s,M,N) V = np.matrix(Vh).H Ut = np.matrix(U).T # invSig = linalg.inv( np.matrix(Sig) ) invSig = np.matrix(Sig).I ### Correct for ill-ness w = np.where(Sig<=10**(-14)) invSig[w]=0.0 ### Compute the Inverse of the matrix invSVD = V.dot(invSig).dot(Ut) ### and check the matrix checkMatrix = U.dot(Sig.dot(Vh)) checkProduct = invSVD.dot(matrix) #print ' invSVD = \n', invSVD #print ' sCheck = \n', checkMatrix #print ' invSVD.dot(matrix) = \n ', checkProduct if(doCheck==0): return(invSVD) elif(doCheck==1): return(invSVD,U,Ut,V,Vh,Sig,invSig) elif(doCheck==2): if(kk==True): print 'V.dot(Vh)= \n',V.dot(Vh) print 'U.dot(Ut)= \n',U.dot(Ut) print 'Sig.dot(invSig)= \n',Sig.dot(invSig) return(invSVD,U,Ut,V,Vh,Sig,invSig,checkMatrix) else: print'Read the description of numMath.mySVD'
def doLSA(self): self.U,s,self.Vt = linalg.svd(self.TM) #print 'Eigen Values : ',s #self.U, self.Vt = array(self.U),array(self.Vt) # Reduce Sig for i in range(self.k,len(s)): s[i] = 0 #for i in range(0,self.k): # print s[i] r,c = self.U.shape l,z = self.Vt.shape self.sigma = array(linalg.diagsvd(s,r,z)) self.CM = dot(dot(self.U ,self.sigma), self.Vt)
def transform(self, dimensions=1): rows,cols = self.matrix.shape if dimensions <= rows: u,sigma,vt = linalg.svd(self.matrix) for index in range(rows - dimensions, rows): sigma[index] = 0 transformed_matrix = dot(dot(u, linalg.diagsvd(sigma, len(self.matrix), len(vt))) ,vt) return transformed_matrix else : print("dimension reduction cannot be greater than %s" % (rows))