def img_compr_separate(image, k): img = np.array(Image.open(image)) img = img / 255 row, col, _ = img.shape img_red = img[:, :, 0] img_green = img[:, :, 1] img_blue = img[:, :, 2] U_r, D_r, V_r = svd(img_red) U_g, D_g, V_g = svd(img_green) U_b, D_b, V_b = svd(img_blue) bytes_stored = sum([ matrix.nbytes for matrix in [U_r, D_r, V_r, U_g, D_g, V_g, U_b, D_b, V_b] ]) U_r_k = U_r[:, 0:k] U_g_k = U_g[:, 0:k] U_b_k = U_b[:, 0:k] V_r_k = V_r[0:k, :] V_g_k = V_g[0:k, :] V_b_k = V_b[0:k, :] D_r_k = D_r[0:k] D_g_k = D_g[0:k] D_b_k = D_b[0:k] compressed_bytes = sum([ matrix.nbytes for matrix in [U_r_k, D_r_k, V_r_k, U_g_k, D_g_k, V_g_k, U_b_k, D_b_k, V_b_k] ]) img_red_compr = np.dot(U_r_k, np.dot(np.diag(D_r_k), V_r_k)) img_green_compr = np.dot(U_g_k, np.dot(np.diag(D_g_k), V_g_k)) img_blue_compr = np.dot(U_b_k, np.dot(np.diag(D_b_k), V_b_k)) img_compr = np.zeros((row, col, 3)) img_compr[:, :, 0] = img_red_compr img_compr[:, :, 1] = img_green_compr img_compr[:, :, 2] = img_blue_compr img_compr[img_compr < 0] = 0 img_compr[img_compr > 1] = 1 plt.xlabel('rank = {}'.format(k)) plt.imshow(img_compr) plt.show() matplotlib.image.imsave('compressed_{}.{}'.format(k, image.split('.')[-1]), img_compr) mse = ((img - img_compr)**2).mean(axis=None) return bytes_stored, compressed_bytes, mse
def testSVD(): '''SVD based feature selection''' features = trainingDataWithoutLabel() New_features = svd(features) test_features = svd(testDataWithoutLabel()) print('SVD', New_features.shape) testModel(New_features, test_features)
def generate_models(self, train_matrix): scores_by_movie = score_matrix(train_matrix, avg='movie') scores_by_user = score_matrix(train_matrix, avg='user') self.movie_normalized_models = [] self.user_normalized_models = [] for i in range(self.start, self.stop, self.step): self.movie_normalized_models.append(svd(scores_by_movie, i)) self.user_normalized_models.append(svd(scores_by_user, i))
def kabsch(X, Y): """Using the Kabsch algorithm with two sets of paired point X and Y. Args: X (np.ndarray): NxD matrix, where N is points and D is dimension. Y (np.ndarray): NxD matrix, where N is points and D is dimension. Returns: np.ndarray: Rotation matrix (D,D) """ # Computation of the covariance matrix C = np.dot(np.transpose(X), Y) # Computation of the optimal rotation matrix see: # http://en.wikipedia.org/wiki/Kabsch_algorithm V, S, W = svd(C) d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 if d: V[:, -1] = -V[:, -1] # Create Rotation matrix U U = np.dot(V, W) return U
def main(): semeval_dir = 'data/maui-semeval2010-test/' manual_keywords = [] total_precision = 0 total_recall = 0 total_docs = 0 method = str(sys.argv[1]) fdir = str(sys.argv[2]) single = False num_top = -1 if (len(sys.argv) > 3) and (str(sys.argv[3]) == 'single'): single = True if (len(sys.argv) > 4): num_top = int(sys.argv[4]) filenames = sorted(os.listdir(fdir)) for filename in filenames: if (filename[0] =='.'): continue print(filename) f = open(fdir + filename, 'r') content = f.read() if method == 'svd': keywords = svd(content, 1, single, num_top) elif method == 'raketr': keywords = raketr.main(content, single, num_top) elif method == 'cluster': keywords = kcluster(content, 6, 15, single, num_top) else: print('methods accepted: svd raketr cluster, please specify') exit(0) print('keyphrases found') print(keywords)
def UserRecommend(name): u = json.load(open("data.json"), "GB2312") i = 0 try: for z in range(1, len(u['user'])): if u['user'][z]['name'] == name: i = z except: print 'err' temp = svd.svd() userDmkForm = temp[i] a = [abs(x - int(u['user'][i]['dmk_log'])) for x in userDmkForm] result1 = a.index(min(a)) result1 = u['bangumi'][result1]['title'] result2 = findmin(a, 1) result2 = u['bangumi'][result2]['title'] result3 = findmin(a, 2) result3 = u['bangumi'][result3]['title'] result4 = findmin(a, 3) result4 = u['bangumi'][result4]['title'] result = [] result.append(result1) result.append(result2) result.append(result3) result.append(result4) return result
def WeightedPrinComp(self, Mat, Rep=-1): """Takes a matrix and row-weights and manually computes the statistical procedure known as Principal Components Analysis (PCA) This version of the procedure is so basic, that it can also be thought of as merely a singular-value decomposition on a weighted covariance matrix.""" wCVM = self.WeightedCov(Mat,Rep) SVD = svd.svd(wCVM['Cov']) L = SVD[0].T[0] #First loading S = dot(wCVM['Center'],SVD[0]).T[0] #First Score return (L,S)
def WeightedPrinComp(M, Weights): if len(Weights)!=len(M): print('Weights must be equal in length to rows') return 'error' if type(Weights[0])!=list: Weights=map(lambda x: [x], Weights) wCVM=WeightedCov(M, Weights) SVD=svd.svd(wCVM['Cov']) L=switch_row_cols(SVD[0])[0] S=switch_row_cols(dot(wCVM['Center'], L))[0] return{'Scores':S, 'Loadings':L}
def test_factorization(self): r2 = np.sqrt(2) u = np.array([[1 / r2, 1 / r2], [-1 / r2, 1 / r2]]) d = np.array([[4, 0], [0, 1]]) v_t = np.eye(2) mat = np.dot(np.dot(u, d), v_t) result = svd.svd(mat) assert_array_almost_equal(mat, np.dot(np.dot(result[0], result[1]), result[2])) assert_array_almost_equal(u, result[0], decimal=10) assert_array_almost_equal(d, result[1], decimal=10) assert_array_almost_equal(v_t, result[2], decimal=10)
def recommendation(): selected_option = request.args.get('type') if selected_option == '1': k=["This option is yet to come..!"] if selected_option == '2': k=knn.knn(bookname) if selected_option == '3': k=svd.svd(bookname) if selected_option == '4': k=["The Lovely Bones: A Novel","The Da Vinci Code","The Red Tent (Bestselling Backlist)","Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))","The Secret Life of Bees","Wild Animus","Divine Secrets of the Ya-Ya Sisterhood: A Novel","Where the Heart Is (Oprah's Book Club (Paperback))","Girl with a Pearl Earring","Angels & Demons"] return render_template('recommendations.html', k=k)
def test_svd_with_zero_singular_values(self): matrix = [[3, -2], [-3, 2]] result = svd(matrix, max_eigenvalues=2, iterations=10) assert len(result) == 1 assert_allclose(result[0][0], [[0.7071067811865475], [-0.7071067811865475]]) assert result[0][1] == approx(5.099019513592785) assert_allclose(result[0][2], [[0.8320502943378437], [-0.5547001962252291]])
def test_1_by_1(): matrix = numpy.array([[2]], dtype='float64') singular_values, us, vs = svd(matrix) assert_that(singular_values).is_equal_to([2]) assert_that(us).is_length(1) assert_that(vs).is_length(1) if us[0] == [-1.0]: assert_that(vs[0]).is_equal_to([-1.0]) else: assert_that(us[0]).is_equal_to([1.0]) assert_that(vs[0]).is_equal_to([1.0])
def problem(): #Initialize parameters of the problem m = 15 n = 100 c = 2006.787453080206 #Initilize matrix A and vector b b = empty(n) A = empty((n, m)) #Calculate A and b according to the parameters i = 0 while i < n: A[i, 0] = 1 A[i, 1] = i / (n - 1) j = 2 while j < m: A[i, j] = power(A[i, 1], j) j = j + 1 b[i] = (1 / c) * exp(sin(4 * A[i, 1])) i = i + 1 print('Matrix A:') print(A) print('\nVector b:') print(b) #Calculate x by solving the least squares problem using the normal equations #NOTE: This won't execute because A is very ill conditioned (det ~= 1E-90) #ne_x = normal_equations(A, b) #print(ne_x) #Calculate x by solving the least squares problem using the Householder tranformations h_x = householder(A, b) print('\nValue of x using Householder:') print(h_x) #Calculate x by solving the least squares problem using the Gram-Schmidt gs_x = gram_schmidt(A, b) print('\nValue of x using Gram-Schmidt:') print(gs_x) #Calculate x by solving the least squares problem using the modified Gram-Schmidt mgs_x = modified_gram_schmidt(A, b) print('\nValue of x using modified Gram-Schmidt:') print(mgs_x) #Calculate x by solving the least squares problem using the SVD decomposition (pseudoinverse) svd_x = svd(A, b) print('\nValue of x using the SVD decomposition (pseudoinverse):') print(svd_x)
def test_2by4_factorization(self): svd.TEST = True r3 = np.sqrt(3) r2 = np.sqrt(2) u = np.array( [ [1.0 / 4.0, 1.0 / 4.0, 1.0 / 4.0], [1.0 / 4.0, -1.0 / 4.0, 1.0 / 4.0], [1.0 / 4.0, 1.0 / 4.0, -1.0 / 4.0], [1.0 / 4.0, -1.0 / 4.0, -1.0 / 4.0], ] ) d = np.zeros((3, 3), dtype=np.float64) d[0][0] = 10 d[1][1] = 5 d[2][2] = 2 v = np.array([[1 / r3, 1 / r3, 1 / r3], [1 / r2, -1 / r2, 0]]) # Now we gon transpose it mat = np.dot(np.dot(v, d), u.T) result = svd.svd(mat) trans_res = svd.svd(mat.T) assert_array_almost_equal(result[0].T, trans_res[2]) assert_array_almost_equal(mat, np.dot(np.dot(result[0], result[1]), result[2]))
def rSVD(A, rank): """ Implementation of randomized SVD :param A: input matrix :param rank: rank of SVD decomposition :return: SVD decomposition matrices """ n, m = A.shape P = np.random.randn(m, rank) Z = A @ P q, r = np.linalg.qr(Z, mode="reduced") Y = q.T @ A s, uy, v = svd(Y, min(min(Y.shape), rank)) u = q @ uy return s, u, v
def test_svd(self): matrix = np.array([[2, -1], [-1, 2]]) result = svd(matrix, max_eigenvalues=2, iterations=10) assert len(result) == 2 assert_allclose(result[0][0], [[0.7071067812541463], [-0.7071067811189488]]) assert result[0][1] == approx(3) assert_allclose(result[0][2], [[0.7071067813893437], [-0.7071067809837513]]) assert_allclose(result[1][0], [[0.7071067805781592], [0.7071067817949359]]) assert result[1][1] == approx(1) assert_allclose(result[1][2], [[0.7071067809837512], [0.7071067813893438]])
def test_4by2_factorization(self): r3 = np.sqrt(3) r2 = np.sqrt(2) u = np.array( [ [1.0 / 4.0, 1.0 / 4.0, 1.0 / 4.0], [1.0 / 4.0, -1.0 / 4.0, 1.0 / 4.0], [1.0 / 4.0, 1.0 / 4.0, -1.0 / 4.0], [1.0 / 4.0, -1.0 / 4.0, -1.0 / 4.0], ] ) d = np.zeros((3, 3), dtype=np.float64) d[0][0] = 10 d[1][1] = 5 d[2][2] = 2 v = np.array([[1 / r3, 1 / r3, 1 / r3], [1 / r2, -1 / r2, 0]]) mat = np.dot(np.dot(u, d), v.T) result = svd.svd(mat) assert_array_almost_equal(mat, np.dot(np.dot(result[0], result[1]), result[2]))
def equipoints(k, state0, state1, num): mat = [[EKF.Ps[k][state0, state0], EKF.Ps[k][state0, state1]], [EKF.Ps[k][state1, state0], EKF.Ps[k][state1, state1]]] decomp = svd.svd(mat) U = decomp[0] std0 = math.sqrt(decomp[1][0]) std1 = math.sqrt(decomp[1][1]) SLam = [[std0, 0.0], [0.0, std1]] m = [[EKF.ms[k][state0, 0]], [EKF.ms[k][state1, 0]]] m = numpy.matrix(m) SLam = numpy.matrix(SLam) U = numpy.matrix(U) xy = [] for angle in numpy.arange(0, 2 * math.pi + 2 * math.pi / num, 2 * math.pi / num): circlepoint = numpy.matrix([[math.cos(angle)], [math.sin(angle)]]) xypoints = m + U * SLam * circlepoint xy.append([xypoints[0, 0], xypoints[1, 0]]) return xy
def compress(self): print('Compressing', self.img_name, 'with k =', self.k, '...') for i in range(self.img_ori_arr.shape[2] - 1): RGB = ['R', 'G', 'B'] channel = self.img_ori_arr[..., i] print(RGB[i], 'channel shape: ', channel.shape) u, s, vh = svd(channel) s[self.k:-1] = np.zeros(s.shape[0] - self.k - 1) s = np.diag(s) print(u.shape) print(s.shape) print(vh.shape) # uncomment and modify the below lines if not input not square to fit svd outputs # s_new = np.zeros((9,220)) # s = np.vstack((s, s_new)) self.output[..., i] = np.clip(np.matmul(np.matmul(u, s), vh), 0, 255) self.compressed = True print('Image compressed!\n')
def cluster_stories(documents, k=10): '''Cluster a set of documents using a simple SVD-based topic model. Arguments: documents: a list of dictionaries of the form { 'words': [string] 'text': string } k: the number of singular values to compute. Returns: A pair of (word_clusters, document_clusters), where word_clusters is a clustering over the set of all words in all documents, and document_clustering is a clustering over the set of documents. ''' matrix, (index_to_word, index_to_document) = make_document_term_matrix(documents) matrix = normalize(matrix) sigma, U, V = svd(matrix, k=k) projected_documents = np.dot(matrix.T, U) projected_words = np.dot(matrix, V.T) document_centers, document_clustering = cluster(projected_documents) word_centers, word_clustering = cluster(projected_words) word_clusters = tuple( tuple(index_to_word[i] for (i, x) in enumerate(word_clustering) if x == j) for j in range(len(set(word_clustering)))) document_clusters = tuple( tuple(index_to_document[i]['text'] for (i, x) in enumerate(document_clustering) if x == j) for j in range(len(set(document_clustering)))) return word_clusters, document_clusters
def test_reconstruct_matrix_from_svd(): matrix = numpy.array([ [2, 5, 3], [1, 2, 1], [4, 1, 1], [3, 5, 2], [5, 3, 1], [4, 5, 5], [2, 4, 2], [2, 2, 5], ], dtype='float64') singular_values, us, vs = svd(matrix) singular_value_matrix = numpy.diag(singular_values) reconstructed_matrix = numpy.dot(us, numpy.dot(singular_value_matrix, vs)) flattened_original = matrix.flatten() flattened_actual = reconstructed_matrix.flatten() for (a, b) in zip(flattened_actual, flattened_original): assert_that(a).is_close_to(b, EPSILON)
def main(): # load all matrices all_orig = load_sparse_matrix('all') all_norm = load_sparse_matrix('all', normalized=True) train_orig = load_sparse_matrix('train') train_norm = load_sparse_matrix('train', normalized=True) test_orig = load_sparse_matrix('test') test_norm = load_sparse_matrix('test', normalized=True) test_txt_path = get_txt_path_by_type('test') # perform collaborative filtering with and without baseline approach collab_matrix = collaborative_filtering(train_norm, train_orig, test_orig, collaborative_neighbours) rmse_spearman(collab_matrix, test_orig, test_txt_path) precision_on_top_k(collab_matrix, all_orig) collab_matrix_baseline = collaborative_filtering(train_norm, train_orig, test_orig, collaborative_neighbours, baseline=True) rmse_spearman(collab_matrix_baseline, all_orig, test_txt_path) precision_on_top_k(collab_matrix_baseline, all_orig) # perform svd for energy in [1, 0.9]: svd_matrix = svd(train_norm, concepts, energy) rmse_spearman(svd_matrix, test_norm, test_txt_path) precision_on_top_k(svd_matrix, all_norm) # perform cur for energy in [1, 0.9]: cur_matrix = cur(train_norm, CUR_no_cols, concepts, energy) rmse_spearman(cur_matrix, test_norm, test_txt_path) precision_on_top_k(cur_matrix, all_norm)
import numpy as np import pandas as pd from utility import create_utility_matrix, rmse from svd import svd data = pd.read_csv('../data/ratings7kusers.csv') data['userId'] = data['userId'].astype('str') data['movieId'] = data['movieId'].astype('str') users = data['userId'].unique() movies = data['movieId'].unique() dataset = pd.DataFrame(data=data) utilMat, users_index, items_index = create_utility_matrix(dataset) svd_out = svd(utilMat, k=10) while True: user = input("Enter user ID: ") u_index = users_index[user] films_ratings = utilMat.loc[user].copy() unwatched_films_ratings = films_ratings[films_ratings.isnull()] unwatched_films = list(unwatched_films_ratings.index.values) unwatched_films_predicts = [] for item in unwatched_films: if item in items_index: i_index = items_index[item] unwatched_films_predicts.append([item, svd_out[u_index, i_index]]) else: unwatched_films_predicts.append([item, np.mean(svd_out[u_index, :])]) unwatched_films_predicts.sort(key=lambda x: x[1], reverse=True)
print Control_matrix # This is the controllability matrix I get: # # [[-0.70710678 -0.70710678 0.70710678 0.70710678 -0.70710678 -0.70710678 0.70710678 0. 0. 0. ] # [-0.70710678 0.70710678 0.70710678 -0.70710678 -0.70710678 0.70710678 0.70710678 0. 0. 0. ] # [ 0.70710678 0.70710678 -0.70710678 -0.70710678 0.70710678 0.70710678 -0.70710678 -0.70710678 0.70710678 0. ] # [ 0.70710678 -0.70710678 -0.70710678 0.70710678 0.70710678 -0.70710678 -0.70710678 0.70710678 0.70710678 0. ] # [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. ]] Control_rank = matrix_rank(Control_matrix) print "rank = " + str(Control_rank) # rank = 5 --> fully controllable. print "(2) find steering input sequence" # we need to transpose first before feeding into svd function # due to shape constraint a, b, c = svd.svd(svd.transpose(Control_matrix)) # This is the "a" I get: # #[[ -0.48324982438233488, 7.6327832942979512e-16, -1.0178532794652785e-14, 0.12833396757851195, 0.0], # [ 6.9388939039072284e-16, 0.55105881968666859, 0.17224259223220284, 1.385436904088877e-14, 0.0], # [ 0.48324982438233482, -8.6042284408449632e-16, 1.0440476039887174e-14, -0.12833396757851209, 0.0], # [-7.4940054162198066e-16, -0.55105881968666848, -0.17224259223220295, -1.3783679059242715e-14, 0.0], # [ -0.48324982438233488, 7.4940054162198066e-16, -1.0398842676101839e-14, 0.12833396757851209, 0.0], # [ 7.4940054162198066e-16, 0.55105881968666848, 0.17224259223220295, 1.3783679059242715e-14, 0.0], # [ 0.48324982438233488, -7.4940054162198066e-16, 1.0398842676101839e-14, -0.12833396757851209, 0.0], # [ -5.134781488891349e-16, -0.29833292097354391, 0.95446187365624646, 7.6645287339083268e-14, 0.0], # [ -0.25666793515702424, 4.8572257327350599e-16, 7.7440328390661301e-14, -0.96649964876466921, 0.0], # [ 0.0, 0.0, -0.0, 0.0, -1.0]] # This is the "b" I get:
Go = X[:,:,1] Bo = X[:,:,2] # compute the storage space for X - image storage_o = X.nbytes/1024.0/1024.0 index = array([[10,25,50,100,250]]) # initialize comparison metrics (Frobenious norm, stress, variance) Fros = zeros((3,index.shape[1])) var = zeros((3,index.shape[1])) Stress = zeros((3,index.shape[1])) storages = zeros((3,index.shape[1])) # svd tfros,tvars,tstr,mbs = svd(Ro,Go,Bo,index) Fros[0,:] = transpose(tfros) var[0,:] = transpose(tvars) Stress[0,:] = transpose(tstr) storages[0,:] = transpose(mbs) # pca tfros,tvars,tstr,mbs = pca(Ro,Go,Bo,index) Fros[1,:] = transpose(tfros) var[1,:] = transpose(tvars) Stress[1,:] = transpose(tstr) storages[1,:] = transpose(mbs) # mds tfros,tvars,tstr,mbs = mds(Ro,Go,Bo,index) Fros[2,:] = transpose(tfros)
[ 0.00000000e+00 0.00000000e+00 1.00000000e+00] [ 1.32506500e+02 -8.34400000e+00 -8.00000000e-04] [ 1.00000000e+00 0.00000000e+00 0.00000000e+00] [ 0.00000000e+00 1.00000000e+00 0.00000000e+00] [ 0.00000000e+00 0.00000000e+00 1.00000000e+00]] ''' Observe_rank = matrix_rank(Observe_matrix) print "rank = " + str(Observe_rank) # rank = 3 --> fully observable. print "(2) find steering input sequence" # we need to transpose first before feeding into svd function # due to shape constraint a,b,c = svd.svd(Observe_matrix) # This is the "a" I get: (30*30) ''' [[ -2.06752633e-01 -6.60343207e-01 -6.34190843e-01] [ -3.52274770e-02 4.94296295e-01 -5.02801802e-01] [ -9.77093922e-01 1.03529721e-01 1.69907217e-01] [ -7.45412132e-03 1.19624786e-02 1.22650501e-02] [ -1.27006499e-03 1.77514504e-01 1.74459959e-01] [ -3.52274770e-02 4.94296295e-01 -5.02801802e-01] [ -2.68745749e-04 5.28837683e-03 -5.77741481e-03] [ -4.57892527e-05 2.47250717e-02 -3.15056066e-02] [ -1.27006499e-03 1.77514504e-01 1.74459959e-01] [ -9.68914976e-06 1.88272669e-03 2.02969877e-03] [ -1.65073168e-06 8.62167304e-03 1.13271243e-02]
def compute(self, nchan, seed, n_trajec, run=4): # The "run" parameter controls when the fitting stops # If run = 0 doesn't fit self.lastrun = run self.lastnchan = nchan self.lastseed = seed self.lastn_trajec = n_trajec self.datagen.fill(nchan, seed, n_trajec) printSomeInfo() # self.tl = self.N.likelihood() # print self.tl if run == 0: return # Initial Guess self.usingArgs(True, True) self.N.setParmVal(self.guess) # Nuisance Fit At True global preG, preH, postG, postH self.usingArgs(False, True) h.attr_praxis(seed) # passes here assert(False) #print 'SIZE =', self.N.getParm().size() self.preTrueParm = self.N.getParmVal() # passes here assert(False) self.preTruef = self.ef() # fails here: assert(False) if preG: self.preTrueGrad = numpy.matrix(self.Gradient()) if preH: self.preTrueHess = numpy.matrix(self.Hessian()) print "USE 0", cvodewrap.fs.use_fixed_step self.mrf.efun() print "USE 1", cvodewrap.fs.use_fixed_step self.postTrueParm = self.N.getParmVal() self.postTruef = self.ef() self.otle = self.N.getParmVal() self.otml = self.N.likelihood() #optimized true maximum likelihood if postG: self.TrueGrad = numpy.matrix(self.Gradient()) if postH: self.TrueHess = numpy.matrix(self.Hessian()) if run == 1: return # Square Norm Fit self.usingArgs(True, False) self.generator.use_likelihood = 0 h.attr_praxis(seed) self.preSNFParm = self.N.getParmVal() self.preSNFf = self.ef() self.mrf.efun() self.postSNFParm = self.N.getParmVal() self.postSNFf = self.ef() self.generator.use_likelihood = 1 if run == 2: return # Main Parm Fit: global MPF if MPF: h.attr_praxis(seed) self.preMPFParm = self.N.getParmVal() self.preMPFf = self.ef() self.mrf.efun() self.postMPFParm = self.N.getParmVal() self.postMPFf = self.ef() self.MPFpValue = self.get_pValue(self.postTruef, self.postMPFf, self.trueParm.size()) if run == 3: return # All Parm Fit self.usingArgs(True, True) h.attr_praxis(seed) self.preAPFParm = self.N.getParmVal() self.preAPFf = self.ef() if preG: self.preAPFGrad = numpy.matrix(self.Gradient()) if preH: self.preAPFHess = numpy.matrix(self.Hessian()) print "USE 2", cvodewrap.fs.use_fixed_step self.mrf.efun() print "USE 3", cvodewrap.fs.use_fixed_step self.postAPFParm = self.N.getParmVal() self.postAPFf = self.ef() self.APFpValue = self.get_pValue(self.postTruef, self.postAPFf, self.trueParm.size()) self.mle = self.N.getParmVal() self.ml = self.N.likelihood() if postG: self.APFGrad = numpy.matrix(self.Gradient()) if postH: self.APFHess = numpy.matrix(self.Hessian()) if run == 4: return self.H = numpy.matrix(self.Hessian()) svdList = svd.svd(numpy.array(self.H))[1] self.precision = 0.0 for sl in svdList: sl_positive = max(sl, 1e-14) self.precision += math.log(sl_positive) # THESE STATEMENTS INTENDED to always run but don't self.N.setParm(self.saveParm) self.likefailed = self.N.likefailed self.N.likefailed = False
return compact_matrix, rows_selected ratings = handle_input("ratings.txt") start_time = time.time() R, rows_selected = selection(ratings) C, columns_selected = selection(ratings.T) intersection = np.zeros((len(rows_selected), len(columns_selected))) for i in xrange(len(rows_selected)): for j in xrange(len(columns_selected)): intersection[i][j] = ratings[rows_selected[i]][columns_selected[j]] u, sigma, v = svd(intersection) u = u.T v = v.T for j in xrange(len(sigma)): if abs(sigma[j][j]) != 0: sigma[j][j] = 1 / sigma[j][j] intersection = np.dot(v, np.dot(sigma, u)) C = np.matrix(C) C = C.T R = np.matrix(R) final_matrix = np.dot(C, np.dot(intersection, R)) print final_matrix
def SVD(M, use_np=False): if use_np: return np.linalg.svd(M) else: return svd(M)
Go = X[:, :, 1] Bo = X[:, :, 2] # compute the storage space for X - image storage_o = X.nbytes / 1024.0 / 1024.0 index = array([[10, 25, 50, 100, 250]]) # initialize comparison metrics (Frobenious norm, stress, variance) Fros = zeros((3, index.shape[1])) var = zeros((3, index.shape[1])) Stress = zeros((3, index.shape[1])) storages = zeros((3, index.shape[1])) # svd tfros, tvars, tstr, mbs = svd(Ro, Go, Bo, index) Fros[0, :] = transpose(tfros) var[0, :] = transpose(tvars) Stress[0, :] = transpose(tstr) storages[0, :] = transpose(mbs) # pca tfros, tvars, tstr, mbs = pca(Ro, Go, Bo, index) Fros[1, :] = transpose(tfros) var[1, :] = transpose(tvars) Stress[1, :] = transpose(tstr) storages[1, :] = transpose(mbs) # mds tfros, tvars, tstr, mbs = mds(Ro, Go, Bo, index) Fros[2, :] = transpose(tfros)
from svd import svd import time starttime = time.time() alpha = 0.003 lamba = 0.01 svd2 = svd(3, alpha, lamba, 10) svd2.printData() svd2.saveData() endtime = time.time() print '\n-----------csv data load finished ,cost time %f -------------\n' % ( endtime - starttime)
def main(): semeval_dir = 'data/maui-semeval2010-test/' filenames = sorted(os.listdir(semeval_dir)) manual_keywords = [] total_precision = 0 total_recall = 0 total_docs = 0 method = str(sys.argv[1]) for filename in filenames: if filename[-3:] == 'key': # ignored due to issue on Mac or empty keyfile if filename == "H-5.key" or filename == "C-86.key": continue with open(semeval_dir + filename, 'r') as f: last_key_file = filename key_lines = f.read().splitlines() # list of list of keywords by line manual_keywords = [line.split() for line in key_lines] # flatten list manual_keywords = [word for line in manual_keywords for word in line] manual_keywords = list(set(manual_keywords)) manual_keywords = [t for t in manual_keywords if ( (len(t) > 1) and (t.lower()not in stopwords.words('english')) )] elif filename[-3:] == 'txt': # ignored due to issue on Mac or empty keyfile if filename == "H-5.txt" or filename == "C-86.txt": continue total_docs += 1 print(filename) with open(semeval_dir + filename, 'r') as f: correct = 0 f = open(semeval_dir + filename, 'r') content = f.read() if method == 'svd': keywords = svd(content, 1, True) elif method == 'raketr': keywords = raketr.main(content, True) elif method == 'cluster': keywords = kcluster(content, 6, 10, True) else: print('methods accepted: svd raketr cluster') exit(0) keywords = list(set(keywords)) keywords = [word.encode('ascii') for word in keywords] # print('--------manual keywords---------') # print(manual_keywords) print(keywords) print('-'*100) for keyword in keywords: if keyword in set(manual_keywords): correct += 1 if len(manual_keywords) == 0: print(filename) print(last_key_file) print('^^^^ issue with this file ^^^^') exit(0) total_precision += correct/float(len(keywords)) total_recall += correct/float(len(manual_keywords)) total_precision /= total_docs total_recall /= total_docs total_fmeasure = round(2*total_precision*total_recall/(total_precision + total_recall), 5) print('total docs: ' + str(total_docs)) print('total precision: ' + str(total_precision)) print('total recall: ' + str(total_recall)) print('total fmeasure: ' + str(total_fmeasure))
csv_writer.writerow(row) else: print mid print '--------update number ', num, ' ---------------' out.close() return num def clean(self): print '--------current number of data ', self.rates.count( ), ' ---------------' result = self.rates.delete_many({}) print '--------clean number ', result.deleted_count, ' ---------------' starttime = time.time() u = update_rate() num = u.update() u.clean() alpha = 0.003 lamba = 0.01 if num > 0: svd2 = svd(3, alpha, lamba, 30) svd2.printData() svd2.saveData() else: print '------------no update data,no svd-------------------------' endtime = time.time() print '\n-----------update ratings data fininshed ,cost time %f -------------\n' % ( endtime - starttime)
Course: CS F469 Information Retrieval """ import numpy as np import math import time from numpy import linalg as LA from common import handle_input, calc_error, print_matrix from svd import svd ratings = handle_input("ratings.txt") start_time = time.time() U,sigma,V = svd(ratings) final_matrix = (np.dot(U,np.dot(sigma,V))) for i in xrange(len(final_matrix)): for j in xrange(len(final_matrix[i])): final_matrix[i][j] = round(final_matrix[i][j],2) print "Printing matrix U:" print U print "\nPrinting matrix sigma:" print sigma print "\nPrinting matrix V:" print V print "\nPrinting the final matrix got by multiplying U, sigma and V:" print final_matrix
__author__ = 'PC-LiNing' from svd import svd import numpy as np matrix = np.asarray([1.0, 1.0, 0.5, 1.0, 1.0, 0.25, 0.5, 0.25, 2.0]).reshape(3, 3) print(matrix) print(matrix.shape) singularValues, us, vs = svd(matrix) print(singularValues) print(us) print(vs) print('#######') # sum result = np.zeros(shape=(3, 3)) for i in range(3): singularValue = singularValues[i] u = us[i] v = vs[i] result += singularValue * np.outer(u, v) print(result)
def pinv_by_svd(A): U, S, V = svd(A, retSimple=True) S = inv(S) A_pinv = np.dot(np.dot(V.T, S), U.T) return A_pinv
masked_arr = np.ma.masked_array(user_item_matrix, mask) del mask del user_item_matrix item_means=np.mean(masked_arr, axis=0) #user_means=np.mean(masked_arr, axis=1) #item_means_tiled = np.tile(item_means, (user_item_matrix.shape[0],1)) #init_dgesdd failed init print masked_arr utilMat = masked_arr.filled(item_means) print(utilMat) #utilMat = kpod(utilMat=utilMat, mask=masked_arr, iter=40, n_clusters=10, method="normal") utilMat = svd(utilMat,k=15) pred = [] #to store the predicted ratings for _,row in test.iterrows(): user = row['userId'] item = row['movieId'] if user in user_index: u_index = user_index[user] if item in item_index: i_index = item_index[item] pred_rating = utilMat[u_index, i_index] else: pred_rating = np.mean(utilMat[u_index, :]) else:
for entry in data: words |= set(entry['words']) return list(sorted(words)) def load(): with open('all_stories.json', 'r') as infile: data = json.loads(infile.read()) return data if __name__ == "__main__": data = load() matrix, (indexToWord, indexToDocument) = makeDocumentTermMatrix(data) matrix = normalize(matrix) sigma, U, V = svd(matrix, k=10) projectedDocuments = np.dot(matrix.T, U) projectedWords = np.dot(matrix, V.T) documentCenters, documentClustering = cluster(projectedDocuments) wordCenters, wordClustering = cluster(projectedWords) wordClusters = [[ indexToWord[i] for (i, x) in enumerate(wordClustering) if x == j ] for j in range(len(set(wordClustering)))] documentClusters = [[ indexToDocument[i]['text'] for (i, x) in enumerate(documentClustering) if x == j ] for j in range(len(set(documentClustering)))]
def test_simple_svd(self): test_data = np.array([1, -1, 1, -1, 3, 0], dtype=np.float64).reshape(3, 2) result = svd.svd(test_data) done = np.dot(np.dot(result[0], result[1]), result[2]) assert_array_almost_equal(test_data, done, decimal=10)
import numpy as np from svd import svd y = np.array([4, 3, 7]).T A = np.array([[1, -2, 3], [2, -1, 4], [-1, -4, 1]]) # A_red = [[1,-2,3],[0,3,-2],[0,0,0]] -> rank 2 ####################### Rank ####################### print('Rank of A:\t\t{0:d}'.format(np.linalg.matrix_rank(A))) ####################### SVD ####################### matrices, a, error, C = svd(A, y=y) U, w, V = matrices a_lst, *_, s = np.linalg.lstsq(A, y) print('Singular Values of A:\t{}'.format(list(np.diag(w)))) print('Solution a:\t\t{}'.format(list(a))) print('Lstsq Solution a:\t{}'.format(list(a_lst))) print('Residual Error:\t\t{} ({})'.format(error, error/np.linalg.norm(a))) print('Covariance Matrix:\n{}'.format(C)) print('U Matrix:\n{}'.format(U)) print('D Matrix:\n{}'.format(w)) print('V Matrix:\n{}'.format(V)) print('U.T@U Matrix:\n{}'.format(U.T@U)) print('[email protected] Matrix:\n{}'.format([email protected]))
for entry in data: words |= set(entry['words']) return list(sorted(words)) def load(): with open('all_stories.json', 'r') as infile: data = json.loads(infile.read()) return data if __name__ == "__main__": data = load() matrix, (indexToWord, indexToDocument) = makeDocumentTermMatrix(data) matrix = normalize(matrix) sigma, U, V = svd(matrix, k=10) projectedDocuments = np.dot(matrix.T, U) projectedWords = np.dot(matrix, V.T) documentCenters, documentClustering = cluster(projectedDocuments) wordCenters, wordClustering = cluster(projectedWords) wordClusters = [ [indexToWord[i] for (i, x) in enumerate(wordClustering) if x == j] for j in range(len(set(wordClustering))) ] documentClusters = [ [indexToDocument[i]['text'] for (i, x) in enumerate(documentClustering) if x == j]
def main(): semeval_dir = 'data/maui-semeval2010-test/' filenames = sorted(os.listdir(semeval_dir)) manual_keywords = [] total_precision = 0 total_recall = 0 total_docs = 0 method = str(sys.argv[1]) for filename in filenames: if filename[-3:] == 'key': # ignored due to issue on Mac or empty keyfile if filename == "H-5.key" or filename == "C-86.key": continue with open(semeval_dir + filename, 'r') as f: last_key_file = filename key_lines = f.read().splitlines() key_lines = [word.encode('ascii') for word in key_lines] manual_keywords = get_stemmed_keywords(key_lines) elif filename[-3:] == 'txt': # ignored due to issue on Mac or empty keyfile if filename == "H-5.txt" or filename == "C-86.txt": continue total_docs += 1 print(filename) with open(semeval_dir + filename, 'r') as f: correct = 0 f = open(semeval_dir + filename, 'r') content = f.read() if method == 'svd': keywords = svd(content, 1, False) elif method == 'raketr': keywords = raketr.main(content, False) elif method == 'cluster': keywords = kcluster(content, 6, 15, False) # benchmark against RAKE # keywords = rake_object.run(content)[:15] # keywords = [word[0] for word in keywords] # keywords = [''.join([i if ord(i) < 128 and i != '\n' else ' ' for i in keyword]).encode('ascii') for keyword in keywords] else: print('methods accepted: svd raketr cluster') exit(0) print(keywords) print('-'*100) # print('--------manual keywords---------') # print(manual_keywords) # print('--------extracted keywords---------') # print(keywords) stemmed_keywords = get_stemmed_keywords(keywords) for keyword in stemmed_keywords: if keyword in set(manual_keywords): correct += 1 if len(manual_keywords) == 0: print(filename) print(last_key_file) print('^^^^ issue with this file ^^^^') exit(0) total_precision += correct/float(len(keywords)) total_recall += correct/float(len(manual_keywords)) total_precision /= total_docs total_recall /= total_docs total_fmeasure = round(2*total_precision*total_recall/(total_precision + total_recall), 5) print('total docs: ' + str(total_docs)) print('total precision: ' + str(total_precision)) print('total recall: ' + str(total_recall)) print('total fmeasure: ' + str(total_fmeasure))