def test_svd(self): rng = np.random.RandomState(42) mat = rng.randn(1e3, 10) data = block_rdd(self.sc.parallelize(list(mat), 10)) u, s, v = svd(data, 1) u = np.squeeze(np.concatenate(np.array(u.collect()))).T u_true, s_true, v_true = ln.svd(mat) assert_array_almost_equal(v[0], match_sign(v[0], v_true[0, :])) assert_array_almost_equal(s[0], s_true[0]) assert_array_almost_equal(u, match_sign(u, u_true[:, 0]))
def train(matrix, featureSize, labels): diSmatrix = sc.parallelize(list(matrix), 10) #use spyleanr to parallelize SVD on RDD data = block_rdd(diSmatrix) u, s, v = svd(data, 100) print v.shape #Old unparallelized version # cov_mat = numpy.cov(matrix.T) # print cov_mat.shape # eig_val_cov, eig_vec_cov = numpy.linalg.eig(cov_mat) # # Make a list of (eigenvalue, eigenvector) tuples # eig_pairs = [(numpy.abs(eig_val_cov[i]), eig_vec_cov[:,i]) for i in range(len(eig_val_cov))] # # Sort the (eigenvalue, eigenvector) tuples from high to low # eig_pairs.sort() # eig_pairs.reverse() # matrix_w = eig_pairs[0][1].reshape(featureSize,1) # for i in range(200): # matrix_w = numpy.hstack((matrix_w, eig_pairs[i+1][1].reshape(featureSize,1))) # print matrix_w.shape transformed = matrix.dot(v.T) print transformed.shape #Compute cov matrix # if os.path.isfile('svm.model'): # print 'Loading Model file...' # #Load models from file # # with open('svm.model', 'rb') as file: # # Z = pickle.load(file) # else: #Start to train SVM Z = OneVsRestClassifier(SVC(kernel="rbf")).fit(transformed, labels) # with open('svm.model', 'wb') as file: # pickle.dump(Z, file) Z = Z.predict(transformed) print Z[0] correct = 0.0 for x in range(len(Z)): if labels[x] == Z[x]: correct = correct +1 print correct/len(Z) print 'plot reconstructed data' recData = transformed.dot(v.T) + matrix.mean(axis=1)[:, None] plot(recData[0].reshape((32,32)))