import numpy as np from matrix_sketching import MatrixSketching from pyspark import SparkContext, SparkConf conf = SparkConf().setAppName("PysparkSVDTest") sc = SparkContext(conf=conf) filename = '/home/hduser/Test_Spark/Spark_Matrix_Sketching/matrix.csv' raw_data = open(filename, 'rt') A = np.loadtxt(raw_data, delimiter=",") approxCovarianceMatrixA = np.dot(A.transpose(), A) Norm_A = np.linalg.norm(A, ord='fro') # isvd Test l = 20 ms = MatrixSketching(sc=sc, rows=l, columns=500, op=0.2) for i in range(10000): row = A[i, :] ms.add(row) B = ms.getLocalSketchMatrix() approxCovarianceMatrixB = np.dot(B.transpose(), B) testMatrix = approxCovarianceMatrixA - approxCovarianceMatrixB Test1 = np.linalg.norm(testMatrix, ord=2) / (Norm_A**2) print(Test1) del (ms) np.savetxt("Test_PFD02_20.csv", Test1, delimiter=",") sc.stop() # # FD Test
B = np.zeros((9324, 500)) for i in range(len(A)): B[i] = A[i] A = np.array(B) del (B) approxCovarianceMatrixA = np.dot(A.transpose(), A) Norm_A = np.linalg.norm(A, ord='fro') # isvd Test Test1 = np.zeros(9) for j in range(9): l = 20 + 10 * j ms = MatrixSketching(sc=sc, rows=l, columns=500, op='isvd') for i in range(9324): row = A[i, :] ms.add(row) B = ms.getLocalSketchMatrix() approxCovarianceMatrixB = np.dot(B.transpose(), B) testMatrix = approxCovarianceMatrixA - approxCovarianceMatrixB Test1[j] = np.linalg.norm(testMatrix, ord=2) / (Norm_A**2) del (ms) np.savetxt("spam_Test1.csv", Test1, delimiter=",") # FD Test Test2 = np.zeros(9) for j in range(9): l = 20 + 10 * j