import numpy as np
from matrix_sketching import MatrixSketching
from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName("PysparkSVDTest")
sc = SparkContext(conf=conf)

filename = '/home/hduser/Test_Spark/Spark_Matrix_Sketching/matrix.csv'
raw_data = open(filename, 'rt')
A = np.loadtxt(raw_data, delimiter=",")

approxCovarianceMatrixA = np.dot(A.transpose(), A)
Norm_A = np.linalg.norm(A, ord='fro')

# isvd Test
l = 20
ms = MatrixSketching(sc=sc, rows=l, columns=500, op=0.2)
for i in range(10000):
    row = A[i, :]
    ms.add(row)
B = ms.getLocalSketchMatrix()
approxCovarianceMatrixB = np.dot(B.transpose(), B)
testMatrix = approxCovarianceMatrixA - approxCovarianceMatrixB
Test1 = np.linalg.norm(testMatrix, ord=2) / (Norm_A**2)
print(Test1)
del (ms)

np.savetxt("Test_PFD02_20.csv", Test1, delimiter=",")

sc.stop()

# # FD Test
Exemple #2
0
data = mmread(
    '/home/duynguyen/Test_Spark/Spark_Matrix_Sketching/connectus.mtx')
temp = data.toarray()
A = temp.transpose()

del (data)
del (temp)

approxCovarianceMatrixA = np.dot(A.transpose(), A)
Norm_A = np.linalg.norm(A, ord='fro')

# isvd Test
Test1 = np.zeros(9)
for j in range(9):
    l = 20 + 10 * j
    ms = MatrixSketching(sc=sc, rows=l, columns=512, op='isvd')
    for i in range(394792):
        row = A[i, :]
        ms.add(row)
    B = ms.getLocalSketchMatrix()
    approxCovarianceMatrixB = np.dot(B.transpose(), B)
    testMatrix = approxCovarianceMatrixA - approxCovarianceMatrixB
    Test1[j] = np.linalg.norm(testMatrix, ord=2) / (Norm_A**2)
    del (ms)

np.savetxt("connectUS_Test1.csv", Test1, delimiter=",")

# FD Test
Test2 = np.zeros(9)
for j in range(9):
    l = 20 + 10 * j