Esempio n. 1
0
    lambda x: MatrixEntry(int(x[0]) - 1,
                          int(x[1]) - 1, 1.0))
lower_entries = txt.map(
    lambda x: MatrixEntry(int(x[1]) - 1,
                          int(x[0]) - 1, 1.0))
degrees = upper_entries.map(lambda entry: (entry.i, entry.value)).reduceByKey(
    lambda a, b: a + b)
W = CoordinateMatrix(upper_entries.union(lower_entries), numCols=N, numRows=N)

# XXX:
laplacian = sys.argv[1]

if laplacian == 'unnormalized':
    entries = degrees.map(lambda x: MatrixEntry(x[0], x[0], x[1]))
    D = CoordinateMatrix(entries, numCols=N, numRows=N)
    L = D.toBlockMatrix().subtract(W.toBlockMatrix()).toCoordinateMatrix()
elif laplacian == 'normalized':
    entries = degrees.map(lambda x: MatrixEntry(x[0], x[0], 1 / x[1]))
    D_inv = CoordinateMatrix(entries, numCols=N, numRows=N).toBlockMatrix()
    I = CoordinateMatrix(sc.range(N).map(lambda i: MatrixEntry(i, i, 1.0)),
                         numCols=N,
                         numRows=N).toBlockMatrix()
    L = I.subtract(D_inv.multiply(W.toBlockMatrix())).toCoordinateMatrix()
elif laplacian == 'symmetric':
    entries = degrees.map(lambda x: MatrixEntry(x[0], x[0], 1 / sqrt(x[1])))
    D_invsq = CoordinateMatrix(entries, numCols=N, numRows=N).toBlockMatrix()
    I = sc.range(N).map(lambda i: MatrixEntry(i, i, 1.0), N, N)
    tmp = D_invsq.multiply(W.toBlockMatrix()).multiply(D_invsq)
    L = I.toBlockMatrix().subtract(tmp)
else:
    raise ValueError('Unknown type of Laplacian.')
Esempio n. 2
0
#sc = SparkContext(conf=conf).getOrCreate()

#use local spark on computer
# findspark.init()
#from pyspark.sql import SparkSession

local_file_location = 'file:///wasp/pdb1HYS.mtx.mtx'

rdd = spark.sparkContext.textFile(local_file_location)
rdd = rdd.map(lambda line: line.split(" "))
rdd = rdd.map(
    lambda line: MatrixEntry(int(line[0]), int(line[1]), float(line[2])))

mat = CoordinateMatrix(rdd)
M = mat.toRowMatrix()
A = mat.toBlockMatrix()
At = mat.transpose().toBlockMatrix()

print("SVD")
print(M.numRows(), M.numCols())
start_svd = time.time()

NUM_TIMES = 10
#do it 10 times to get mean
for i in range(NUM_TIMES):
    svd = M.computeSVD(5, computeU=True)

end_svd = time.time()
print("Time elapsed: ", (end_svd - start_svd) /
      NUM_TIMES)  # CPU seconds elapsed (floating point)