def test_netflix_sgd(ctx): U = 100 M = 100*100 r = 20 d = 8 P_RATING = 1000.0 / (U * M) # create random factor and value matrices Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32)) V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) V = spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING}))) for i in range(2): _ = netflix.sgd(V, Mfactor, Ufactor).evaluate()
def test_netflix_sgd(ctx): U = 100 M = 100 * 100 r = 20 d = 8 P_RATING = 1000.0 / (U * M) # create random factor and value matrices Mfactor = spartan.eager(spartan.rand(M, r).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r).astype(np.float32)) V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) V = spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING}))) for i in range(2): _ = netflix.sgd(V, Mfactor, Ufactor).force()
def benchmark_netflix_sgd(ctx, timer): d = ctx.num_workers V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) V = timer.time_op( 'prep', lambda: spartan.eager( spartan.tocoo( spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={'p_rating': P_RATING})))) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) for r in [25, 50]: Mfactor = spartan.eager( spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32)) Ufactor = spartan.eager( spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32)) timer.time_op('rank %d' % r, netflix.sgd(V, Mfactor, Ufactor).force)
def benchmark_netflix_sgd(ctx, timer): d = ctx.num_workers V = spartan.sparse_empty((U, M), tile_hint=(divup(U, d), divup(M, d)), dtype=np.float32) V = timer.time_op( "prep", lambda: spartan.eager( spartan.tocoo(spartan.shuffle(V, netflix.fake_netflix_mapper, target=V, kw={"p_rating": P_RATING})) ), ) # V = spartan.shuffle(V, netflix.load_netflix_mapper, # kw={ 'load_file' : '/big1/netflix.zip' }) for r in [25, 50]: Mfactor = spartan.eager(spartan.rand(M, r, tile_hint=(divup(M, d), r)).astype(np.float32)) Ufactor = spartan.eager(spartan.rand(U, r, tile_hint=(divup(U, d), r)).astype(np.float32)) timer.time_op("rank %d" % r, netflix.sgd(V, Mfactor, Ufactor).force)
import time import spartan as sp import sys from datetime import datetime #argvLen = len(sys.argv); #print "total fileds in sys.argv=",argvLen; #for i,eachArg in enumerate(sys.argv): # print "[%d]=%s"%(i, eachArg); m = int(sys.argv[1]) k = int(sys.argv[2]) n = int(sys.argv[3]) sp.initialize() print "execute matrix multiplication, m: %d, k: %d, n: %d. date: %s\n" % ( m, k, n, datetime.now()) a = sp.rand(m, k) b = sp.rand(k, n) start = time.time() z = sp.dot(a, b).optimized() z.evaluate() end = time.time() print 'z: ', z print "finish matrix multiplication, diff time: %d. date: %s\n" % ( (end - start), datetime.now())
import spartan as sp from datetime import datetime import numpy as np import scipy.sparse import multiprocessing import time wrows = vrows = 18000 hcols = vcols = 480000 wcols = hrows = 200 sp.initialize() V = sp.sparse_rand((vrows, vcols), density=0.01, dtype=np.double, format=u'csr') W = sp.rand(wrows, wcols) H = sp.rand(hrows, hcols) eps = 10e-8 max_iteration = 5 i = 0 print "starts to run!" start = time.time() while i < max_iteration: begin = time.time() H = H * (sp.dot(W.T, V) / (sp.dot(sp.dot(W.T, W), H) + eps)) W = W * (sp.dot(V, H.T) / (sp.dot(W, sp.dot(H, H.T)) + eps)) i = i + 1 end = time.time()
#!/usr/bin/env python #-*- coding:utf-8 -*- import spartan as sp sp.initialize() x = sp.rand(10000, 10000) y = sp.rand(10000, 10000) z = x + y
#!/usr/local/bin/python2.7 import time import spartan as sp import sys from datetime import datetime #argvLen = len(sys.argv); #print "total fileds in sys.argv=",argvLen; #for i,eachArg in enumerate(sys.argv): # print "[%d]=%s"%(i, eachArg); m = int(sys.argv[1]) k = int(sys.argv[2]) n = int(sys.argv[3]) sp.initialize() print "execute matrix multiplication, m: %d, k: %d, n: %d. date: %s\n"%(m, k , n, datetime.now()) a = sp.rand(m, k) b = sp.rand(k, n) start = time.time() z = sp.dot(a,b).optimized() z.evaluate() end = time.time() print 'z: ', z print "finish matrix multiplication, diff time: %d. date: %s\n" % ((end - start), datetime.now())