def test_transpose3(self): t1 = expr.sparse_diagonal((107, 401)).evaluate() t2 = expr.sparse_diagonal((401, 107)).evaluate() a = expr.transpose(t1) b = expr.transpose(t2) Assert.all_eq(a.glom().todense(), sp.eye(107, 401).transpose().todense()) Assert.all_eq(b.glom().todense(), sp.eye(401, 107).transpose().todense())
def benchmark_cg(ctx, timer): print "#worker:", ctx.num_workers l = int(math.sqrt(ctx.num_workers)) #n = 2000 * 16 n = 500 * ctx.num_workers la = 20 niter = 5 #nonzer = 7 #nz = n * (nonzer + 1) * (nonzer + 1) + n * (nonzer + 2) #density = 0.5 * nz/(n*n) A = expr.rand(n, n) A = (A + expr.transpose(A)) * 0.5 I = expr.sparse_diagonal((n, n)) * la A = A - I #x1 = numpy_cg(A.glom(), niter) util.log_warn('begin cg!') t1 = datetime.now() x2 = conj_gradient(A, niter).force() t2 = datetime.now() cost_time = millis(t1, t2) print "total cost time:%s ms, per iter cost time:%s ms" % ( cost_time, cost_time / niter)
def benchmark_cholesky(ctx, timer): print "#worker:", ctx.num_workers #n = int(math.pow(ctx.num_workers, 1.0 / 3.0)) n = int(math.sqrt(ctx.num_workers)) #ARRAY_SIZE = 1600 * 4 ARRAY_SIZE = 1600 * n util.log_warn('prepare data!') #A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE) #A = np.dot(A, A.T) #A = expr.force(from_numpy(A, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n))) #A = expr.randn(ARRAY_SIZE, ARRAY_SIZE, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n)) A = expr.randn(ARRAY_SIZE, ARRAY_SIZE) # FIXME: Ideally we should be able to get rid of tile_hint. # However, current extent.change_partition_axis relies on the # information of one-dimentional size to change tiling to grid tiling. # It assumes that every extent should be partitioned in the same size. # Trace extent.pyx to think about how to fix it! A = expr.dot(A, expr.transpose(A), tile_hint=(ARRAY_SIZE, ARRAY_SIZE / ctx.num_workers)).force() util.log_warn('begin cholesky!') t1 = datetime.now() L = cholesky(A).glom() t2 = datetime.now() assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj()))) cost_time = millis(t1, t2) print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time / n)
def benchmark_cg(ctx, timer): print "#worker:", ctx.num_workers l = int(math.sqrt(ctx.num_workers)) n = 2000 * 16 #n = 4000 * l la = 20 niter = 5 tile_hint = (n, n/ctx.num_workers) #nonzer = 7 #nz = n * (nonzer + 1) * (nonzer + 1) + n * (nonzer + 2) #density = 0.5 * nz/(n*n) A = expr.rand(n, n, tile_hint=tile_hint) A = (A + expr.transpose(A))*0.5 I = expr.sparse_diagonal((n,n), tile_hint=tile_hint) * la I.force() A = expr.eager(A - I) #x1 = numpy_cg(A.glom(), niter) util.log_warn('begin cg!') t1 = datetime.now() x2 = conj_gradient(A, niter).force() t2 = datetime.now() cost_time = millis(t1,t2) print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/niter)
def connectedConponents(ctx, dim, numIters): linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim / ctx.num_workers, dim)), make_matrix, )) power = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim / ctx.num_workers, dim)), make_matrix, )) eye = expr.eye(dim, tile_hint = (dim / ctx.num_workers,dim)) startCompute = time.time() result = expr.logical_or(eye, linkMatrix).optimized().glom() for i in range(numIters): power = expr.dot(power, linkMatrix).optimized().glom() result = expr.logical_or(result, power) result.optimized().glom() final = expr.logical_and(result, expr.transpose(result.optimized())).optimized().evaluate() endCompute = time.time() return endCompute - startCompute
def gen_dot(a, b): if not hasattr(a, 'shape') or not hasattr(b, 'shape') or len(a.shape) * len(b.shape) == 0: return [a * b] if a.shape[0] == b.shape[0]: if len(a.shape) > 1: return [expr.dot(expr.transpose(a), b)] elif len(b.shape) == 1: return [expr.dot(a, b)] if len(a.shape) > 1 and a.shape[1] == b.shape[0]: return [expr.dot(a, b)] if len(b.shape) > 1 and a.shape[0] == b.shape[1]: return [expr.dot(b, a)] if len(a.shape) > 1 and len(b.shape) > 1 and a.shape[1] == b.shape[1]: return [expr.dot(a, expr.transpose(b))] return [a, b]
def train_smo_1998(self, data, labels): ''' Train an SVM model using the SMO (1998) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 self.alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N]) labels = expr.force(labels) self.E = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() for i in xrange(N): self.E[i, 0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i, 0] util.log_info("Starting SMO") it = 0 num_changed = 0 examine_all = True while (num_changed > 0 or examine_all) and (it < self.maxiter): util.log_info("Iteration:%d", it) num_changed = 0 if examine_all: for i in xrange(N): num_changed += self.examine_example(i, N, labels, kernel_results) else: for i in xrange(N): if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C: num_changed += self.examine_example(i, N, labels, kernel_results) it += 1 if examine_all: examine_all = False elif num_changed == 0: examine_all = True self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i,0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom() self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def svd(A, k=None): """ Stochastic SVD. Parameters ---------- A : spartan matrix Array to compute the SVD on, of shape (M, N) k : int, optional Number of singular values and vectors to compute. The operations include matrix multiplication and QR decomposition. We parallelize both of them. Returns -------- U : Spartan array of shape (M, k) S : numpy array of shape (k,) V : numpy array of shape (k, k) """ if k is None: k = A.shape[1] ctx = blob_ctx.get() Omega = expr.randn(A.shape[1], k, tile_hint=(A.shape[1]/ctx.num_workers, k)) r = A.shape[0] / ctx.num_workers Y = expr.dot(A, Omega, tile_hint=(r, k)).force() Q, R = qr(Y) B = expr.dot(expr.transpose(Q), A) BTB = expr.dot(B, expr.transpose(B)).glom() S, U_ = np.linalg.eig(BTB) S = np.sqrt(S) # Sort by eigen values from large to small si = np.argsort(S)[::-1] S = S[si] U_ = U_[:, si] U = expr.dot(Q, U_).force() V = np.dot(np.dot(expr.transpose(B).glom(), U_), np.diag(np.ones(S.shape[0]) / S)) return U, S, V.T
def cholesky(A): ''' Cholesky matrix decomposition. Args: A(Expr): matrix to be decomposed ''' A = expr.force(A) n = int(math.sqrt(len(A.tiles))) tile_size = A.shape[0] / n for k in range(n): # A[k,k] = DPOTRF(A[k,k]) diag_ex = get_ex(k, k, tile_size, A.shape) A = expr.map2(A, ((0, 1), ), fn=_cholesky_dpotrf_mapper, shape=A.shape, update_region=diag_ex) if k == n - 1: break # A[l,k] = DTRSM(A[k,k], A[l,k]) l -> [k+1,n) col_ex = extent.create(((k + 1) * tile_size, k * tile_size), (n * tile_size, (k + 1) * tile_size), A.shape) diag_tile = A.force().fetch(diag_ex) A = expr.map2(A, ((0, 1), ), fn=_cholesky_dtrsm_mapper, fn_kw=dict(array=force(A), diag_tile=diag_tile), shape=A.shape, update_region=col_ex) # A[m,m] = DSYRK(A[m,k], A[m,m]) m -> [k+1,n) # A[l,m] = DGEMM(A[l,k], A[m,k], A[l,m]) m -> [k+1,n) l -> [m+1,n) col_exs = list([ extent.create((m * tile_size, m * tile_size), (n * tile_size, (m + 1) * tile_size), A.shape) for m in range(k + 1, n) ]) dgemm_1 = expr.transpose(A)[(k * tile_size):((k + 1) * tile_size), :] dgemm_2 = A[:, (k * tile_size):((k + 1) * tile_size)] A = expr.map2((A, dgemm_1, dgemm_2), ((0, 1), 1, 0), fn=_cholesky_dsyrk_dgemm_mapper, fn_kw=dict(array=force(A), k=k), shape=A.shape, update_region=col_exs) # update the right corner to 0 col_exs = list([ extent.create((0, m * tile_size), (m * tile_size, (m + 1) * tile_size), A.shape) for m in range(1, n) ]) A = expr.map2(A, ((0, 1), ), fn=_zero_mapper, shape=A.shape, update_region=col_exs) return A
def gen_dot(a, b): if not hasattr(a, 'shape') or not hasattr( b, 'shape') or len(a.shape) * len(b.shape) == 0: return [a * b] if a.shape[0] == b.shape[0]: if len(a.shape) > 1: return [expr.dot(expr.transpose(a), b)] elif len(b.shape) == 1: return [expr.dot(a, b)] if len(a.shape) > 1 and a.shape[1] == b.shape[0]: return [expr.dot(a, b)] if len(b.shape) > 1 and a.shape[0] == b.shape[1]: return [expr.dot(b, a)] if len(a.shape) > 1 and len(b.shape) > 1 and a.shape[1] == b.shape[1]: return [expr.dot(a, expr.transpose(b))] return [a, b]
def svd(A, k=None): """ Stochastic SVD. Parameters ---------- A : spartan matrix Array to compute the SVD on, of shape (M, N) k : int, optional Number of singular values and vectors to compute. The operations include matrix multiplication and QR decomposition. We parallelize both of them. Returns -------- U : Spartan array of shape (M, k) S : numpy array of shape (k,) V : numpy array of shape (k, k) """ if k is None: k = A.shape[1] Omega = expr.randn(A.shape[1], k) Y = expr.dot(A, Omega) Q, R = qr(Y) B = expr.dot(expr.transpose(Q), A) BTB = expr.dot(B, expr.transpose(B)).optimized().glom() S, U_ = np.linalg.eig(BTB) S = np.sqrt(S) # Sort by eigen values from large to small si = np.argsort(S)[::-1] S = S[si] U_ = U_[:, si] U = expr.dot(Q, U_).optimized().force() V = np.dot(np.dot(expr.transpose(B).optimized().glom(), U_), np.diag(np.ones(S.shape[0]) / S)) return U, S, V.T
def test_transpose_dot(self): npa1 = np.random.random((401, 97)) npa2 = np.random.random((401, 97)) result1 = np.dot(npa1, np.transpose(npa2)) #result2 = np.dot(np.transpose(npa1), npa2) t1 = expr.from_numpy(npa1) t2 = expr.from_numpy(npa2) t3 = expr.dot(t1, expr.transpose(t2)) #t4 = expr.dot(expr.transpose(t1), t2) assert np.all(np.isclose(result1, t3.glom()))
def update(self): """ gradient_update = 2xTxw - 2xTy + 2* lambda * w Correct this if the update function is wrong. """ xT = expr.transpose(self.x) g1 = expr.dot(expr.dot(xT, self.x), self.w) g2 = expr.dot(xT, self.y) g3 = self.ridge_lambda * self.w g4 = g1 + g2 + g3 return expr.reshape(g4, (1, self.N_DIM))
def cholesky(A): ''' Cholesky matrix decomposition. Args: A(Expr): matrix to be decomposed ''' n = int(math.sqrt(FLAGS.num_workers)) tile_size = A.shape[0] / n print n, tile_size for k in range(n): # A[k,k] = DPOTRF(A[k,k]) diag_ex = get_ex(k, k, tile_size, A.shape) A = expr.map2(A, ((0, 1), ), fn=_cholesky_dpotrf_mapper, shape=A.shape, update_region=diag_ex) if k == n - 1: break # A[l,k] = DTRSM(A[k,k], A[l,k]) l -> [k+1,n) col_ex = extent.create(((k + 1) * tile_size, k * tile_size), (n * tile_size, (k + 1) * tile_size), A.shape) A = expr.map2((A, A[diag_ex.to_slice()]), ((0, 1), None), fn=_cholesky_dtrsm_mapper, shape=A.shape, update_region=col_ex) # A[m,m] = DSYRK(A[m,k], A[m,m]) m -> [k+1,n) # A[l,m] = DGEMM(A[l,k], A[m,k], A[l,m]) m -> [k+1,n) l -> [m+1,n) col_exs = list([ extent.create((m * tile_size, m * tile_size), (n * tile_size, (m + 1) * tile_size), A.shape) for m in range(k + 1, n) ]) dgemm = A[:, (k * tile_size):((k + 1) * tile_size)] A = expr.map2((A, expr.transpose(dgemm), dgemm), ((0, 1), 1, 0), fn=_cholesky_dsyrk_dgemm_mapper, shape=A.shape, update_region=col_exs).optimized() # update the right corner to 0 col_exs = list([ extent.create((0, m * tile_size), (m * tile_size, (m + 1) * tile_size), A.shape) for m in range(1, n) ]) A = expr.map2(A, ((0, 1), ), fn=_zero_mapper, shape=A.shape, update_region=col_exs) return A
def predict(model, new_data): ''' Predict the label of the given instance. Args: model(dict): trained naive bayes model. new_data(Expr or DistArray): data to be predicted ''' scores_per_label_and_feature = model['scores_per_label_and_feature'] scoring_vector = expr.dot(scores_per_label_and_feature, expr.transpose(new_data)) # util.log_warn('scoring_vector:%s', scoring_vector.glom().T) return np.argmax(scoring_vector.glom())
def qr(Y): ''' Compute the thin qr factorization of a matrix. Factor the matrix Y as QR, where Q is orthonormal and R is upper-triangular. Parameters ---------- Y: Spartan array of shape (M, K). Notes ---------- Y'Y must fit in memory. Y is a Spartan array of shape (M, K). Since this QR decomposition is mainly used in Stochastic SVD, K will be the rank of the matrix of shape (M, N) and the assumption is that the rank K should be far less than M or N. Returns ------- Q : Spartan array of shape (M, K). R : Numpy array of shape (K, K). ''' # Since the K should be far less than M. So the matrix multiplication # should be the bottleneck instead of local cholesky decomposition and # finding inverse of R. So we just parallelize the matrix mulitplication. # If K is really large, we may consider using our Spartan cholesky # decomposition, but for now, we use numpy version, it works fine. # YTY = Y'Y. YTY has shape of (K, K). YTY = expr.dot(expr.transpose(Y), Y).optimized().glom() # Do cholesky decomposition and get R. R = np.linalg.cholesky(YTY).T # Find the inverse of R inv_R = np.linalg.inv(R) # Q = Y * inv(R) Q = expr.dot(Y, inv_R).optimized().evaluate() return Q, R
def als(A, la=0.065, alpha=40, implicit_feedback=False, num_features=20, num_iter=10, M=None): ''' compute the factorization A = U M' using the alternating least-squares (ALS) method. where `A` is the "ratings" matrix which maps from a user and item to a rating score, `U` and `M` are the factor matrices, which represent user and item preferences. Args: A(Expr or DistArray): the rating matrix which maps from a user and item to a rating score. la(float): the parameter of the als. alpha(int): confidence parameter used on implicit feedback. implicit_feedback(bool): whether using implicit_feedback method for als. num_features(int): dimension of the feature space. num_iter(int): max iteration to run. ''' num_users = A.shape[0] num_items = A.shape[1] AT = expr.transpose(A) avg_rating = expr.sum(A, axis=0) * 1.0 / expr.count_nonzero(A, axis=0) M = expr.rand(num_items, num_features) M = expr.assign(M, np.s_[:, 0], avg_rating.reshape((avg_rating.shape[0], 1))) #A = expr.retile(A, tile_hint=util.calc_tile_hint(A, axis=0)) #AT = expr.retile(AT, tile_hint=util.calc_tile_hint(AT, axis=0)) for i in range(num_iter): # Recomputing U shape = (num_users, num_features) U = expr.outer((A, M), (0, None), fn=_solve_U_or_M_mapper, fn_kw={'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape}, shape=shape, dtype=np.float) # Recomputing M shape = (num_items, num_features) M = expr.outer((AT, U), (0, None), fn=_solve_U_or_M_mapper, fn_kw={'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape}, shape=shape, dtype=np.float) return U, M
def qr(Y): ''' Compute the thin qr factorization of a matrix. Factor the matrix Y as QR, where Q is orthonormal and R is upper-triangular. Parameters ---------- Y: Spartan array of shape (M, K). Notes ---------- Y'Y must fit in memory. Y is a Spartan array of shape (M, K). Since this QR decomposition is mainly used in Stochastic SVD, K will be the rank of the matrix of shape (M, N) and the assumption is that the rank K should be far less than M or N. Returns ------- Q : Spartan array of shape (M, K). R : Numpy array of shape (K, K). ''' # Since the K should be far less than M. So the matrix multiplication # should be the bottleneck instead of local cholesky decomposition and # finding inverse of R. So we just parallelize the matrix mulitplication. # If K is really large, we may consider using our Spartan cholesky # decomposition, but for now, we use numpy version, it works fine. # YTY = Y'Y. YTY has shape of (K, K). YTY = expr.dot(expr.transpose(Y), Y).optimized().glom() # Do cholesky decomposition and get R. R = np.linalg.cholesky(YTY).T # Find the inverse of R inv_R = np.linalg.inv(R) # Q = Y * inv(R) Q = expr.dot(Y, inv_R).optimized().force() return Q, R
def benchmark_cholesky(ctx, timer): print "#worker:", ctx.num_workers #n = int(math.pow(ctx.num_workers, 1.0 / 3.0)) n = int(math.sqrt(ctx.num_workers)) #ARRAY_SIZE = 1600 * 4 ARRAY_SIZE = 900 * n util.log_warn('prepare data!') #A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE) #A = np.dot(A, A.T) A = expr.randn(ARRAY_SIZE, ARRAY_SIZE) A = expr.dot(A, expr.transpose(A)) util.log_warn('begin cholesky!') t1 = datetime.now() L = cholesky(A).optimized().glom() t2 = datetime.now() #assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj()))) cost_time = millis(t1, t2) print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time/n)
def svds(A, k=6): """Compute the largest k singular values/vectors for a sparse matrix. Parameters ---------- A : sparse matrix Array to compute the SVD on, of shape (M, N) k : int, optional Number of singular values and vectors to compute. Returns ------- u : ndarray, shape=(M, k) Unitary matrix having left singular vectors as columns. s : ndarray, shape=(k,) The singular values. vt : ndarray, shape=(k, N) Unitary matrix having right singular vectors as rows. """ AT = expr.transpose(A) d, u = lanczos.solve(AT, A, k) d, v = lanczos.solve(A, AT, k) return u, np.sqrt(d), v.T
def benchmark_cholesky(ctx, timer): print "#worker:", ctx.num_workers # n = int(math.pow(ctx.num_workers, 1.0 / 3.0)) n = int(math.sqrt(ctx.num_workers)) ARRAY_SIZE = 1600 * 4 # ARRAY_SIZE = 1600 * n util.log_warn("prepare data!") # A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE) # A = np.dot(A, A.T) # A = expr.force(from_numpy(A, tile_hint=(ARRAY_SIZE/n, ARRAY_SIZE/n))) A = expr.randn(ARRAY_SIZE, ARRAY_SIZE, tile_hint=(ARRAY_SIZE / n, ARRAY_SIZE / n)) A = expr.dot(A, expr.transpose(A)).force() util.log_warn("begin cholesky!") t1 = datetime.now() L = cholesky(A).glom() t2 = datetime.now() assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj()))) cost_time = millis(t1, t2) print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time / n)
def benchmark_cholesky(ctx, timer): print "#worker:", ctx.num_workers #n = int(math.pow(ctx.num_workers, 1.0 / 3.0)) n = int(math.sqrt(ctx.num_workers)) #ARRAY_SIZE = 1600 * 4 ARRAY_SIZE = 900 * n util.log_warn('prepare data!') #A = np.random.randn(ARRAY_SIZE, ARRAY_SIZE) #A = np.dot(A, A.T) A = expr.randn(ARRAY_SIZE, ARRAY_SIZE) A = expr.dot(A, expr.transpose(A)) util.log_warn('begin cholesky!') t1 = datetime.now() L = cholesky(A).optimized().glom() t2 = datetime.now() #assert np.all(np.isclose(A.glom(), np.dot(L, L.T.conj()))) cost_time = millis(t1, t2) print "total cost time:%s ms, per iter cost time:%s ms" % (cost_time, cost_time / n)
def svds(A, k=6): """Compute the largest k singular values/vectors for a sparse matrix. Parameters ---------- A : sparse matrix Array to compute the SVD on, of shape (M, N) k : int, optional Number of singular values and vectors to compute. Returns ------- u : ndarray, shape=(M, k) Unitary matrix having left singular vectors as columns. s : ndarray, shape=(k,) The singular values. vt : ndarray, shape=(k, N) Unitary matrix having right singular vectors as rows. """ AT = expr.transpose(A).force() d, u = lanczos.solve(AT, A, k) d, v = lanczos.solve(A, AT, k) return u, np.sqrt(d), v.T
def cholesky(A): ''' Cholesky matrix decomposition. Args: A(Expr): matrix to be decomposed ''' n = int(math.sqrt(FLAGS.num_workers)) tile_size = A.shape[0] / n print n, tile_size for k in range(n): # A[k,k] = DPOTRF(A[k,k]) diag_ex = get_ex(k, k, tile_size, A.shape) A = expr.map2(A, ((0, 1), ), fn=_cholesky_dpotrf_mapper, shape=A.shape, update_region=diag_ex) if k == n - 1: break # A[l,k] = DTRSM(A[k,k], A[l,k]) l -> [k+1,n) col_ex = extent.create(((k+1)*tile_size, k*tile_size), (n*tile_size, (k+1)*tile_size), A.shape) A = expr.map2((A, A[diag_ex.to_slice()]), ((0, 1), None), fn=_cholesky_dtrsm_mapper, shape=A.shape, update_region=col_ex) # A[m,m] = DSYRK(A[m,k], A[m,m]) m -> [k+1,n) # A[l,m] = DGEMM(A[l,k], A[m,k], A[l,m]) m -> [k+1,n) l -> [m+1,n) col_exs = list([extent.create((m*tile_size, m*tile_size), (n*tile_size, (m+1)*tile_size), A.shape) for m in range(k+1, n)]) dgemm = A[:, (k * tile_size):((k + 1) * tile_size)] A = expr.map2((A, expr.transpose(dgemm), dgemm), ((0, 1), 1, 0), fn=_cholesky_dsyrk_dgemm_mapper, shape=A.shape, update_region=col_exs).optimized() # update the right corner to 0 col_exs = list([extent.create((0, m*tile_size), (m*tile_size, (m+1)*tile_size), A.shape) for m in range(1, n)]) A = expr.map2(A, ((0, 1), ), fn=_zero_mapper, shape=A.shape, update_region=col_exs) return A
def test_transpose2(self): t1 = expr.arange((101, 102, 103)) t2 = np.transpose(np.reshape(np.arange(101 * 102 * 103), (101, 102, 103))) Assert.all_eq(expr.transpose(t1).glom(), t2)
def train_smo_1998(self, data, labels): ''' Train an SVM model using the SMO (1998) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 self.alpha = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N / self.ctx.num_workers, N]) labels = expr.force(labels) self.E = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).force() for i in xrange(N): self.E[i, 0] = self.b + expr.reduce( self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict( label=labels, data=kernel_results[:, i].force())).glom() - labels[i, 0] util.log_info("Starting SMO") it = 0 num_changed = 0 examine_all = True while (num_changed > 0 or examine_all) and (it < self.maxiter): util.log_info("Iteration:%d", it) num_changed = 0 if examine_all: for i in xrange(N): num_changed += self.examine_example( i, N, labels, kernel_results) else: for i in xrange(N): if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C: num_changed += self.examine_example( i, N, labels, kernel_results) it += 1 if examine_all: examine_all = False elif num_changed == 0: examine_all = True self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i, 0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force( data[:, i]))).glom() self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def train_smo_2005(self, data, labels): ''' Train an SVM model using the SMO (2005) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 alpha = expr.zeros((N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N / self.ctx.num_workers, N]) gradient = expr.ones( (N, 1), dtype=np.float64, tile_hint=[N / self.ctx.num_workers, 1 ]) * -1.0 expr_labels = expr.lazify(labels) util.log_info("Starting SMO") pv1 = pv2 = -1 it = 0 while it < self.maxiter: util.log_info("Iteration:%d", it) minObj = 1e100 expr_alpha = expr.lazify(alpha) G = expr.multiply(labels, gradient) * -1.0 v1_mask = ((expr_labels > self.tol) * (expr_alpha < self.C) + (expr_labels < -self.tol) * (expr_alpha > self.tol)) v1 = expr.argmax(G[v1_mask - True]).glom().item() maxG = G[v1, 0].glom() print 'maxv1:', v1, 'maxG:', maxG v2_mask = ((expr_labels > self.tol) * (expr_alpha > self.tol) + (expr_labels < -self.tol) * (expr_alpha < self.C)) min_v2 = expr.argmin(G[v2_mask - True]).glom().item() minG = G[min_v2, 0].glom() #print 'minv2:', min_v2, 'minG:', minG set_v2 = v2_mask.glom().nonzero()[0] #print 'actives:', set_v2.shape[0] v2 = -1 for v in set_v2: b = maxG - G[v, 0].glom() if b > self.tol: na = (kernel_results[v1, v1] + kernel_results[v, v] - 2 * kernel_results[v1, v]).glom()[0][0] if na < self.tol: na = 1e12 obj = -(b * b) / na if obj <= minObj and v1 != pv1 or v != pv2: v2 = v a = na minObj = obj if v2 == -1: break if maxG - minG < self.tol: break print 'opt v1:', v1, 'v2:', v2 pv1 = v1 pv2 = v2 y1 = labels[v1, 0] y2 = labels[v2, 0] oldA1 = alpha[v1, 0] oldA2 = alpha[v2, 0] # Calculate new alpha values, to reduce the objective function... b = y2 * expr.glom(gradient[v2, 0]) - y1 * expr.glom(gradient[v1, 0]) if y1 != y2: a += 4 * kernel_results[v1, v2].glom() newA1 = oldA1 + y1 * b / a newA2 = oldA2 - y2 * b / a # Correct for alpha being out of range... sum = y1 * oldA1 + y2 * oldA2 if newA1 < self.tol: newA1 = 0.0 elif newA1 > self.C: newA1 = self.C newA2 = y2 * (sum - y1 * newA1) if newA2 < self.tol: newA2 = 0.0 elif newA2 > self.C: newA2 = self.C newA1 = y1 * (sum - y2 * newA2) # Update the gradient... dA1 = newA1 - oldA1 dA2 = newA2 - oldA2 gradient += expr.multiply( labels, kernel_results[:, v1]) * y1 * dA1 + expr.multiply( labels, kernel_results[:, v2]) * y2 * dA2 alpha[v1, 0] = newA1 alpha[v2, 0] = newA2 #print 'alpha:', alpha.glom().T it += 1 #print 'gradient:', gradient.glom().T self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i, 0] = expr.reduce(alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force( data[:, i]))).glom() self.b = 0.0 E = (labels - self.margins(data)).force() minB = -1e100 maxB = 1e100 actualB = 0.0 numActualB = 0 for i in xrange(N): ai = alpha[i, 0] yi = labels[i, 0] Ei = E[i, 0] if ai < 1e-3: if yi < self.tol: maxB = min((maxB, Ei)) else: minB = max((minB, Ei)) elif ai > self.C - 1e-3: if yi < self.tol: minB = max((minB, Ei)) else: maxB = min((maxB, Ei)) else: numActualB += 1 actualB += (Ei - actualB) / float(numActualB) if numActualB > 0: self.b = actualB else: self.b = 0.5 * (minB + maxB) self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def test_transpose1(self): t1 = expr.arange((3721, 1347)) t2 = np.transpose(np.reshape(np.arange(3721 * 1347), (3721, 1347))) Assert.all_eq(expr.transpose(t1).glom(), t2)
def als(A, la=0.065, alpha=40, implicit_feedback=False, num_features=20, num_iter=10, M=None): ''' compute the factorization A = U M' using the alternating least-squares (ALS) method. where `A` is the "ratings" matrix which maps from a user and item to a rating score, `U` and `M` are the factor matrices, which represent user and item preferences. Args: A(Expr or DistArray): the rating matrix which maps from a user and item to a rating score. la(float): the parameter of the als. alpha(int): confidence parameter used on implicit feedback. implicit_feedback(bool): whether using implicit_feedback method for als. num_features(int): dimension of the feature space. num_iter(int): max iteration to run. ''' num_users = A.shape[0] num_items = A.shape[1] AT = expr.transpose(A) avg_rating = expr.sum(A, axis=0) * 1.0 / expr.count_nonzero(A, axis=0) M = expr.rand(num_items, num_features) M = expr.assign(M, np.s_[:, 0], avg_rating.reshape( (avg_rating.shape[0], 1))) #A = expr.retile(A, tile_hint=util.calc_tile_hint(A, axis=0)) #AT = expr.retile(AT, tile_hint=util.calc_tile_hint(AT, axis=0)) for i in range(num_iter): # Recomputing U shape = (num_users, num_features) U = expr.outer( (A, M), (0, None), fn=_solve_U_or_M_mapper, fn_kw={ 'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape }, shape=shape, dtype=np.float) # Recomputing M shape = (num_items, num_features) M = expr.outer( (AT, U), (0, None), fn=_solve_U_or_M_mapper, fn_kw={ 'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape }, shape=shape, dtype=np.float) return U, M
def test_transpose2(self): t1 = expr.arange((101, 102, 103)) t2 = np.transpose( np.reshape(np.arange(101 * 102 * 103), (101, 102, 103))) Assert.all_eq(expr.transpose(t1).glom(), t2)
def train_smo_2005(self, data, labels): ''' Train an SVM model using the SMO (2005) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N]) gradient = expr.ones((N, 1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]) * -1.0 expr_labels = expr.lazify(labels) util.log_info("Starting SMO") pv1 = pv2 = -1 it = 0 while it < self.maxiter: util.log_info("Iteration:%d", it) minObj = 1e100 expr_alpha = expr.lazify(alpha) G = expr.multiply(labels, gradient) * -1.0 v1_mask = ((expr_labels > self.tol) * (expr_alpha < self.C) + (expr_labels < -self.tol) * (expr_alpha > self.tol)) v1 = expr.argmax(G[v1_mask-True]).glom().item() maxG = G[v1,0].glom() print 'maxv1:', v1, 'maxG:', maxG v2_mask = ((expr_labels > self.tol) * (expr_alpha > self.tol) + (expr_labels < -self.tol) * (expr_alpha < self.C)) min_v2 = expr.argmin(G[v2_mask-True]).glom().item() minG = G[min_v2,0].glom() #print 'minv2:', min_v2, 'minG:', minG set_v2 = v2_mask.glom().nonzero()[0] #print 'actives:', set_v2.shape[0] v2 = -1 for v in set_v2: b = maxG - G[v,0].glom() if b > self.tol: na = (kernel_results[v1,v1] + kernel_results[v,v] - 2*kernel_results[v1,v]).glom()[0][0] if na < self.tol: na = 1e12 obj = -(b*b)/na if obj <= minObj and v1 != pv1 or v != pv2: v2 = v a = na minObj = obj if v2 == -1: break if maxG - minG < self.tol: break print 'opt v1:', v1, 'v2:', v2 pv1 = v1 pv2 = v2 y1 = labels[v1,0] y2 = labels[v2,0] oldA1 = alpha[v1,0] oldA2 = alpha[v2,0] # Calculate new alpha values, to reduce the objective function... b = y2*expr.glom(gradient[v2,0]) - y1*expr.glom(gradient[v1,0]) if y1 != y2: a += 4 * kernel_results[v1,v2].glom() newA1 = oldA1 + y1*b/a newA2 = oldA2 - y2*b/a # Correct for alpha being out of range... sum = y1*oldA1 + y2*oldA2; if newA1 < self.tol: newA1 = 0.0 elif newA1 > self.C: newA1 = self.C newA2 = y2 * (sum - y1 * newA1) if newA2 < self.tol: newA2 = 0.0; elif newA2 > self.C: newA2 = self.C newA1 = y1 * (sum - y2 * newA2) # Update the gradient... dA1 = newA1 - oldA1 dA2 = newA2 - oldA2 gradient += expr.multiply(labels, kernel_results[:,v1]) * y1 * dA1 + expr.multiply(labels, kernel_results[:,v2]) * y2 * dA2 alpha[v1,0] = newA1 alpha[v2,0] = newA2 #print 'alpha:', alpha.glom().T it += 1 #print 'gradient:', gradient.glom().T self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i,0] = expr.reduce(alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom() self.b = 0.0 E = (labels - self.margins(data)).force() minB = -1e100 maxB = 1e100 actualB = 0.0 numActualB = 0 for i in xrange(N): ai = alpha[i,0] yi = labels[i,0] Ei = E[i,0] if ai < 1e-3: if yi < self.tol: maxB = min((maxB,Ei)) else: minB = max((minB,Ei)) elif ai > self.C - 1e-3: if yi < self.tol: minB = max((minB,Ei)) else: maxB = min((maxB,Ei)) else: numActualB += 1 actualB += (Ei - actualB) / float(numActualB) if numActualB > 0: self.b = actualB else: self.b = 0.5*(minB + maxB) self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()