def bfs(ctx, dim): util.log_info("start to computing......") sGenerate = time.time() current = eager( expr.shuffle( expr.ndarray( (dim, 1), dtype = np.int64, tile_hint = (dim / ctx.num_workers, 1)), make_current, )) linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim, dim / ctx.num_workers)), make_matrix, )) eGenerate = time.time() startCompute = time.time() while(True): next = expr.dot(linkMatrix, current) formerNum = expr.count_nonzero(current) laterNum = expr.count_nonzero(next) hasNew = expr.equal(formerNum, laterNum).glom() current = next if (hasNew): break current.evaluate() endCompute = time.time() return (eGenerate - sGenerate, endCompute - startCompute)
def als(A, la=0.065, alpha=40, implicit_feedback=False, num_features=20, num_iter=10, M=None): ''' compute the factorization A = U M' using the alternating least-squares (ALS) method. where `A` is the "ratings" matrix which maps from a user and item to a rating score, `U` and `M` are the factor matrices, which represent user and item preferences. Args: A(Expr or DistArray): the rating matrix which maps from a user and item to a rating score. la(float): the parameter of the als. alpha(int): confidence parameter used on implicit feedback. implicit_feedback(bool): whether using implicit_feedback method for als. num_features(int): dimension of the feature space. num_iter(int): max iteration to run. ''' num_users = A.shape[0] num_items = A.shape[1] AT = expr.transpose(A) avg_rating = expr.sum(A, axis=0) * 1.0 / expr.count_nonzero(A, axis=0) M = expr.rand(num_items, num_features) M = expr.assign(M, np.s_[:, 0], avg_rating.reshape((avg_rating.shape[0], 1))) #A = expr.retile(A, tile_hint=util.calc_tile_hint(A, axis=0)) #AT = expr.retile(AT, tile_hint=util.calc_tile_hint(AT, axis=0)) for i in range(num_iter): # Recomputing U shape = (num_users, num_features) U = expr.outer((A, M), (0, None), fn=_solve_U_or_M_mapper, fn_kw={'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape}, shape=shape, dtype=np.float) # Recomputing M shape = (num_items, num_features) M = expr.outer((AT, U), (0, None), fn=_solve_U_or_M_mapper, fn_kw={'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape}, shape=shape, dtype=np.float) return U, M
def als(A, la=0.065, alpha=40, implicit_feedback=False, num_features=20, num_iter=10): ''' compute the factorization A = U M' using the alternating least-squares (ALS) method. where `A` is the "ratings" matrix which maps from a user and item to a rating score, `U` and `M` are the factor matrices, which represent user and item preferences. Args: A(Expr or DistArray): the rating matrix which maps from a user and item to a rating score. la(float): the parameter of the als. alpha(int): confidence parameter used on implicit feedback. implicit_feedback(bool): whether using implicit_feedback method for als. num_features(int): dimension of the feature space. num_iter(int): max iteration to run. ''' A = expr.force(A) AT = expr.shuffle(expr.ndarray((A.shape[1], A.shape[0]), dtype=A.dtype, tile_hint=(A.shape[1] / len(A.tiles), A.shape[0])), _transpose_mapper, kw={'orig_array': A}) num_items = A.shape[1] avg_rating = expr.sum(A, axis=0, tile_hint=(num_items / len(A.tiles),)) * 1.0 / \ expr.count_nonzero(A, axis=0, tile_hint=(num_items / len(A.tiles),)) M = expr.shuffle(expr.ndarray((num_items, num_features), tile_hint=(num_items / len(A.tiles), num_features)), _init_M_mapper, kw={'avg_rating': avg_rating}) #util.log_warn('avg_rating:%s M:%s', avg_rating.glom(), M.glom()) for i in range(num_iter): # Recomputing U U = expr.shuffle(A, _solve_U_or_M_mapper, kw={'U_or_M': M, 'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback}) # Recomputing M M = expr.shuffle(AT, _solve_U_or_M_mapper, kw={'U_or_M': U, 'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback}) return U, M
def als(A, la=0.065, alpha=40, implicit_feedback=False, num_features=20, num_iter=10, M=None): ''' compute the factorization A = U M' using the alternating least-squares (ALS) method. where `A` is the "ratings" matrix which maps from a user and item to a rating score, `U` and `M` are the factor matrices, which represent user and item preferences. Args: A(Expr or DistArray): the rating matrix which maps from a user and item to a rating score. la(float): the parameter of the als. alpha(int): confidence parameter used on implicit feedback. implicit_feedback(bool): whether using implicit_feedback method for als. num_features(int): dimension of the feature space. num_iter(int): max iteration to run. ''' num_users = A.shape[0] num_items = A.shape[1] AT = expr.transpose(A) avg_rating = expr.sum(A, axis=0) * 1.0 / expr.count_nonzero(A, axis=0) M = expr.rand(num_items, num_features) M = expr.assign(M, np.s_[:, 0], avg_rating.reshape( (avg_rating.shape[0], 1))) #A = expr.retile(A, tile_hint=util.calc_tile_hint(A, axis=0)) #AT = expr.retile(AT, tile_hint=util.calc_tile_hint(AT, axis=0)) for i in range(num_iter): # Recomputing U shape = (num_users, num_features) U = expr.outer( (A, M), (0, None), fn=_solve_U_or_M_mapper, fn_kw={ 'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape }, shape=shape, dtype=np.float) # Recomputing M shape = (num_items, num_features) M = expr.outer( (AT, U), (0, None), fn=_solve_U_or_M_mapper, fn_kw={ 'la': la, 'alpha': alpha, 'implicit_feedback': implicit_feedback, 'shape': shape }, shape=shape, dtype=np.float) return U, M
def test_count_nonzero(self): x = expr.ones((TEST_SIZE, )) Assert.eq(expr.count_nonzero(x).glom(), TEST_SIZE) x = expr.zeros((TEST_SIZE, )) Assert.eq(expr.count_nonzero(x).glom(), 0)
def test_count_nonzero(self): x = expr.ones((TEST_SIZE,)) Assert.eq(expr.count_nonzero(x).glom(), TEST_SIZE) x = expr.zeros((TEST_SIZE,)) Assert.eq(expr.count_nonzero(x).glom(), 0)