def test_ib_recommender(self): ctx = blob_ctx.get() rating_table = expr.sparse_rand((N_USERS, N_ITEMS), dtype=np.float64, density=0.1, format = "csr", tile_hint=(N_USERS, N_ITEMS/ctx.num_workers)) model = ItemBasedRecommender(rating_table) model.precompute()
def test_fio_sparse(self): self.create_path() t1 = expr.sparse_rand((100, 100)).evaluate() Assert.eq(expr.save(t1, "fiotest3", self.test_dir, False), True) Assert.all_eq(t1.glom().todense(), expr.load("fiotest3", self.test_dir, False).glom().todense()) Assert.eq(expr.save(t1, "fiotest3", self.test_dir, True), True) Assert.all_eq(t1.glom().todense(), expr.load("fiotest3", self.test_dir, True).glom().todense()) Assert.eq(expr.pickle(t1, "fiotest4", self.test_dir, False), True) Assert.all_eq(t1.glom().todense(), expr.unpickle("fiotest4", self.test_dir, False).glom().todense()) Assert.eq(expr.pickle(t1, "fiotest4", self.test_dir, True), True) Assert.all_eq(t1.glom().todense(), expr.unpickle("fiotest4", self.test_dir, True).glom().todense())
def test_ib_recommender(self): ctx = blob_ctx.get() FLAGS.opt_auto_tiling = 0 rating_table = expr.sparse_rand( (N_USERS, N_ITEMS), dtype=np.float64, density=0.1, format="csr", tile_hint=(N_USERS, N_ITEMS / ctx.num_workers)) model = ItemBasedRecommender(rating_table) model.precompute()
def benchmark_ib_recommander(ctx, timer): print "#worker:", ctx.num_workers N_ITEMS = 800 N_USERS = 8000 rating_table = expr.sparse_rand((N_USERS, N_ITEMS), dtype=np.float64, density=0.1, format = "csr") t1 = datetime.now() model = ItemBasedRecommender(rating_table) model.precompute() t2 = datetime.now() cost_time = millis(t1, t2) print "total cost time:%s ms" % cost_time
def benchmark_ib_recommander(ctx, timer): print "#worker:", ctx.num_workers N_ITEMS = 800 N_USERS = 8000 rating_table = expr.sparse_rand((N_USERS, N_ITEMS), dtype=np.float64, density=0.1, format="csr") t1 = datetime.now() model = ItemBasedRecommender(rating_table) model.precompute() t2 = datetime.now() cost_time = millis(t1, t2) print "total cost time:%s ms" % cost_time
def test_svds(self): ctx = blob_ctx.get() # Create a sparse matrix. A = expr.sparse_rand(DIM, density=1, format="csr", tile_hint = (DIM[0] / ctx.num_workers, DIM[1]), dtype=np.float64) RANK = np.linalg.matrix_rank(A.glom()) U,S,VT = svds(A, RANK) U2,S2,VT2 = linalg.svds(A.glom(), RANK) assert np.allclose(absolute(U), absolute(U2)) assert np.allclose(absolute(S), absolute(S2)) assert np.allclose(absolute(VT), absolute(VT2))
def test_svds(self): ctx = blob_ctx.get() # Create a sparse matrix. A = expr.sparse_rand(DIM, density=1, format="csr", tile_hint=(DIM[0] / ctx.num_workers, DIM[1]), dtype=np.float64) RANK = np.linalg.matrix_rank(A.glom()) U, S, VT = svds(A, RANK) U2, S2, VT2 = linalg.svds(A.glom(), RANK) assert np.allclose(absolute(U), absolute(U2)) assert np.allclose(absolute(S), absolute(S2)) assert np.allclose(absolute(VT), absolute(VT2))
def profile2(self): self.create_path() t1 = expr.sparse_rand((10000, 10000)).evaluate() time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, False)) util.log_info('Save a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, False).evaluate()) util.log_info('Load a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, True)) util.log_info('Save a %s sparse array in %s with zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, True).evaluate()) util.log_info('Load a %s sparse array in %s with zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, False)) util.log_info('Pickle a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, False).evaluate()) util.log_info('Unpickle a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, True)) util.log_info('Pickle a %s sparse array in %s with zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, True).evaluate()) util.log_info('Unpickle a %s sparse array in %s with zip', t1.shape, time_a)
def test_fio_partial_sparse(self): self.create_path() t1 = expr.sparse_rand((300, 300)).evaluate() expr.save(t1, "fiotest_partial1", self.test_dir, False) expr.pickle(t1, "fiotest_partial2", self.test_dir, False) t2 = expr.load("fiotest_partial1", self.test_dir, False) test_tiles = {} for ex, v in t1.tiles.iteritems(): test_tiles[ex] = v.worker test_tiles = expr.partial_load(test_tiles, "fiotest_partial1", self.test_dir, False) for ex, v in test_tiles.iteritems(): t1.tiles[ex] = v Assert.all_eq(t1.glom().todense(), t2.glom().todense()) test_tiles = {} for ex, v in t1.tiles.iteritems(): test_tiles[ex] = v.worker test_tiles = expr.partial_unpickle(test_tiles, "fiotest_partial2", self.test_dir, False) for ex, v in test_tiles.iteritems(): t1.tiles[ex] = v Assert.all_eq(t1.glom().todense(), t2.glom().todense())
def test_sparse_glom(self): x = expr.sparse_rand(ARRAY_SIZE, density=0.5) x.force() y = x.glom() assert not isinstance(y, np.ndarray), 'Bad type: %s' % type(y) print y.todense()
def test_sparse_create(self): x = expr.sparse_rand(ARRAY_SIZE, density=0.001) x.force()