def test01_most_similar(self): set_log_level(2) data_opt = self.get_ml100k_mm_opt() opt = ALSOption().get_default_option() opt.d = 20 opt.num_workers = 1 als = ALS(opt, data_opt=data_opt) als.initialize() als.train() pals = ParALS(als) random_keys = [ k for k, _ in als.most_similar('49.Star_Wars_(1977)', topk=128) ] random_indexes = als.get_index_pool(random_keys) naive = [als.most_similar(k, topk=10) for k in random_keys] topks0 = [[k for k, _ in result] for result in naive] scores0 = np.array([[v for _, v in result] for result in naive]) self.assertEqual(scores0.shape, ( 128, 10, ), msg='check even size') scores0 = scores0.reshape(len(naive), 10) pals.num_workers = 1 topks1, scores1 = pals.most_similar(random_keys, topk=10, repr=True) topks2, scores2 = pals.most_similar(random_indexes, topk=10, repr=True) for a, b in combinations([topks0, topks1, topks2], 2): self.assertEqual(a, b) for a, b in combinations([scores0, scores1, scores2], 2): self.assertTrue(np.allclose(a, b))
def test05_topk_MT(self): set_log_level(2) data_opt = self.get_ml100k_mm_opt() opt = ALSOption().get_default_option() opt.d = 20 opt.num_workers = 1 als = ALS(opt, data_opt=data_opt) als.initialize() als.train() als.build_userid_map() all_keys = als._idmanager.userids start_t = time.time() naive = als.topk_recommendation(all_keys, topk=5) naive_elapsed = time.time() - start_t pals = ParALS(als) pals.num_workers = 4 start_t = time.time() qkeys1, topks1, scores1 = pals.topk_recommendation(all_keys, topk=5, repr=True) par_elapsed = time.time() - start_t self.assertEqual(len(qkeys1), len(naive)) for q, t in zip(qkeys1, topks1): self.assertEqual(naive[q], t) self.assertTrue(naive_elapsed > par_elapsed * 1.5)
def test11_train_ml_20m_on_gpu(self): opt = ALSOption().get_default_option() opt.num_workers = 8 opt.d = 100 opt.validation = aux.Option({'topk': 10}) opt.compute_loss_on_training = True opt.accelerator = True opt.num_cg_max_iters = 3 self._test7_train_ml_20m(ALS, opt)
def test06_topk_pool(self): set_log_level(2) data_opt = self.get_ml100k_mm_opt() opt = ALSOption().get_default_option() opt.d = 20 opt.num_workers = 1 als = ALS(opt, data_opt=data_opt) als.initialize() als.train() pals = ParALS(als) pool = np.array([i for i in range(5)], dtype=np.int32) als.build_userid_map() all_keys = als._idmanager.userids[::][:10] naive = als.topk_recommendation(all_keys, topk=10, pool=pool) qkeys1, topks1, scores1 = pals.topk_recommendation(all_keys, topk=10, pool=pool, repr=True) for q, t in zip(qkeys1, topks1): self.assertEqual(naive[q], t)
def test4_optimize(self): set_log_level(2) opt = ALSOption().get_default_option() opt.d = 5 opt.num_workers = 2 opt.model_path = 'als.bin' opt.validation = aux.Option({'topk': 10}) optimize_option = aux.Option({ 'loss': 'val_rmse', 'max_trials': 10, 'deployment': True, 'start_with_default_parameters': True, 'space': { 'd': ['randint', ['d', 10, 20]], 'reg_u': ['uniform', ['reg_u', 0.1, 0.3]], 'reg_i': ['uniform', ['reg_i', 0.1, 0.3]], 'alpha': ['randint', ['alpha', 8, 10]] } }) opt.optimize = optimize_option opt.evaluation_period = 1 opt.tensorboard = aux.Option({'root': './tb', 'name': 'als'}) data_opt = MatrixMarketOptions().get_default_option() data_opt.input.main = self.ml_100k + 'main' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) als = ALS(opt, data_opt=data_opt) als.init_factors() als.train() default_result = als.get_validation_results() als.optimize() base_loss = default_result['rmse'] # val_rmse optimize_loss = als.get_optimization_data()['best']['val_rmse'] self.assertTrue(base_loss > optimize_loss) als.load('als.bin') loss = als.get_validation_results() self.assertAlmostEqual(loss['rmse'], optimize_loss) os.remove('als.bin')
def test02_most_similar(self): set_log_level(1) data_opt = self.get_ml100k_mm_opt() opt = ALSOption().get_default_option() opt.d = 20 opt.num_workers = 1 als = ALS(opt, data_opt=data_opt) als.initialize() als.train() als.build_itemid_map() pals = ParALS(als) all_keys = als._idmanager.itemids[::] start_t = time.time() [als.most_similar(k, topk=10) for k in all_keys] naive_elapsed = time.time() - start_t pals.num_workers = 4 start_t = time.time() pals.most_similar(all_keys, topk=10, repr=True) parals_elapsed = time.time() - start_t self.assertTrue(naive_elapsed > parals_elapsed * 3.0)
def test7_train_ml_20m(self): opt = ALSOption().get_default_option() opt.num_workers = 8 opt.validation = aux.Option({'topk': 10}) self._test7_train_ml_20m(ALS, opt)