Esempio n. 1
0
    def test05_topk_MT(self):
        set_log_level(2)
        data_opt = self.get_ml100k_mm_opt()
        opt = ALSOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        als = ALS(opt, data_opt=data_opt)
        als.initialize()
        als.train()

        als.build_userid_map()
        all_keys = als._idmanager.userids
        start_t = time.time()
        naive = als.topk_recommendation(all_keys, topk=5)
        naive_elapsed = time.time() - start_t

        pals = ParALS(als)
        pals.num_workers = 4
        start_t = time.time()
        qkeys1, topks1, scores1 = pals.topk_recommendation(all_keys,
                                                           topk=5,
                                                           repr=True)
        par_elapsed = time.time() - start_t
        self.assertEqual(len(qkeys1), len(naive))
        for q, t in zip(qkeys1, topks1):
            self.assertEqual(naive[q], t)
        self.assertTrue(naive_elapsed > par_elapsed * 1.5)
Esempio n. 2
0
    def test01_most_similar(self):
        set_log_level(2)
        data_opt = self.get_ml100k_mm_opt()
        opt = ALSOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        als = ALS(opt, data_opt=data_opt)
        als.initialize()
        als.train()
        pals = ParALS(als)
        random_keys = [
            k for k, _ in als.most_similar('49.Star_Wars_(1977)', topk=128)
        ]
        random_indexes = als.get_index_pool(random_keys)
        naive = [als.most_similar(k, topk=10) for k in random_keys]
        topks0 = [[k for k, _ in result] for result in naive]
        scores0 = np.array([[v for _, v in result] for result in naive])
        self.assertEqual(scores0.shape, (
            128,
            10,
        ), msg='check even size')
        scores0 = scores0.reshape(len(naive), 10)
        pals.num_workers = 1
        topks1, scores1 = pals.most_similar(random_keys, topk=10, repr=True)
        topks2, scores2 = pals.most_similar(random_indexes, topk=10, repr=True)

        for a, b in combinations([topks0, topks1, topks2], 2):
            self.assertEqual(a, b)
        for a, b in combinations([scores0, scores1, scores2], 2):
            self.assertTrue(np.allclose(a, b))
Esempio n. 3
0
 def test5_validation(self):
     opt = ALSOption().get_default_option()
     opt.d = 5
     opt.num_iters = 20
     opt.validation = aux.Option({'topk': 10})
     opt.tensorboard = aux.Option({'root': './tb', 'name': 'als'})
     self._test5_validation(ALS, opt)
Esempio n. 4
0
 def test11_train_ml_20m_on_gpu(self):
     opt = ALSOption().get_default_option()
     opt.num_workers = 8
     opt.d = 100
     opt.validation = aux.Option({'topk': 10})
     opt.compute_loss_on_training = True
     opt.accelerator = True
     opt.num_cg_max_iters = 3
     self._test7_train_ml_20m(ALS, opt)
Esempio n. 5
0
    def test06_topk_pool(self):
        set_log_level(2)
        data_opt = self.get_ml100k_mm_opt()
        opt = ALSOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        als = ALS(opt, data_opt=data_opt)
        als.initialize()
        als.train()
        pals = ParALS(als)

        pool = np.array([i for i in range(5)], dtype=np.int32)
        als.build_userid_map()
        all_keys = als._idmanager.userids[::][:10]
        naive = als.topk_recommendation(all_keys, topk=10, pool=pool)
        qkeys1, topks1, scores1 = pals.topk_recommendation(all_keys, topk=10, pool=pool, repr=True)
        for q, t in zip(qkeys1, topks1):
            self.assertEqual(naive[q], t)
Esempio n. 6
0
    def test00_tensorboard(self):
        set_log_level(2)
        opt = ALSOption().get_default_option()
        opt.d = 5
        opt.validation = aux.Option({'topk': 10})
        opt.tensorboard = aux.Option({'root': './tb', 'name': 'als'})

        data_opt = MatrixMarketOptions().get_default_option()
        data_opt.input.main = self.ml_100k + 'main'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        als = ALS(opt, data_opt=data_opt)
        als.initialize()
        als.train()
        results = als.get_validation_results()
        self.assertTrue(results['ndcg'] > 0.025)
        self.assertTrue(results['map'] > 0.015)
Esempio n. 7
0
    def test4_optimize(self):
        set_log_level(2)
        opt = ALSOption().get_default_option()
        opt.d = 5
        opt.num_workers = 2
        opt.model_path = 'als.bin'
        opt.validation = aux.Option({'topk': 10})
        optimize_option = aux.Option({
            'loss': 'val_rmse',
            'max_trials': 10,
            'deployment': True,
            'start_with_default_parameters': True,
            'space': {
                'd': ['randint', ['d', 10, 20]],
                'reg_u': ['uniform', ['reg_u', 0.1, 0.3]],
                'reg_i': ['uniform', ['reg_i', 0.1, 0.3]],
                'alpha': ['randint', ['alpha', 8, 10]]
            }
        })
        opt.optimize = optimize_option
        opt.evaluation_period = 1
        opt.tensorboard = aux.Option({'root': './tb',
                                      'name': 'als'})

        data_opt = MatrixMarketOptions().get_default_option()
        data_opt.input.main = self.ml_100k + 'main'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        als = ALS(opt, data_opt=data_opt)
        als.init_factors()
        als.train()
        default_result = als.get_validation_results()
        als.optimize()
        base_loss = default_result['rmse']  # val_rmse
        optimize_loss = als.get_optimization_data()['best']['val_rmse']
        self.assertTrue(base_loss > optimize_loss)

        als.load('als.bin')
        loss = als.get_validation_results()
        self.assertAlmostEqual(loss['rmse'], optimize_loss)
        os.remove('als.bin')
Esempio n. 8
0
    def test02_most_similar(self):
        set_log_level(1)
        data_opt = self.get_ml100k_mm_opt()
        opt = ALSOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        als = ALS(opt, data_opt=data_opt)
        als.initialize()
        als.train()
        als.build_itemid_map()
        pals = ParALS(als)

        all_keys = als._idmanager.itemids[::]
        start_t = time.time()
        [als.most_similar(k, topk=10) for k in all_keys]
        naive_elapsed = time.time() - start_t

        pals.num_workers = 4
        start_t = time.time()
        pals.most_similar(all_keys, topk=10, repr=True)
        parals_elapsed = time.time() - start_t

        self.assertTrue(naive_elapsed > parals_elapsed * 3.0)
Esempio n. 9
0
 def test10_fast_most_similar(self):
     opt = ALSOption().get_default_option()
     opt.d = 5
     opt.validation = aux.Option({'topk': 10})
     self._test10_fast_most_similar(ALS, opt)
Esempio n. 10
0
 def test9_compact_serialization(self):
     opt = ALSOption().get_default_option()
     opt.d = 5
     opt.validation = aux.Option({'topk': 10})
     self._test9_compact_serialization(ALS, opt)
Esempio n. 11
0
 def test6_topk(self):
     opt = ALSOption().get_default_option()
     opt.d = 5
     opt.validation = aux.Option({'topk': 10})
     self._test6_topk(ALS, opt)
Esempio n. 12
0
 def test4_train(self):
     opt = ALSOption().get_default_option()
     opt.d = 20
     self._test4_train(ALS, opt)