Ejemplo n.º 1
0
    def __init__(self, opt_path=None, *args, **kwargs):
        Algo.__init__(self, *args, **kwargs)
        BPRMFOption.__init__(self, *args, **kwargs)
        Evaluable.__init__(self, *args, **kwargs)
        Serializable.__init__(self, *args, **kwargs)
        Optimizable.__init__(self, *args, **kwargs)
        if opt_path is None:
            opt_path = BPRMFOption().get_default_option()

        self.logger = log.get_logger('BPRMF')
        self.opt, self.opt_path = self.get_option(opt_path)
        self.obj = CyBPRMF()
        assert self.obj.init(bytes(self.opt_path, 'utf-8')),\
            'cannot parse option file: %s' % opt_path
        self.data = None
        data = kwargs.get('data')
        data_opt = self.opt.get('data_opt')
        data_opt = kwargs.get('data_opt', data_opt)
        if data_opt:
            self.data = buffalo.data.load(data_opt)
            self.data.create()
        elif isinstance(data, Data):
            self.data = data
        self.logger.info('BPRMF(%s)' % json.dumps(self.opt, indent=2))
        if self.data:
            self.logger.info(self.data.show_info())
            assert self.data.data_type in ['matrix']
Ejemplo n.º 2
0
    def test8_serialization(self):
        opt = BPRMFOption().get_default_option()
        opt.num_iters = 200
        opt.d = 5
        opt.validation = aux.Option({'topk': 10})

        self._test8_serialization(BPRMF, opt)
Ejemplo n.º 3
0
    def __init__(self, opt_path=None, *args, **kwargs):
        Algo.__init__(self, *args, **kwargs)
        BPRMFOption.__init__(self, *args, **kwargs)
        Evaluable.__init__(self, *args, **kwargs)
        Serializable.__init__(self, *args, **kwargs)
        Optimizable.__init__(self, *args, **kwargs)
        if opt_path is None:
            opt_path = BPRMFOption().get_default_option()

        self.logger = log.get_logger('BPRMF')
        self.opt, self.opt_path = self.get_option(opt_path)

        if self.opt.accelerator and not inited_CUBPR:
            self.logger.error(f"ImportError CuBPRMF, no cuda library exists.")
            raise RuntimeError()
        self.obj = CuBPRMF() if self.opt.accelerator else CyBPRMF()

        assert self.obj.init(bytes(self.opt_path, 'utf-8')),\
            'cannot parse option file: %s' % opt_path

        self.data = None
        data = kwargs.get('data')
        data_opt = self.opt.get('data_opt')
        data_opt = kwargs.get('data_opt', data_opt)
        if data_opt:
            self.data = buffalo.data.load(data_opt)
            self.data.create()
        elif isinstance(data, Data):
            self.data = data
        self.logger.info('BPRMF(%s)' % json.dumps(self.opt, indent=2))
        if self.data:
            self.logger.info(self.data.show_info())
            assert self.data.data_type in ['matrix']
Ejemplo n.º 4
0
 def test1_is_valid_option(self):
     opt = BPRMFOption().get_default_option()
     self.assertTrue(BPRMFOption().is_valid_option(opt))
     opt['save_best'] = 1
     self.assertRaises(RuntimeError, BPRMFOption().is_valid_option, opt)
     opt['save_best'] = False
     self.assertTrue(BPRMFOption().is_valid_option(opt))
Ejemplo n.º 5
0
    def test07_topk_pool(self):
        set_log_level(2)
        data_opt = self.get_ml100k_mm_opt()
        opt = BPRMFOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        model = BPRMF(opt, data_opt=data_opt)
        model.initialize()
        model.train()
        par = ParBPRMF(model)

        pool = np.array([i for i in range(5)], dtype=np.int32)
        model.build_userid_map()
        all_keys = model._idmanager.userids[::][:10]
        naive = model.topk_recommendation(all_keys, topk=10, pool=pool)
        qkeys1, topks1, scores1 = par.topk_recommendation(all_keys, topk=10, pool=pool, repr=True)
        for q, t in zip(qkeys1, topks1):
            self.assertEqual(naive[q], t)
Ejemplo n.º 6
0
    def test05_validation(self):
        np.random.seed(7)
        opt = BPRMFOption().get_default_option()
        opt.d = 5
        opt.num_workers = 4
        opt.num_iters = 500
        opt.random_seed = 7
        opt.validation = aux.Option({'topk': 10})
        opt.tensorboard = aux.Option({'root': './tb', 'name': 'bpr'})

        self._test5_validation(BPRMF, opt, ndcg=0.03, map=0.02)
Ejemplo n.º 7
0
 def test12_gpu_train_ml_20m(self):
     if not inited_CUBPR:
         return
     opt = BPRMFOption().get_default_option()
     opt.accelerator = True
     opt.d = 100
     opt.verify_neg = False
     opt.num_iters = 30
     opt.evaluation_period = 5
     opt.validation = aux.Option({'topk': 10})
     self._test7_train_ml_20m(BPRMF, opt)
Ejemplo n.º 8
0
 def get_option(self, lib_name, algo_name, **kwargs):
     if lib_name == 'buffalo':
         if algo_name == 'als':
             from buffalo.algo.options import ALSOption
             opt = ALSOption().get_default_option()
             opt.update({'d': kwargs.get('d', 100),
                         'optimizer': {True: 'manual_cg', False: 'ldlt'}.get(kwargs.get('use_cg', True)),
                         'num_iters': kwargs.get('num_iters', 10),
                         'num_cg_max_iters': 3,
                         'accelerator': kwargs.get('gpu', False),
                         'num_workers': kwargs.get('num_workers', 10),
                         'compute_loss_on_training': kwargs.get('compute_loss_on_training', False)})
             return opt
         if algo_name == 'bpr':
             from buffalo.algo.options import BPRMFOption
             opt = BPRMFOption().get_default_option()
             opt.update({'d': kwargs.get('d', 100),
                         'num_iters': kwargs.get('num_iters', 10),
                         'num_workers': kwargs.get('num_workers', 10),
                         'compute_loss_on_training': kwargs.get('compute_loss_on_training', False)})
             return opt
     elif lib_name == 'implicit':
         if algo_name == 'als':
             return {'factors': kwargs.get('d', 100),
                     'dtype': np.float32,
                     'use_native': True,
                     'use_gpu': kwargs.get('gpu', False),
                     'use_cg': kwargs.get('use_cg', True),
                     'iterations': kwargs.get('num_iters', 10),
                     'num_threads': kwargs.get('num_workers', 10),
                     'calculate_training_loss': kwargs.get('calculate_training_loss', False)}
         if algo_name == 'bpr':
             return {'factors': kwargs.get('d', 100),
                     'dtype': np.float32,
                     'iterations': kwargs.get('num_iters', 10),
                     'verify_negative_samples': True,
                     'num_threads': kwargs.get('num_workers', 10)}
     elif lib_name == 'lightfm':
         if algo_name == 'bpr':
             return {'epochs': kwargs.get('num_iters', 10),
                     'verbose': True,
                     'num_threads': kwargs.get('num_workers', 10)}
     elif lib_name == 'pyspark':
         if algo_name == 'als':
             return {'maxIter': kwargs.get('num_iters', 10),
                     'rank': kwargs.get('d', 100),
                     'alpha': 8,
                     'implicitPrefs': True,
                     'userCol': 'row',
                     'itemCol': 'col',
                     'intermediateStorageLevel': 'MEMORY_ONLY',
                     'finalStorageLevel': 'MEMORY_ONLY',
                     'ratingCol': 'data'}
Ejemplo n.º 9
0
    def test03_most_similar(self):
        set_log_level(1)
        data_opt = self.get_ml100k_mm_opt()
        opt = BPRMFOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        bpr = BPRMF(opt, data_opt=data_opt)
        bpr.initialize()
        bpr.train()
        bpr.build_itemid_map()
        parbpr = ParBPRMF(bpr)

        all_keys = bpr._idmanager.itemids[::]
        start_t = time.time()
        [bpr.most_similar(k, topk=10) for k in all_keys]
        naive_elapsed = time.time() - start_t

        parbpr.num_workers = 4
        start_t = time.time()
        parbpr.most_similar(all_keys, topk=10, repr=True)
        parbpr_elapsed = time.time() - start_t

        self.assertTrue(naive_elapsed > parbpr_elapsed * 3.0)
Ejemplo n.º 10
0
 def test10_fast_most_similar(self):
     opt = BPRMFOption().get_default_option()
     opt.d = 5
     opt.validation = aux.Option({'topk': 10})
     self._test10_fast_most_similar(BPRMF, opt)
Ejemplo n.º 11
0
 def test7_train_ml_20m(self):
     opt = BPRMFOption().get_default_option()
     opt.num_workers = 8
     opt.validation = aux.Option({'topk': 10})
     self._test7_train_ml_20m(BPRMF, opt)
Ejemplo n.º 12
0
 def test4_train(self):
     opt = BPRMFOption().get_default_option()
     opt.d = 5
     self._test4_train(BPRMF, opt)
Ejemplo n.º 13
0
 def test3_init(self):
     opt = BPRMFOption().get_default_option()
     self._test3_init(BPRMF, opt)
Ejemplo n.º 14
0
 def test2_init_with_dict(self):
     set_log_level(3)
     opt = BPRMFOption().get_default_option()
     BPRMF(opt)
     self.assertTrue(True)
Ejemplo n.º 15
0
 def test0_get_default_option(self):
     BPRMFOption().get_default_option()
     self.assertTrue(True)
Ejemplo n.º 16
0
    def test11_gpu_validation(self):
        if not inited_CUBPR:
            return
        np.random.seed(7)
        opt = BPRMFOption().get_default_option()
        opt.d = 100
        opt.verify_neg = False
        opt.accelerator = True
        opt.lr = 0.01
        opt.reg_b = 10.0
        opt.num_iters = 500
        opt.evaluation_period = 50
        opt.random_seed = 777
        opt.validation = aux.Option({'topk': 10})
        opt.tensorboard = aux.Option({'root': './tb', 'name': 'bpr'})

        self._test5_validation(BPRMF, opt, ndcg=0.03, map=0.02)