def __init__(self, opt_path=None, *args, **kwargs): Algo.__init__(self, *args, **kwargs) BPRMFOption.__init__(self, *args, **kwargs) Evaluable.__init__(self, *args, **kwargs) Serializable.__init__(self, *args, **kwargs) Optimizable.__init__(self, *args, **kwargs) if opt_path is None: opt_path = BPRMFOption().get_default_option() self.logger = log.get_logger('BPRMF') self.opt, self.opt_path = self.get_option(opt_path) self.obj = CyBPRMF() assert self.obj.init(bytes(self.opt_path, 'utf-8')),\ 'cannot parse option file: %s' % opt_path self.data = None data = kwargs.get('data') data_opt = self.opt.get('data_opt') data_opt = kwargs.get('data_opt', data_opt) if data_opt: self.data = buffalo.data.load(data_opt) self.data.create() elif isinstance(data, Data): self.data = data self.logger.info('BPRMF(%s)' % json.dumps(self.opt, indent=2)) if self.data: self.logger.info(self.data.show_info()) assert self.data.data_type in ['matrix']
def test8_serialization(self): opt = BPRMFOption().get_default_option() opt.num_iters = 200 opt.d = 5 opt.validation = aux.Option({'topk': 10}) self._test8_serialization(BPRMF, opt)
def __init__(self, opt_path=None, *args, **kwargs): Algo.__init__(self, *args, **kwargs) BPRMFOption.__init__(self, *args, **kwargs) Evaluable.__init__(self, *args, **kwargs) Serializable.__init__(self, *args, **kwargs) Optimizable.__init__(self, *args, **kwargs) if opt_path is None: opt_path = BPRMFOption().get_default_option() self.logger = log.get_logger('BPRMF') self.opt, self.opt_path = self.get_option(opt_path) if self.opt.accelerator and not inited_CUBPR: self.logger.error(f"ImportError CuBPRMF, no cuda library exists.") raise RuntimeError() self.obj = CuBPRMF() if self.opt.accelerator else CyBPRMF() assert self.obj.init(bytes(self.opt_path, 'utf-8')),\ 'cannot parse option file: %s' % opt_path self.data = None data = kwargs.get('data') data_opt = self.opt.get('data_opt') data_opt = kwargs.get('data_opt', data_opt) if data_opt: self.data = buffalo.data.load(data_opt) self.data.create() elif isinstance(data, Data): self.data = data self.logger.info('BPRMF(%s)' % json.dumps(self.opt, indent=2)) if self.data: self.logger.info(self.data.show_info()) assert self.data.data_type in ['matrix']
def test1_is_valid_option(self): opt = BPRMFOption().get_default_option() self.assertTrue(BPRMFOption().is_valid_option(opt)) opt['save_best'] = 1 self.assertRaises(RuntimeError, BPRMFOption().is_valid_option, opt) opt['save_best'] = False self.assertTrue(BPRMFOption().is_valid_option(opt))
def test07_topk_pool(self): set_log_level(2) data_opt = self.get_ml100k_mm_opt() opt = BPRMFOption().get_default_option() opt.d = 20 opt.num_workers = 1 model = BPRMF(opt, data_opt=data_opt) model.initialize() model.train() par = ParBPRMF(model) pool = np.array([i for i in range(5)], dtype=np.int32) model.build_userid_map() all_keys = model._idmanager.userids[::][:10] naive = model.topk_recommendation(all_keys, topk=10, pool=pool) qkeys1, topks1, scores1 = par.topk_recommendation(all_keys, topk=10, pool=pool, repr=True) for q, t in zip(qkeys1, topks1): self.assertEqual(naive[q], t)
def test05_validation(self): np.random.seed(7) opt = BPRMFOption().get_default_option() opt.d = 5 opt.num_workers = 4 opt.num_iters = 500 opt.random_seed = 7 opt.validation = aux.Option({'topk': 10}) opt.tensorboard = aux.Option({'root': './tb', 'name': 'bpr'}) self._test5_validation(BPRMF, opt, ndcg=0.03, map=0.02)
def test12_gpu_train_ml_20m(self): if not inited_CUBPR: return opt = BPRMFOption().get_default_option() opt.accelerator = True opt.d = 100 opt.verify_neg = False opt.num_iters = 30 opt.evaluation_period = 5 opt.validation = aux.Option({'topk': 10}) self._test7_train_ml_20m(BPRMF, opt)
def get_option(self, lib_name, algo_name, **kwargs): if lib_name == 'buffalo': if algo_name == 'als': from buffalo.algo.options import ALSOption opt = ALSOption().get_default_option() opt.update({'d': kwargs.get('d', 100), 'optimizer': {True: 'manual_cg', False: 'ldlt'}.get(kwargs.get('use_cg', True)), 'num_iters': kwargs.get('num_iters', 10), 'num_cg_max_iters': 3, 'accelerator': kwargs.get('gpu', False), 'num_workers': kwargs.get('num_workers', 10), 'compute_loss_on_training': kwargs.get('compute_loss_on_training', False)}) return opt if algo_name == 'bpr': from buffalo.algo.options import BPRMFOption opt = BPRMFOption().get_default_option() opt.update({'d': kwargs.get('d', 100), 'num_iters': kwargs.get('num_iters', 10), 'num_workers': kwargs.get('num_workers', 10), 'compute_loss_on_training': kwargs.get('compute_loss_on_training', False)}) return opt elif lib_name == 'implicit': if algo_name == 'als': return {'factors': kwargs.get('d', 100), 'dtype': np.float32, 'use_native': True, 'use_gpu': kwargs.get('gpu', False), 'use_cg': kwargs.get('use_cg', True), 'iterations': kwargs.get('num_iters', 10), 'num_threads': kwargs.get('num_workers', 10), 'calculate_training_loss': kwargs.get('calculate_training_loss', False)} if algo_name == 'bpr': return {'factors': kwargs.get('d', 100), 'dtype': np.float32, 'iterations': kwargs.get('num_iters', 10), 'verify_negative_samples': True, 'num_threads': kwargs.get('num_workers', 10)} elif lib_name == 'lightfm': if algo_name == 'bpr': return {'epochs': kwargs.get('num_iters', 10), 'verbose': True, 'num_threads': kwargs.get('num_workers', 10)} elif lib_name == 'pyspark': if algo_name == 'als': return {'maxIter': kwargs.get('num_iters', 10), 'rank': kwargs.get('d', 100), 'alpha': 8, 'implicitPrefs': True, 'userCol': 'row', 'itemCol': 'col', 'intermediateStorageLevel': 'MEMORY_ONLY', 'finalStorageLevel': 'MEMORY_ONLY', 'ratingCol': 'data'}
def test03_most_similar(self): set_log_level(1) data_opt = self.get_ml100k_mm_opt() opt = BPRMFOption().get_default_option() opt.d = 20 opt.num_workers = 1 bpr = BPRMF(opt, data_opt=data_opt) bpr.initialize() bpr.train() bpr.build_itemid_map() parbpr = ParBPRMF(bpr) all_keys = bpr._idmanager.itemids[::] start_t = time.time() [bpr.most_similar(k, topk=10) for k in all_keys] naive_elapsed = time.time() - start_t parbpr.num_workers = 4 start_t = time.time() parbpr.most_similar(all_keys, topk=10, repr=True) parbpr_elapsed = time.time() - start_t self.assertTrue(naive_elapsed > parbpr_elapsed * 3.0)
def test10_fast_most_similar(self): opt = BPRMFOption().get_default_option() opt.d = 5 opt.validation = aux.Option({'topk': 10}) self._test10_fast_most_similar(BPRMF, opt)
def test7_train_ml_20m(self): opt = BPRMFOption().get_default_option() opt.num_workers = 8 opt.validation = aux.Option({'topk': 10}) self._test7_train_ml_20m(BPRMF, opt)
def test4_train(self): opt = BPRMFOption().get_default_option() opt.d = 5 self._test4_train(BPRMF, opt)
def test3_init(self): opt = BPRMFOption().get_default_option() self._test3_init(BPRMF, opt)
def test2_init_with_dict(self): set_log_level(3) opt = BPRMFOption().get_default_option() BPRMF(opt) self.assertTrue(True)
def test0_get_default_option(self): BPRMFOption().get_default_option() self.assertTrue(True)
def test11_gpu_validation(self): if not inited_CUBPR: return np.random.seed(7) opt = BPRMFOption().get_default_option() opt.d = 100 opt.verify_neg = False opt.accelerator = True opt.lr = 0.01 opt.reg_b = 10.0 opt.num_iters = 500 opt.evaluation_period = 50 opt.random_seed = 777 opt.validation = aux.Option({'topk': 10}) opt.tensorboard = aux.Option({'root': './tb', 'name': 'bpr'}) self._test5_validation(BPRMF, opt, ndcg=0.03, map=0.02)