def test10_fast_most_similar(self): set_log_level(1) opt = CFROption().get_default_option() data_opt = StreamOptions().get_default_option() data_opt.data.sppmi = {"windows": 5, "k": 10} data_opt.data.internal_data_type = "matrix" data_opt.input.main = self.ml_100k + 'stream' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) c = CFR(opt, data_opt=data_opt) c.initialize() c.train() keys = [x for x, _ in c.most_similar('49.Star_Wars_(1977)', 10)] start_t = time.time() for i in range(100): for key in keys: c.most_similar(key) elapsed_a = time.time() - start_t c.normalize(group='item') start_t = time.time() for i in range(100): for key in keys: c.most_similar(key) elapsed_b = time.time() - start_t self.assertTrue(elapsed_a > elapsed_b)
def test9_compact_serialization(self): set_log_level(1) opt = CFROption().get_default_option() data_opt = StreamOptions().get_default_option() data_opt.data.sppmi = {"windows": 5, "k": 10} data_opt.data.internal_data_type = "matrix" data_opt.input.main = self.ml_100k + 'stream' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) c = CFR(opt, data_opt=data_opt) c.initialize() c.train() ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')] self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a) c.save('model.bin', with_userid_map=False) c = CFR(opt) c.load('model.bin', data_fields=['I', '_idmanager']) ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')] self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a) self.assertFalse(hasattr(c, 'U')) c.normalize(group='item') ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')] self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
def test4_train(self): set_log_level(3) opt = CFROption().get_default_option() data_opt = StreamOptions().get_default_option() data_opt.data.sppmi = {"windows": 5, "k": 10} data_opt.data.internal_data_type = "matrix" data_opt.input.main = self.ml_100k + 'stream' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) c = CFR(opt, data_opt=data_opt) c.initialize() c.train() self.assertTrue(True)
def test3_init(self): set_log_level(3) opt = CFROption().get_default_option() opt.d = 20 data_opt = StreamOptions().get_default_option() data_opt.data.sppmi = {"windows": 5, "k": 10} data_opt.data.internal_data_type = "matrix" data_opt.input.main = self.ml_100k + 'stream' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.path = './ml100k.h5py' c = CFR(opt, data_opt=data_opt) self.assertTrue(True) c.initialize() self.assertEqual(c.U.shape, (943, 20)) self.assertEqual(c.I.shape, (1682, 20))
def test5_validation(self, ndcg=0.06, map=0.04): set_log_level(3) opt = CFROption().get_default_option() opt.validation = aux.Option({'topk': 10}) opt.tensorboard = aux.Option({'root': './tb', 'name': 'cfr'}) data_opt = StreamOptions().get_default_option() data_opt.data.validation.name = "sample" data_opt.data.sppmi = {"windows": 5, "k": 10} data_opt.data.internal_data_type = "matrix" data_opt.input.main = self.ml_100k + 'stream' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) c = CFR(opt, data_opt=data_opt) c.initialize() c.train() results = c.get_validation_results() self.assertTrue(results['ndcg'] > ndcg) self.assertTrue(results['map'] > map)
def test8_serialization(self): set_log_level(1) opt = CFROption().get_default_option() data_opt = StreamOptions().get_default_option() data_opt.data.sppmi = {"windows": 5, "k": 10} data_opt.data.internal_data_type = "matrix" data_opt.input.main = self.ml_100k + 'stream' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) c = CFR(opt, data_opt=data_opt) c.initialize() c.train() ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')] self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a) c.save('model.bin') c.load('model.bin') os.remove('model.bin') ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')] self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
def test6_topk(self): set_log_level(1) opt = CFROption().get_default_option() opt.validation = aux.Option({'topk': 10}) data_opt = StreamOptions().get_default_option() data_opt.data.validation.name = "sample" data_opt.data.sppmi = {"windows": 5, "k": 10} data_opt.data.internal_data_type = "matrix" data_opt.input.main = self.ml_100k + 'stream' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) c = CFR(opt, data_opt=data_opt) c.initialize() c.train() self.assertTrue(len(c.topk_recommendation('1', 10)), 10) ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')] self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a) c.normalize() ret_b = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')] self.assertIn('180.Return_of_the_Jedi_(1983)', ret_b) self.assertEqual(ret_a, ret_b)