Beispiel #1
0
    def test10_fast_most_similar(self):
        set_log_level(1)

        opt = CFROption().get_default_option()
        data_opt = StreamOptions().get_default_option()
        data_opt.data.sppmi = {"windows": 5, "k": 10}
        data_opt.data.internal_data_type = "matrix"
        data_opt.input.main = self.ml_100k + 'stream'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        c = CFR(opt, data_opt=data_opt)
        c.initialize()
        c.train()

        keys = [x for x, _ in c.most_similar('49.Star_Wars_(1977)', 10)]
        start_t = time.time()
        for i in range(100):
            for key in keys:
                c.most_similar(key)
        elapsed_a = time.time() - start_t

        c.normalize(group='item')
        start_t = time.time()
        for i in range(100):
            for key in keys:
                c.most_similar(key)
        elapsed_b = time.time() - start_t
        self.assertTrue(elapsed_a > elapsed_b)
Beispiel #2
0
    def test9_compact_serialization(self):
        set_log_level(1)

        opt = CFROption().get_default_option()
        data_opt = StreamOptions().get_default_option()
        data_opt.data.sppmi = {"windows": 5, "k": 10}
        data_opt.data.internal_data_type = "matrix"
        data_opt.input.main = self.ml_100k + 'stream'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        c = CFR(opt, data_opt=data_opt)
        c.initialize()
        c.train()
        ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')]
        self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
        c.save('model.bin', with_userid_map=False)
        c = CFR(opt)
        c.load('model.bin', data_fields=['I', '_idmanager'])
        ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')]
        self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
        self.assertFalse(hasattr(c, 'U'))
        c.normalize(group='item')
        ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')]
        self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
Beispiel #3
0
    def test4_train(self):
        set_log_level(3)
        opt = CFROption().get_default_option()
        data_opt = StreamOptions().get_default_option()
        data_opt.data.sppmi = {"windows": 5, "k": 10}
        data_opt.data.internal_data_type = "matrix"
        data_opt.input.main = self.ml_100k + 'stream'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        c = CFR(opt, data_opt=data_opt)
        c.initialize()
        c.train()
        self.assertTrue(True)
Beispiel #4
0
    def test3_init(self):
        set_log_level(3)
        opt = CFROption().get_default_option()
        opt.d = 20
        data_opt = StreamOptions().get_default_option()
        data_opt.data.sppmi = {"windows": 5, "k": 10}
        data_opt.data.internal_data_type = "matrix"
        data_opt.input.main = self.ml_100k + 'stream'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.path = './ml100k.h5py'

        c = CFR(opt, data_opt=data_opt)
        self.assertTrue(True)
        c.initialize()
        self.assertEqual(c.U.shape, (943, 20))
        self.assertEqual(c.I.shape, (1682, 20))
Beispiel #5
0
    def test5_validation(self, ndcg=0.06, map=0.04):
        set_log_level(3)
        opt = CFROption().get_default_option()
        opt.validation = aux.Option({'topk': 10})
        opt.tensorboard = aux.Option({'root': './tb', 'name': 'cfr'})
        data_opt = StreamOptions().get_default_option()
        data_opt.data.validation.name = "sample"
        data_opt.data.sppmi = {"windows": 5, "k": 10}
        data_opt.data.internal_data_type = "matrix"
        data_opt.input.main = self.ml_100k + 'stream'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        c = CFR(opt, data_opt=data_opt)
        c.initialize()
        c.train()
        results = c.get_validation_results()
        self.assertTrue(results['ndcg'] > ndcg)
        self.assertTrue(results['map'] > map)
Beispiel #6
0
    def test8_serialization(self):
        set_log_level(1)

        opt = CFROption().get_default_option()
        data_opt = StreamOptions().get_default_option()
        data_opt.data.sppmi = {"windows": 5, "k": 10}
        data_opt.data.internal_data_type = "matrix"
        data_opt.input.main = self.ml_100k + 'stream'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        c = CFR(opt, data_opt=data_opt)
        c.initialize()
        c.train()
        ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')]
        self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
        c.save('model.bin')
        c.load('model.bin')
        os.remove('model.bin')
        ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')]
        self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
Beispiel #7
0
    def test6_topk(self):
        set_log_level(1)
        opt = CFROption().get_default_option()
        opt.validation = aux.Option({'topk': 10})
        data_opt = StreamOptions().get_default_option()
        data_opt.data.validation.name = "sample"
        data_opt.data.sppmi = {"windows": 5, "k": 10}
        data_opt.data.internal_data_type = "matrix"
        data_opt.input.main = self.ml_100k + 'stream'
        data_opt.input.uid = self.ml_100k + 'uid'
        data_opt.input.iid = self.ml_100k + 'iid'
        data_opt.data.value_prepro = aux.Option({'name': 'OneBased'})

        c = CFR(opt, data_opt=data_opt)
        c.initialize()
        c.train()
        self.assertTrue(len(c.topk_recommendation('1', 10)), 10)
        ret_a = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')]
        self.assertIn('180.Return_of_the_Jedi_(1983)', ret_a)
        c.normalize()
        ret_b = [x for x, _ in c.most_similar('49.Star_Wars_(1977)')]
        self.assertIn('180.Return_of_the_Jedi_(1983)', ret_b)
        self.assertEqual(ret_a, ret_b)