Beispiel #1
0
    def test01_most_similar(self):
        set_log_level(2)
        data_opt = self.get_ml100k_mm_opt()
        opt = ALSOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        als = ALS(opt, data_opt=data_opt)
        als.initialize()
        als.train()
        pals = ParALS(als)
        random_keys = [
            k for k, _ in als.most_similar('49.Star_Wars_(1977)', topk=128)
        ]
        random_indexes = als.get_index_pool(random_keys)
        naive = [als.most_similar(k, topk=10) for k in random_keys]
        topks0 = [[k for k, _ in result] for result in naive]
        scores0 = np.array([[v for _, v in result] for result in naive])
        self.assertEqual(scores0.shape, (
            128,
            10,
        ), msg='check even size')
        scores0 = scores0.reshape(len(naive), 10)
        pals.num_workers = 1
        topks1, scores1 = pals.most_similar(random_keys, topk=10, repr=True)
        topks2, scores2 = pals.most_similar(random_indexes, topk=10, repr=True)

        for a, b in combinations([topks0, topks1, topks2], 2):
            self.assertEqual(a, b)
        for a, b in combinations([scores0, scores1, scores2], 2):
            self.assertTrue(np.allclose(a, b))
Beispiel #2
0
def example1():
    log.set_log_level(log.DEBUG)
    als_option = ALSOption().get_default_option()
    als_option.validation = aux.Option({'topk': 10})
    data_option = MatrixMarketOptions().get_default_option()
    data_option.input.main = '../tests/ext/ml-100k/main'
    data_option.input.iid = '../tests/ext/ml-100k/iid'

    als = ALS(als_option, data_opt=data_option)
    als.initialize()
    als.train()
    print('MovieLens 100k metrics for validations\n%s' % json.dumps(als.get_validation_results(), indent=2))

    print('Similar movies to Star_Wars_(1977)')
    for rank, (movie_name, score) in enumerate(als.most_similar('49.Star_Wars_(1977)')):
        print(f'{rank + 1:02d}. {score:.3f} {movie_name}')

    print('Run hyper parameter optimization for val_ndcg...')
    als.opt.num_workers = 4
    als.opt.evaluation_period = 10
    als.opt.optimize = aux.Option({
        'loss': 'val_ndcg',
        'max_trials': 100,
        'deployment': True,
        'start_with_default_parameters': True,
        'space': {
            'd': ['randint', ['d', 10, 128]],
            'reg_u': ['uniform', ['reg_u', 0.1, 1.0]],
            'reg_i': ['uniform', ['reg_i', 0.1, 1.0]],
            'alpha': ['randint', ['alpha', 1, 10]],
        }
    })
    log.set_log_level(log.INFO)
    als.opt.model_path = './example1.ml100k.als.optimize.bin'
    print(json.dumps({'alpha': als.opt.alpha, 'd': als.opt.d,
                      'reg_u': als.opt.reg_u, 'reg_i': als.opt.reg_i}, indent=2))
    als.optimize()
    als.load('./example1.ml100k.als.optimize.bin')

    print('Similar movies to Star_Wars_(1977)')
    for rank, (movie_name, score) in enumerate(als.most_similar('49.Star_Wars_(1977)')):
        print(f'{rank + 1:02d}. {score:.3f} {movie_name}')

    optimization_res = als.get_optimization_data()
    best_parameters = optimization_res['best_parameters']

    print(json.dumps(optimization_res['best'], indent=2))
    print(json.dumps({'alpha': int(best_parameters['alpha']), 'd': int(best_parameters['d']),
                      'reg_u': best_parameters['reg_u'], 'reg_i': best_parameters['reg_i']}, indent=2))
Beispiel #3
0
    def test02_most_similar(self):
        set_log_level(1)
        data_opt = self.get_ml100k_mm_opt()
        opt = ALSOption().get_default_option()
        opt.d = 20
        opt.num_workers = 1
        als = ALS(opt, data_opt=data_opt)
        als.initialize()
        als.train()
        als.build_itemid_map()
        pals = ParALS(als)

        all_keys = als._idmanager.itemids[::]
        start_t = time.time()
        [als.most_similar(k, topk=10) for k in all_keys]
        naive_elapsed = time.time() - start_t

        pals.num_workers = 4
        start_t = time.time()
        pals.most_similar(all_keys, topk=10, repr=True)
        parals_elapsed = time.time() - start_t

        self.assertTrue(naive_elapsed > parals_elapsed * 3.0)