def test01_most_similar(self): set_log_level(2) data_opt = self.get_ml100k_mm_opt() opt = ALSOption().get_default_option() opt.d = 20 opt.num_workers = 1 als = ALS(opt, data_opt=data_opt) als.initialize() als.train() pals = ParALS(als) random_keys = [ k for k, _ in als.most_similar('49.Star_Wars_(1977)', topk=128) ] random_indexes = als.get_index_pool(random_keys) naive = [als.most_similar(k, topk=10) for k in random_keys] topks0 = [[k for k, _ in result] for result in naive] scores0 = np.array([[v for _, v in result] for result in naive]) self.assertEqual(scores0.shape, ( 128, 10, ), msg='check even size') scores0 = scores0.reshape(len(naive), 10) pals.num_workers = 1 topks1, scores1 = pals.most_similar(random_keys, topk=10, repr=True) topks2, scores2 = pals.most_similar(random_indexes, topk=10, repr=True) for a, b in combinations([topks0, topks1, topks2], 2): self.assertEqual(a, b) for a, b in combinations([scores0, scores1, scores2], 2): self.assertTrue(np.allclose(a, b))
def example1(): log.set_log_level(log.DEBUG) als_option = ALSOption().get_default_option() als_option.validation = aux.Option({'topk': 10}) data_option = MatrixMarketOptions().get_default_option() data_option.input.main = '../tests/ext/ml-100k/main' data_option.input.iid = '../tests/ext/ml-100k/iid' als = ALS(als_option, data_opt=data_option) als.initialize() als.train() print('MovieLens 100k metrics for validations\n%s' % json.dumps(als.get_validation_results(), indent=2)) print('Similar movies to Star_Wars_(1977)') for rank, (movie_name, score) in enumerate(als.most_similar('49.Star_Wars_(1977)')): print(f'{rank + 1:02d}. {score:.3f} {movie_name}') print('Run hyper parameter optimization for val_ndcg...') als.opt.num_workers = 4 als.opt.evaluation_period = 10 als.opt.optimize = aux.Option({ 'loss': 'val_ndcg', 'max_trials': 100, 'deployment': True, 'start_with_default_parameters': True, 'space': { 'd': ['randint', ['d', 10, 128]], 'reg_u': ['uniform', ['reg_u', 0.1, 1.0]], 'reg_i': ['uniform', ['reg_i', 0.1, 1.0]], 'alpha': ['randint', ['alpha', 1, 10]], } }) log.set_log_level(log.INFO) als.opt.model_path = './example1.ml100k.als.optimize.bin' print(json.dumps({'alpha': als.opt.alpha, 'd': als.opt.d, 'reg_u': als.opt.reg_u, 'reg_i': als.opt.reg_i}, indent=2)) als.optimize() als.load('./example1.ml100k.als.optimize.bin') print('Similar movies to Star_Wars_(1977)') for rank, (movie_name, score) in enumerate(als.most_similar('49.Star_Wars_(1977)')): print(f'{rank + 1:02d}. {score:.3f} {movie_name}') optimization_res = als.get_optimization_data() best_parameters = optimization_res['best_parameters'] print(json.dumps(optimization_res['best'], indent=2)) print(json.dumps({'alpha': int(best_parameters['alpha']), 'd': int(best_parameters['d']), 'reg_u': best_parameters['reg_u'], 'reg_i': best_parameters['reg_i']}, indent=2))
def test02_most_similar(self): set_log_level(1) data_opt = self.get_ml100k_mm_opt() opt = ALSOption().get_default_option() opt.d = 20 opt.num_workers = 1 als = ALS(opt, data_opt=data_opt) als.initialize() als.train() als.build_itemid_map() pals = ParALS(als) all_keys = als._idmanager.itemids[::] start_t = time.time() [als.most_similar(k, topk=10) for k in all_keys] naive_elapsed = time.time() - start_t pals.num_workers = 4 start_t = time.time() pals.most_similar(all_keys, topk=10, repr=True) parals_elapsed = time.time() - start_t self.assertTrue(naive_elapsed > parals_elapsed * 3.0)