def holdout_eval(recommender, train, test, at=10): # train the recommender logger.info('Recommender: {}'.format(recommender)) tic = dt.now() logger.info('Training started') print(train.sum()) recommender.fit(train) logger.info('Training completed in {}'.format(dt.now() - tic)) # evaluate the ranking quality roc_auc_, precision_, recall_, map_, mrr_, ndcg_ = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 n_eval = 0 nusers = train.shape[0] for test_user in range(nusers): user_profile = train[test_user] relevant_items = test[test_user].indices if len(relevant_items) > 0: n_eval += 1 # this will rank **all** items recommended_items = recommender.recommend(user_id=test_user, exclude_seen=True) # evaluate the recommendation list with ranking metrics ONLY roc_auc_ += roc_auc(recommended_items, relevant_items) precision_ += precision(recommended_items, relevant_items, at=at) recall_ += recall(recommended_items, relevant_items, at=at) map_ += map(recommended_items, relevant_items, at=at) mrr_ += rr(recommended_items, relevant_items, at=at) ndcg_ += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) roc_auc_ /= n_eval precision_ /= n_eval recall_ /= n_eval map_ /= n_eval mrr_ /= n_eval ndcg_ /= n_eval return roc_auc_, precision_, recall_, map_, mrr_, ndcg_
if len(relevant_items) > 0: n_eval += 1 # this will rank **all** items recommended_items = recommender.recommend(user_id=test_user, exclude_seen=True) if args.prediction_file: # write the recommendation list to file, one user per line # TODO: convert user and item indices back to their original ids user_id = test_user rec_list = recommended_items[:args.rec_length] s = str(user_id) + ',' s += ','.join([str(x) for x in rec_list]) + '\n' pfile.write(s) # evaluate the recommendation list with ranking metrics ONLY roc_auc_ += roc_auc(recommended_items, relevant_items) precision_ += precision(recommended_items, relevant_items, at=at) recall_ += recall(recommended_items, relevant_items, at=at) map_ += map(recommended_items, relevant_items, at=at) mrr_ += rr(recommended_items, relevant_items, at=at) ndcg_ += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) roc_auc_ /= n_eval precision_ /= n_eval recall_ /= n_eval map_ /= n_eval mrr_ /= n_eval ndcg_ /= n_eval # close the prediction file if args.prediction_file: pfile.close()
recommender.fit(train) logger.info('Training completed in {}'.format(dt.now() - tic)) # evaluate the ranking quality n_eval = 0 for test_user in range(nusers): user_profile = train[test_user] relevant_items = test[test_user].indices if len(relevant_items) > 0: n_eval += 1 # this will rank **all** items recommended_items = recommender.recommend(user_id=test_user, exclude_seen=True) # evaluate the recommendation list with ranking metrics ONLY roc_auc_[nfold] += roc_auc(recommended_items, relevant_items) precision_[nfold] += precision(recommended_items, relevant_items, at=at) recall_[nfold] += recall(recommended_items, relevant_items, at=at) map_[nfold] += map(recommended_items, relevant_items, at=at) mrr_[nfold] += rr(recommended_items, relevant_items, at=at) ndcg_[nfold] += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) roc_auc_[nfold] /= n_eval precision_[nfold] /= n_eval recall_[nfold] /= n_eval map_[nfold] /= n_eval mrr_[nfold] /= n_eval
def grid_search_cv(RecommenderClass, dataset, param_space, metric=roc_auc, at=None, cv_folds=5, is_binary=True, user_key='user_id', item_key='item_id', rating_key='rating', rnd_seed=1234): """ Finds the best hyper-parameters of a recommender algorithm with Grid Search :param RecommenderClass: Class of the recommender to tune (must be subclass of Recommender) :param dataset: data to use for tuning :param param_space: space of the parameters to explore :param metric: metric to maximize :param at: optional length of the recommendation list used in recommendaiton :param cv_folds: number of cross-validation iters :param is_binary: True to discard ratings, False otherwise :param user_key: name of the column with user ids in dataset :param item_key: name of the column with item ids in dataset :param rating_key: name of the column with ratings in dataset :param rnd_seed: random seed used for cross-validation :return: a tuple with (best configuration, best metric value) """ tried_conf = [] results = np.zeros(np.prod([len(v) for v in param_space.values()]), dtype=np.float32) space_size = len(results) logger.info('Size of the parameter space: {} ({} cv trials)'.format( space_size, space_size * cv_folds)) param_grid = ParameterGrid(param_space) # compute the cv splits nusers, nitems = dataset[user_key].max() + 1, dataset[item_key].max() + 1 cv_split = [] for train_df, test_df in k_fold_cv(dataset, user_key=user_key, item_key=item_key, k=cv_folds, clean_test=True, seed=rnd_seed): train = df_to_csr(train_df, is_binary=is_binary, nrows=nusers, ncols=nitems, user_key=user_key, item_key=item_key, rating_key=rating_key) test = df_to_csr(test_df, is_binary=is_binary, nrows=nusers, ncols=nitems, user_key=user_key, item_key=item_key, rating_key=rating_key) cv_split.append((train, test)) for i, params in enumerate(param_grid): logger.info('Iteration {}/{}: {}'.format(i + 1, space_size, params)) tried_conf.append(params) cv_result = 0.0 for f, (train, test) in enumerate(cv_split): # train the recommender recommender = RecommenderClass(**params) recommender.fit(train) # evaluate the ranking quality n_eval = 0 metric_ = 0.0 for test_user in range(nusers): relevant_items = test[test_user].indices if len(relevant_items) > 0: n_eval += 1 # this will rank **all** items recommended_items = recommender.recommend( user_id=test_user, exclude_seen=True) # evaluate the recommendation list with ranking metrics ONLY if metric == roc_auc: metric_ += roc_auc(recommended_items, relevant_items) elif metric == ndcg: metric_ += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) else: metric_ += metric(recommended_items, relevant_items, at=at) metric_ /= n_eval cv_result += metric_ # average value of the metric in cross-validation results[i] = cv_result / cv_folds logger.info('Result: {:.4f}'.format(results[i])) # return the best configuration best = results.argsort()[-1] return tried_conf[best], results[best]