def holdout_eval(recommender, train, test, at=10): # train the recommender logger.info('Recommender: {}'.format(recommender)) tic = dt.now() logger.info('Training started') print(train.sum()) recommender.fit(train) logger.info('Training completed in {}'.format(dt.now() - tic)) # evaluate the ranking quality roc_auc_, precision_, recall_, map_, mrr_, ndcg_ = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 n_eval = 0 nusers = train.shape[0] for test_user in range(nusers): user_profile = train[test_user] relevant_items = test[test_user].indices if len(relevant_items) > 0: n_eval += 1 # this will rank **all** items recommended_items = recommender.recommend(user_id=test_user, exclude_seen=True) # evaluate the recommendation list with ranking metrics ONLY roc_auc_ += roc_auc(recommended_items, relevant_items) precision_ += precision(recommended_items, relevant_items, at=at) recall_ += recall(recommended_items, relevant_items, at=at) map_ += map(recommended_items, relevant_items, at=at) mrr_ += rr(recommended_items, relevant_items, at=at) ndcg_ += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) roc_auc_ /= n_eval precision_ /= n_eval recall_ /= n_eval map_ /= n_eval mrr_ /= n_eval ndcg_ /= n_eval return roc_auc_, precision_, recall_, map_, mrr_, ndcg_
if args.prediction_file: # write the recommendation list to file, one user per line # TODO: convert user and item indices back to their original ids user_id = test_user rec_list = recommended_items[:args.rec_length] s = str(user_id) + ',' s += ','.join([str(x) for x in rec_list]) + '\n' pfile.write(s) # evaluate the recommendation list with ranking metrics ONLY roc_auc_ += roc_auc(recommended_items, relevant_items) precision_ += precision(recommended_items, relevant_items, at=at) recall_ += recall(recommended_items, relevant_items, at=at) map_ += map(recommended_items, relevant_items, at=at) mrr_ += rr(recommended_items, relevant_items, at=at) ndcg_ += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) roc_auc_ /= n_eval precision_ /= n_eval recall_ /= n_eval map_ /= n_eval mrr_ /= n_eval ndcg_ /= n_eval # close the prediction file if args.prediction_file: pfile.close() logger.info('Recommendations written to {}'.format(args.prediction_file)) logger.info('Ranking quality') logger.info('ROC-AUC: {:.4f}'.format(roc_auc_))
user_profile = train[test_user] relevant_items = test[test_user].indices if len(relevant_items) > 0: n_eval += 1 # this will rank **all** items recommended_items = recommender.recommend(user_id=test_user, exclude_seen=True) # evaluate the recommendation list with ranking metrics ONLY roc_auc_[nfold] += roc_auc(recommended_items, relevant_items) precision_[nfold] += precision(recommended_items, relevant_items, at=at) recall_[nfold] += recall(recommended_items, relevant_items, at=at) map_[nfold] += map(recommended_items, relevant_items, at=at) mrr_[nfold] += rr(recommended_items, relevant_items, at=at) ndcg_[nfold] += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) roc_auc_[nfold] /= n_eval precision_[nfold] /= n_eval recall_[nfold] /= n_eval map_[nfold] /= n_eval mrr_[nfold] /= n_eval ndcg_[nfold] /= n_eval nfold += 1 logger.info('Ranking quality') logger.info('ROC-AUC: {:.4f}'.format(roc_auc_.mean()))