def run_strategy(cfg, sample_file): rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) label = get_label(cfg) population_sample = [] sample_str = sample_file.split('/')[-1] with open(sample_file, 'r') as f: for line in f.readlines(): user_id = line.strip('\n') population_sample.append( os.path.join(cfg.popcon_dir, user_id[:2], user_id)) sample_dir = ("results/roc-sample/%s" % sample_str) if not os.path.exists(sample_dir): os.makedirs(sample_dir) log_file = os.path.join(sample_dir, label["values"]) # n iterations per population user for submission_file in population_sample: user = PopconSystem(submission_file) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) plot_roc(results, log_file) plot_roc(results, log_file, 1) with open(log_file + "-roc.jpg.comment", 'w') as f: f.write("# %s\n# %s\n\n" % (label["description"], label["values"])) f.write("# roc AUC\n%.4f\n\n" % results.get_auc()) f.write( "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n") # noqa for size in results.thresholds: f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" % (size, numpy.mean(results.fpr[size]), numpy.std(results.fpr[size]), numpy.mean(results.recall[size]), numpy.std(results.recall[size]), numpy.mean(results.coverage(size))))
option_str) for size in sizes: c_10[size] = set() c_100[size] = set() p_10_summary[size] = [] f05_100_summary[size] = [] with open(log_file + "-%s%.3d" % (option_str, size), 'w') as f: f.write("# sample %s\n" % sample_str) f.write("# strategy %s-%s%.3d\n\n" % (cfg.strategy, option_str, size)) f.write("# p_10\tf05_100\n\n") # main loop per user for submission_file in population_sample: user = PopconSystem(submission_file) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() for size in sizes: cfg.profile_size = size cfg.k_neighbors = size rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() p_10 = [] f05_100 = [] for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg]
from apprecommender.evaluation import (CrossValidation, Precision, Recall, F_score, FPR, Accuracy) from apprecommender.recommender import Recommender from apprecommender.user import PopconSystem if __name__ == '__main__': cfg = Config() rec = Recommender() # user = LocalSystem() # user = RandomPopcon(cfg.popcon_dir) # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir, # "desktopapps")) popcon_entries = "~/.app-recommender/popcon-entries/" \ "00/0001166d0737c6dffb083071e5ee69f5" user = PopconSystem(os.path.expanduser(popcon_entries)) user.filter_pkg_profile(os.path.join(cfg.filters_dir, "desktopapps")) user.maximal_pkg_profile() begin_time = datetime.datetime.now() metrics = [] metrics.append(Precision()) metrics.append(Recall()) metrics.append(F_score(0.5)) metrics.append(Accuracy()) metrics.append(FPR()) validation = CrossValidation(0.9, 20, rec, metrics, 0.005) validation.run(user) print validation end_time = datetime.datetime.now()