def main(): parser = argparse.ArgumentParser(description='Nyc Event Recommender') parser = argparse.ArgumentParser() parser.add_argument('-t', '--today', action='store_true', help='Show today\'s events') parser.add_argument('-a', '--all', action='store_true', help='Show events all week') parser.add_argument('-j', '--json', action='store_true', help='Show events in json format') args = parser.parse_args() if args.today or args.all: # os.system('cls' if os.name == 'nt' else 'clear') today = args.today json = args.json recommender = Recommender(today, json) events = recommender.get_recommendation() print(events) else: parser.print_help()
class RecommenderTests(unittest2.TestCase): @classmethod def setUpClass(self): cfg = Config() cfg.popcon_index = "test_data/.sample_pxi" cfg.popcon_dir = "test_data/popcon_dir" cfg.clusters_dir = "test_data/clusters_dir" self.rec = Recommender(cfg) def test_set_strategy(self): self.rec.set_strategy("cb") self.assertIsInstance(self.rec.strategy,ContentBasedStrategy) self.assertEqual(self.rec.strategy.content,"full") self.rec.set_strategy("cbt") self.assertIsInstance(self.rec.strategy,ContentBasedStrategy) self.assertEqual(self.rec.strategy.content,"tag") self.rec.set_strategy("cbd") self.assertIsInstance(self.rec.strategy,ContentBasedStrategy) self.assertEqual(self.rec.strategy.content,"desc") self.rec.set_strategy("col") self.assertIsInstance(self.rec.strategy,CollaborativeStrategy) def test_get_recommendation(self): user = User({"inkscape": 1, "gimp": 1, "eog":1}) result = self.rec.get_recommendation(user) self.assertIsInstance(result, RecommendationResult) self.assertGreater(len(result.item_score),0)
def run_strategy(cfg, sample_file): rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) label = get_label(cfg) population_sample = [] sample_str = sample_file.split('/')[-1] with open(sample_file, 'r') as f: for line in f.readlines(): user_id = line.strip('\n') population_sample.append( os.path.join(cfg.popcon_dir, user_id[:2], user_id)) sample_dir = ("results/roc-sample/%s" % sample_str) if not os.path.exists(sample_dir): os.makedirs(sample_dir) log_file = os.path.join(sample_dir, label["values"]) # n iterations per population user for submission_file in population_sample: user = PopconSystem(submission_file) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) plot_roc(results, log_file) plot_roc(results, log_file, 1) with open(log_file + "-roc.jpg.comment", 'w') as f: f.write("# %s\n# %s\n\n" % (label["description"], label["values"])) f.write("# roc AUC\n%.4f\n\n" % results.get_auc()) f.write( "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n") # noqa for size in results.thresholds: f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" % (size, numpy.mean(results.fpr[size]), numpy.std(results.fpr[size]), numpy.mean(results.recall[size]), numpy.std(results.recall[size]), numpy.mean(results.coverage(size))))
def run_strategy(cfg, sample_file): rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) label = get_label(cfg) population_sample = [] sample_str = sample_file.split('/')[-1] with open(sample_file, 'r') as f: for line in f.readlines(): user_id = line.strip('\n') population_sample.append( os.path.join(cfg.popcon_dir, user_id[:2], user_id)) sample_dir = ("results/roc-sample/%s" % sample_str) if not os.path.exists(sample_dir): os.makedirs(sample_dir) log_file = os.path.join(sample_dir, label["values"]) # n iterations per population user for submission_file in population_sample: user = PopconSystem(submission_file) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) plot_roc(results, log_file) plot_roc(results, log_file, 1) with open(log_file + "-roc.jpg.comment", 'w') as f: f.write("# %s\n# %s\n\n" % (label["description"], label["values"])) f.write("# roc AUC\n%.4f\n\n" % results.get_auc()) f.write( "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n" ) # noqa for size in results.thresholds: f.write( "%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" % (size, numpy.mean(results.fpr[size]), numpy.std(results.fpr[size]), numpy.mean( results.recall[size]), numpy.std(results.recall[size]), numpy.mean(results.coverage(size))))
def read_stream(): """ Listens to the user-stream and reacts to mention events with a recommendation. """ twitter_user_stream = TwitterStream(auth=OAuth(Config.access_token, Config.access_token_secret, Config.api_key, Config.api_secret), domain='userstream.twitter.com') for msg in twitter_user_stream.user(): logging.info(msg) recommend = False # check if the the bot was mentioned in the status update if "entities" in msg: for mention in msg["entities"]["user_mentions"]: if mention["screen_name"] == Config.name.replace("@", ""): recommend = True if recommend: user_id = UserDao.add_user(msg["user"]["screen_name"], msg["user"]["id"]) UserTweetDao.create_user_tweet(user_id, msg["id"], msg["text"], msg) Recommender.get_recommendation() distribute_recommendations()
def run_strategy(cfg, user): for weight in weighting: cfg.weight = weight[0] cfg.bm25_k1 = weight[1] rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() for proportion in sample_proportions: results = ExperimentResults(repo_size) label = get_label(cfg, proportion) log_file = "results/strategies/" + label["values"] for n in range(iterations): # Fill sample profile profile_size = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_size * proportion) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation( iteration_user, repo_size) write_recall_log(label, n, sample, recommendation, profile_size, repo_size, log_file) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) with open(log_file, 'w') as f: precision_10 = sum(results.precision[10]) / len( results.precision[10]) f1_10 = sum(results.f1[10]) / len(results.f1[10]) f05_10 = sum(results.f05[10]) / len(results.f05[10]) f.write("# %s\n# %s\n\ncoverage %d\n\n" % (label["description"], label["values"], recommendation.size)) f.write("# best results (recommendation size; metric)\n") f.write( "precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" % (results.best_precision()[0], results.best_precision()[1], results.best_f1()[0], results.best_f1()[1], results.best_f05()[0], results.best_f05()[1])) f.write( "# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)" % # noqa (precision_10, f1_10, f05_10)) precision = results.get_precision_summary() recall = results.get_recall_summary() f1 = results.get_f1_summary() f05 = results.get_f05_summary() accuracy = results.get_accuracy_summary() plot_summary(precision, recall, f1, f05, accuracy, log_file)
def do_recommendation(tweet, keyword_list="", delete_fails=False): # TODO only persist if there is a recommendation? user = UserDao.add_user(tweet["user"]["screen_name"], tweet["user"]["id"]) nr_distributed = 0 if not UserTweetDao.is_existing_user_tweet(tweet["id"]): if len(keyword_list) > 0: tweet_text = keyword_list else: tweet_text = tweet["text"] UserTweetDao.create_user_tweet(user.id, tweet["id"], tweet_text, tweet) Recommender.get_recommendation() nr_distributed = distribute_recommendations() # TODO delete failed # if nr_distributed == 0 and delete_fails: # # user.delete() # pass return nr_distributed
def run_strategy(cfg, user): for weight in weighting: cfg.weight = weight[0] cfg.bm25_k1 = weight[1] rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() for proportion in sample_proportions: results = ExperimentResults(repo_size) label = get_label(cfg, proportion) log_file = "results/strategies/" + label["values"] for n in range(iterations): # Fill sample profile profile_size = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_size * proportion) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation( iteration_user, repo_size) write_recall_log( label, n, sample, recommendation, profile_size, repo_size, log_file) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) with open(log_file, 'w') as f: precision_10 = sum(results.precision[10]) / len( results.precision[10]) f1_10 = sum(results.f1[10]) / len(results.f1[10]) f05_10 = sum(results.f05[10]) / len(results.f05[10]) f.write("# %s\n# %s\n\ncoverage %d\n\n" % (label["description"], label["values"], recommendation.size)) f.write("# best results (recommendation size; metric)\n") f.write( "precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" % (results.best_precision()[0], results.best_precision()[1], results.best_f1()[0], results.best_f1()[1], results.best_f05()[0], results.best_f05()[1])) f.write("# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)" % # noqa (precision_10, f1_10, f05_10)) precision = results.get_precision_summary() recall = results.get_recall_summary() f1 = results.get_f1_summary() f05 = results.get_f05_summary() accuracy = results.get_accuracy_summary() plot_summary(precision, recall, f1, f05, accuracy, log_file)
class AppRecommender: def __init__(self): logging.info("Setting up AppRecommender...") self.cfg = Config() self.rec = Recommender(self.cfg) self.requests_dir = "/var/www/AppRecommender/src/web/requests/" if not os.path.exists(self.requests_dir): os.makedirs(self.requests_dir) def POST(self): web_input = web.input(pkgs_file={}) user_dir = tempfile.mkdtemp(prefix='', dir=self.requests_dir) user_id = user_dir.split("/")[-1] uploaded_file = os.path.join(user_dir, "uploaded_file") if web_input['pkgs_file'].value: lines = web_input['pkgs_file'].file.readlines() with open(uploaded_file, "w") as uploaded: uploaded.writelines(lines) with open(uploaded_file) as uploaded: if uploaded.readline().startswith('POPULARITY-CONTEST'): user = PopconSystem(uploaded_file, user_id) else: user = PkgsListSystem(uploaded_file, user_id) if len(user.pkg_profile) < 10: return render.error( [ "Could not extract profile from uploaded file. It must have at least 10 applications." ], # noqa "/", "RECOMMENDATION") else: self.rec.set_strategy("knn_eset") user.maximal_pkg_profile() prediction = self.rec.get_recommendation(user, 12).get_prediction() logging.info("Prediction for user %s" % user.user_id) logging.info(str(prediction)) recommendation = [result[0] for result in prediction] pkgs_details = [] for pkg_name in recommendation: logging.info("Getting details of package %s" % pkg_name) pkg = DebianPackage(pkg_name) pkg.load_summary() pkgs_details.append(pkg) if pkgs_details: logging.info("Rendering recommendation...") return render.apprec(pkgs_details) else: return render.error( ["No recommendation produced for the uploaded file."], "/", "RECOMMENDATION") # noqa
def run_strategy(cfg, user): rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) label = get_label(cfg) user_dir = ("results/roc-suite/%s/%s" % (user.user_id[:8], cfg.strategy)) if not os.path.exists(user_dir): os.makedirs(user_dir) log_file = os.path.join(user_dir, label["values"]) for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) write_recall_log( label, n, sample, recommendation, profile_len, repo_size, log_file) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) with open(log_file + "-roc.jpg.comment", 'w') as f: f.write("# %s\n# %s\n\n" % (label["description"], label["values"])) f.write("# roc AUC\n%.4f\n\n" % results.get_auc()) f.write("# threshold\tprecision\trecall\t\tf05\t\tcoverage\n") for size in results.thresholds: f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" % (size, numpy.mean(results.precision[size]), numpy.mean(results.recall[size]), numpy.mean(results.f05[size]), numpy.mean(results.coverage(size)))) shutil.copy(log_file + "-roc.jpg.comment", log_file + ".jpg.comment") shutil.copy(log_file + "-roc.jpg.comment", log_file + "-logscale.jpg.comment") plot_roc(results, log_file) plot_summary(results, log_file)
def run_strategy(cfg, user): rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) label = get_label(cfg) user_dir = ("results/roc-suite/%s/%s" % (user.user_id[:8], cfg.strategy)) if not os.path.exists(user_dir): os.makedirs(user_dir) log_file = os.path.join(user_dir, label["values"]) for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) write_recall_log(label, n, sample, recommendation, profile_len, repo_size, log_file) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) with open(log_file + "-roc.jpg.comment", 'w') as f: f.write("# %s\n# %s\n\n" % (label["description"], label["values"])) f.write("# roc AUC\n%.4f\n\n" % results.get_auc()) f.write("# threshold\tprecision\trecall\t\tf05\t\tcoverage\n") for size in results.thresholds: f.write( "%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" % (size, numpy.mean(results.precision[size]), numpy.mean(results.recall[size]), numpy.mean( results.f05[size]), numpy.mean(results.coverage(size)))) shutil.copy(log_file + "-roc.jpg.comment", log_file + ".jpg.comment") shutil.copy(log_file + "-roc.jpg.comment", log_file + "-logscale.jpg.comment") plot_roc(results, log_file) plot_summary(results, log_file)
def get_recommendation_index(user_data): # init recommender and the weights at random rr = Recommender(n_fixed_feature=len(user_data)) rr.init_weights() """ need to add a user_goal and user_rating function?? """ user_goal = [['time', 0.5], ['talk', 0.5], ['friendly', 0.5], ['advice', 0.5]] user_rating = { 'Group Therapy': (3, 5), 'Vent Over Tea': (5, 5), '7 Cups': (4, 5) } # get the service index of top_k choices choices, service_names = rr.get_recommendation(user_profile, user_goal, verbose=False) choices_for_chatbot = [c + 1 for c in choices] user_scores = rr.process_user_rating(user_rating) rr.update_weights(user_profile, user_scores, choices, verbose=False) return choices_for_chatbot
class AppRecommender: def __init__(self): self.recommender = Recommender() def make_recommendation(self, recommendation_size, no_auto_pkg_profile=False): begin_time = datetime.datetime.now() logging.info("Computation started at %s" % begin_time) # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir, # "desktopapps")) user = self.get_user(no_auto_pkg_profile) user_reccomendation = (self.recommender.get_recommendation( user, recommendation_size)) logging.info("Recommending applications for user %s" % user.user_id) logging.info(user_reccomendation) end_time = datetime.datetime.now() logging.info("Computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) return user_reccomendation def get_user(self, no_auto_pkg_profile): config = Config() user = LocalSystem() user.filter_pkg_profile( os.path.join(config.filters_dir, "desktopapps")) user.maximal_pkg_profile() if no_auto_pkg_profile: user.no_auto_pkg_profile() return user
repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) # n iterations for same recommender and user for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len*0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user,threshold) if hasattr(recommendation,"ranking"): results.add_result(recommendation.ranking,sample) recommended[k] = recommended[k].union(recommendation.ranking) # save summary roc_point = results.get_roc_point() roc_summary[k].append(roc_point) precision = results.get_precision_summary() precision_summary[k].append(precision) f05 = results.get_f05_summary() f05_summary[k].append(f05) mcc = results.get_mcc_summary() mcc_summary[k].append(mcc) with open(log_file+"-k%.3d"%k,'a') as f: f.write("[%.2f,%.2f] \t%.4f \t%.4f \t%.4f\n" % (roc_point[0],roc_point[1],precision,f05,mcc))
repo_size = rec.items_repository.get_doccount() p_10 = [] f05_100 = [] for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) if hasattr(recommendation, "ranking"): ranking = recommendation.ranking real = RecommendationResult(sample) predicted_10 = RecommendationResult(dict.fromkeys(ranking[:10], 1)) evaluation = Evaluation(predicted_10, real, repo_size) p_10.append(evaluation.run(Precision())) predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100], 1)) evaluation = Evaluation(predicted_100, real, repo_size) f05_100.append(evaluation.run(F_score(0.5))) c_10[k][size] = c_10[k][size].union(recommendation.ranking[:10]) c_100[k][size] = c_100[k][size].union(recommendation.ranking[:100]) # save summary if p_10: p_10_summary[k][size].append(numpy.mean(p_10)) if f05_100:
repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) # n iterations for same recommender and user for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation( iteration_user, threshold) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) recommended[k] = recommended[ k].union(recommendation.ranking) # save summary roc_point = results.get_roc_point() roc_summary[k].append(roc_point) precision = results.get_precision_summary() precision_summary[k].append(precision) f05 = results.get_f05_summary() f05_summary[k].append(f05) mcc = results.get_mcc_summary() mcc_summary[k].append(mcc) with open(log_file + "-k%.3d" % k, 'a') as f: f.write("[%.2f,%.2f] \t%.4f \t%.4f \t%.4f\n" %
class ContentBasedSuite(expsuite.PyExperimentSuite): def reset(self, params, rep): if params['name'].startswith("content"): cfg = Config() # if the index was not built yet # app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") cfg.axi = "data/AppAxi" cfg.index_mode = "old" cfg.weight = params['weight'] self.rec = Recommender(cfg) self.rec.set_strategy(params['strategy']) self.repo_size = self.rec.items_repository.get_doccount() self.user = LocalSystem() self.user.app_pkg_profile(self.rec.items_repository) self.user.no_auto_pkg_profile() self.sample_size = int( len(self.user.pkg_profile) * params['sample']) # iteration should be set to 10 in config file # self.profile_size = range(10,101,10) def iterate(self, params, rep, n): if params['name'].startswith("content"): item_score = dict.fromkeys(self.user.pkg_profile, 1) # Prepare partition sample = {} for i in range(self.sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) # Get full recommendation user = User(item_score) recommendation = self.rec.get_recommendation(user, self.repo_size) # Write recall log recall_file = "results/content/recall/%s-%s-%.2f-%d" % \ (params['strategy'], params[ 'weight'], params['sample'], n) output = open(recall_file, 'w') output.write("# weight=%s\n" % params['weight']) output.write("# strategy=%s\n" % params['strategy']) output.write("# sample=%f\n" % params['sample']) output.write("\n%d %d %d\n" % (self.repo_size, len(item_score), self.sample_size)) notfound = [] ranks = [] for pkg in sample.keys(): if pkg in recommendation.ranking: ranks.append(recommendation.ranking.index(pkg)) else: notfound.append(pkg) for r in sorted(ranks): output.write(str(r) + "\n") if notfound: output.write("Out of recommendation:\n") for pkg in notfound: output.write(pkg + "\n") output.close() # Plot metrics summary accuracy = [] precision = [] recall = [] f1 = [] g = Gnuplot.Gnuplot() g('set style data lines') g.xlabel('Recommendation size') for size in range(1, len(recommendation.ranking) + 1, 100): predicted = RecommendationResult( dict.fromkeys(recommendation.ranking[:size], 1)) real = RecommendationResult(sample) evaluation = Evaluation(predicted, real, self.repo_size) accuracy.append([size, evaluation.run(Accuracy())]) precision.append([size, evaluation.run(Precision())]) recall.append([size, evaluation.run(Recall())]) f1.append([size, evaluation.run(F1())]) g.plot(Gnuplot.Data(accuracy, title="Accuracy"), Gnuplot.Data(precision, title="Precision"), Gnuplot.Data(recall, title="Recall"), Gnuplot.Data(f1, title="F1")) g.hardcopy(recall_file + "-plot.ps", enhanced=1, color=1) # Iteration log result = {'iteration': n, 'weight': params['weight'], 'strategy': params['strategy'], 'accuracy': accuracy[20], 'precision': precision[20], 'recall:': recall[20], 'f1': f1[20]} return result
class Survey: def __init__(self): logging.info("Setting up survey...") self.cfg = Config() self.rec = Recommender(self.cfg) self.submissions_dir = "/var/www/AppRecommender/src/web/submissions/" if not os.path.exists(self.submissions_dir): os.makedirs(self.submissions_dir) self.strategies = ["cbh", "cbh_eset", "knn", "knn_eset", "knn_plus", "knnco"] def POST(self): web_input = web.input(pkgs_file={}) if 'user_id' in web_input: user_id = web_input['user_id'].encode('utf8') user_dir = os.path.join(self.submissions_dir, user_id) logging.info("New recommendation for user %s" % user_id) uploaded_file = os.path.join(user_dir, "uploaded_file") with open(uploaded_file) as uploaded: if uploaded.readline().startswith('POPULARITY-CONTEST'): user = PopconSystem(uploaded_file, user_id) else: user = PkgsListSystem(uploaded_file, user_id) user.maximal_pkg_profile() if len(user.pkg_profile) < 10: error_msg = "Could not extract profile from uploaded file. It must have at least 10 applications." # noqa logging.critical(error_msg) return render.error([error_msg], "/survey/", "START") else: # Check the remaining strategies and select a new one old_strategies = [dirs for root, dirs, files in os.walk(os.path.join(self.submissions_dir, user_id))] if old_strategies: strategies = [ s for s in self.strategies if s not in old_strategies[0]] logging.info("Already used strategies %s" % old_strategies[0]) else: strategies = self.strategies if not strategies: return render.thanks(user_id) selected_strategy = random.choice(strategies) logging.info("Selected \'%s\' from %s" % (selected_strategy, strategies)) self.set_rec_strategy(selected_strategy) prediction = self.rec.get_recommendation(user, 10).get_prediction() logging.info("Prediction for user %s" % user_id) logging.info(str(prediction)) self.save_prediction(user_id, selected_strategy, prediction) # Load packages details recommendation = [result[0] for result in prediction] pkgs_details = [] for pkg_name in recommendation: logging.info("Getting details of package %s" % pkg_name) pkg = DebianPackage(pkg_name) pkg.load_details() pkgs_details.append(pkg) if pkgs_details: logging.info("Rendering survey slide...") return render.survey(pkgs_details, user_id, selected_strategy, len(strategies)) else: return render.error( ["No recommendation produced for the uploaded file."], "/survey/", "START") def set_rec_strategy(self, selected_strategy): k = 10 n = 20 if selected_strategy == "cbh": pass if selected_strategy == "cbh_eset": pass if selected_strategy == "knn": pass if selected_strategy == "knn_eset": pass if selected_strategy == "knn_plus": pass if selected_strategy == "knnco": pass self.rec.set_strategy(selected_strategy, k, n) return selected_strategy def save_prediction(self, user_id, strategy, prediction): strategy_dir = os.path.join(self.submissions_dir, user_id, strategy) if not os.path.exists(strategy_dir): os.makedirs(strategy_dir) ranking = 0 prediction_file = open(os.path.join(strategy_dir, "prediction"), "w") try: writer = csv.writer(prediction_file) fieldnames = ('ranking', 'rating', 'package', 'evaluation') writer.writerow(fieldnames) for pkg, rating in prediction: writer.writerow((ranking, "%.4f" % rating, pkg, "")) ranking += 1 except: error_msg = "Error to write prediction to file." logging.critical(error_msg) return render.error([error_msg], "/survey/", "START") finally: prediction_file.close() with open(os.path.join(strategy_dir, "start"), 'w') as start: now = datetime.datetime.now() start.write(now.strftime("%Y%m%d%H%M%S")) logging.debug("Saved prediction to file at %s/%s" % (user_id, strategy))
repo_size = rec.items_repository.get_doccount() p_10 = [] f05_100 = [] for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation( iteration_user, repo_size) if hasattr(recommendation, "ranking"): ranking = recommendation.ranking real = RecommendationResult(sample) predicted_10 = RecommendationResult( dict.fromkeys(ranking[:10], 1)) evaluation = Evaluation(predicted_10, real, repo_size) p_10.append(evaluation.run(Precision())) predicted_100 = RecommendationResult( dict.fromkeys(ranking[:100], 1)) evaluation = Evaluation(predicted_100, real, repo_size) f05_100.append(evaluation.run(F_score(0.5))) c_10[k][size] = c_10[k][size].union( recommendation.ranking[:10]) c_100[k][size] = c_100[k][size].union( recommendation.ranking[:100])
""" import os import sys sys.path.insert(0,'../') import logging import random import datetime from config import Config from recommender import Recommender from user import LocalSystem, RandomPopcon if __name__ == '__main__': begin_time = datetime.datetime.now() cfg = Config() rec = Recommender(cfg) logging.info("Computation started at %s" % begin_time) #user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,"desktopapps")) user = LocalSystem() user.filter_pkg_profile(os.path.join(cfg.filters_dir,"desktopapps")) user.maximal_pkg_profile() logging.info("Recommending applications for user %s" % user.user_id) logging.info(rec.get_recommendation(user,20)) end_time = datetime.datetime.now() logging.info("Computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds)
class Survey: def __init__(self): logging.info("Setting up survey...") self.cfg = Config() self.rec = Recommender(self.cfg) self.submissions_dir = "/var/www/AppRecommender/src/web/submissions/" if not os.path.exists(self.submissions_dir): os.makedirs(self.submissions_dir) self.strategies = [ "cbh", "cbh_eset", "knn", "knn_eset", "knn_plus", "knnco" ] def POST(self): web_input = web.input(pkgs_file={}) if 'user_id' in web_input: user_id = web_input['user_id'].encode('utf8') user_dir = os.path.join(self.submissions_dir, user_id) logging.info("New recommendation for user %s" % user_id) uploaded_file = os.path.join(user_dir, "uploaded_file") with open(uploaded_file) as uploaded: if uploaded.readline().startswith('POPULARITY-CONTEST'): user = PopconSystem(uploaded_file, user_id) else: user = PkgsListSystem(uploaded_file, user_id) user.maximal_pkg_profile() if len(user.pkg_profile) < 10: error_msg = "Could not extract profile from uploaded file. It must have at least 10 applications." # noqa logging.critical(error_msg) return render.error([error_msg], "/survey/", "START") else: # Check the remaining strategies and select a new one old_strategies = [ dirs for root, dirs, files in os.walk( os.path.join(self.submissions_dir, user_id)) ] if old_strategies: strategies = [ s for s in self.strategies if s not in old_strategies[0] ] logging.info("Already used strategies %s" % old_strategies[0]) else: strategies = self.strategies if not strategies: return render.thanks(user_id) selected_strategy = random.choice(strategies) logging.info("Selected \'%s\' from %s" % (selected_strategy, strategies)) self.set_rec_strategy(selected_strategy) prediction = self.rec.get_recommendation(user, 10).get_prediction() logging.info("Prediction for user %s" % user_id) logging.info(str(prediction)) self.save_prediction(user_id, selected_strategy, prediction) # Load packages details recommendation = [result[0] for result in prediction] pkgs_details = [] for pkg_name in recommendation: logging.info("Getting details of package %s" % pkg_name) pkg = DebianPackage(pkg_name) pkg.load_details() pkgs_details.append(pkg) if pkgs_details: logging.info("Rendering survey slide...") return render.survey(pkgs_details, user_id, selected_strategy, len(strategies)) else: return render.error( ["No recommendation produced for the uploaded file."], "/survey/", "START") def set_rec_strategy(self, selected_strategy): k = 10 n = 20 if selected_strategy == "cbh": pass if selected_strategy == "cbh_eset": pass if selected_strategy == "knn": pass if selected_strategy == "knn_eset": pass if selected_strategy == "knn_plus": pass if selected_strategy == "knnco": pass self.rec.set_strategy(selected_strategy, k, n) return selected_strategy def save_prediction(self, user_id, strategy, prediction): strategy_dir = os.path.join(self.submissions_dir, user_id, strategy) if not os.path.exists(strategy_dir): os.makedirs(strategy_dir) ranking = 0 prediction_file = open(os.path.join(strategy_dir, "prediction"), "w") try: writer = csv.writer(prediction_file) fieldnames = ('ranking', 'rating', 'package', 'evaluation') writer.writerow(fieldnames) for pkg, rating in prediction: writer.writerow((ranking, "%.4f" % rating, pkg, "")) ranking += 1 except: error_msg = "Error to write prediction to file." logging.critical(error_msg) return render.error([error_msg], "/survey/", "START") finally: prediction_file.close() with open(os.path.join(strategy_dir, "start"), 'w') as start: now = datetime.datetime.now() start.write(now.strftime("%Y%m%d%H%M%S")) logging.debug("Saved prediction to file at %s/%s" % (user_id, strategy))