def main():
    parser = argparse.ArgumentParser(description='Nyc Event Recommender')

    parser = argparse.ArgumentParser()
    parser.add_argument('-t',
                        '--today',
                        action='store_true',
                        help='Show today\'s events')
    parser.add_argument('-a',
                        '--all',
                        action='store_true',
                        help='Show events all week')
    parser.add_argument('-j',
                        '--json',
                        action='store_true',
                        help='Show events in json format')
    args = parser.parse_args()

    if args.today or args.all:
        # os.system('cls' if os.name == 'nt' else 'clear')
        today = args.today
        json = args.json
        recommender = Recommender(today, json)
        events = recommender.get_recommendation()
        print(events)

    else:
        parser.print_help()
Ejemplo n.º 2
0
class RecommenderTests(unittest2.TestCase):
    @classmethod
    def setUpClass(self):
        cfg = Config()
        cfg.popcon_index = "test_data/.sample_pxi"
        cfg.popcon_dir = "test_data/popcon_dir"
        cfg.clusters_dir = "test_data/clusters_dir"
        self.rec = Recommender(cfg)

    def test_set_strategy(self):
        self.rec.set_strategy("cb")
        self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
        self.assertEqual(self.rec.strategy.content,"full")
        self.rec.set_strategy("cbt")
        self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
        self.assertEqual(self.rec.strategy.content,"tag")
        self.rec.set_strategy("cbd")
        self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
        self.assertEqual(self.rec.strategy.content,"desc")
        self.rec.set_strategy("col")
        self.assertIsInstance(self.rec.strategy,CollaborativeStrategy)

    def test_get_recommendation(self):
        user = User({"inkscape": 1, "gimp": 1, "eog":1})
        result = self.rec.get_recommendation(user)
        self.assertIsInstance(result, RecommendationResult)
        self.assertGreater(len(result.item_score),0)
Ejemplo n.º 3
0
def run_strategy(cfg, sample_file):
    rec = Recommender(cfg)
    repo_size = rec.items_repository.get_doccount()
    results = ExperimentResults(repo_size)
    label = get_label(cfg)
    population_sample = []
    sample_str = sample_file.split('/')[-1]
    with open(sample_file, 'r') as f:
        for line in f.readlines():
            user_id = line.strip('\n')
            population_sample.append(
                os.path.join(cfg.popcon_dir, user_id[:2], user_id))
    sample_dir = ("results/roc-sample/%s" % sample_str)
    if not os.path.exists(sample_dir):
        os.makedirs(sample_dir)
    log_file = os.path.join(sample_dir, label["values"])

    # n iterations per population user
    for submission_file in population_sample:
        user = PopconSystem(submission_file)
        user.filter_pkg_profile(cfg.pkgs_filter)
        user.maximal_pkg_profile()
        for n in range(iterations):
            # Fill sample profile
            profile_len = len(user.pkg_profile)
            item_score = {}
            for pkg in user.pkg_profile:
                item_score[pkg] = user.item_score[pkg]
            sample = {}
            sample_size = int(profile_len * 0.9)
            for i in range(sample_size):
                key = random.choice(item_score.keys())
                sample[key] = item_score.pop(key)
            iteration_user = User(item_score)
            recommendation = rec.get_recommendation(iteration_user, repo_size)
            if hasattr(recommendation, "ranking"):
                results.add_result(recommendation.ranking, sample)

    plot_roc(results, log_file)
    plot_roc(results, log_file, 1)
    with open(log_file + "-roc.jpg.comment", 'w') as f:
        f.write("# %s\n# %s\n\n" %
                (label["description"], label["values"]))
        f.write("# roc AUC\n%.4f\n\n" % results.get_auc())
        f.write(
            "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n")  # noqa
        for size in results.thresholds:
            f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" %
                    (size, numpy.mean(results.fpr[size]),
                     numpy.std(results.fpr[size]),
                     numpy.mean(results.recall[size]),
                     numpy.std(results.recall[size]),
                     numpy.mean(results.coverage(size))))
Ejemplo n.º 4
0
def run_strategy(cfg, sample_file):
    rec = Recommender(cfg)
    repo_size = rec.items_repository.get_doccount()
    results = ExperimentResults(repo_size)
    label = get_label(cfg)
    population_sample = []
    sample_str = sample_file.split('/')[-1]
    with open(sample_file, 'r') as f:
        for line in f.readlines():
            user_id = line.strip('\n')
            population_sample.append(
                os.path.join(cfg.popcon_dir, user_id[:2], user_id))
    sample_dir = ("results/roc-sample/%s" % sample_str)
    if not os.path.exists(sample_dir):
        os.makedirs(sample_dir)
    log_file = os.path.join(sample_dir, label["values"])

    # n iterations per population user
    for submission_file in population_sample:
        user = PopconSystem(submission_file)
        user.filter_pkg_profile(cfg.pkgs_filter)
        user.maximal_pkg_profile()
        for n in range(iterations):
            # Fill sample profile
            profile_len = len(user.pkg_profile)
            item_score = {}
            for pkg in user.pkg_profile:
                item_score[pkg] = user.item_score[pkg]
            sample = {}
            sample_size = int(profile_len * 0.9)
            for i in range(sample_size):
                key = random.choice(item_score.keys())
                sample[key] = item_score.pop(key)
            iteration_user = User(item_score)
            recommendation = rec.get_recommendation(iteration_user, repo_size)
            if hasattr(recommendation, "ranking"):
                results.add_result(recommendation.ranking, sample)

    plot_roc(results, log_file)
    plot_roc(results, log_file, 1)
    with open(log_file + "-roc.jpg.comment", 'w') as f:
        f.write("# %s\n# %s\n\n" % (label["description"], label["values"]))
        f.write("# roc AUC\n%.4f\n\n" % results.get_auc())
        f.write(
            "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n"
        )  # noqa
        for size in results.thresholds:
            f.write(
                "%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" %
                (size, numpy.mean(results.fpr[size]),
                 numpy.std(results.fpr[size]), numpy.mean(
                     results.recall[size]), numpy.std(results.recall[size]),
                 numpy.mean(results.coverage(size))))
Ejemplo n.º 5
0
def read_stream():
    """ Listens to the user-stream and reacts to mention events with a recommendation.
    """
    twitter_user_stream = TwitterStream(auth=OAuth(Config.access_token, Config.access_token_secret, Config.api_key,
                                                   Config.api_secret), domain='userstream.twitter.com')

    for msg in twitter_user_stream.user():
        logging.info(msg)
        recommend = False

        # check if the the bot was mentioned in the status update
        if "entities" in msg:
            for mention in msg["entities"]["user_mentions"]:
                if mention["screen_name"] == Config.name.replace("@", ""):
                    recommend = True

            if recommend:
                user_id = UserDao.add_user(msg["user"]["screen_name"], msg["user"]["id"])
                UserTweetDao.create_user_tweet(user_id, msg["id"], msg["text"], msg)
                Recommender.get_recommendation()
                distribute_recommendations()
Ejemplo n.º 6
0
def run_strategy(cfg, user):
    for weight in weighting:
        cfg.weight = weight[0]
        cfg.bm25_k1 = weight[1]
        rec = Recommender(cfg)
        repo_size = rec.items_repository.get_doccount()
        for proportion in sample_proportions:
            results = ExperimentResults(repo_size)
            label = get_label(cfg, proportion)
            log_file = "results/strategies/" + label["values"]
            for n in range(iterations):
                # Fill sample profile
                profile_size = len(user.pkg_profile)
                item_score = {}
                for pkg in user.pkg_profile:
                    item_score[pkg] = user.item_score[pkg]
                sample = {}
                sample_size = int(profile_size * proportion)
                for i in range(sample_size):
                    key = random.choice(item_score.keys())
                    sample[key] = item_score.pop(key)
                iteration_user = User(item_score)
                recommendation = rec.get_recommendation(
                    iteration_user, repo_size)
                write_recall_log(label, n, sample, recommendation,
                                 profile_size, repo_size, log_file)
                if hasattr(recommendation, "ranking"):
                    results.add_result(recommendation.ranking, sample)
            with open(log_file, 'w') as f:
                precision_10 = sum(results.precision[10]) / len(
                    results.precision[10])
                f1_10 = sum(results.f1[10]) / len(results.f1[10])
                f05_10 = sum(results.f05[10]) / len(results.f05[10])
                f.write("# %s\n# %s\n\ncoverage %d\n\n" %
                        (label["description"], label["values"],
                         recommendation.size))
                f.write("# best results (recommendation size; metric)\n")
                f.write(
                    "precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" %
                    (results.best_precision()[0], results.best_precision()[1],
                     results.best_f1()[0], results.best_f1()[1],
                     results.best_f05()[0], results.best_f05()[1]))
                f.write(
                    "# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)"
                    %  # noqa
                    (precision_10, f1_10, f05_10))
            precision = results.get_precision_summary()
            recall = results.get_recall_summary()
            f1 = results.get_f1_summary()
            f05 = results.get_f05_summary()
            accuracy = results.get_accuracy_summary()
            plot_summary(precision, recall, f1, f05, accuracy, log_file)
Ejemplo n.º 7
0
def do_recommendation(tweet, keyword_list="", delete_fails=False):
    # TODO only persist if there is a recommendation?
    user = UserDao.add_user(tweet["user"]["screen_name"], tweet["user"]["id"])
    nr_distributed = 0

    if not UserTweetDao.is_existing_user_tweet(tweet["id"]):
        if len(keyword_list) > 0:
            tweet_text = keyword_list
        else:
            tweet_text = tweet["text"]

        UserTweetDao.create_user_tweet(user.id, tweet["id"], tweet_text, tweet)
        Recommender.get_recommendation()
        nr_distributed = distribute_recommendations()

        # TODO delete failed
        # if nr_distributed == 0 and delete_fails:
        #
        #     user.delete()
        #     pass

    return nr_distributed
Ejemplo n.º 8
0
def run_strategy(cfg, user):
    for weight in weighting:
        cfg.weight = weight[0]
        cfg.bm25_k1 = weight[1]
        rec = Recommender(cfg)
        repo_size = rec.items_repository.get_doccount()
        for proportion in sample_proportions:
            results = ExperimentResults(repo_size)
            label = get_label(cfg, proportion)
            log_file = "results/strategies/" + label["values"]
            for n in range(iterations):
                # Fill sample profile
                profile_size = len(user.pkg_profile)
                item_score = {}
                for pkg in user.pkg_profile:
                    item_score[pkg] = user.item_score[pkg]
                sample = {}
                sample_size = int(profile_size * proportion)
                for i in range(sample_size):
                    key = random.choice(item_score.keys())
                    sample[key] = item_score.pop(key)
                iteration_user = User(item_score)
                recommendation = rec.get_recommendation(
                    iteration_user, repo_size)
                write_recall_log(
                    label, n, sample, recommendation, profile_size, repo_size,
                    log_file)
                if hasattr(recommendation, "ranking"):
                    results.add_result(recommendation.ranking, sample)
            with open(log_file, 'w') as f:
                precision_10 = sum(results.precision[10]) / len(
                    results.precision[10])
                f1_10 = sum(results.f1[10]) / len(results.f1[10])
                f05_10 = sum(results.f05[10]) / len(results.f05[10])
                f.write("# %s\n# %s\n\ncoverage %d\n\n" %
                        (label["description"], label["values"],
                         recommendation.size))
                f.write("# best results (recommendation size; metric)\n")
                f.write(
                    "precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" %
                    (results.best_precision()[0], results.best_precision()[1],
                     results.best_f1()[0], results.best_f1()[1],
                     results.best_f05()[0], results.best_f05()[1]))
                f.write("# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)" %  # noqa
                        (precision_10, f1_10, f05_10))
            precision = results.get_precision_summary()
            recall = results.get_recall_summary()
            f1 = results.get_f1_summary()
            f05 = results.get_f05_summary()
            accuracy = results.get_accuracy_summary()
            plot_summary(precision, recall, f1, f05, accuracy, log_file)
Ejemplo n.º 9
0
class AppRecommender:
    def __init__(self):
        logging.info("Setting up AppRecommender...")
        self.cfg = Config()
        self.rec = Recommender(self.cfg)
        self.requests_dir = "/var/www/AppRecommender/src/web/requests/"
        if not os.path.exists(self.requests_dir):
            os.makedirs(self.requests_dir)

    def POST(self):
        web_input = web.input(pkgs_file={})
        user_dir = tempfile.mkdtemp(prefix='', dir=self.requests_dir)
        user_id = user_dir.split("/")[-1]
        uploaded_file = os.path.join(user_dir, "uploaded_file")
        if web_input['pkgs_file'].value:
            lines = web_input['pkgs_file'].file.readlines()
            with open(uploaded_file, "w") as uploaded:
                uploaded.writelines(lines)
        with open(uploaded_file) as uploaded:
            if uploaded.readline().startswith('POPULARITY-CONTEST'):
                user = PopconSystem(uploaded_file, user_id)
            else:
                user = PkgsListSystem(uploaded_file, user_id)
        if len(user.pkg_profile) < 10:
            return render.error(
                [
                    "Could not extract profile from uploaded file. It must have at least 10 applications."
                ],  # noqa
                "/",
                "RECOMMENDATION")
        else:
            self.rec.set_strategy("knn_eset")
            user.maximal_pkg_profile()
            prediction = self.rec.get_recommendation(user, 12).get_prediction()
            logging.info("Prediction for user %s" % user.user_id)
            logging.info(str(prediction))
            recommendation = [result[0] for result in prediction]
            pkgs_details = []
            for pkg_name in recommendation:
                logging.info("Getting details of package %s" % pkg_name)
                pkg = DebianPackage(pkg_name)
                pkg.load_summary()
                pkgs_details.append(pkg)
            if pkgs_details:
                logging.info("Rendering recommendation...")
                return render.apprec(pkgs_details)
            else:
                return render.error(
                    ["No recommendation produced for the uploaded file."], "/",
                    "RECOMMENDATION")  # noqa
Ejemplo n.º 10
0
def run_strategy(cfg, user):
    rec = Recommender(cfg)
    repo_size = rec.items_repository.get_doccount()
    results = ExperimentResults(repo_size)
    label = get_label(cfg)
    user_dir = ("results/roc-suite/%s/%s" % (user.user_id[:8], cfg.strategy))
    if not os.path.exists(user_dir):
        os.makedirs(user_dir)
    log_file = os.path.join(user_dir, label["values"])
    for n in range(iterations):
        # Fill sample profile
        profile_len = len(user.pkg_profile)
        item_score = {}
        for pkg in user.pkg_profile:
            item_score[pkg] = user.item_score[pkg]
        sample = {}
        sample_size = int(profile_len * 0.9)
        for i in range(sample_size):
            key = random.choice(item_score.keys())
            sample[key] = item_score.pop(key)
        iteration_user = User(item_score)
        recommendation = rec.get_recommendation(iteration_user, repo_size)
        write_recall_log(
            label, n, sample, recommendation, profile_len, repo_size, log_file)
        if hasattr(recommendation, "ranking"):
            results.add_result(recommendation.ranking, sample)
    with open(log_file + "-roc.jpg.comment", 'w') as f:
        f.write("# %s\n# %s\n\n" %
                (label["description"], label["values"]))
        f.write("# roc AUC\n%.4f\n\n" % results.get_auc())
        f.write("# threshold\tprecision\trecall\t\tf05\t\tcoverage\n")
        for size in results.thresholds:
            f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" %
                    (size, numpy.mean(results.precision[size]),
                     numpy.mean(results.recall[size]),
                     numpy.mean(results.f05[size]),
                     numpy.mean(results.coverage(size))))
    shutil.copy(log_file + "-roc.jpg.comment", log_file + ".jpg.comment")
    shutil.copy(log_file + "-roc.jpg.comment",
                log_file + "-logscale.jpg.comment")
    plot_roc(results, log_file)
    plot_summary(results, log_file)
Ejemplo n.º 11
0
def run_strategy(cfg, user):
    rec = Recommender(cfg)
    repo_size = rec.items_repository.get_doccount()
    results = ExperimentResults(repo_size)
    label = get_label(cfg)
    user_dir = ("results/roc-suite/%s/%s" % (user.user_id[:8], cfg.strategy))
    if not os.path.exists(user_dir):
        os.makedirs(user_dir)
    log_file = os.path.join(user_dir, label["values"])
    for n in range(iterations):
        # Fill sample profile
        profile_len = len(user.pkg_profile)
        item_score = {}
        for pkg in user.pkg_profile:
            item_score[pkg] = user.item_score[pkg]
        sample = {}
        sample_size = int(profile_len * 0.9)
        for i in range(sample_size):
            key = random.choice(item_score.keys())
            sample[key] = item_score.pop(key)
        iteration_user = User(item_score)
        recommendation = rec.get_recommendation(iteration_user, repo_size)
        write_recall_log(label, n, sample, recommendation, profile_len,
                         repo_size, log_file)
        if hasattr(recommendation, "ranking"):
            results.add_result(recommendation.ranking, sample)
    with open(log_file + "-roc.jpg.comment", 'w') as f:
        f.write("# %s\n# %s\n\n" % (label["description"], label["values"]))
        f.write("# roc AUC\n%.4f\n\n" % results.get_auc())
        f.write("# threshold\tprecision\trecall\t\tf05\t\tcoverage\n")
        for size in results.thresholds:
            f.write(
                "%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" %
                (size, numpy.mean(results.precision[size]),
                 numpy.mean(results.recall[size]), numpy.mean(
                     results.f05[size]), numpy.mean(results.coverage(size))))
    shutil.copy(log_file + "-roc.jpg.comment", log_file + ".jpg.comment")
    shutil.copy(log_file + "-roc.jpg.comment",
                log_file + "-logscale.jpg.comment")
    plot_roc(results, log_file)
    plot_summary(results, log_file)
Ejemplo n.º 12
0
def get_recommendation_index(user_data):
    # init recommender and the weights at random
    rr = Recommender(n_fixed_feature=len(user_data))
    rr.init_weights()
    """ need to add a user_goal and user_rating function?? """

    user_goal = [['time', 0.5], ['talk', 0.5], ['friendly', 0.5],
                 ['advice', 0.5]]
    user_rating = {
        'Group Therapy': (3, 5),
        'Vent Over Tea': (5, 5),
        '7 Cups': (4, 5)
    }

    # get the service index of top_k choices
    choices, service_names = rr.get_recommendation(user_profile,
                                                   user_goal,
                                                   verbose=False)
    choices_for_chatbot = [c + 1 for c in choices]

    user_scores = rr.process_user_rating(user_rating)
    rr.update_weights(user_profile, user_scores, choices, verbose=False)

    return choices_for_chatbot
Ejemplo n.º 13
0
class AppRecommender:
    def __init__(self):
        self.recommender = Recommender()

    def make_recommendation(self, recommendation_size,
                            no_auto_pkg_profile=False):
        begin_time = datetime.datetime.now()
        logging.info("Computation started at %s" % begin_time)
        # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
        #                                                 "desktopapps"))
        user = self.get_user(no_auto_pkg_profile)
        user_reccomendation = (self.recommender.get_recommendation(
                               user, recommendation_size))

        logging.info("Recommending applications for user %s" % user.user_id)
        logging.info(user_reccomendation)

        end_time = datetime.datetime.now()
        logging.info("Computation completed at %s" % end_time)
        delta = end_time - begin_time
        logging.info("Time elapsed: %d seconds." % delta.seconds)

        return user_reccomendation

    def get_user(self, no_auto_pkg_profile):
        config = Config()

        user = LocalSystem()
        user.filter_pkg_profile(
            os.path.join(config.filters_dir, "desktopapps"))
        user.maximal_pkg_profile()

        if no_auto_pkg_profile:
            user.no_auto_pkg_profile()

        return user
Ejemplo n.º 14
0
 repo_size = rec.items_repository.get_doccount()
 results = ExperimentResults(repo_size)
 # n iterations for same recommender and user
 for n in range(iterations):
     # Fill sample profile
     profile_len = len(user.pkg_profile)
     item_score = {}
     for pkg in user.pkg_profile:
         item_score[pkg] = user.item_score[pkg]
     sample = {}
     sample_size = int(profile_len*0.9)
     for i in range(sample_size):
          key = random.choice(item_score.keys())
          sample[key] = item_score.pop(key)
     iteration_user = User(item_score)
     recommendation = rec.get_recommendation(iteration_user,threshold)
     if hasattr(recommendation,"ranking"):
         results.add_result(recommendation.ranking,sample)
         recommended[k] = recommended[k].union(recommendation.ranking)
 # save summary
 roc_point = results.get_roc_point()
 roc_summary[k].append(roc_point)
 precision = results.get_precision_summary()
 precision_summary[k].append(precision)
 f05 = results.get_f05_summary()
 f05_summary[k].append(f05)
 mcc = results.get_mcc_summary()
 mcc_summary[k].append(mcc)
 with open(log_file+"-k%.3d"%k,'a') as f:
     f.write("[%.2f,%.2f] \t%.4f \t%.4f \t%.4f\n" %
             (roc_point[0],roc_point[1],precision,f05,mcc))
Ejemplo n.º 15
0
 repo_size = rec.items_repository.get_doccount()
 p_10 = []
 f05_100 = []
 for n in range(iterations):
     # Fill sample profile
     profile_len = len(user.pkg_profile)
     item_score = {}
     for pkg in user.pkg_profile:
         item_score[pkg] = user.item_score[pkg]
     sample = {}
     sample_size = int(profile_len * 0.9)
     for i in range(sample_size):
         key = random.choice(item_score.keys())
         sample[key] = item_score.pop(key)
     iteration_user = User(item_score)
     recommendation = rec.get_recommendation(iteration_user, repo_size)
     if hasattr(recommendation, "ranking"):
         ranking = recommendation.ranking
         real = RecommendationResult(sample)
         predicted_10 = RecommendationResult(dict.fromkeys(ranking[:10], 1))
         evaluation = Evaluation(predicted_10, real, repo_size)
         p_10.append(evaluation.run(Precision()))
         predicted_100 = RecommendationResult(dict.fromkeys(ranking[:100], 1))
         evaluation = Evaluation(predicted_100, real, repo_size)
         f05_100.append(evaluation.run(F_score(0.5)))
         c_10[k][size] = c_10[k][size].union(recommendation.ranking[:10])
         c_100[k][size] = c_100[k][size].union(recommendation.ranking[:100])
 # save summary
 if p_10:
     p_10_summary[k][size].append(numpy.mean(p_10))
 if f05_100:
Ejemplo n.º 16
0
 repo_size = rec.items_repository.get_doccount()
 results = ExperimentResults(repo_size)
 # n iterations for same recommender and user
 for n in range(iterations):
     # Fill sample profile
     profile_len = len(user.pkg_profile)
     item_score = {}
     for pkg in user.pkg_profile:
         item_score[pkg] = user.item_score[pkg]
     sample = {}
     sample_size = int(profile_len * 0.9)
     for i in range(sample_size):
         key = random.choice(item_score.keys())
         sample[key] = item_score.pop(key)
     iteration_user = User(item_score)
     recommendation = rec.get_recommendation(
         iteration_user, threshold)
     if hasattr(recommendation, "ranking"):
         results.add_result(recommendation.ranking, sample)
         recommended[k] = recommended[
             k].union(recommendation.ranking)
 # save summary
 roc_point = results.get_roc_point()
 roc_summary[k].append(roc_point)
 precision = results.get_precision_summary()
 precision_summary[k].append(precision)
 f05 = results.get_f05_summary()
 f05_summary[k].append(f05)
 mcc = results.get_mcc_summary()
 mcc_summary[k].append(mcc)
 with open(log_file + "-k%.3d" % k, 'a') as f:
     f.write("[%.2f,%.2f] \t%.4f \t%.4f \t%.4f\n" %
Ejemplo n.º 17
0
class ContentBasedSuite(expsuite.PyExperimentSuite):

    def reset(self, params, rep):
        if params['name'].startswith("content"):
            cfg = Config()
            # if the index was not built yet
            # app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
            cfg.axi = "data/AppAxi"
            cfg.index_mode = "old"
            cfg.weight = params['weight']
            self.rec = Recommender(cfg)
            self.rec.set_strategy(params['strategy'])
            self.repo_size = self.rec.items_repository.get_doccount()
            self.user = LocalSystem()
            self.user.app_pkg_profile(self.rec.items_repository)
            self.user.no_auto_pkg_profile()
            self.sample_size = int(
                len(self.user.pkg_profile) * params['sample'])
            # iteration should be set to 10 in config file
            # self.profile_size = range(10,101,10)

    def iterate(self, params, rep, n):
        if params['name'].startswith("content"):
            item_score = dict.fromkeys(self.user.pkg_profile, 1)
            # Prepare partition
            sample = {}
            for i in range(self.sample_size):
                key = random.choice(item_score.keys())
                sample[key] = item_score.pop(key)
            # Get full recommendation
            user = User(item_score)
            recommendation = self.rec.get_recommendation(user, self.repo_size)
            # Write recall log
            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
                          (params['strategy'], params[
                           'weight'], params['sample'], n)
            output = open(recall_file, 'w')
            output.write("# weight=%s\n" % params['weight'])
            output.write("# strategy=%s\n" % params['strategy'])
            output.write("# sample=%f\n" % params['sample'])
            output.write("\n%d %d %d\n" %
                         (self.repo_size, len(item_score), self.sample_size))
            notfound = []
            ranks = []
            for pkg in sample.keys():
                if pkg in recommendation.ranking:
                    ranks.append(recommendation.ranking.index(pkg))
                else:
                    notfound.append(pkg)
            for r in sorted(ranks):
                output.write(str(r) + "\n")
            if notfound:
                output.write("Out of recommendation:\n")
                for pkg in notfound:
                    output.write(pkg + "\n")
            output.close()
            # Plot metrics summary
            accuracy = []
            precision = []
            recall = []
            f1 = []
            g = Gnuplot.Gnuplot()
            g('set style data lines')
            g.xlabel('Recommendation size')
            for size in range(1, len(recommendation.ranking) + 1, 100):
                predicted = RecommendationResult(
                    dict.fromkeys(recommendation.ranking[:size], 1))
                real = RecommendationResult(sample)
                evaluation = Evaluation(predicted, real, self.repo_size)
                accuracy.append([size, evaluation.run(Accuracy())])
                precision.append([size, evaluation.run(Precision())])
                recall.append([size, evaluation.run(Recall())])
                f1.append([size, evaluation.run(F1())])
            g.plot(Gnuplot.Data(accuracy, title="Accuracy"),
                   Gnuplot.Data(precision, title="Precision"),
                   Gnuplot.Data(recall, title="Recall"),
                   Gnuplot.Data(f1, title="F1"))
            g.hardcopy(recall_file + "-plot.ps", enhanced=1, color=1)
            # Iteration log
            result = {'iteration': n,
                      'weight': params['weight'],
                      'strategy': params['strategy'],
                      'accuracy': accuracy[20],
                      'precision': precision[20],
                      'recall:': recall[20],
                      'f1': f1[20]}
            return result
Ejemplo n.º 18
0
class Survey:

    def __init__(self):
        logging.info("Setting up survey...")
        self.cfg = Config()
        self.rec = Recommender(self.cfg)
        self.submissions_dir = "/var/www/AppRecommender/src/web/submissions/"
        if not os.path.exists(self.submissions_dir):
            os.makedirs(self.submissions_dir)
        self.strategies = ["cbh", "cbh_eset",
                           "knn", "knn_eset", "knn_plus",
                           "knnco"]

    def POST(self):
        web_input = web.input(pkgs_file={})
        if 'user_id' in web_input:
            user_id = web_input['user_id'].encode('utf8')
            user_dir = os.path.join(self.submissions_dir, user_id)
            logging.info("New recommendation for user %s" % user_id)

        uploaded_file = os.path.join(user_dir, "uploaded_file")
        with open(uploaded_file) as uploaded:
            if uploaded.readline().startswith('POPULARITY-CONTEST'):
                user = PopconSystem(uploaded_file, user_id)
            else:
                user = PkgsListSystem(uploaded_file, user_id)
        user.maximal_pkg_profile()
        if len(user.pkg_profile) < 10:
            error_msg = "Could not extract profile from uploaded file. It must have at least 10 applications."  # noqa
            logging.critical(error_msg)
            return render.error([error_msg], "/survey/", "START")
        else:
            # Check the remaining strategies and select a new one
            old_strategies = [dirs for root, dirs, files in
                              os.walk(os.path.join(self.submissions_dir,
                                                   user_id))]
            if old_strategies:
                strategies = [
                    s for s in self.strategies if s not in old_strategies[0]]
                logging.info("Already used strategies %s" % old_strategies[0])
            else:
                strategies = self.strategies
            if not strategies:
                return render.thanks(user_id)
            selected_strategy = random.choice(strategies)
            logging.info("Selected \'%s\' from %s" %
                         (selected_strategy, strategies))
            self.set_rec_strategy(selected_strategy)
            prediction = self.rec.get_recommendation(user, 10).get_prediction()
            logging.info("Prediction for user %s" % user_id)
            logging.info(str(prediction))
            self.save_prediction(user_id, selected_strategy, prediction)

            # Load packages details
            recommendation = [result[0] for result in prediction]
            pkgs_details = []
            for pkg_name in recommendation:
                logging.info("Getting details of package %s" % pkg_name)
                pkg = DebianPackage(pkg_name)
                pkg.load_details()
                pkgs_details.append(pkg)

            if pkgs_details:
                logging.info("Rendering survey slide...")
                return render.survey(pkgs_details, user_id, selected_strategy,
                                     len(strategies))
            else:
                return render.error(
                    ["No recommendation produced for the uploaded file."],
                    "/survey/", "START")

    def set_rec_strategy(self, selected_strategy):
        k = 10
        n = 20
        if selected_strategy == "cbh":
            pass
        if selected_strategy == "cbh_eset":
            pass
        if selected_strategy == "knn":
            pass
        if selected_strategy == "knn_eset":
            pass
        if selected_strategy == "knn_plus":
            pass
        if selected_strategy == "knnco":
            pass
        self.rec.set_strategy(selected_strategy, k, n)
        return selected_strategy

    def save_prediction(self, user_id, strategy, prediction):
        strategy_dir = os.path.join(self.submissions_dir, user_id, strategy)
        if not os.path.exists(strategy_dir):
            os.makedirs(strategy_dir)
        ranking = 0
        prediction_file = open(os.path.join(strategy_dir, "prediction"), "w")
        try:
            writer = csv.writer(prediction_file)
            fieldnames = ('ranking', 'rating', 'package', 'evaluation')
            writer.writerow(fieldnames)
            for pkg, rating in prediction:
                writer.writerow((ranking, "%.4f" % rating, pkg, ""))
                ranking += 1
        except:
            error_msg = "Error to write prediction to file."
            logging.critical(error_msg)
            return render.error([error_msg], "/survey/", "START")
        finally:
            prediction_file.close()
        with open(os.path.join(strategy_dir, "start"), 'w') as start:
            now = datetime.datetime.now()
            start.write(now.strftime("%Y%m%d%H%M%S"))
        logging.debug("Saved prediction to file at %s/%s" %
                      (user_id, strategy))
Ejemplo n.º 19
0
 repo_size = rec.items_repository.get_doccount()
 p_10 = []
 f05_100 = []
 for n in range(iterations):
     # Fill sample profile
     profile_len = len(user.pkg_profile)
     item_score = {}
     for pkg in user.pkg_profile:
         item_score[pkg] = user.item_score[pkg]
     sample = {}
     sample_size = int(profile_len * 0.9)
     for i in range(sample_size):
         key = random.choice(item_score.keys())
         sample[key] = item_score.pop(key)
     iteration_user = User(item_score)
     recommendation = rec.get_recommendation(
         iteration_user, repo_size)
     if hasattr(recommendation, "ranking"):
         ranking = recommendation.ranking
         real = RecommendationResult(sample)
         predicted_10 = RecommendationResult(
             dict.fromkeys(ranking[:10], 1))
         evaluation = Evaluation(predicted_10, real, repo_size)
         p_10.append(evaluation.run(Precision()))
         predicted_100 = RecommendationResult(
             dict.fromkeys(ranking[:100], 1))
         evaluation = Evaluation(predicted_100, real, repo_size)
         f05_100.append(evaluation.run(F_score(0.5)))
         c_10[k][size] = c_10[k][size].union(
             recommendation.ranking[:10])
         c_100[k][size] = c_100[k][size].union(
             recommendation.ranking[:100])
Ejemplo n.º 20
0
"""

import os
import sys
sys.path.insert(0,'../')
import logging
import random
import datetime

from config import Config
from recommender import Recommender
from user import LocalSystem, RandomPopcon

if __name__ == '__main__':
    begin_time = datetime.datetime.now()
    cfg = Config()
    rec = Recommender(cfg)
    logging.info("Computation started at %s" % begin_time)
    #user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,"desktopapps"))
    user = LocalSystem()
    user.filter_pkg_profile(os.path.join(cfg.filters_dir,"desktopapps"))
    user.maximal_pkg_profile()

    logging.info("Recommending applications for user %s" % user.user_id)
    logging.info(rec.get_recommendation(user,20))

    end_time = datetime.datetime.now()
    logging.info("Computation completed at %s" % end_time)
    delta = end_time - begin_time
    logging.info("Time elapsed: %d seconds." % delta.seconds)
Ejemplo n.º 21
0
class Survey:
    def __init__(self):
        logging.info("Setting up survey...")
        self.cfg = Config()
        self.rec = Recommender(self.cfg)
        self.submissions_dir = "/var/www/AppRecommender/src/web/submissions/"
        if not os.path.exists(self.submissions_dir):
            os.makedirs(self.submissions_dir)
        self.strategies = [
            "cbh", "cbh_eset", "knn", "knn_eset", "knn_plus", "knnco"
        ]

    def POST(self):
        web_input = web.input(pkgs_file={})
        if 'user_id' in web_input:
            user_id = web_input['user_id'].encode('utf8')
            user_dir = os.path.join(self.submissions_dir, user_id)
            logging.info("New recommendation for user %s" % user_id)

        uploaded_file = os.path.join(user_dir, "uploaded_file")
        with open(uploaded_file) as uploaded:
            if uploaded.readline().startswith('POPULARITY-CONTEST'):
                user = PopconSystem(uploaded_file, user_id)
            else:
                user = PkgsListSystem(uploaded_file, user_id)
        user.maximal_pkg_profile()
        if len(user.pkg_profile) < 10:
            error_msg = "Could not extract profile from uploaded file. It must have at least 10 applications."  # noqa
            logging.critical(error_msg)
            return render.error([error_msg], "/survey/", "START")
        else:
            # Check the remaining strategies and select a new one
            old_strategies = [
                dirs for root, dirs, files in os.walk(
                    os.path.join(self.submissions_dir, user_id))
            ]
            if old_strategies:
                strategies = [
                    s for s in self.strategies if s not in old_strategies[0]
                ]
                logging.info("Already used strategies %s" % old_strategies[0])
            else:
                strategies = self.strategies
            if not strategies:
                return render.thanks(user_id)
            selected_strategy = random.choice(strategies)
            logging.info("Selected \'%s\' from %s" %
                         (selected_strategy, strategies))
            self.set_rec_strategy(selected_strategy)
            prediction = self.rec.get_recommendation(user, 10).get_prediction()
            logging.info("Prediction for user %s" % user_id)
            logging.info(str(prediction))
            self.save_prediction(user_id, selected_strategy, prediction)

            # Load packages details
            recommendation = [result[0] for result in prediction]
            pkgs_details = []
            for pkg_name in recommendation:
                logging.info("Getting details of package %s" % pkg_name)
                pkg = DebianPackage(pkg_name)
                pkg.load_details()
                pkgs_details.append(pkg)

            if pkgs_details:
                logging.info("Rendering survey slide...")
                return render.survey(pkgs_details, user_id, selected_strategy,
                                     len(strategies))
            else:
                return render.error(
                    ["No recommendation produced for the uploaded file."],
                    "/survey/", "START")

    def set_rec_strategy(self, selected_strategy):
        k = 10
        n = 20
        if selected_strategy == "cbh":
            pass
        if selected_strategy == "cbh_eset":
            pass
        if selected_strategy == "knn":
            pass
        if selected_strategy == "knn_eset":
            pass
        if selected_strategy == "knn_plus":
            pass
        if selected_strategy == "knnco":
            pass
        self.rec.set_strategy(selected_strategy, k, n)
        return selected_strategy

    def save_prediction(self, user_id, strategy, prediction):
        strategy_dir = os.path.join(self.submissions_dir, user_id, strategy)
        if not os.path.exists(strategy_dir):
            os.makedirs(strategy_dir)
        ranking = 0
        prediction_file = open(os.path.join(strategy_dir, "prediction"), "w")
        try:
            writer = csv.writer(prediction_file)
            fieldnames = ('ranking', 'rating', 'package', 'evaluation')
            writer.writerow(fieldnames)
            for pkg, rating in prediction:
                writer.writerow((ranking, "%.4f" % rating, pkg, ""))
                ranking += 1
        except:
            error_msg = "Error to write prediction to file."
            logging.critical(error_msg)
            return render.error([error_msg], "/survey/", "START")
        finally:
            prediction_file.close()
        with open(os.path.join(strategy_dir, "start"), 'w') as start:
            now = datetime.datetime.now()
            start.write(now.strftime("%Y%m%d%H%M%S"))
        logging.debug("Saved prediction to file at %s/%s" %
                      (user_id, strategy))