Example #1
0
def eval_time_within_group(repo_group_names):

    # NUM_TOP_REPOS = 100, [0,0,0,...]
    sum_precision_list = [0] * compute.NUM_TOP_REPOS
    sum_recall_list = [0] * compute.NUM_TOP_REPOS
    sum_f1score_list = [0] * compute.NUM_TOP_REPOS

    for current_repo_name in repo_group_names:
        res_dict = compute.find_similar_repos_considering_time(current_repo_name, 8)
        rank_list = [repo_name for repo_name, sim in
            sorted(res_dict.items(), key=lambda x: -x[1])]

        for depth in range(0, len(rank_list)):
            precision, recall, f1score = eval(depth+1, rank_list, repo_group_names)
            sum_precision_list[depth]+=precision
            sum_recall_list[depth]+=recall
            sum_f1score_list[depth]+=f1score

    group_size = len(repo_group_names)
    sum_precision_list = [x / group_size for x in sum_precision_list]
    sum_recall_list = [x / group_size for x in sum_recall_list]
    sum_f1score_list = [x / group_size for x in sum_f1score_list]

    plot_f1score(f1score_list=sum_f1score_list, precision_list=sum_precision_list, 
            recall_list=sum_recall_list, title = 'repo group:'+str(repo_group_names))

    plot_precision_recall(recall_list=sum_recall_list, precision_list=sum_precision_list, 
            title = 'repo group:'+str(repo_group_names))
Example #2
0
def find_similar_repo(repo_name, method_name, rank=True):
    res = {}
    if method_name == "user_based_jaccard":
        res = user_based_jaccard.find_similar_repos(repo_name)
    elif method_name == "user_based_jaccard_withtime":
        res = user_based_jaccard.find_similar_repos_considering_time(repo_name, 2)
    elif method_name == "user_based_lda":
        res = user_based_model.find_similar_repos(repo_name, "lda")
    elif method_name == "user_based_tfidf":
        res = user_based_model.find_similar_repos(repo_name, "tfidf")
    elif method_name == "text_based_lda":
        pass
    elif method_name == "text_based_tfidf":
        pass

    if rank:
        res = [repo_name for repo_name, similarity in sorted(res.items(), key=lambda item: -item[1])]
        ranks = range(1, len(res) + 1)
        res = dict(zip(res, ranks))
    return res
Example #3
0
def eval_time_with_range(test_repo_name):
    # for time_range in [0.5, 1, 2, 4, 8]:
    for time_range in [1, 2]:
        precision_list = []
        recall_list = []
        f1score_list = []

        res_dict = compute.find_similar_repos_considering_time(test_repo_name, time_range)
        rank_list = [repo_name for repo_name, sim in
            sorted(res_dict.items(), key=lambda x: -x[1])]

        for depth in range(1, len(rank_list)+1):
            precision, recall, f1score = eval(depth, rank_list, showcase_js)
            precision_list.append(precision)
            recall_list.append(recall)
            f1score_list.append(f1score)

        plot_f1score(f1score_list=f1score_list, precision_list=precision_list, 
            recall_list=recall_list, title='time_range='+str(time_range))

        plot_precision_recall(recall_list=recall_list, precision_list=precision_list, 
            title='time_range='+str(time_range))