def eval_time_within_group(repo_group_names): # NUM_TOP_REPOS = 100, [0,0,0,...] sum_precision_list = [0] * compute.NUM_TOP_REPOS sum_recall_list = [0] * compute.NUM_TOP_REPOS sum_f1score_list = [0] * compute.NUM_TOP_REPOS for current_repo_name in repo_group_names: res_dict = compute.find_similar_repos_considering_time(current_repo_name, 8) rank_list = [repo_name for repo_name, sim in sorted(res_dict.items(), key=lambda x: -x[1])] for depth in range(0, len(rank_list)): precision, recall, f1score = eval(depth+1, rank_list, repo_group_names) sum_precision_list[depth]+=precision sum_recall_list[depth]+=recall sum_f1score_list[depth]+=f1score group_size = len(repo_group_names) sum_precision_list = [x / group_size for x in sum_precision_list] sum_recall_list = [x / group_size for x in sum_recall_list] sum_f1score_list = [x / group_size for x in sum_f1score_list] plot_f1score(f1score_list=sum_f1score_list, precision_list=sum_precision_list, recall_list=sum_recall_list, title = 'repo group:'+str(repo_group_names)) plot_precision_recall(recall_list=sum_recall_list, precision_list=sum_precision_list, title = 'repo group:'+str(repo_group_names))
def find_similar_repo(repo_name, method_name, rank=True): res = {} if method_name == "user_based_jaccard": res = user_based_jaccard.find_similar_repos(repo_name) elif method_name == "user_based_jaccard_withtime": res = user_based_jaccard.find_similar_repos_considering_time(repo_name, 2) elif method_name == "user_based_lda": res = user_based_model.find_similar_repos(repo_name, "lda") elif method_name == "user_based_tfidf": res = user_based_model.find_similar_repos(repo_name, "tfidf") elif method_name == "text_based_lda": pass elif method_name == "text_based_tfidf": pass if rank: res = [repo_name for repo_name, similarity in sorted(res.items(), key=lambda item: -item[1])] ranks = range(1, len(res) + 1) res = dict(zip(res, ranks)) return res
def eval_time_with_range(test_repo_name): # for time_range in [0.5, 1, 2, 4, 8]: for time_range in [1, 2]: precision_list = [] recall_list = [] f1score_list = [] res_dict = compute.find_similar_repos_considering_time(test_repo_name, time_range) rank_list = [repo_name for repo_name, sim in sorted(res_dict.items(), key=lambda x: -x[1])] for depth in range(1, len(rank_list)+1): precision, recall, f1score = eval(depth, rank_list, showcase_js) precision_list.append(precision) recall_list.append(recall) f1score_list.append(f1score) plot_f1score(f1score_list=f1score_list, precision_list=precision_list, recall_list=recall_list, title='time_range='+str(time_range)) plot_precision_recall(recall_list=recall_list, precision_list=precision_list, title='time_range='+str(time_range))