def main(): score_mat = get_score_mat() pd_helper.print_df(score_mat) x_axis = list(score_mat.index) average_values = np.zeros(len(x_axis)) for k in score_mat.columns: y_axis = score_mat[k].values average_values += y_axis plt.plot(x_axis, y_axis, '-') average_values /= len(score_mat.columns) plt.title("Distance cutoff v performance") plt.xlabel("Distance cutoff") plt.ylabel("ratio of program not looked at") plt.ylim((0.0,1.0)) plt.show() # Plot the average performance print("Avg values {0}".format(average_values)) plt.plot(x_axis, average_values, '-') plt.title("Distance cutoff v avg performance") plt.xlabel("Distance cutoff") plt.ylabel("ratio of program not looked at") plt.ylim((0.0,1.0)) plt.show() # Plot median performance median = score_mat.median(axis=1) plt.plot(x_axis, median, '-') plt.title("Distance cutoff v median performance") plt.xlabel("Distance cutoff") plt.ylabel("ratio of program not looked at") plt.ylim((0.0,1.0)) plt.show()
def compare_filter(project_name, ranker_type, filter_type, provider_type): print("Computing with filter") filter_scores = get_total_scores(project_name, ranker_type, filter_type, provider_type) print("Computing without filter") nofilter_scores = get_total_scores(project_name, "tarantula", "none", provider_type) results = pd.DataFrame({'nofilter': nofilter_scores, 'filter': filter_scores, 'diff': filter_scores - nofilter_scores}) results.sort_values('diff', inplace=True) pd_helper.print_df(results) filter_bins = get_bin_to_count(filter_scores) nofilter_bins = get_bin_to_count(nofilter_scores) bins = pd.DataFrame({'filter': filter_bins, 'nofilter': nofilter_bins, 'diff': filter_bins - nofilter_bins}) pd_helper.print_df(bins)
def print_total_scores(project_name, ranker_type, filter_type, provider_type, versions=None): version_to_score = get_total_scores(project_name, ranker_type, filter_type, provider_type, versions=versions) print("Average score is {0}".format(version_to_score.mean())) pd_helper.print_df(version_to_score)