def get_score_mat(): project_name = "totinfo" version = "v13" feature_obj = feature_computer.get_feature_vecs(project_name, version) score_mat = pd.DataFrame() x_axis = list(np.arange(1.0, 0.0, -0.05)) for cutoff in x_axis: ranker_obj = tarantula.TarantulaRanker() filter_obj = spectra_filter.SingleFailingDistanceFilter( feature_obj, "inv_common_execd_over_passing", cutoff) provider = run_result_provider.SingleFailingProvider() results = evaluator.get_ranker_results_with_objs(project_name, version, ranker_obj, filter_obj, provider) scores = pd.Series({ver:rank_res.score if rank_res else 0.0 for ver,rank_res in results.items()}) assert scores.max() <= 1.0 score_mat[cutoff] = scores return score_mat.transpose()
def get_filter(project_name, version, filter_type): if filter_type == "none": return spectra_filter.TrivialFilter() feature_obj = feature_computer.get_feature_vecs(project_name, version) if filter_type == "heuristic": return spectra_filter.HeuristicFilter(feature_obj) elif filter_type == "direct_cutoff": return spectra_filter.SingleFailingDistanceFilter( feature_obj, 'inv_common_execd_over_passing', 0.15) elif filter_type == "topn": return spectra_filter.SingleFailingDistanceFilterTopNPercent( feature_obj, 'normalized_hamming', 0.3) raise RuntimeError("Unkown filter {0}".format(filter_type))
def evaluation_fn(project_name, initial_scores, classify_vector, cutoff): print("Evaluating with cutoff {0} classify {1}".format(cutoff, classify_vector)) scoresum = 0 ver = None for version in projects.get_version_names(project_name): features = feature_computer.get_feature_vecs(project_name, version) filter_obj = spectra_filter.DotProductFilter(classify_vector, cutoff, features) ranker_obj = tarantula.TarantulaRanker() rank_res = get_res(project_name, version, ranker_obj, filter_obj) if rank_res is None: continue score = rank_res.score scorediff = score - initial_scores[version] scoresum += scorediff print("Score is {0} for version: {1}".format(scoresum, ver)) return scoresum