def main(): parser = argparse.ArgumentParser() parser.add_argument("project_name", help="Name of project") parser.add_argument("project_dir", help="Location of project dir") args = parser.parse_args() spectra_maker.make_spectra( args.project_name, args.project_dir, projects.get_version_names(args.project_name), False )
def compute_features(project_name): if not os.path.exists(FEATURE_DIR): os.mkdir(FEATURE_DIR) for version in projects.get_version_names(project_name): fname = get_feature_file(project_name, version) if os.path.exists(fname): print("V {0} Already exists, skipping".format(version)) continue print("Computing version {0}".format(version)) spectra_file = spectra_maker.get_spectra_file(project_name, version) make_feature_file(spectra_file, fname)
def get_total_scores(project_name, ranker_type, filter_type, provider_type, versions=None): version_to_rankres = {} if versions is None: versions = projects.get_version_names(project_name) for version in versions: failing_to_rankres = evaluator.get_ranker_results(project_name, version, ranker_type, filter_type, provider_type) version_to_rankres.update(failing_to_rankres) version_to_score = {ver: rank_res.score for ver, rank_res in version_to_rankres.items()} version_to_score = pd.Series(version_to_score) version_to_score.sort_values(inplace=True, ascending=False) return version_to_score
def optimize_classifier(project_name): initial_scores = {} for version in projects.get_version_names(project_name): rank_res_d = evaluator.get_ranker_results(project_name, version, "intersection", "none", TrivialProvider()) assert len(rank_res_d) == 1 rank_res = next(iter(rank_res_d.values())) initial_scores[version] = rank_res.score def to_optimize(classify_vector_with_cutoff): cutoff = classify_vector_with_cutoff[-1] classify_vector = classify_vector_with_cutoff[:-1] score = evaluation_fn(project_name, initial_scores, classify_vector, cutoff) # minimize negative return -1 * score vecsize = 14 + 1 lower = [-1.0 for i in range(vecsize)] upper = [1.0 for i in range(vecsize)] x0 = [0.1 for i in range(vecsize)] res = scipy.optimize.anneal(to_optimize, x0, #maxiter=1, maxeval=10, dwell=5, full_output=True) x0 = res[0] print("Vec is {0}, cutoff {1}".format(x0[:-1], x0[-1])) print(res)
def evaluation_fn(project_name, initial_scores, classify_vector, cutoff): print("Evaluating with cutoff {0} classify {1}".format(cutoff, classify_vector)) scoresum = 0 ver = None for version in projects.get_version_names(project_name): features = feature_computer.get_feature_vecs(project_name, version) filter_obj = spectra_filter.DotProductFilter(classify_vector, cutoff, features) ranker_obj = tarantula.TarantulaRanker() rank_res = get_res(project_name, version, ranker_obj, filter_obj) if rank_res is None: continue score = rank_res.score scorediff = score - initial_scores[version] scoresum += scorediff print("Score is {0} for version: {1}".format(scoresum, ver)) return scoresum