def transfer_lessons3(): data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): if not proj in paths.bellw: res = {proj[:6]: []} "If training data.dat doesn't exist, create it." pred, pred2, distr, distr2 = [], [], [], [] if not "train" in locals(): train = list2dataframe(data[paths.bellw].data) test, validation = train_test_split(list2dataframe(paths.data), test_size=0.8) # test = list2dataframe(paths.data.dat[-1]) # validation = list2dataframe(paths.data.dat[:-1]) patched = xtree.execute(train, test) a, b = rforest(train, patched) # How good are the patches aa, bb = rforest(train, test) # How good are the predcitions pred.append(a) pred2.append(aa) distr.append(b) distr2.append(bb) res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]], after=pred2, distr=distr2)) res[proj[:6]].extend(impact(test, pred)) yield res
def transfer_lessons4(): data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): if not proj in paths.bellw: res = {proj[:6]: []} "If training data.dat doesn't exist, create it." pred, pred2, distr, distr2 = [], [], [], [] if not "train_bellw" in locals(): train_bellw = list2dataframe(data[paths.bellw].data) train_local = list2dataframe(paths.data[:-1]) test = list2dataframe(paths.data[-1]) patched_local = xtree.execute(train_local, test) patched_bellw = xtree.execute(train_bellw, test) pred, distr = rforest(train_bellw, patched_local) # How good are the patches pred2, distr2 = rforest(train_bellw, patched_bellw) # How good are the predcitions pred3, distr3 = rforest(train_bellw, test) # How good are the predcitions res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]], after=pred3, distr=distr3)) res[proj[:6]].append(impact(test, pred)) res[proj[:6]].append(impact(test, pred2)) yield res
def test_oracles(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if not proj in paths.bellw: res = { proj[:6]: { "pd_rf": [], "pf_rf": [], "pd_xg": [], "pf_xg": [] } } pred, pred2, distr, distr2 = [], [], [], [] validate = list2dataframe(paths.data[:-1]) test = list2dataframe(paths.data[-1]) set_trace() # How good are the predictions pred1, distr1 = rforest(validate, test) pred2, distr2 = xgboost(validate, test) pred_rf = pred_stats(before=test[test.columns[-1]], after=pred1, distr=distr1) pred_xg = pred_stats(before=test[test.columns[-1]], after=pred2, distr=distr2) res[proj[:6]]["pd_rf"].append(pred_rf[0]) res[proj[:6]]["pf_rf"].append(pred_rf[1]) res[proj[:6]]["pd_xg"].append(pred_xg[0]) res[proj[:6]]["pf_xg"].append(pred_xg[1]) yield res
def transfer_lessons2(n_folds=1): data = DefectData.get_all_projects()["Apache"] print("Name\tPd\tPf\tImprovement") for proj, paths in data.iteritems(): if not proj in paths.bellw: print(proj[:4], end="\t") "If training data.dat doesn't exist, create it." train, validation = train_test_split(list2dataframe(paths.data), test_size=0.8) test = paths.data[-1] validation = paths.data[:-1] patched = xtree.execute(train, test) test = list2dataframe(test) pred, distr = rforest(validation, patched) # How good are the patches pred2, distr2 = rforest(validation, test) # How good are the predcitions pred_stats(before=test[test.columns[-1]], after=pred2, distr=distr2) impact(test, pred)
def transfer_lessons(data=None): if data is None: data = DefectData.get_all_projects()["Apache"] for proj, paths in data.iteritems(): "Make sure we don't test on the bellwether dataset" if not proj in paths.bellw: res = {proj[:6]: { "pd": [], "pf": [], "local": [], "bellw": []} } bellw = list2dataframe(data[paths.bellw].data) for train_bellw, validation in CrossValidation.split(bellw, ways=5): train_local = list2dataframe(paths.data[:-1]) test = list2dataframe(paths.data[-1]) patched_local = xtree(train_local, test) patched_bellw = xtree(train_bellw, list2dataframe(paths.data)) # How good are the patches from local lessons? pred_local, distr_local = xgboost(validation, patched_local) # How good are the patches from the bellwether lessons? pred_bellw, distr_bellw = xgboost(validation, patched_bellw) # How good are the predictions pred_qual, distr_qual = xgboost(validation, test) pred = pred_stats(before=test[test.columns[-1]], after=pred_qual, distr=distr_qual) res[proj[:6]]["pd"].append(pred[0]) res[proj[:6]]["pf"].append(pred[1]) res[proj[:6]]["local"].append(impact(test, pred_local)) res[proj[:6]]["bellw"].append(impact(test, pred_bellw)) yield res