def tnb(source, target, n_rep=12): """ TNB: Transfer Naive Bayes :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() plot_data =[("Xalan", "Log4j", "Lucene", "Poi", "Velocity")] for tgt_name, tgt_path in target.iteritems(): stats = [] charts = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: # print("{} \r".format(src_name[0].upper() + src_name[1:])) src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, g, auc = [], [], [], [] for _ in xrange(n_rep): lo, hi, test_mass = target_details(tgt) weights = get_weights(maxs=hi, mins=lo, train_set=src, test_set=tgt) _train, __test = weight_training(weights=weights, training_instance=src, test_instance=tgt) actual, predicted, distribution = predict_defects(train=_train, test=__test) # loc = tgt["$loc"].values # loc = loc * 100 / np.max(loc) # recall, loc, au_roc = get_curve(loc, actual, predicted, distribution) # effort_plot(recall, loc, # save_dest=os.path.abspath(os.path.join(root, "plot", "plots", tgt_name)), # save_name=src_name) p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd(actual, predicted, distribution, threshold=0.4) pd.append(p_d) pf.append(p_f) g.append(_g) auc.append(int(auroc)) stats.append([src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf)), int(np.mean(auc)), int(np.std(auc))]) # , # int(np.mean(g)), int(np.std(g))]) # print("") stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[-2], reverse=True), # Sort by G Score columns=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"]) # , # "G (Mean)", "G (Std)"]) print(tabulate(stats, headers=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def tca_plus(source, target, verbose=False, n_rep=12): """ TCA: Transfer Component Analysis :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() for tgt_name, tgt_path in target.iteritems(): stats = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: src = list2dataframe(src_path) tgt = list2dataframe(tgt_path) pd, pf, pr, f1, g, auc = [], [], [], [], [], [] dcv_src, dcv_tgt = get_dcv(src, tgt) for _ in xrange(n_rep): norm_src, norm_tgt = smart_norm(src, tgt, dcv_src, dcv_tgt) _train, __test = map_transform(norm_src, norm_tgt) actual, predicted, distribution = predict_defects( train=_train, test=__test) p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd( actual, predicted, distribution) pd.append(p_d) pf.append(p_f) pr.append(p_f) f1.append(p_f) g.append(_g) auc.append(int(auroc)) stats.append([ src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf)), int(np.mean(auc)), int(np.std(auc)) ]) # , stats = pandas.DataFrame( sorted(stats, key=lambda lst: lst[0]), # Sort by G Score columns=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"]) # , if verbose: print( tabulate( stats, headers=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def tca_plus(source, target, verbose=False, n_rep=12): """ TCA: Transfer Component Analysis :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() for tgt_name, tgt_path in target.iteritems(): stats = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: # set_trace() src = list2dataframe(src_path) tgt = list2dataframe(tgt_path) # set_trace() pd, pf, g, auc = [], [], [], [] dcv_src, dcv_tgt = get_dcv(src, tgt) for _ in xrange(n_rep): recall, loc = None, None norm_src, norm_tgt = smart_norm(src, tgt, dcv_src, dcv_tgt) _train, __test = map_transform(norm_src, norm_tgt) # for k in np.arange(0.1,1,0.1): actual, predicted, distribution = predict_defects(train=_train, test=__test) # loc = tgt["$loc"].values # loc = loc * 100 / np.max(loc) # recall, loc, au_roc = get_curve(loc, actual, predicted) # effort_plot(recall, loc, # save_dest=os.path.abspath(os.path.join(root, "plot", "plots", tgt_name)), # save_name=src_name) p_d, p_f, p_r, rc, f_1, e_d, _g, _ = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) g.append(_g) # auc.append(int(au_roc)) # set_trace() stats.append([src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf))]) # , # int(np.mean(auc)), int(np.std(auc))]) # , # int(np.mean(g)), int(np.std(g))]) stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[0]), # Sort by G Score columns=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)"]) # ,, # "AUC (Mean)", "AUC (Std)"]) # , # "G (Mean)", "G (Std)"]) result.update({tgt_name: stats}) # set_trace() return result
def seer(source, target, n_rep=20, n_redo=5): """ seer: Causal Inference Learning :param source: :param target: :return: result: A dictionary of estimated """ result = dict() t0 = time() for tgt_name, tgt_path in target.iteritems(): stats = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, g = [], [], [] matched_src = metrics_match(src, tgt, n_redo) for n in xrange(n_rep): target_columns = [] source_columns = [] all_columns = [(key, val[0], val[1]) for key, val in matched_src.iteritems() if val[1] > 1] all_columns = sorted(all_columns, key=lambda x: x[-1], reverse=True) # Sort descending # Filter all columns to remove dupes for elem in all_columns: if not elem[1] in source_columns: target_columns.append(elem[0]) source_columns.append(elem[1]) _train, __test = src[source_columns + [src.columns[-1]]], \ tgt[target_columns + [tgt.columns[-1]]] # _train, __test = map_transform(src[source_columns + [src.columns[-1]]], # tgt[target_columns + [tgt.columns[-1]]]) # set_trace() actual, predicted = predict_defects(train=_train, test=__test) p_d, p_f, p_r, rc, f_1, e_d, _g = abcd(actual, predicted) pd.append(p_d) pf.append(p_f) g.append(e_d) stats.append([src_name, round(np.mean(pd), 2), round(np.std(pd)), round(np.mean(pf), 2), round(np.std(pf), 2), round(np.mean(g), 2), round(np.std(g), 2)]) # set_trace() stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[0]), # Sort by G Score columns=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "G (Mean)", "G (Std)"]) set_trace() result.update({tgt_name: stats}) return result
def tca_plus(source, target, n_rep=12): """ TCA: Transfer Component Analysis :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() for tgt_name, tgt_path in target.iteritems(): stats = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: print("{} \r".format(src_name[0].upper() + src_name[1:])) src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, g, auc = [], [], [], [] dcv_src, dcv_tgt = get_dcv(src, tgt) for _ in xrange(n_rep): recall, loc = None, None norm_src, norm_tgt = smart_norm(src, tgt, dcv_src, dcv_tgt) _train, __test = map_transform(norm_src, norm_tgt) try: actual, predicted, distribution = predict_defects(train=_train, test=__test) except: set_trace() p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) g.append(_g) auc.append(int(auroc)) stats.append([src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf)), int(np.mean(auc)), int(np.std(auc))]) # , stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[-2], reverse=True), # Sort by G Score columns=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"]) # , print(tabulate(stats, headers=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def vcb(source, target, n_rep=12): """ TNB: Transfer Naive Bayes :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() plot_data = [("Xalan", "Log4j", "Lucene", "Poi", "Velocity")] for tgt_name, tgt_path in target.iteritems(): stats = [] charts = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: # print("{} \r".format(src_name[0].upper() + src_name[1:])) src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, g, auc = [], [], [], [] for _ in xrange(n_rep): _train, clf_w, classifiers = weight_training(train=src, test=tgt) actual, predicted, distribution = predict_defects(tgt, clf_w, classifiers) loc = tgt["$loc"].values loc = loc * 100 / np.max(loc) recall, loc, au_roc = get_curve(loc, actual, predicted, distribution) effort_plot(recall, loc, save_dest=os.path.abspath(os.path.join(root, "plot", "plots", tgt_name)), save_name=src_name) p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) g.append(_g) auc.append(int(auroc)) stats.append([src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf)), int(np.mean(auc)), int(np.std(auc))]) stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[-2], reverse=True), # Sort by G Score columns=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"]) # , # "G (Mean)", "G (Std)"]) print(tabulate(stats, headers=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def tca_plus(source, target, verbose=True, n_rep=12): """ TCA: Transfer Component Analysis :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() for tgt_name, tgt_path in target.iteritems(): stats = [] if verbose: print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: src = pandas.read_csv(src_path) tgt = pandas.read_csv(tgt_path) pd, pf, pr, f1, g, auc = [], [], [], [], [], [] dcv_src, dcv_tgt = get_dcv(src, tgt) for _ in xrange(n_rep): norm_src, norm_tgt = smart_norm(src, tgt, dcv_src, dcv_tgt) _train, __test = map_transform(norm_src.dropna(axis=1, inplace=False), norm_tgt.dropna(axis=1, inplace=False)) actual, predicted, distribution = predict_defects(train=_train, test=__test) p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) pr.append(p_r) f1.append(f_1) g.append(_g) auc.append(int(auroc)) stats.append([src_name, int(np.mean(pd)), int(np.mean(pf)), int(np.mean(pr)), int(np.mean(f1)), int(np.mean(g)), int(np.mean(auc))]) # , stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[-2], reverse=True), # Sort by G Score columns=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"]) # , if verbose: print(tabulate(stats, headers=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def bellw(source, target, n_rep=12, verbose=False): """ TNB: Transfer Naive Bayes :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() for tgt_name, tgt_path in target.iteritems(): stats = [] charts = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, pr, f1, g, auc = [], [], [], [], [], [] for _ in xrange(n_rep): _train, __test = weight_training(test_instance=tgt, training_instance=src) actual, predicted, distribution = predict_defects(train=_train, test=__test) p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) pr.append(p_r) f1.append(f_1) g.append(_g) auc.append(int(auroc)) stats.append([src_name, int(np.mean(pd)), int(np.mean(pf)), int(np.mean(pr)), int(np.mean(f1)), int(np.mean(g)), int(np.mean(auc))]) # , stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[-2], reverse=True), # Sort by G Score columns=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"]) # , if verbose: print(tabulate(stats, headers=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def tca_plus(source, target, n_rep=12): """ TCA: Transfer Component Analysis :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() for tgt_name, tgt_path in target.iteritems(): stats = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: # set_trace() src = list2dataframe(src_path) tgt = list2dataframe(tgt_path) # set_trace() pd, pf, g, auc = [], [], [], [] dcv_src, dcv_tgt = get_dcv(src, tgt) for _ in xrange(n_rep): recall, loc = None, None norm_src, norm_tgt = smart_norm(src, tgt, dcv_src, dcv_tgt) _train, __test = map_transform(norm_src, norm_tgt) # for k in np.arange(0.1,1,0.1): actual, predicted, distribution = predict_defects(train=_train, test=__test) # loc = tgt["$loc"].values # loc = loc * 100 / np.max(loc) # recall, loc, au_roc = get_curve(loc, actual, predicted) # effort_plot(recall, loc, # save_dest=os.path.abspath(os.path.join(root, "plot", "plots", tgt_name)), # save_name=src_name) p_d, p_f, p_r, rc, f_1, e_d, _g, _ = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) g.append(_g) # auc.append(int(au_roc)) # set_trace() stats.append([src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf)), int(np.mean(auc)), int(np.std(auc))]) # , # int(np.mean(g)), int(np.std(g))]) stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[0]), # Sort by G Score columns=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"]) # , # "G (Mean)", "G (Std)"]) print(tabulate(stats, headers=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) # set_trace() return result
def tnb(source, target, verbose=False, n_rep=12): """ TNB: Transfer Naive Bayes :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() for tgt_name, tgt_path in target.iteritems(): stats = [] if verbose: print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: src = pandas.read_csv(src_path) tgt = pandas.read_csv(tgt_path) pd, pf, pr, f1, g, auc = [], [], [], [], [], [] for _ in xrange(n_rep): lo, hi, test_mass = target_details(tgt) weights = get_weights(maxs=hi, mins=lo, train_set=src, test_set=tgt) _train, __test = weight_training(weights=weights, training_instance=src, test_instance=tgt) actual, predicted, distribution = predict_defects( train=_train, test=__test) p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd( actual, predicted, distribution) pd.append(p_d) pf.append(p_f) pr.append(p_r) f1.append(f_1) g.append(_g) auc.append(int(auroc)) stats.append([ src_name, int(np.mean(pd)), int(np.mean(pf)), int(np.mean(pr)), int(np.mean(f1)), int(np.mean(g)), int(np.mean(auc)) ]) # , stats = pandas.DataFrame( sorted(stats, key=lambda lst: lst[-2], reverse=True), # Sort by G Score columns=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"]) # , if verbose: print( tabulate( stats, headers=["Name", "Pd", "Pf", "Prec", "F1", "G", "AUC"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def seer(source, target, n_rep=20, n_redo=5): """ seer: Causal Inference Learning :param source: :param target: :return: result: A dictionary of estimated """ result = dict() t0 = time() for tgt_name, tgt_path in target.iteritems(): stats = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, g = [], [], [] matched_src = metrics_match(src, tgt, n_redo) for n in xrange(n_rep): target_columns = [] source_columns = [] all_columns = [(key, val[0], val[1]) for key, val in matched_src.iteritems() if val[1] > 1] all_columns = sorted(all_columns, key=lambda x: x[-1], reverse=True) # Sort descending # Filter all columns to remove dupes for elem in all_columns: if not elem[1] in source_columns: target_columns.append(elem[0]) source_columns.append(elem[1]) _train, __test = src[source_columns + [src.columns[-1]]], \ tgt[target_columns + [tgt.columns[-1]]] # _train, __test = map_transform(src[source_columns + [src.columns[-1]]], # tgt[target_columns + [tgt.columns[-1]]]) # set_trace() actual, predicted = predict_defects(train=_train, test=__test) p_d, p_f, p_r, rc, f_1, e_d, _g = abcd(actual, predicted) pd.append(p_d) pf.append(p_f) g.append(e_d) stats.append([ src_name, round(np.mean(pd), 2), round(np.std(pd)), round(np.mean(pf), 2), round(np.std(pf), 2), round(np.mean(g), 2), round(np.std(g), 2) ]) # set_trace() stats = pandas.DataFrame( sorted(stats, key=lambda lst: lst[0]), # Sort by G Score columns=[ "Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "G (Mean)", "G (Std)" ]) set_trace() result.update({tgt_name: stats}) return result
def seer(source, target, n_rep=20, n_redo=5): """ seer: Causal Inference Learning :param source: :param target: :return: result: A dictionary of estimated """ result = dict() t0 = time() for tgt_name, tgt_path in target.iteritems(): stats = [] print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) for src_name, src_path in source.iteritems(): if not src_name == tgt_name: src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, g, auc = [], [], [], [] matched_src = metrics_match(src, tgt, n_redo) for n in xrange(n_rep): target_columns = [] source_columns = [] all_columns = [(key, val[0], val[1]) for key, val in matched_src.iteritems() if val[1] > 1] all_columns = sorted(all_columns, key=lambda x: x[-1], reverse=True) # Sort descending # Filter all columns to remove dupes for elem in all_columns: if not elem[1] in source_columns: target_columns.append(elem[0]) source_columns.append(elem[1]) selected_col = list(set(target_columns).intersection(source_columns)) _train, __test = map_transform(src[selected_col + [src.columns[-1]]], tgt[selected_col + [tgt.columns[-1]]]) # _train, __test = src[source_columns + [src.columns[-1]]], \ # tgt[target_columns + [tgt.columns[-1]]] # set_trace() actual, predicted, distribution = predict_defects(train=_train, test=__test) p_d, p_f, p_r, rc, f_1, e_d, _g, auroc = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) g.append(e_d) auc.append(int(auroc)) stats.append([src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf)), int(np.mean(auc)), int(np.std(auc))]) # , stats = pandas.DataFrame(sorted(stats, key=lambda lst: lst[-2], reverse=True), # Sort by G Score columns=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"]) # , # "G (Mean)", "G (Std)"]) print(tabulate(stats, headers=["Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)"], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) return result
def tnb(source, target, verbose=False, n_rep=12): """ TNB: Transfer Naive Bayes :param source: :param target: :param n_rep: number of repeats :return: result """ result = dict() plot_data = [("Xalan", "Log4j", "Lucene", "Poi", "Velocity")] for tgt_name, tgt_path in target.iteritems(): stats = [] charts = [] if verbose: print("{} \r".format(tgt_name[0].upper() + tgt_name[1:])) val = [] for src_name, src_path in source.iteritems(): if not src_name == tgt_name: # print("{} \r".format(src_name[0].upper() + src_name[1:])) src = list2dataframe(src_path.data) tgt = list2dataframe(tgt_path.data) pd, pf, g, auc = [], [], [], [] lo, hi, test_mass = target_details(tgt) weights = get_weights(maxs=hi, mins=lo, train_set=src, test_set=tgt) _train = weight_training(weights=weights, training_instance=src) __test = (tgt[tgt.columns[:-1]] - tgt[tgt.columns[:-1]].min() ) / (tgt[tgt.columns[:-1]].max() - tgt[tgt.columns[:-1]].min()) __test[tgt.columns[-1]] = tgt[tgt.columns[-1]] actual, predicted, distribution = predict_defects(train=_train, test=__test) loc = tgt["$loc"].values loc = loc * 100 / np.max(loc) recall, loc, au_roc = get_curve(loc, actual, predicted) effort_plot(recall, loc, save_dest=os.path.abspath( os.path.join(root, "plot", "plots", tgt_name)), save_name=src_name) p_d, p_f, p_r, rc, f_1, e_d, _g = abcd(actual, predicted, distribution) pd.append(p_d) pf.append(p_f) g.append(_g) auc.append(int(au_roc)) stats.append([ src_name, int(np.mean(pd)), int(np.std(pd)), int(np.mean(pf)), int(np.std(pf)), int(np.mean(auc)), int(np.std(auc)) ]) # , # int(np.mean(g)), int(np.std(g))]) # print("") stats = pandas.DataFrame( sorted(stats, key=lambda lst: lst[0]), # Sort by G Score columns=[ "Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)" ]) # , # "G (Mean)", "G (Std)"]) if verbose: print( tabulate(stats, headers=[ "Name", "Pd (Mean)", "Pd (Std)", "Pf (Mean)", "Pf (Std)", "AUC (Mean)", "AUC (Std)" ], showindex="never", tablefmt="fancy_grid")) result.update({tgt_name: stats}) set_trace() return result