Exemple #1
0
def transfer_lessons4():
    data = DefectData.get_all_projects()["Apache"]
    for proj, paths in data.iteritems():
        if not proj in paths.bellw:
            res = {proj[:6]: []}
            "If training data.dat doesn't exist, create it."
            pred, pred2, distr, distr2 = [], [], [], []

            if not "train_bellw" in locals():
                train_bellw = list2dataframe(data[paths.bellw].data)

            train_local = list2dataframe(paths.data[:-1])
            test = list2dataframe(paths.data[-1])

            patched_local = xtree.execute(train_local, test)
            patched_bellw = xtree.execute(train_bellw, test)

            pred, distr = rforest(train_bellw,
                                  patched_local)  # How good are the patches
            pred2, distr2 = rforest(train_bellw,
                                    patched_bellw)  # How good are the predcitions
            pred3, distr3 = rforest(train_bellw,
                                    test)  # How good are the predcitions

            res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]],
                                            after=pred3,
                                            distr=distr3))

            res[proj[:6]].append(impact(test, pred))
            res[proj[:6]].append(impact(test, pred2))
            yield res
Exemple #2
0
def transfer_lessons3():
    data = DefectData.get_all_projects()["Apache"]
    for proj, paths in data.iteritems():
        if not proj in paths.bellw:
            res = {proj[:6]: []}
            "If training data.dat doesn't exist, create it."
            pred, pred2, distr, distr2 = [], [], [], []

            if not "train" in locals():
                train = list2dataframe(data[paths.bellw].data)

            test, validation = train_test_split(list2dataframe(paths.data),
                                                test_size=0.8)
            # test = list2dataframe(paths.data.dat[-1])
            # validation = list2dataframe(paths.data.dat[:-1])
            patched = xtree.execute(train, test)
            a, b = rforest(train, patched)  # How good are the patches
            aa, bb = rforest(train, test)  # How good are the predcitions
            pred.append(a)
            pred2.append(aa)
            distr.append(b)
            distr2.append(bb)
            res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]],
                                            after=pred2,
                                            distr=distr2))

            res[proj[:6]].extend(impact(test, pred))
            yield res
Exemple #3
0
def transfer_lessons(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if not proj in paths.bellw:
            res = {proj[:6]: {
                "pd": [],
                "pf": [],
                "local": [],
                "bellw": []}
            }

            bellw = list2dataframe(data[paths.bellw].data)

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=5):
                train_local = list2dataframe(paths.data[:-1])
                test = list2dataframe(paths.data[-1])

                patched_local = xtree(train_local, test)
                patched_bellw = xtree(train_bellw, list2dataframe(paths.data))

                # How good are the patches from local lessons?
                pred_local, distr_local = xgboost(validation, patched_local)

                # How good are the patches from the bellwether lessons?
                pred_bellw, distr_bellw = xgboost(validation, patched_bellw)

                # How good are the predictions
                pred_qual, distr_qual = xgboost(validation, test)

                pred = pred_stats(before=test[test.columns[-1]],
                                  after=pred_qual,
                                  distr=distr_qual)

                res[proj[:6]]["pd"].append(pred[0])
                res[proj[:6]]["pf"].append(pred[1])

                res[proj[:6]]["local"].append(impact(test, pred_local))
                res[proj[:6]]["bellw"].append(impact(test, pred_bellw))

            yield res
Exemple #4
0
def transfer_lessons2(n_folds=1):
    data = DefectData.get_all_projects()["Apache"]
    print("Name\tPd\tPf\tImprovement")
    for proj, paths in data.iteritems():
        if not proj in paths.bellw:
            print(proj[:4], end="\t")
            "If training data.dat doesn't exist, create it."

            train, validation = train_test_split(list2dataframe(paths.data),
                                                 test_size=0.8)
            test = paths.data[-1]
            validation = paths.data[:-1]
            patched = xtree.execute(train, test)
            test = list2dataframe(test)
            pred, distr = rforest(validation,
                                  patched)  # How good are the patches
            pred2, distr2 = rforest(validation,
                                    test)  # How good are the predcitions

            pred_stats(before=test[test.columns[-1]],
                       after=pred2,
                       distr=distr2)

            impact(test, pred)
Exemple #5
0
def transfer_lessons(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if proj in ["ant", "ivy", "poi", "jedit"]:
            res = {
                proj[:6]: {
                    "xtree_local": [],
                    "xtree_bellw": [],
                    "alves": [],
                    "olive": [],
                    "shatw": []
                }
            }

            bellw = list2dataframe(data[paths.bellw].data)
            test = list2dataframe(paths.data)
            test_local = list2dataframe(paths.data[-1])
            train_local = list2dataframe(paths.data[:-1])

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=5):
                patched_alves = alves(train_bellw, test_local)
                patched_shatw = shatnawi(train_bellw, test_local)
                patched_olive = oliveira(train_bellw, test_local)
                patched_xtree = xtree(train_bellw, test_local)
                patched_xtree_local = xtree(train_local, test_local)

                # How good are the patches from Alves?
                pred_alves, distr_alves = xgboost(validation, patched_alves)

                # How good are the patches from Shatnawi?
                pred_shatw, distr_shatw = xgboost(validation, patched_shatw)

                # How good are the patches from Oliveira?
                pred_olive, distr_olive = xgboost(validation, patched_olive)

                # How good are the patches from the bellwether XTREE?
                pred_xtree, distr_xtree = xgboost(validation, patched_xtree)

                # How good are the patches from the local XTREE?
                pred_xtree_local, distr_xtree_local = xgboost(
                    validation, patched_xtree_local)

                res[proj[:6]]["alves"].append(impact(test, pred_alves))
                res[proj[:6]]["shatw"].append(impact(test, pred_shatw))
                res[proj[:6]]["olive"].append(impact(test, pred_olive))
                res[proj[:6]]["xtree_bellw"].append(impact(test, pred_xtree))
                res[proj[:6]]["xtree_local"].append(
                    impact(test, pred_xtree_local))

                # Not yet...
                # # How good are the patches from the bellwether lessons?
                # pred_fontana, distr_fontana = xgboost(validation, patched_xtree)
                #
                # # res[proj[:6]]["fontana"].append(pred[1])

            yield res