Beispiel #1
0
def transfer_lessons3():
    data = DefectData.get_all_projects()["Apache"]
    for proj, paths in data.iteritems():
        if not proj in paths.bellw:
            res = {proj[:6]: []}
            "If training data.dat doesn't exist, create it."
            pred, pred2, distr, distr2 = [], [], [], []

            if not "train" in locals():
                train = list2dataframe(data[paths.bellw].data)

            test, validation = train_test_split(list2dataframe(paths.data),
                                                test_size=0.8)
            # test = list2dataframe(paths.data.dat[-1])
            # validation = list2dataframe(paths.data.dat[:-1])
            patched = xtree.execute(train, test)
            a, b = rforest(train, patched)  # How good are the patches
            aa, bb = rforest(train, test)  # How good are the predcitions
            pred.append(a)
            pred2.append(aa)
            distr.append(b)
            distr2.append(bb)
            res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]],
                                            after=pred2,
                                            distr=distr2))

            res[proj[:6]].extend(impact(test, pred))
            yield res
Beispiel #2
0
def transfer_lessons4():
    data = DefectData.get_all_projects()["Apache"]
    for proj, paths in data.iteritems():
        if not proj in paths.bellw:
            res = {proj[:6]: []}
            "If training data.dat doesn't exist, create it."
            pred, pred2, distr, distr2 = [], [], [], []

            if not "train_bellw" in locals():
                train_bellw = list2dataframe(data[paths.bellw].data)

            train_local = list2dataframe(paths.data[:-1])
            test = list2dataframe(paths.data[-1])

            patched_local = xtree.execute(train_local, test)
            patched_bellw = xtree.execute(train_bellw, test)

            pred, distr = rforest(train_bellw,
                                  patched_local)  # How good are the patches
            pred2, distr2 = rforest(train_bellw,
                                    patched_bellw)  # How good are the predcitions
            pred3, distr3 = rforest(train_bellw,
                                    test)  # How good are the predcitions

            res[proj[:6]].extend(pred_stats(before=test[test.columns[-1]],
                                            after=pred3,
                                            distr=distr3))

            res[proj[:6]].append(impact(test, pred))
            res[proj[:6]].append(impact(test, pred2))
            yield res
Beispiel #3
0
def changes(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if not proj in paths.bellw:
            res = {
                proj[:6]: {
                    "xtree_local": [],
                    "xtree_bellw": [],
                    "alves": [],
                    "olive": [],
                    "shatw": []
                }
            }

            bellw = list2dataframe(data[paths.bellw].data)
            test = list2dataframe(paths.data)
            test_local = list2dataframe(paths.data[-1])
            train_local = list2dataframe(paths.data[:-1])

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=2):
                orig = DataFrame([
                    test.iloc[n].values.tolist()
                    for n in xrange(test.shape[0]) if test.iloc[n][-1] > 0
                ],
                                 columns=test.columns)

                patched_alves, changes_alves = alves(train_bellw, test_local)
                patched_shatw, changes_shatw = shatnawi(
                    train_bellw, test_local)
                patched_olive, changes_olive = oliveira(
                    train_bellw, test_local)
                patched_xtree, changes_xtree = xtree(train_bellw, test_local)
                patched_xtree_local, changes_xtree_local = xtree(
                    train_local, test_local)

                # How good are the patches from local lessons?
                res[proj[:6]]["alves"].append(
                    deltas_count(test.columns, changes_alves))
                res[proj[:6]]["olive"].append(
                    deltas_count(test.columns, changes_olive))
                res[proj[:6]]["shatw"].append(
                    deltas_count(test.columns, changes_shatw))
                res[proj[:6]]["xtree_bellw"].append(
                    deltas_count(test.columns, changes_xtree))
                res[proj[:6]]["xtree_local"].append(
                    deltas_count(test.columns, changes_xtree_local))

            yield res
Beispiel #4
0
def transfer_lessons(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if not proj in paths.bellw:
            res = {proj[:6]: {
                "pd": [],
                "pf": [],
                "local": [],
                "bellw": []}
            }

            bellw = list2dataframe(data[paths.bellw].data)

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=5):
                train_local = list2dataframe(paths.data[:-1])
                test = list2dataframe(paths.data[-1])

                patched_local = xtree(train_local, test)
                patched_bellw = xtree(train_bellw, list2dataframe(paths.data))

                # How good are the patches from local lessons?
                pred_local, distr_local = xgboost(validation, patched_local)

                # How good are the patches from the bellwether lessons?
                pred_bellw, distr_bellw = xgboost(validation, patched_bellw)

                # How good are the predictions
                pred_qual, distr_qual = xgboost(validation, test)

                pred = pred_stats(before=test[test.columns[-1]],
                                  after=pred_qual,
                                  distr=distr_qual)

                res[proj[:6]]["pd"].append(pred[0])
                res[proj[:6]]["pf"].append(pred[1])

                res[proj[:6]]["local"].append(impact(test, pred_local))
                res[proj[:6]]["bellw"].append(impact(test, pred_bellw))

            yield res
Beispiel #5
0
def test_oracles(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if not proj in paths.bellw:
            res = {
                proj[:6]: {
                    "pd_rf": [],
                    "pf_rf": [],
                    "pd_xg": [],
                    "pf_xg": []
                }
            }

            pred, pred2, distr, distr2 = [], [], [], []

            validate = list2dataframe(paths.data[:-1])
            test = list2dataframe(paths.data[-1])

            set_trace()

            # How good are the predictions

            pred1, distr1 = rforest(validate, test)
            pred2, distr2 = xgboost(validate, test)

            pred_rf = pred_stats(before=test[test.columns[-1]],
                                 after=pred1,
                                 distr=distr1)
            pred_xg = pred_stats(before=test[test.columns[-1]],
                                 after=pred2,
                                 distr=distr2)

            res[proj[:6]]["pd_rf"].append(pred_rf[0])
            res[proj[:6]]["pf_rf"].append(pred_rf[1])

            res[proj[:6]]["pd_xg"].append(pred_xg[0])
            res[proj[:6]]["pf_xg"].append(pred_xg[1])

            yield res
Beispiel #6
0
def transfer_lessons2(n_folds=1):
    data = DefectData.get_all_projects()["Apache"]
    print("Name\tPd\tPf\tImprovement")
    for proj, paths in data.iteritems():
        if not proj in paths.bellw:
            print(proj[:4], end="\t")
            "If training data.dat doesn't exist, create it."

            train, validation = train_test_split(list2dataframe(paths.data),
                                                 test_size=0.8)
            test = paths.data[-1]
            validation = paths.data[:-1]
            patched = xtree.execute(train, test)
            test = list2dataframe(test)
            pred, distr = rforest(validation,
                                  patched)  # How good are the patches
            pred2, distr2 = rforest(validation,
                                    test)  # How good are the predcitions

            pred_stats(before=test[test.columns[-1]],
                       after=pred2,
                       distr=distr2)

            impact(test, pred)
Beispiel #7
0
def run_experiment():
    data = DefectData.get_all_projects()["Apache"]
    metrics = list2dataframe(data["ant"].data[-1]).columns
    for res in changes(data):
        for key, value in res.iteritems():
            print(key)
            for n, (attr, xtree_local, xtree_bellw, Olive, Alves, Shatw) in \
                    enumerate(
                        zip(metrics,
                            np.median(value["xtree_local"], axis=0),
                            np.median(value["xtree_bellw"], axis=0),
                            np.median(value["olive"], axis=0),
                            np.median(value["alves"], axis=0),
                            np.median(value["shatw"], axis=0))):
                print(n,
                      attr[1:],
                      int(xtree_local),
                      int(xtree_bellw),
                      int(Olive),
                      int(Alves),
                      int(Shatw),
                      sep="\t")

    set_trace()
Beispiel #8
0
    featureTot = 0
    information_gain = []
    for i in range(0, len(nz[0])):
        if (i != 0 and nz[0][i] != pre):
            for notappear in range(pre + 1, nz[0][i]):
                information_gain.append(0)
            ig = _calIg()
            information_gain.append(ig)
            pre = nz[0][i]
            classCnt = {}
            featureTot = 0
        featureTot = featureTot + 1
        yclass = y[nz[1][i]]
        if yclass not in classCnt:
            classCnt[yclass] = 1
        else:
            classCnt[yclass] = classCnt[yclass] + 1
    ig = _calIg()
    information_gain.append(ig)

    return np.asarray(information_gain)


if __name__ == "__main__":
    data = DefectData.get_all_projects()["Apache"]
    test_data = list2dataframe(data["ant"].data)
    indep_var = test_data[test_data.columns[:-1]]
    depen_var = test_data[test_data.columns[-1]]
    information_gain(indep_var.values, depen_var.values)
    set_trace()
Beispiel #9
0
def transfer_lessons(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if proj in ["ant", "ivy", "poi", "jedit"]:
            res = {
                proj[:6]: {
                    "xtree_local": [],
                    "xtree_bellw": [],
                    "alves": [],
                    "olive": [],
                    "shatw": []
                }
            }

            bellw = list2dataframe(data[paths.bellw].data)
            test = list2dataframe(paths.data)
            test_local = list2dataframe(paths.data[-1])
            train_local = list2dataframe(paths.data[:-1])

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=5):
                patched_alves = alves(train_bellw, test_local)
                patched_shatw = shatnawi(train_bellw, test_local)
                patched_olive = oliveira(train_bellw, test_local)
                patched_xtree = xtree(train_bellw, test_local)
                patched_xtree_local = xtree(train_local, test_local)

                # How good are the patches from Alves?
                pred_alves, distr_alves = xgboost(validation, patched_alves)

                # How good are the patches from Shatnawi?
                pred_shatw, distr_shatw = xgboost(validation, patched_shatw)

                # How good are the patches from Oliveira?
                pred_olive, distr_olive = xgboost(validation, patched_olive)

                # How good are the patches from the bellwether XTREE?
                pred_xtree, distr_xtree = xgboost(validation, patched_xtree)

                # How good are the patches from the local XTREE?
                pred_xtree_local, distr_xtree_local = xgboost(
                    validation, patched_xtree_local)

                res[proj[:6]]["alves"].append(impact(test, pred_alves))
                res[proj[:6]]["shatw"].append(impact(test, pred_shatw))
                res[proj[:6]]["olive"].append(impact(test, pred_olive))
                res[proj[:6]]["xtree_bellw"].append(impact(test, pred_xtree))
                res[proj[:6]]["xtree_local"].append(
                    impact(test, pred_xtree_local))

                # Not yet...
                # # How good are the patches from the bellwether lessons?
                # pred_fontana, distr_fontana = xgboost(validation, patched_xtree)
                #
                # # res[proj[:6]]["fontana"].append(pred[1])

            yield res
def run_all_tests():
    data = DefectData.get_all_projects()["Apache"]["ant"]
    set_trace()
    __test_alves(data)
    pass