예제 #1
0
def changes(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if not proj in paths.bellw:
            res = {
                proj[:6]: {
                    "xtree_local": [],
                    "xtree_bellw": [],
                    "alves": [],
                    "olive": [],
                    "shatw": []
                }
            }

            bellw = list2dataframe(data[paths.bellw].data)
            test = list2dataframe(paths.data)
            test_local = list2dataframe(paths.data[-1])
            train_local = list2dataframe(paths.data[:-1])

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=2):
                orig = DataFrame([
                    test.iloc[n].values.tolist()
                    for n in xrange(test.shape[0]) if test.iloc[n][-1] > 0
                ],
                                 columns=test.columns)

                patched_alves, changes_alves = alves(train_bellw, test_local)
                patched_shatw, changes_shatw = shatnawi(
                    train_bellw, test_local)
                patched_olive, changes_olive = oliveira(
                    train_bellw, test_local)
                patched_xtree, changes_xtree = xtree(train_bellw, test_local)
                patched_xtree_local, changes_xtree_local = xtree(
                    train_local, test_local)

                # How good are the patches from local lessons?
                res[proj[:6]]["alves"].append(
                    deltas_count(test.columns, changes_alves))
                res[proj[:6]]["olive"].append(
                    deltas_count(test.columns, changes_olive))
                res[proj[:6]]["shatw"].append(
                    deltas_count(test.columns, changes_shatw))
                res[proj[:6]]["xtree_bellw"].append(
                    deltas_count(test.columns, changes_xtree))
                res[proj[:6]]["xtree_local"].append(
                    deltas_count(test.columns, changes_xtree_local))

            yield res
예제 #2
0
def transfer_lessons(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if not proj in paths.bellw:
            res = {proj[:6]: {
                "pd": [],
                "pf": [],
                "local": [],
                "bellw": []}
            }

            bellw = list2dataframe(data[paths.bellw].data)

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=5):
                train_local = list2dataframe(paths.data[:-1])
                test = list2dataframe(paths.data[-1])

                patched_local = xtree(train_local, test)
                patched_bellw = xtree(train_bellw, list2dataframe(paths.data))

                # How good are the patches from local lessons?
                pred_local, distr_local = xgboost(validation, patched_local)

                # How good are the patches from the bellwether lessons?
                pred_bellw, distr_bellw = xgboost(validation, patched_bellw)

                # How good are the predictions
                pred_qual, distr_qual = xgboost(validation, test)

                pred = pred_stats(before=test[test.columns[-1]],
                                  after=pred_qual,
                                  distr=distr_qual)

                res[proj[:6]]["pd"].append(pred[0])
                res[proj[:6]]["pf"].append(pred[1])

                res[proj[:6]]["local"].append(impact(test, pred_local))
                res[proj[:6]]["bellw"].append(impact(test, pred_bellw))

            yield res
예제 #3
0
def transfer_lessons(data=None):
    if data is None:
        data = DefectData.get_all_projects()["Apache"]

    for proj, paths in data.iteritems():
        "Make sure we don't test on the bellwether dataset"

        if proj in ["ant", "ivy", "poi", "jedit"]:
            res = {
                proj[:6]: {
                    "xtree_local": [],
                    "xtree_bellw": [],
                    "alves": [],
                    "olive": [],
                    "shatw": []
                }
            }

            bellw = list2dataframe(data[paths.bellw].data)
            test = list2dataframe(paths.data)
            test_local = list2dataframe(paths.data[-1])
            train_local = list2dataframe(paths.data[:-1])

            for train_bellw, validation in CrossValidation.split(bellw,
                                                                 ways=5):
                patched_alves = alves(train_bellw, test_local)
                patched_shatw = shatnawi(train_bellw, test_local)
                patched_olive = oliveira(train_bellw, test_local)
                patched_xtree = xtree(train_bellw, test_local)
                patched_xtree_local = xtree(train_local, test_local)

                # How good are the patches from Alves?
                pred_alves, distr_alves = xgboost(validation, patched_alves)

                # How good are the patches from Shatnawi?
                pred_shatw, distr_shatw = xgboost(validation, patched_shatw)

                # How good are the patches from Oliveira?
                pred_olive, distr_olive = xgboost(validation, patched_olive)

                # How good are the patches from the bellwether XTREE?
                pred_xtree, distr_xtree = xgboost(validation, patched_xtree)

                # How good are the patches from the local XTREE?
                pred_xtree_local, distr_xtree_local = xgboost(
                    validation, patched_xtree_local)

                res[proj[:6]]["alves"].append(impact(test, pred_alves))
                res[proj[:6]]["shatw"].append(impact(test, pred_shatw))
                res[proj[:6]]["olive"].append(impact(test, pred_olive))
                res[proj[:6]]["xtree_bellw"].append(impact(test, pred_xtree))
                res[proj[:6]]["xtree_local"].append(
                    impact(test, pred_xtree_local))

                # Not yet...
                # # How good are the patches from the bellwether lessons?
                # pred_fontana, distr_fontana = xgboost(validation, patched_xtree)
                #
                # # res[proj[:6]]["fontana"].append(pred[1])

            yield res
예제 #4
0
def __test_xtree(data):
    train, test = data.data[:-1], data.data[-1]
    xtree(train, test)
    pass