예제 #1
0
def RF(dataset, max_depth=3):

    dataset = normalize(dataset)
    mre_list = []
    sa_list = []
    rse_list = []
    for train, test in KFold_df(dataset, 3):
        train_input = train.iloc[:, :-1]
        train_actual_effort = train.iloc[:, -1]
        test_input = test.iloc[:, :-1]
        test_actual_effort = test.iloc[:, -1]

        model = RandomForestRegressor(max_depth)
        model.fit(train_input, train_actual_effort)
        test_predict_effort = model.predict(test_input)
        test_predict_Y = test_predict_effort
        test_actual_Y = test_actual_effort.values

        mre_list.append(mre_calc(test_predict_Y,
                                 test_actual_Y))  ######### for MRE
        sa_list.append(
            sa_calc(test_predict_Y, test_actual_Y,
                    train_actual_effort))  ######### for SA
        # rse_list.append(rse_calc(test_predict_Y, test_actual_Y))  ######### for RSE

    return mre_list, sa_list, rse_list
예제 #2
0
def CART(dataset, a=12, b=1, c=2):

    dataset = normalize(dataset)
    mre_list = []
    sa_list = []
    rse_list = []
    for train, test in KFold_df(dataset, 3):
        train_input = train.iloc[:, :-1]
        train_actual_effort = train.iloc[:, -1]
        test_input = test.iloc[:, :-1]
        test_actual_effort = test.iloc[:, -1]
        # max_depth: [1:12], min_samples_leaf: [1:12], min_samples_split: [2:21]

        model = DecisionTreeRegressor(max_depth=a,
                                      min_samples_leaf=b,
                                      min_samples_split=c)
        model.fit(train_input, train_actual_effort)
        test_predict_effort = model.predict(test_input)
        test_predict_Y = test_predict_effort
        test_actual_Y = test_actual_effort.values

        mre_list.append(mre_calc(test_predict_Y,
                                 test_actual_Y))  ######### for MRE
        sa_list.append(
            sa_calc(test_predict_Y, test_actual_Y,
                    train_actual_effort))  ######### for SA
        # rse_list.append(rse_calc(test_predict_Y, test_actual_Y))  ######### for RSE

    return mre_list, sa_list, rse_list
예제 #3
0
def SVM(dataset):

    dataset = normalize(dataset)
    mre_list = []
    sa_list = []
    rse_list = []
    for train, test in KFold_df(dataset, 3):
        train_input = train.iloc[:, :-1]
        train_actual_effort = train.iloc[:, -1]
        test_input = test.iloc[:, :-1]
        test_actual_effort = test.iloc[:, -1]

        model = svm.SVR(gamma='scale')
        model.fit(train_input, train_actual_effort)
        test_predict_effort = model.predict(test_input)
        test_predict_Y = test_predict_effort
        test_actual_Y = test_actual_effort.values

        mre_list.append(mre_calc(test_predict_Y,
                                 test_actual_Y))  ######### for MRE
        sa_list.append(
            sa_calc(test_predict_Y, test_actual_Y,
                    train_actual_effort))  ######### for SA
        # rse_list.append(rse_calc(test_predict_Y, test_actual_Y))  ######### for RSE

    return mre_list, sa_list, rse_list
예제 #4
0
def KNN(dataset, n_neighbors=3):

    dataset = normalize(dataset)
    mre_list = []
    sa_list = []
    rse_list = []
    for train, test in KFold_df(dataset, 3):
        train_input = train.iloc[:, :-1]
        train_actual_effort = train.iloc[:, -1]
        test_input = test.iloc[:, :-1]
        test_actual_effort = test.iloc[:, -1]

        model = neighbors.KNeighborsRegressor(n_neighbors)
        model.fit(train_input, train_actual_effort)
        test_predict_effort = model.predict(test_input)
        test_predict_Y = test_predict_effort
        test_actual_Y = test_actual_effort.values

        mre_list.append(mre_calc(test_predict_Y,
                                 test_actual_Y))  ######### for MRE
        sa_list.append(
            sa_calc(test_predict_Y, test_actual_Y,
                    train_actual_effort))  ######### for SA
        # rse_list.append(rse_calc(test_predict_Y, test_actual_Y))  ######### for RSE

    return mre_list, sa_list, rse_list
예제 #5
0
 def cart_builder_future(a, b, c):
     model = DecisionTreeRegressor(
         max_depth=a,
         min_samples_leaf=b,
         min_samples_split=c
     )
     model.fit(validate_train_input, validate_train_output)
     validate_test_predict = model.predict(validate_test_input)
     validate_test_actual = validate_test_output.values
     if metrics == 0:
         return mre_calc(validate_test_predict, validate_test_actual)
     if metrics == 1:
         return sa_calc(validate_test_predict, validate_test_actual, validate_train_output)
예제 #6
0
def flash(train_input, train_actual_effort, test_input, test_actual_effort,
          metrics, pop_size):
    def convert(index):  # 12 12 20
        a = int(index / 240 + 1)
        b = int(index % 240 / 20 + 1)
        c = int(index % 20 + 2)
        return a, b, c

    def convert_lr(index):  # 30 2 2 100
        a = int(index / 400 + 1)
        b = int(index % 400 / 200 + 1)
        c = int(index % 200 / 100 + 1)
        d = int(index % 100 + 1)
        return a, b, c, d

    all_case = set(range(0, 2880))
    modeling_pool = random.sample(all_case, pop_size)

    List_X = []
    List_Y = []

    for i in range(len(modeling_pool)):
        temp = convert(modeling_pool[i])
        List_X.append(temp)
        model = DecisionTreeRegressor(max_depth=temp[0],
                                      min_samples_leaf=temp[1],
                                      min_samples_split=temp[2])
        model.fit(train_input, train_actual_effort)
        test_predict_effort = model.predict(test_input)
        test_predict_Y = test_predict_effort
        test_actual_Y = test_actual_effort.values

        if metrics == 0:
            List_Y.append(mre_calc(test_predict_Y,
                                   test_actual_Y))  ######### for MRE
        if metrics == 1:
            List_Y.append(
                sa_calc(test_predict_Y, test_actual_Y,
                        train_actual_effort))  ######### for SA

    remain_pool = all_case - set(modeling_pool)
    test_list = []
    for i in list(remain_pool):
        test_list.append(convert(i))

    upper_model = DecisionTreeRegressor()
    life = 20

    while len(List_X) < 201 and life > 0:  # eval_number
        upper_model.fit(List_X, List_Y)
        candidate = random.sample(test_list, 1)
        test_list.remove(candidate[0])
        candi_pred_value = upper_model.predict(candidate)
        if metrics == 0:
            if candi_pred_value < np.median(List_Y):  ######### for MRE
                List_X.append(candidate[0])
                candi_config = candidate[0]
                candi_model = DecisionTreeRegressor(
                    max_depth=candi_config[0],
                    min_samples_leaf=candi_config[1],
                    min_samples_split=candi_config[2])
                candi_model.fit(train_input, train_actual_effort)
                candi_pred_Y = candi_model.predict(test_input)
                candi_actual_Y = test_actual_effort.values

                List_Y.append(mre_calc(candi_pred_Y,
                                       candi_actual_Y))  ######### for MRE

            else:
                life -= 1

        if metrics == 1:
            if candi_pred_value > np.median(List_Y):  ######### for SA
                List_X.append(candidate[0])
                candi_config = candidate[0]
                candi_model = DecisionTreeRegressor(
                    max_depth=candi_config[0],
                    min_samples_leaf=candi_config[1],
                    min_samples_split=candi_config[2])
                candi_model.fit(train_input, train_actual_effort)
                candi_pred_Y = candi_model.predict(test_input)
                candi_actual_Y = test_actual_effort.values

                List_Y.append(
                    sa_calc(candi_pred_Y, candi_actual_Y,
                            train_actual_effort))  ######### for SA

            else:
                life -= 1

    # temp_tree = candi_model
    # tree.plot_tree(temp_tree, feature_names=list(train_input.columns.values))
    # plt.show()

    if metrics == 0:
        return np.median(List_Y)  ########## MRE
    if metrics == 1:
        return np.median(List_Y)  ########## SA
예제 #7
0
def CART_DE(dataset, metrics, month):

    dataset = normalize(dataset)

    for trainset, testset in df_split(dataset, month):
        train_input = trainset.iloc[:, :-1]
        train_output = trainset.iloc[:, -1]
        test_input = testset.iloc[:, :-1]
        test_output = testset.iloc[:, -1]

    for validate_trainset, validate_testset in df_split(trainset, 1):
        validate_train_input = validate_trainset.iloc[:, :-1]
        validate_train_output = validate_trainset.iloc[:, -1]
        validate_test_input = validate_testset.iloc[:, :-1]
        validate_test_output = validate_testset.iloc[:, -1]

    def cart_builder(a, b, c):
        model = DecisionTreeRegressor(
            max_depth=a,
            min_samples_leaf=b,
            min_samples_split=c
        )
        model.fit(train_input, train_output)
        test_predict = model.predict(test_input)
        test_actual = test_output.values
        if metrics == 0:
            return mre_calc(test_predict, test_actual)
        if metrics == 1:
            return sa_calc(test_predict, test_actual, train_output)

    def cart_builder_future(a, b, c):
        model = DecisionTreeRegressor(
            max_depth=a,
            min_samples_leaf=b,
            min_samples_split=c
        )
        model.fit(validate_train_input, validate_train_output)
        validate_test_predict = model.predict(validate_test_input)
        validate_test_actual = validate_test_output.values
        if metrics == 0:
            return mre_calc(validate_test_predict, validate_test_actual)
        if metrics == 1:
            return sa_calc(validate_test_predict, validate_test_actual, validate_train_output)

    # config_optimized = de(cart_builder, metrics, bounds=[(10,20), (1,10), (2,12)])[1]
    config_optimized = de(cart_builder_future, metrics, bounds=[(10, 20), (1, 10), (2, 12)])[1]
    # print(config_optimized[0], config_optimized[1], config_optimized[2])
    model_touse = DecisionTreeRegressor(
        max_depth=config_optimized[0],
        min_samples_leaf=config_optimized[1],
        min_samples_split=config_optimized[2]
    )

    model_touse.fit(train_input, train_output)
    test_predict = np.rint(model_touse.predict(test_input))
    test_actual = test_output.values

    result_list_mre = []
    result_list_sa = []

    result_list_mre.append(mre_calc(test_predict, test_actual))
    result_list_sa.append(sa_calc(test_predict, test_actual, train_output))
    # print("pre", test_predict, "act", test_actual)
    if metrics == 0:
        return result_list_mre
    if metrics == 1:
        return result_list_sa