def RF(dataset, max_depth=3): dataset = normalize(dataset) mre_list = [] sa_list = [] rse_list = [] for train, test in KFold_df(dataset, 3): train_input = train.iloc[:, :-1] train_actual_effort = train.iloc[:, -1] test_input = test.iloc[:, :-1] test_actual_effort = test.iloc[:, -1] model = RandomForestRegressor(max_depth) model.fit(train_input, train_actual_effort) test_predict_effort = model.predict(test_input) test_predict_Y = test_predict_effort test_actual_Y = test_actual_effort.values mre_list.append(mre_calc(test_predict_Y, test_actual_Y)) ######### for MRE sa_list.append( sa_calc(test_predict_Y, test_actual_Y, train_actual_effort)) ######### for SA # rse_list.append(rse_calc(test_predict_Y, test_actual_Y)) ######### for RSE return mre_list, sa_list, rse_list
def CART(dataset, a=12, b=1, c=2): dataset = normalize(dataset) mre_list = [] sa_list = [] rse_list = [] for train, test in KFold_df(dataset, 3): train_input = train.iloc[:, :-1] train_actual_effort = train.iloc[:, -1] test_input = test.iloc[:, :-1] test_actual_effort = test.iloc[:, -1] # max_depth: [1:12], min_samples_leaf: [1:12], min_samples_split: [2:21] model = DecisionTreeRegressor(max_depth=a, min_samples_leaf=b, min_samples_split=c) model.fit(train_input, train_actual_effort) test_predict_effort = model.predict(test_input) test_predict_Y = test_predict_effort test_actual_Y = test_actual_effort.values mre_list.append(mre_calc(test_predict_Y, test_actual_Y)) ######### for MRE sa_list.append( sa_calc(test_predict_Y, test_actual_Y, train_actual_effort)) ######### for SA # rse_list.append(rse_calc(test_predict_Y, test_actual_Y)) ######### for RSE return mre_list, sa_list, rse_list
def SVM(dataset): dataset = normalize(dataset) mre_list = [] sa_list = [] rse_list = [] for train, test in KFold_df(dataset, 3): train_input = train.iloc[:, :-1] train_actual_effort = train.iloc[:, -1] test_input = test.iloc[:, :-1] test_actual_effort = test.iloc[:, -1] model = svm.SVR(gamma='scale') model.fit(train_input, train_actual_effort) test_predict_effort = model.predict(test_input) test_predict_Y = test_predict_effort test_actual_Y = test_actual_effort.values mre_list.append(mre_calc(test_predict_Y, test_actual_Y)) ######### for MRE sa_list.append( sa_calc(test_predict_Y, test_actual_Y, train_actual_effort)) ######### for SA # rse_list.append(rse_calc(test_predict_Y, test_actual_Y)) ######### for RSE return mre_list, sa_list, rse_list
def KNN(dataset, n_neighbors=3): dataset = normalize(dataset) mre_list = [] sa_list = [] rse_list = [] for train, test in KFold_df(dataset, 3): train_input = train.iloc[:, :-1] train_actual_effort = train.iloc[:, -1] test_input = test.iloc[:, :-1] test_actual_effort = test.iloc[:, -1] model = neighbors.KNeighborsRegressor(n_neighbors) model.fit(train_input, train_actual_effort) test_predict_effort = model.predict(test_input) test_predict_Y = test_predict_effort test_actual_Y = test_actual_effort.values mre_list.append(mre_calc(test_predict_Y, test_actual_Y)) ######### for MRE sa_list.append( sa_calc(test_predict_Y, test_actual_Y, train_actual_effort)) ######### for SA # rse_list.append(rse_calc(test_predict_Y, test_actual_Y)) ######### for RSE return mre_list, sa_list, rse_list
def cart_builder_future(a, b, c): model = DecisionTreeRegressor( max_depth=a, min_samples_leaf=b, min_samples_split=c ) model.fit(validate_train_input, validate_train_output) validate_test_predict = model.predict(validate_test_input) validate_test_actual = validate_test_output.values if metrics == 0: return mre_calc(validate_test_predict, validate_test_actual) if metrics == 1: return sa_calc(validate_test_predict, validate_test_actual, validate_train_output)
def flash(train_input, train_actual_effort, test_input, test_actual_effort, metrics, pop_size): def convert(index): # 12 12 20 a = int(index / 240 + 1) b = int(index % 240 / 20 + 1) c = int(index % 20 + 2) return a, b, c def convert_lr(index): # 30 2 2 100 a = int(index / 400 + 1) b = int(index % 400 / 200 + 1) c = int(index % 200 / 100 + 1) d = int(index % 100 + 1) return a, b, c, d all_case = set(range(0, 2880)) modeling_pool = random.sample(all_case, pop_size) List_X = [] List_Y = [] for i in range(len(modeling_pool)): temp = convert(modeling_pool[i]) List_X.append(temp) model = DecisionTreeRegressor(max_depth=temp[0], min_samples_leaf=temp[1], min_samples_split=temp[2]) model.fit(train_input, train_actual_effort) test_predict_effort = model.predict(test_input) test_predict_Y = test_predict_effort test_actual_Y = test_actual_effort.values if metrics == 0: List_Y.append(mre_calc(test_predict_Y, test_actual_Y)) ######### for MRE if metrics == 1: List_Y.append( sa_calc(test_predict_Y, test_actual_Y, train_actual_effort)) ######### for SA remain_pool = all_case - set(modeling_pool) test_list = [] for i in list(remain_pool): test_list.append(convert(i)) upper_model = DecisionTreeRegressor() life = 20 while len(List_X) < 201 and life > 0: # eval_number upper_model.fit(List_X, List_Y) candidate = random.sample(test_list, 1) test_list.remove(candidate[0]) candi_pred_value = upper_model.predict(candidate) if metrics == 0: if candi_pred_value < np.median(List_Y): ######### for MRE List_X.append(candidate[0]) candi_config = candidate[0] candi_model = DecisionTreeRegressor( max_depth=candi_config[0], min_samples_leaf=candi_config[1], min_samples_split=candi_config[2]) candi_model.fit(train_input, train_actual_effort) candi_pred_Y = candi_model.predict(test_input) candi_actual_Y = test_actual_effort.values List_Y.append(mre_calc(candi_pred_Y, candi_actual_Y)) ######### for MRE else: life -= 1 if metrics == 1: if candi_pred_value > np.median(List_Y): ######### for SA List_X.append(candidate[0]) candi_config = candidate[0] candi_model = DecisionTreeRegressor( max_depth=candi_config[0], min_samples_leaf=candi_config[1], min_samples_split=candi_config[2]) candi_model.fit(train_input, train_actual_effort) candi_pred_Y = candi_model.predict(test_input) candi_actual_Y = test_actual_effort.values List_Y.append( sa_calc(candi_pred_Y, candi_actual_Y, train_actual_effort)) ######### for SA else: life -= 1 # temp_tree = candi_model # tree.plot_tree(temp_tree, feature_names=list(train_input.columns.values)) # plt.show() if metrics == 0: return np.median(List_Y) ########## MRE if metrics == 1: return np.median(List_Y) ########## SA
def CART_DE(dataset, metrics, month): dataset = normalize(dataset) for trainset, testset in df_split(dataset, month): train_input = trainset.iloc[:, :-1] train_output = trainset.iloc[:, -1] test_input = testset.iloc[:, :-1] test_output = testset.iloc[:, -1] for validate_trainset, validate_testset in df_split(trainset, 1): validate_train_input = validate_trainset.iloc[:, :-1] validate_train_output = validate_trainset.iloc[:, -1] validate_test_input = validate_testset.iloc[:, :-1] validate_test_output = validate_testset.iloc[:, -1] def cart_builder(a, b, c): model = DecisionTreeRegressor( max_depth=a, min_samples_leaf=b, min_samples_split=c ) model.fit(train_input, train_output) test_predict = model.predict(test_input) test_actual = test_output.values if metrics == 0: return mre_calc(test_predict, test_actual) if metrics == 1: return sa_calc(test_predict, test_actual, train_output) def cart_builder_future(a, b, c): model = DecisionTreeRegressor( max_depth=a, min_samples_leaf=b, min_samples_split=c ) model.fit(validate_train_input, validate_train_output) validate_test_predict = model.predict(validate_test_input) validate_test_actual = validate_test_output.values if metrics == 0: return mre_calc(validate_test_predict, validate_test_actual) if metrics == 1: return sa_calc(validate_test_predict, validate_test_actual, validate_train_output) # config_optimized = de(cart_builder, metrics, bounds=[(10,20), (1,10), (2,12)])[1] config_optimized = de(cart_builder_future, metrics, bounds=[(10, 20), (1, 10), (2, 12)])[1] # print(config_optimized[0], config_optimized[1], config_optimized[2]) model_touse = DecisionTreeRegressor( max_depth=config_optimized[0], min_samples_leaf=config_optimized[1], min_samples_split=config_optimized[2] ) model_touse.fit(train_input, train_output) test_predict = np.rint(model_touse.predict(test_input)) test_actual = test_output.values result_list_mre = [] result_list_sa = [] result_list_mre.append(mre_calc(test_predict, test_actual)) result_list_sa.append(sa_calc(test_predict, test_actual, train_output)) # print("pre", test_predict, "act", test_actual) if metrics == 0: return result_list_mre if metrics == 1: return result_list_sa