def findNextTick(df, type): df["nextClose"] = df[predictionLabel].shift( -1) #Creating a column for next value (This is what we are predicting) #df["nextIndex"] = df.index #Creating a new index column #df["nextIndex"] = df["nextIndex"].shift(-1) #Shifting new index column back #df.at[len(df)-1, 'nextIndex'] = df.iloc[len(df) - 2]["nextIndex"] + 1 #df = df[0:len(df) - 2] # X_pred = df[-1:].drop(["nextClose"], axis=1) #Setting up a variable for prediction. df = df[0:-1] #Taking all but the last value for training X = df.drop(["nextClose"], axis=1) #Dropping the answers y = df["nextClose"] #Creating an answer list r1 = LinearRegression(n_jobs=-1) r2 = tree.DecisionTreeRegressor() r3 = ensemble.RandomForestRegressor(n_jobs=-1) #r4 = svm.LinearSVR() estimators = [('r1', r1), ('r2', r2), ('r3', r3)] if (type == 0): regressor = ensemble.StackingRegressor( estimators=estimators, final_estimator=ensemble.RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)) elif (type == 1): regressor = ensemble.VotingRegressor(estimators=estimators) regressor.fit(X, y) #training the algorithm y_pred = list(regressor.predict(X_pred)) y_pred.insert(0, X_pred.iloc[0][predictionLabel]) y_pred = np.asarray(y_pred) x_predTime = list(X_pred.index) x_predTime.append(x_predTime[0] + 1) x_predTime = np.asarray(x_predTime) print(y_pred) print(x_predTime) return {"Y": y_pred, "X": x_predTime}
def findNextTick(df, type): df["nextClose"] = df[predictionLabel].shift(-1) #df["nextTime"] = df["time"].shift(-1) df["nextIndex"] = df.index df["nextIndex"] = df["nextIndex"].shift(-1) df.at[len(df)-1, 'nextIndex'] = df.iloc[len(df) - 2]["nextIndex"] + 1 df = df[0:len(df) - 2] #df.to_csv("test3.csv") X_pred = df[-1:].drop(["nextClose"], axis=1) print(X_pred) df = df[0:-1] X = df.drop(["nextClose"], axis=1) #X.to_csv("test4.csv") y = df["nextClose"] r1 = LinearRegression(n_jobs=-1) r2 = tree.DecisionTreeRegressor() r3 = ensemble.RandomForestRegressor(n_jobs=-1) r4 = svm.LinearSVR() #r4 = ensemble.AdaBoostRegressor() #r5 = ensemble.BaggingRegressor(n_jobs=-1) #r6 = ensemble.GradientBoostingRegressor() estimators = [ ('r1', r1), ('r2', r2), ('r3', r3), ('r4', r4) ] if(type == 0): regressor = ensemble.StackingRegressor( estimators=estimators, final_estimator=ensemble.RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1) ) elif(type == 1): regressor = ensemble.VotingRegressor( estimators=estimators ) regressor.fit(X, y) #training the algorithm y_pred = list(regressor.predict(X_pred)) y_pred.insert(0,X_pred.iloc[0][predictionLabel]) y_pred = np.asarray(y_pred) x_predTime = list(X_pred.index) x_predTime.append(x_predTime[0] + 1) x_predTime = np.asarray(x_predTime) print(y_pred) print(x_predTime) return {"Y":y_pred,"X":x_predTime}
def findNextTick(df, type): nextStrings = [] #Creating a column for next value (This is what we are predicting) for i in predictionLabels: nextString = "next" + str(i) df[nextString] = df[i].shift(-1) nextStrings.append(nextString) X_pred = df[-1:].drop(nextStrings, axis=1) #Setting up a variable for prediction. df = df[0:-1] #Taking all but the last value for training X = df.drop(nextStrings, axis=1) #Dropping the answers y = df[nextStrings] #Creating an answer list r1 = LinearRegression(n_jobs=-1) r2 = tree.DecisionTreeRegressor() r3 = ensemble.RandomForestRegressor(n_jobs=-1) estimators = [ ('r1', r1), ('r2', r2), ('r3', r3) ] if(type == 0): regressor = ensemble.StackingRegressor( estimators=estimators, final_estimator=ensemble.RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1) ) elif(type == 1): regressor = ensemble.VotingRegressor( estimators=estimators ) print("I got here!") regressor = RegressorChain(regressor) regressor.fit(X, y) #training the algorithm y_pred = list(regressor.predict(X_pred)) y_pred.insert(0,X_pred.iloc[0][predictionLabels]) y_pred = np.asarray(y_pred) x_predTime = list(X_pred.index) x_predTime.append(x_predTime[0] + 1) x_predTime = np.asarray(x_predTime) print(y_pred) print(x_predTime) return {"Y":y_pred,"X":x_predTime}
# r7 = linear_model.BayesianRidge() # r8 = linear_model.ARDRegression() # r9 = linear_model.HuberRegressor() # r10 = linear_model.Lasso() # r11 = svm.LinearSVR() # r12 = gaussian_process.GaussianProcessRegressor() # overfitting # r13 = linear_model.PassiveAggressiveRegressor() # takes okayisch time # r14 = linear_model.RANSACRegressor() # overfitting? # r15 = linear_model.SGDRegressor() # r16 = linear_model.TheilSenRegressor() # eher Verschlechterung rs = [r1, r2, r3, r4, r10, r11, r15] regressor_list = [] for idx, r in enumerate(rs): regressor_list.append((f'r{idx}', r)) estimator = ensemble.VotingRegressor(regressor_list) #estimator = ensemble.RandomForestRegressor(max_depth=3, min_samples_split=2, random_state=0, n_estimators=700) # # 37 features # estimator_optimization= [ # { # 'estimator': ensemble.RandomForestRegressor(random_state=0), # 'params_dist': {"max_depth": [3, None], # "max_features": sp_randint(1, 38), # "min_samples_split": sp_randint(2, 11), # "bootstrap": [True, False], # "n_estimators": [100, 700]} # }, # ] # # estimator_list = [RandomizedSearchCV(rand_search_cv_config['estimator'], param_distributions=rand_search_cv_config['params_dist'], n_iter=10, cv=5, iid=False)
def model_selector(model): # Ye good ol ugly if-elif switch to choose the model if model == "linear": regr = linear_model.LinearRegression(n_jobs=-1) elif model == "lasso": regr = linear_model.Lasso(random_state=17) elif model == "elasticnet" or model == "elastic": regr = linear_model.ElasticNet(random_state=17) elif model == "bayesian": regr = linear_model.BayesianRidge() elif model == "decision tree regressor" or model == "dtr": regr = tree.DecisionTreeRegressor(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "tweedie regressor 0" or model == "normal distribution": regr = linear_model.TweedieRegressor(power=0) elif model == "tweedie regressor 1" or model == "poisson distribution": regr = linear_model.TweedieRegressor(power=1) elif model == "extra trees regressor" or model == "etr": regr = ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "random forest regressor" or model == "rfr": regr = ensemble.RandomForestRegressor(n_estimators=500, oob_score=True, random_state=17, n_jobs=-1) elif model == "adaboost extra trees" or model == "boost et": regr = AdaBoostRegressor(ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17), n_estimators=500, random_state=17) elif model == "k neighbours" or model == "k neighbor": regr = neighbors.KNeighborsRegressor(n_jobs=-1) elif model == "gradient boosting regressor" or model == "gbr": regr = ensemble.GradientBoostingRegressor(random_state=17) elif model == "voting": clf1 = linear_model.LinearRegression(n_jobs=-1) clf2 = ensemble.RandomForestRegressor(max_depth=8, min_samples_leaf=17, random_state=17, n_jobs=-1) clf3 = ensemble.GradientBoostingRegressor(random_state=17) regr = ensemble.VotingRegressor(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], n_jobs=-1) elif model == "logistic": regr = linear_model.LogisticRegression(max_iter=250, random_state=17, n_jobs=-1) elif model == "gaussian": regr = GaussianNB() elif model == "decision tree classifier" or model == "dtc": regr = tree.DecisionTreeClassifier(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "extra tree classifier" or model == "etc": regr = ensemble.ExtraTreesClassifier(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "random forest classifier" or model == "rfc": regr = ensemble.RandomForestClassifier(max_depth=8, min_samples_leaf=17, random_state=17) elif model == "linear svc": regr = svm.LinearSVC(random_state=17) elif model == "k neighbour classifier" or model == "k neighbor classifier": regr = neighbors.KNeighborsClassifier(n_jobs=-1, n_neighbors=2) elif model == "svc": regr = svm.SVC(kernel="rbf", probability=True, random_state=17) return regr
from sklearn import ensemble rfr = ensemble.RandomForestRegressor(max_depth=20, random_state=0) rfr.fit(X_train, y_train) # print("RandomForestRegressor train score: ",rfr.score(X_train, y_train)) # print("RandomForestRegressor test score: ",rfr.score(X_test, y_test)) #ExtraTreesRegressor model rfr = ensemble.ExtraTreesRegressor(n_estimators=400, random_state=5) rfr.fit(X_train, y_train) # print("ExtraTreesRegressor train score: ",rfr.score(X_train, y_train)) # print("ExtraTreesRegressor test score: ",rfr.score(X_test, y_test)) #VotingRegressor model rfr = ensemble.VotingRegressor([ ('lr', LinearRegression()), ('rf', ensemble.RandomForestRegressor(n_estimators=200, random_state=0)) ]) rfr.fit(X_train, y_train) # print("VotingRegressor train score: ",rfr.score(X_train, y_train)) # print("VotingRegressor test score: ",rfr.score(X_test, y_test)) #GradientBoostingRegressor model clf = ensemble.GradientBoostingRegressor(n_estimators=400, max_depth=5, min_samples_split=7, learning_rate=0.1, loss='ls') clf.fit(X_train, y_train) print("GradientBoostingRegressor train score: ", clf.score(X_train, y_train)) print("GradientBoostingRegressor test score: ", clf.score(X_test, y_test))