예제 #1
0
def findNextTick(df, type):
    df["nextClose"] = df[predictionLabel].shift(
        -1)  #Creating a column for next value (This is what we are predicting)
    #df["nextIndex"] = df.index #Creating a new index column
    #df["nextIndex"] = df["nextIndex"].shift(-1) #Shifting new index column back
    #df.at[len(df)-1, 'nextIndex'] = df.iloc[len(df) - 2]["nextIndex"] + 1
    #df = df[0:len(df) - 2] #
    X_pred = df[-1:].drop(["nextClose"],
                          axis=1)  #Setting up a variable for prediction.
    df = df[0:-1]  #Taking all but the last value for training
    X = df.drop(["nextClose"], axis=1)  #Dropping the answers
    y = df["nextClose"]  #Creating an answer list
    r1 = LinearRegression(n_jobs=-1)
    r2 = tree.DecisionTreeRegressor()
    r3 = ensemble.RandomForestRegressor(n_jobs=-1)
    #r4 = svm.LinearSVR()
    estimators = [('r1', r1), ('r2', r2), ('r3', r3)]
    if (type == 0):
        regressor = ensemble.StackingRegressor(
            estimators=estimators,
            final_estimator=ensemble.RandomForestRegressor(n_estimators=100,
                                                           random_state=42,
                                                           n_jobs=-1))
    elif (type == 1):
        regressor = ensemble.VotingRegressor(estimators=estimators)
    regressor.fit(X, y)  #training the algorithm
    y_pred = list(regressor.predict(X_pred))
    y_pred.insert(0, X_pred.iloc[0][predictionLabel])
    y_pred = np.asarray(y_pred)
    x_predTime = list(X_pred.index)
    x_predTime.append(x_predTime[0] + 1)
    x_predTime = np.asarray(x_predTime)
    print(y_pred)
    print(x_predTime)
    return {"Y": y_pred, "X": x_predTime}
예제 #2
0
def findNextTick(df, type):
    df["nextClose"] = df[predictionLabel].shift(-1)
    #df["nextTime"] = df["time"].shift(-1)
    df["nextIndex"] = df.index
    df["nextIndex"] = df["nextIndex"].shift(-1)
    df.at[len(df)-1, 'nextIndex'] = df.iloc[len(df) - 2]["nextIndex"] + 1
    df = df[0:len(df) - 2]
    #df.to_csv("test3.csv")
    X_pred = df[-1:].drop(["nextClose"], axis=1)
    print(X_pred)
    df = df[0:-1]
    X = df.drop(["nextClose"], axis=1)
    #X.to_csv("test4.csv")
    y = df["nextClose"]
    r1 = LinearRegression(n_jobs=-1)
    r2 = tree.DecisionTreeRegressor()
    r3 = ensemble.RandomForestRegressor(n_jobs=-1)
    r4 = svm.LinearSVR()
    #r4 = ensemble.AdaBoostRegressor()
    #r5 = ensemble.BaggingRegressor(n_jobs=-1)
    #r6 = ensemble.GradientBoostingRegressor()
    estimators = [
       ('r1', r1),
       ('r2', r2),
       ('r3', r3),
       ('r4', r4)
    ]
    if(type == 0):
        regressor = ensemble.StackingRegressor(
            estimators=estimators,
            final_estimator=ensemble.RandomForestRegressor(n_estimators=100,
                                                  random_state=42, n_jobs=-1)
        )
    elif(type == 1):
        regressor = ensemble.VotingRegressor(
            estimators=estimators
        )
    regressor.fit(X, y) #training the algorithm
    y_pred = list(regressor.predict(X_pred))
    y_pred.insert(0,X_pred.iloc[0][predictionLabel])
    y_pred = np.asarray(y_pred)
    x_predTime = list(X_pred.index)
    x_predTime.append(x_predTime[0] + 1)
    x_predTime = np.asarray(x_predTime)
    print(y_pred)
    print(x_predTime)
    return {"Y":y_pred,"X":x_predTime}
예제 #3
0
def findNextTick(df, type):
    nextStrings = []
    #Creating a column for next value (This is what we are predicting)
    for i in predictionLabels:
        nextString = "next" + str(i)
        df[nextString] = df[i].shift(-1)
        nextStrings.append(nextString)

    X_pred = df[-1:].drop(nextStrings, axis=1) #Setting up a variable for prediction.
    df = df[0:-1] #Taking all but the last value for training
    X = df.drop(nextStrings, axis=1) #Dropping the answers
    y = df[nextStrings] #Creating an answer list
    r1 = LinearRegression(n_jobs=-1)
    r2 = tree.DecisionTreeRegressor()
    r3 = ensemble.RandomForestRegressor(n_jobs=-1)
    estimators = [
       ('r1', r1),
       ('r2', r2),
       ('r3', r3)
    ]
    if(type == 0):
        regressor = ensemble.StackingRegressor(
            estimators=estimators,
            final_estimator=ensemble.RandomForestRegressor(n_estimators=100,
                                                  random_state=42, n_jobs=-1)
        )
        
    elif(type == 1):
        regressor = ensemble.VotingRegressor(
            estimators=estimators
        )
        print("I got here!")
    regressor = RegressorChain(regressor)
    regressor.fit(X, y) #training the algorithm
    y_pred = list(regressor.predict(X_pred))

    y_pred.insert(0,X_pred.iloc[0][predictionLabels])
    y_pred = np.asarray(y_pred)
    x_predTime = list(X_pred.index)
    x_predTime.append(x_predTime[0] + 1)
    x_predTime = np.asarray(x_predTime)
    print(y_pred)
    print(x_predTime)
    return {"Y":y_pred,"X":x_predTime}
# r7 = linear_model.BayesianRidge()
# r8 = linear_model.ARDRegression()
# r9 = linear_model.HuberRegressor()
# r10 = linear_model.Lasso()
# r11 = svm.LinearSVR()
# r12 = gaussian_process.GaussianProcessRegressor() # overfitting
# r13 = linear_model.PassiveAggressiveRegressor() # takes okayisch time
# r14 = linear_model.RANSACRegressor() # overfitting?
# r15 = linear_model.SGDRegressor()
# r16 = linear_model.TheilSenRegressor() # eher Verschlechterung

rs = [r1, r2, r3, r4, r10, r11, r15]
regressor_list = []
for idx, r in enumerate(rs):
    regressor_list.append((f'r{idx}', r))
estimator = ensemble.VotingRegressor(regressor_list)
#estimator = ensemble.RandomForestRegressor(max_depth=3, min_samples_split=2, random_state=0, n_estimators=700)

# # 37 features
# estimator_optimization= [
#     {
#         'estimator':  ensemble.RandomForestRegressor(random_state=0),
#         'params_dist': {"max_depth": [3, None],
#               "max_features": sp_randint(1, 38),
#               "min_samples_split": sp_randint(2, 11),
#               "bootstrap": [True, False],
#               "n_estimators": [100, 700]}
#     },
# ]
#
# estimator_list = [RandomizedSearchCV(rand_search_cv_config['estimator'], param_distributions=rand_search_cv_config['params_dist'], n_iter=10, cv=5, iid=False)
예제 #5
0
def model_selector(model):
    # Ye good ol ugly if-elif switch to choose the model
    if model == "linear":
        regr = linear_model.LinearRegression(n_jobs=-1)

    elif model == "lasso":
        regr = linear_model.Lasso(random_state=17)

    elif model == "elasticnet" or model == "elastic":
        regr = linear_model.ElasticNet(random_state=17)

    elif model == "bayesian":
        regr = linear_model.BayesianRidge()

    elif model == "decision tree regressor" or model == "dtr":
        regr = tree.DecisionTreeRegressor(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "tweedie regressor 0" or model == "normal distribution":
        regr = linear_model.TweedieRegressor(power=0)

    elif model == "tweedie regressor 1" or model == "poisson distribution":
        regr = linear_model.TweedieRegressor(power=1)

    elif model == "extra trees regressor" or model == "etr":
        regr = ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "random forest regressor" or model == "rfr":
        regr = ensemble.RandomForestRegressor(n_estimators=500, oob_score=True, random_state=17, n_jobs=-1)

    elif model == "adaboost extra trees" or model == "boost et":
        regr = AdaBoostRegressor(ensemble.ExtraTreesRegressor(max_depth=8, min_samples_leaf=17, random_state=17),
                                 n_estimators=500, random_state=17)

    elif model == "k neighbours" or model == "k neighbor":
        regr = neighbors.KNeighborsRegressor(n_jobs=-1)

    elif model == "gradient boosting regressor" or model == "gbr":
        regr = ensemble.GradientBoostingRegressor(random_state=17)

    elif model == "voting":
        clf1 = linear_model.LinearRegression(n_jobs=-1)
        clf2 = ensemble.RandomForestRegressor(max_depth=8, min_samples_leaf=17, random_state=17, n_jobs=-1)
        clf3 = ensemble.GradientBoostingRegressor(random_state=17)
        regr = ensemble.VotingRegressor(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], n_jobs=-1)

    elif model == "logistic":
        regr = linear_model.LogisticRegression(max_iter=250, random_state=17, n_jobs=-1)

    elif model == "gaussian":
        regr = GaussianNB()

    elif model == "decision tree classifier" or model == "dtc":
        regr = tree.DecisionTreeClassifier(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "extra tree classifier" or model == "etc":
        regr = ensemble.ExtraTreesClassifier(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "random forest classifier" or model == "rfc":
        regr = ensemble.RandomForestClassifier(max_depth=8, min_samples_leaf=17, random_state=17)

    elif model == "linear svc":
        regr = svm.LinearSVC(random_state=17)

    elif model == "k neighbour classifier" or model == "k neighbor classifier":
        regr = neighbors.KNeighborsClassifier(n_jobs=-1, n_neighbors=2)

    elif model == "svc":
        regr = svm.SVC(kernel="rbf", probability=True, random_state=17)

    return regr
예제 #6
0
from sklearn import ensemble

rfr = ensemble.RandomForestRegressor(max_depth=20, random_state=0)
rfr.fit(X_train, y_train)
# print("RandomForestRegressor train score: ",rfr.score(X_train, y_train))
# print("RandomForestRegressor test score: ",rfr.score(X_test, y_test))

#ExtraTreesRegressor model
rfr = ensemble.ExtraTreesRegressor(n_estimators=400, random_state=5)
rfr.fit(X_train, y_train)
# print("ExtraTreesRegressor train score: ",rfr.score(X_train, y_train))
# print("ExtraTreesRegressor test score: ",rfr.score(X_test, y_test))

#VotingRegressor model
rfr = ensemble.VotingRegressor([
    ('lr', LinearRegression()),
    ('rf', ensemble.RandomForestRegressor(n_estimators=200, random_state=0))
])
rfr.fit(X_train, y_train)
# print("VotingRegressor train score: ",rfr.score(X_train, y_train))
# print("VotingRegressor test score: ",rfr.score(X_test, y_test))

#GradientBoostingRegressor model
clf = ensemble.GradientBoostingRegressor(n_estimators=400,
                                         max_depth=5,
                                         min_samples_split=7,
                                         learning_rate=0.1,
                                         loss='ls')
clf.fit(X_train, y_train)
print("GradientBoostingRegressor train score: ", clf.score(X_train, y_train))
print("GradientBoostingRegressor test score: ", clf.score(X_test, y_test))