Exemple #1
0
class LassoModel(RegModels):
    def __init__(self, params):

        super(LassoModel, self).__init__(params)
        self.name = "Lasso"

    def train(self):
        self.model = Lasso()
        self.model = RegressorChain(self.model)
        self.model.fit(self.train_x, self.train_y)
        self.train_output = self.model.predict(self.train_x)
Exemple #2
0
def predict(ticker, interval):
    period = "max" if interval in ["1d", "5d"] else "1mo"
    df = pd.DataFrame(
        yf.Ticker(ticker).history(interval=interval, period=period))
    df.dropna(inplace=True)
    last_timestamp = int(df.index[-1].timestamp() * 1000)
    print("LAST_TIMESTAMP:", last_timestamp)
    #Reshape the data
    data = df["Close"].values
    X = []
    y = []

    for i in range(0, len(data) - LOOK_BACK - PREDICT_FORWARD):
        X.append(data[i:i + LOOK_BACK])
        y.append(data[i + LOOK_BACK:i + LOOK_BACK + PREDICT_FORWARD])

    print("X_LENGTH:", len(X))
    print("y_LENGTH:", len(y))

    # define base model
    model = LinearSVR(dual=False, loss="squared_epsilon_insensitive")
    # define the chained multioutput wrapper model
    wrapper = RegressorChain(model)
    # fit the model on the whole dataset
    wrapper.fit(X, y)
    # make prediction
    historic_data = data[len(data) - LOOK_BACK:]
    predictions = wrapper.predict([historic_data])[0].tolist()

    payload = []
    time_increment = 0

    if interval == "5m":
        time_increment = FIVE_MINUTES
    elif interval == "30m":
        time_increment = THIRTY_MINUTES
    elif interval == "1d":
        time_increment = ONE_DAY
    elif interval == "5d":
        time_increment = FIVE_DAYS

    print("TIME_INCREMENT:", time_increment)

    for p in predictions:
        last_timestamp += time_increment
        payload.append({"date": last_timestamp, "price": p})

    return {"response_code": 200, "payload": payload}
Exemple #3
0
def build_model_and_evaluate_rms(prev_pred=None):
    model = Model3()
    X_combined, y = model.combined_features(target="personality")

    # combining the prediction of previous tasks to predict another task
    if prev_pred is not None:
        X_combined = pd.concat([X_combined, prev_pred])

    # X, y = utils.extract_data(X_combined, label="personality")
    X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.20, random_state = 2)
    
    reg = RegressorChain(XGBRegressor(n_estimators=200,
                                        max_depth=2, 
                                        objective="reg:squarederror"),
                                        
                        order = [0,3,1,4,2])
   
    reg = reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    
    # Calculating RMSE for all personality
    # order: 
    rmse = []
    for i,value in enumerate(utils.regressor_labels):
        rmse.append(sqrt(mean_squared_error(y_pred[:,i], y_test[value])))

    return rmse, reg
def findNextTick(df, type):
    nextStrings = []
    #Creating a column for next value (This is what we are predicting)
    for i in predictionLabels:
        nextString = "next" + str(i)
        df[nextString] = df[i].shift(-1)
        nextStrings.append(nextString)

    X_pred = df[-1:].drop(nextStrings, axis=1) #Setting up a variable for prediction.
    df = df[0:-1] #Taking all but the last value for training
    X = df.drop(nextStrings, axis=1) #Dropping the answers
    y = df[nextStrings] #Creating an answer list
    r1 = LinearRegression(n_jobs=-1)
    r2 = tree.DecisionTreeRegressor()
    r3 = ensemble.RandomForestRegressor(n_jobs=-1)
    estimators = [
       ('r1', r1),
       ('r2', r2),
       ('r3', r3)
    ]
    if(type == 0):
        regressor = ensemble.StackingRegressor(
            estimators=estimators,
            final_estimator=ensemble.RandomForestRegressor(n_estimators=100,
                                                  random_state=42, n_jobs=-1)
        )
        
    elif(type == 1):
        regressor = ensemble.VotingRegressor(
            estimators=estimators
        )
        print("I got here!")
    regressor = RegressorChain(regressor)
    regressor.fit(X, y) #training the algorithm
    y_pred = list(regressor.predict(X_pred))

    y_pred.insert(0,X_pred.iloc[0][predictionLabels])
    y_pred = np.asarray(y_pred)
    x_predTime = list(X_pred.index)
    x_predTime.append(x_predTime[0] + 1)
    x_predTime = np.asarray(x_predTime)
    print(y_pred)
    print(x_predTime)
    return {"Y":y_pred,"X":x_predTime}
Exemple #5
0
def test_regressor_chain_w_fit_params():
    # Make sure fit_params are properly propagated to the sub-estimators
    rng = np.random.RandomState(0)
    X, y = datasets.make_regression(n_targets=3)
    weight = rng.rand(y.shape[0])

    class MySGD(SGDRegressor):
        def fit(self, X, y, **fit_params):
            self.sample_weight_ = fit_params['sample_weight']
            super().fit(X, y, **fit_params)

    model = RegressorChain(MySGD())

    # Fitting with params
    fit_param = {'sample_weight': weight}
    model.fit(X, y, **fit_param)

    for est in model.estimators_:
        assert est.sample_weight_ is weight
Exemple #6
0
    def test_sklearn_regressor_chain(self):
        for n_targets in [2, 3, 4]:
            for model_class in [DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression]:
                seed = random.randint(0, 2**32 - 1)
                order = [i for i in range(n_targets)]
                random.Random(seed).shuffle(order)
                if model_class != LinearRegression:
                    model = RegressorChain(model_class(random_state=seed), order=order)
                else:
                    model = RegressorChain(model_class(), order=order)
                X, y = datasets.make_regression(
                    n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=seed
                )
                X = X.astype("float32")
                y = y.astype("float32")
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.TREE_OP_PRECISION_DTYPE: "float64"})
                self.assertTrue(torch_model is not None)
                np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-4, atol=1e-4, err_msg="{}/{}/{}".format(n_targets, model_class, seed))
Exemple #7
0
def chainregressor(X, Y):
    # Fit estimators
    ESTIMATORS = {
        "Extra trees + chain":
        ExtraTreesRegressor(n_estimators=10,
                            max_features=X.shape[1],
                            random_state=0),
        "K-nn + chain":
        KNeighborsRegressor(),
        "Linear regression + chain":
        LinearRegression(),
        "Ridge + chain":
        RidgeCV(),
    }
    kf = KFold(n_splits=5, shuffle=True)  # Define the split - into
    kf_split = kf.get_n_splits(
        X)  # returns the number of splitting iterations in the cross-validator
    accuracy = []
    r2score = []
    meansquared_error = []
    coefficients = 0
    rng = np.random.RandomState(1)
    meansquared_error_es = dict()
    r2score_es = dict()
    for name, estimator in ESTIMATORS.items():
        meansquared_error = []
        r2score = []
        estimator = RegressorChain(estimator)
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            estimator.fit(X_train, y_train)
            y_pred = estimator.predict(X_test)
            meansquared_error.append(mean_squared_error(y_test, y_pred))
            r2score.append(r2_score(y_test, y_pred))
        meansquared_error_es[name] = statistics.mean(meansquared_error)
        r2score_es[name] = statistics.mean(r2score)
    print(meansquared_error_es)
    print(r2score_es)
    def test_sklearn_regressor_chain(self):
        for n_targets in [2, 3, 4]:
            for model_class in [
                    DecisionTreeRegressor, ExtraTreesRegressor,
                    RandomForestRegressor, LinearRegression
            ]:
                order = [i for i in range(n_targets)]
                random.shuffle(order)
                model = RegressorChain(model_class(), order=order)
                X, y = datasets.make_regression(n_samples=50,
                                                n_features=10,
                                                n_informative=5,
                                                n_targets=n_targets,
                                                random_state=2021)
                X = X.astype('float32')
                y = y.astype('float32')
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(model, "torch")
                self.assertTrue(torch_model is not None)
                np.testing.assert_allclose(model.predict(X),
                                           torch_model.predict(X),
                                           rtol=1e-5,
                                           atol=1e-5)
Exemple #9
0
# In[ ]:

# Chained Models for Each Output (RegressorChain)
# https://machinelearningmastery.com/multi-output-regression-models-with-python/
# Another approach to using single-output regression models for multioutput regression is to create a linear
# sequence of models.

# The first model in the sequence uses the input and predicts one output; the second model uses the input and
# the output from the first model to make a prediction; the third model uses the input and output from the
# first two models to make a prediction, and so on.

from sklearn.multioutput import RegressorChain

wrapper = RegressorChain(rf)
wrapper.fit(X_train, y_train)

rf_y_test_pred = wrapper.predict(X_test)
# summarize prediction
print(rf_y_test_pred[0:5])
print(rf_y_test_pred.astype('int')[0:5])
rf_y_test_pred = rf_y_test_pred.astype('int')

# In[ ]:

# Use the R forest's predict method on the test data
rf_y_test_pred = rf.predict(X_test)
print(rf_y_test_pred[0:5])
print(rf_y_test_pred.astype('int')[0:5])
rf_y_test_pred = rf_y_test_pred.astype('int')

# Using best hyper-parameters from the single-step ahead regression
multi_svr = MultiOutputRegressor(estimator=SVR(kernel='rbf', **svr.best_params_), n_jobs=-1)
multi_svr.fit(X_train.values, y_train.values)


# In[ ]:


# A multi-step model that arranges regressions into a chain. Each model makes a prediction
# in the order specified by the chain (i.e. order of columns in the target matrix) using
# all of the available features provided to the model plus the predictions of models that
# are earlier in the chain. Order of columns is arranged by time-lags. Base model is SVM!
chain_svr = RegressorChain(base_estimator=SVR(kernel='rbf', **svr.best_params_))
chain_svr.fit(X_train.values, y_train.values)


# ## DecisionTree multi-step regressor

# In[ ]:


# DecisionTreeRegressor supports multi-step output out-of-the-box!
# Grid search with cross-validation
parameters = [{'criterion':['mse', 'mae'],
              'max_depth':[1, 5, None],
              'max_features':['auto', 'log2', 0.5],
              'max_leaf_nodes':[2, None]}]
tree = GridSearchCV(estimator=DecisionTreeRegressor(), 
                          param_grid=parameters,