class LassoModel(RegModels): def __init__(self, params): super(LassoModel, self).__init__(params) self.name = "Lasso" def train(self): self.model = Lasso() self.model = RegressorChain(self.model) self.model.fit(self.train_x, self.train_y) self.train_output = self.model.predict(self.train_x)
def predict(ticker, interval): period = "max" if interval in ["1d", "5d"] else "1mo" df = pd.DataFrame( yf.Ticker(ticker).history(interval=interval, period=period)) df.dropna(inplace=True) last_timestamp = int(df.index[-1].timestamp() * 1000) print("LAST_TIMESTAMP:", last_timestamp) #Reshape the data data = df["Close"].values X = [] y = [] for i in range(0, len(data) - LOOK_BACK - PREDICT_FORWARD): X.append(data[i:i + LOOK_BACK]) y.append(data[i + LOOK_BACK:i + LOOK_BACK + PREDICT_FORWARD]) print("X_LENGTH:", len(X)) print("y_LENGTH:", len(y)) # define base model model = LinearSVR(dual=False, loss="squared_epsilon_insensitive") # define the chained multioutput wrapper model wrapper = RegressorChain(model) # fit the model on the whole dataset wrapper.fit(X, y) # make prediction historic_data = data[len(data) - LOOK_BACK:] predictions = wrapper.predict([historic_data])[0].tolist() payload = [] time_increment = 0 if interval == "5m": time_increment = FIVE_MINUTES elif interval == "30m": time_increment = THIRTY_MINUTES elif interval == "1d": time_increment = ONE_DAY elif interval == "5d": time_increment = FIVE_DAYS print("TIME_INCREMENT:", time_increment) for p in predictions: last_timestamp += time_increment payload.append({"date": last_timestamp, "price": p}) return {"response_code": 200, "payload": payload}
def build_model_and_evaluate_rms(prev_pred=None): model = Model3() X_combined, y = model.combined_features(target="personality") # combining the prediction of previous tasks to predict another task if prev_pred is not None: X_combined = pd.concat([X_combined, prev_pred]) # X, y = utils.extract_data(X_combined, label="personality") X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.20, random_state = 2) reg = RegressorChain(XGBRegressor(n_estimators=200, max_depth=2, objective="reg:squarederror"), order = [0,3,1,4,2]) reg = reg.fit(X_train, y_train) y_pred = reg.predict(X_test) # Calculating RMSE for all personality # order: rmse = [] for i,value in enumerate(utils.regressor_labels): rmse.append(sqrt(mean_squared_error(y_pred[:,i], y_test[value]))) return rmse, reg
def findNextTick(df, type): nextStrings = [] #Creating a column for next value (This is what we are predicting) for i in predictionLabels: nextString = "next" + str(i) df[nextString] = df[i].shift(-1) nextStrings.append(nextString) X_pred = df[-1:].drop(nextStrings, axis=1) #Setting up a variable for prediction. df = df[0:-1] #Taking all but the last value for training X = df.drop(nextStrings, axis=1) #Dropping the answers y = df[nextStrings] #Creating an answer list r1 = LinearRegression(n_jobs=-1) r2 = tree.DecisionTreeRegressor() r3 = ensemble.RandomForestRegressor(n_jobs=-1) estimators = [ ('r1', r1), ('r2', r2), ('r3', r3) ] if(type == 0): regressor = ensemble.StackingRegressor( estimators=estimators, final_estimator=ensemble.RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1) ) elif(type == 1): regressor = ensemble.VotingRegressor( estimators=estimators ) print("I got here!") regressor = RegressorChain(regressor) regressor.fit(X, y) #training the algorithm y_pred = list(regressor.predict(X_pred)) y_pred.insert(0,X_pred.iloc[0][predictionLabels]) y_pred = np.asarray(y_pred) x_predTime = list(X_pred.index) x_predTime.append(x_predTime[0] + 1) x_predTime = np.asarray(x_predTime) print(y_pred) print(x_predTime) return {"Y":y_pred,"X":x_predTime}
def test_regressor_chain_w_fit_params(): # Make sure fit_params are properly propagated to the sub-estimators rng = np.random.RandomState(0) X, y = datasets.make_regression(n_targets=3) weight = rng.rand(y.shape[0]) class MySGD(SGDRegressor): def fit(self, X, y, **fit_params): self.sample_weight_ = fit_params['sample_weight'] super().fit(X, y, **fit_params) model = RegressorChain(MySGD()) # Fitting with params fit_param = {'sample_weight': weight} model.fit(X, y, **fit_param) for est in model.estimators_: assert est.sample_weight_ is weight
def test_sklearn_regressor_chain(self): for n_targets in [2, 3, 4]: for model_class in [DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression]: seed = random.randint(0, 2**32 - 1) order = [i for i in range(n_targets)] random.Random(seed).shuffle(order) if model_class != LinearRegression: model = RegressorChain(model_class(random_state=seed), order=order) else: model = RegressorChain(model_class(), order=order) X, y = datasets.make_regression( n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=seed ) X = X.astype("float32") y = y.astype("float32") model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.TREE_OP_PRECISION_DTYPE: "float64"}) self.assertTrue(torch_model is not None) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-4, atol=1e-4, err_msg="{}/{}/{}".format(n_targets, model_class, seed))
def chainregressor(X, Y): # Fit estimators ESTIMATORS = { "Extra trees + chain": ExtraTreesRegressor(n_estimators=10, max_features=X.shape[1], random_state=0), "K-nn + chain": KNeighborsRegressor(), "Linear regression + chain": LinearRegression(), "Ridge + chain": RidgeCV(), } kf = KFold(n_splits=5, shuffle=True) # Define the split - into kf_split = kf.get_n_splits( X) # returns the number of splitting iterations in the cross-validator accuracy = [] r2score = [] meansquared_error = [] coefficients = 0 rng = np.random.RandomState(1) meansquared_error_es = dict() r2score_es = dict() for name, estimator in ESTIMATORS.items(): meansquared_error = [] r2score = [] estimator = RegressorChain(estimator) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] estimator.fit(X_train, y_train) y_pred = estimator.predict(X_test) meansquared_error.append(mean_squared_error(y_test, y_pred)) r2score.append(r2_score(y_test, y_pred)) meansquared_error_es[name] = statistics.mean(meansquared_error) r2score_es[name] = statistics.mean(r2score) print(meansquared_error_es) print(r2score_es)
def test_sklearn_regressor_chain(self): for n_targets in [2, 3, 4]: for model_class in [ DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression ]: order = [i for i in range(n_targets)] random.shuffle(order) model = RegressorChain(model_class(), order=order) X, y = datasets.make_regression(n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=2021) X = X.astype('float32') y = y.astype('float32') model.fit(X, y) torch_model = hummingbird.ml.convert(model, "torch") self.assertTrue(torch_model is not None) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-5, atol=1e-5)
# In[ ]: # Chained Models for Each Output (RegressorChain) # https://machinelearningmastery.com/multi-output-regression-models-with-python/ # Another approach to using single-output regression models for multioutput regression is to create a linear # sequence of models. # The first model in the sequence uses the input and predicts one output; the second model uses the input and # the output from the first model to make a prediction; the third model uses the input and output from the # first two models to make a prediction, and so on. from sklearn.multioutput import RegressorChain wrapper = RegressorChain(rf) wrapper.fit(X_train, y_train) rf_y_test_pred = wrapper.predict(X_test) # summarize prediction print(rf_y_test_pred[0:5]) print(rf_y_test_pred.astype('int')[0:5]) rf_y_test_pred = rf_y_test_pred.astype('int') # In[ ]: # Use the R forest's predict method on the test data rf_y_test_pred = rf.predict(X_test) print(rf_y_test_pred[0:5]) print(rf_y_test_pred.astype('int')[0:5]) rf_y_test_pred = rf_y_test_pred.astype('int')
# Using best hyper-parameters from the single-step ahead regression multi_svr = MultiOutputRegressor(estimator=SVR(kernel='rbf', **svr.best_params_), n_jobs=-1) multi_svr.fit(X_train.values, y_train.values) # In[ ]: # A multi-step model that arranges regressions into a chain. Each model makes a prediction # in the order specified by the chain (i.e. order of columns in the target matrix) using # all of the available features provided to the model plus the predictions of models that # are earlier in the chain. Order of columns is arranged by time-lags. Base model is SVM! chain_svr = RegressorChain(base_estimator=SVR(kernel='rbf', **svr.best_params_)) chain_svr.fit(X_train.values, y_train.values) # ## DecisionTree multi-step regressor # In[ ]: # DecisionTreeRegressor supports multi-step output out-of-the-box! # Grid search with cross-validation parameters = [{'criterion':['mse', 'mae'], 'max_depth':[1, 5, None], 'max_features':['auto', 'log2', 0.5], 'max_leaf_nodes':[2, None]}] tree = GridSearchCV(estimator=DecisionTreeRegressor(), param_grid=parameters,