예제 #1
0
def build_model_and_evaluate_rms(prev_pred=None):
    model = Model3()
    X_combined, y = model.combined_features(target="personality")

    # combining the prediction of previous tasks to predict another task
    if prev_pred is not None:
        X_combined = pd.concat([X_combined, prev_pred])

    # X, y = utils.extract_data(X_combined, label="personality")
    X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.20, random_state = 2)
    
    reg = RegressorChain(XGBRegressor(n_estimators=200,
                                        max_depth=2, 
                                        objective="reg:squarederror"),
                                        
                        order = [0,3,1,4,2])
   
    reg = reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)
    
    # Calculating RMSE for all personality
    # order: 
    rmse = []
    for i,value in enumerate(utils.regressor_labels):
        rmse.append(sqrt(mean_squared_error(y_pred[:,i], y_test[value])))

    return rmse, reg
예제 #2
0
class LassoModel(RegModels):
    def __init__(self, params):

        super(LassoModel, self).__init__(params)
        self.name = "Lasso"

    def train(self):
        self.model = Lasso()
        self.model = RegressorChain(self.model)
        self.model.fit(self.train_x, self.train_y)
        self.train_output = self.model.predict(self.train_x)
예제 #3
0
def predict(ticker, interval):
    period = "max" if interval in ["1d", "5d"] else "1mo"
    df = pd.DataFrame(
        yf.Ticker(ticker).history(interval=interval, period=period))
    df.dropna(inplace=True)
    last_timestamp = int(df.index[-1].timestamp() * 1000)
    print("LAST_TIMESTAMP:", last_timestamp)
    #Reshape the data
    data = df["Close"].values
    X = []
    y = []

    for i in range(0, len(data) - LOOK_BACK - PREDICT_FORWARD):
        X.append(data[i:i + LOOK_BACK])
        y.append(data[i + LOOK_BACK:i + LOOK_BACK + PREDICT_FORWARD])

    print("X_LENGTH:", len(X))
    print("y_LENGTH:", len(y))

    # define base model
    model = LinearSVR(dual=False, loss="squared_epsilon_insensitive")
    # define the chained multioutput wrapper model
    wrapper = RegressorChain(model)
    # fit the model on the whole dataset
    wrapper.fit(X, y)
    # make prediction
    historic_data = data[len(data) - LOOK_BACK:]
    predictions = wrapper.predict([historic_data])[0].tolist()

    payload = []
    time_increment = 0

    if interval == "5m":
        time_increment = FIVE_MINUTES
    elif interval == "30m":
        time_increment = THIRTY_MINUTES
    elif interval == "1d":
        time_increment = ONE_DAY
    elif interval == "5d":
        time_increment = FIVE_DAYS

    print("TIME_INCREMENT:", time_increment)

    for p in predictions:
        last_timestamp += time_increment
        payload.append({"date": last_timestamp, "price": p})

    return {"response_code": 200, "payload": payload}
예제 #4
0
    def __init__(self, X, Y, model_type, plot_individual_bool=False, plot_summary_one_bool=False, output_path=False):
        """
        Initiate the RunRegression class

        Args:
            X: <np.Array> input independent variable(s) data
            Y: <np.Array> input dependent variable(s) data
            model_type: <str> regression type, must be one in dict_reg_type variable above
            plot_individual_bool: <bool> whether to also plot the individual x vs y series
            plot_summary_one_bool: <bool> whether to plot the summary comparison chart on one graph or separate the
            different y-series into their own charts
            output_path: <str> path to save output figure results to, if False results are not saved
        """
        self.X = X
        self.Y = Y
        self.model = dict_reg_type[model_type]()
        self.model_type = model_type
        self.plot_individual_bool = plot_individual_bool
        self.plot_summary_one_bool = plot_summary_one_bool
        self.output_path = output_path

        # check whether the type involves a multi-output wrapper
        multi_output_wrapper = model_type.split('_')[0]
        if multi_output_wrapper in [DirectMultiOutput, ChainedMultiOutput]:
            self.model = MultiOutputRegressor(
                self.model) if multi_output_wrapper == DirectMultiOutput else RegressorChain(self.model)
예제 #5
0
def test_multioutput():
    from sklearn.datasets import make_regression
    from sklearn.model_selection import train_test_split
    from sklearn.multioutput import MultiOutputRegressor, RegressorChain

    # create regression data
    X, y = make_regression(n_targets=3)

    # split into train and test data
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.30,
                                                        random_state=42)

    # train the model
    model = MultiOutputRegressor(AutoML(task="regression", time_budget=1))
    model.fit(X_train, y_train)

    # predict
    print(model.predict(X_test))

    # train the model
    model = RegressorChain(AutoML(task="regression", time_budget=1))
    model.fit(X_train, y_train)

    # predict
    print(model.predict(X_test))
예제 #6
0
def findNextTick(df, type):
    nextStrings = []
    #Creating a column for next value (This is what we are predicting)
    for i in predictionLabels:
        nextString = "next" + str(i)
        df[nextString] = df[i].shift(-1)
        nextStrings.append(nextString)

    X_pred = df[-1:].drop(nextStrings, axis=1) #Setting up a variable for prediction.
    df = df[0:-1] #Taking all but the last value for training
    X = df.drop(nextStrings, axis=1) #Dropping the answers
    y = df[nextStrings] #Creating an answer list
    r1 = LinearRegression(n_jobs=-1)
    r2 = tree.DecisionTreeRegressor()
    r3 = ensemble.RandomForestRegressor(n_jobs=-1)
    estimators = [
       ('r1', r1),
       ('r2', r2),
       ('r3', r3)
    ]
    if(type == 0):
        regressor = ensemble.StackingRegressor(
            estimators=estimators,
            final_estimator=ensemble.RandomForestRegressor(n_estimators=100,
                                                  random_state=42, n_jobs=-1)
        )
        
    elif(type == 1):
        regressor = ensemble.VotingRegressor(
            estimators=estimators
        )
        print("I got here!")
    regressor = RegressorChain(regressor)
    regressor.fit(X, y) #training the algorithm
    y_pred = list(regressor.predict(X_pred))

    y_pred.insert(0,X_pred.iloc[0][predictionLabels])
    y_pred = np.asarray(y_pred)
    x_predTime = list(X_pred.index)
    x_predTime.append(x_predTime[0] + 1)
    x_predTime = np.asarray(x_predTime)
    print(y_pred)
    print(x_predTime)
    return {"Y":y_pred,"X":x_predTime}
def test_base_chain_fit_and_predict_with_sparse_data_and_cv():
    # Fit base chain with sparse data cross_val_predict
    X, Y = generate_multilabel_dataset_with_correlations()
    X_sparse = sp.csr_matrix(X)
    base_chains = [ClassifierChain(LogisticRegression(), cv=3),
                   RegressorChain(Ridge(), cv=3)]
    for chain in base_chains:
        chain.fit(X_sparse, Y)
        Y_pred = chain.predict(X_sparse)
        assert_equal(Y_pred.shape, Y.shape)
예제 #8
0
def test_regressor_chain_w_fit_params():
    # Make sure fit_params are properly propagated to the sub-estimators
    rng = np.random.RandomState(0)
    X, y = datasets.make_regression(n_targets=3)
    weight = rng.rand(y.shape[0])

    class MySGD(SGDRegressor):
        def fit(self, X, y, **fit_params):
            self.sample_weight_ = fit_params['sample_weight']
            super().fit(X, y, **fit_params)

    model = RegressorChain(MySGD())

    # Fitting with params
    fit_param = {'sample_weight': weight}
    model.fit(X, y, **fit_param)

    for est in model.estimators_:
        assert est.sample_weight_ is weight
예제 #9
0
def chainregressor(X, Y):
    # Fit estimators
    ESTIMATORS = {
        "Extra trees + chain":
        ExtraTreesRegressor(n_estimators=10,
                            max_features=X.shape[1],
                            random_state=0),
        "K-nn + chain":
        KNeighborsRegressor(),
        "Linear regression + chain":
        LinearRegression(),
        "Ridge + chain":
        RidgeCV(),
    }
    kf = KFold(n_splits=5, shuffle=True)  # Define the split - into
    kf_split = kf.get_n_splits(
        X)  # returns the number of splitting iterations in the cross-validator
    accuracy = []
    r2score = []
    meansquared_error = []
    coefficients = 0
    rng = np.random.RandomState(1)
    meansquared_error_es = dict()
    r2score_es = dict()
    for name, estimator in ESTIMATORS.items():
        meansquared_error = []
        r2score = []
        estimator = RegressorChain(estimator)
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = Y[train_index], Y[test_index]
            estimator.fit(X_train, y_train)
            y_pred = estimator.predict(X_test)
            meansquared_error.append(mean_squared_error(y_test, y_pred))
            r2score.append(r2_score(y_test, y_pred))
        meansquared_error_es[name] = statistics.mean(meansquared_error)
        r2score_es[name] = statistics.mean(r2score)
    print(meansquared_error_es)
    print(r2score_es)
    def test_sklearn_regressor_chain(self):
        for n_targets in [2, 3, 4]:
            for model_class in [
                    DecisionTreeRegressor, ExtraTreesRegressor,
                    RandomForestRegressor, LinearRegression
            ]:
                order = [i for i in range(n_targets)]
                random.shuffle(order)
                model = RegressorChain(model_class(), order=order)
                X, y = datasets.make_regression(n_samples=50,
                                                n_features=10,
                                                n_informative=5,
                                                n_targets=n_targets,
                                                random_state=2021)
                X = X.astype('float32')
                y = y.astype('float32')
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(model, "torch")
                self.assertTrue(torch_model is not None)
                np.testing.assert_allclose(model.predict(X),
                                           torch_model.predict(X),
                                           rtol=1e-5,
                                           atol=1e-5)
예제 #11
0
def test_base_chain_random_order():
    # Fit base chain with random order
    X, Y = generate_multilabel_dataset_with_correlations()
    for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]:
        chain_random = clone(chain).set_params(order="random", random_state=42)
        chain_random.fit(X, Y)
        chain_fixed = clone(chain).set_params(order=chain_random.order_)
        chain_fixed.fit(X, Y)
        assert_array_equal(chain_fixed.order_, chain_random.order_)
        assert list(chain_random.order) != list(range(4))
        assert len(chain_random.order_) == 4
        assert len(set(chain_random.order_)) == 4
        # Randomly ordered chain should behave identically to a fixed order
        # chain with the same order.
        for est1, est2 in zip(chain_random.estimators_, chain_fixed.estimators_):
            assert_array_almost_equal(est1.coef_, est2.coef_)
예제 #12
0
def test_base_chain_fit_and_predict():
    # Fit base chain and verify predict performance
    X, Y = generate_multilabel_dataset_with_correlations()
    chains = [RegressorChain(Ridge()), ClassifierChain(LogisticRegression())]
    for chain in chains:
        chain.fit(X, Y)
        Y_pred = chain.predict(X)
        assert Y_pred.shape == Y.shape
        assert ([c.coef_.size for c in chain.estimators_
                 ] == list(range(X.shape[1], X.shape[1] + Y.shape[1])))

    Y_prob = chains[1].predict_proba(X)
    Y_binary = (Y_prob >= .5)
    assert_array_equal(Y_binary, Y_pred)

    assert isinstance(chains[1], ClassifierMixin)
예제 #13
0
def test_base_chain_crossval_fit_and_predict():
    # Fit chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()

    for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]:
        chain.fit(X, Y)
        chain_cv = clone(chain).set_params(cv=3)
        chain_cv.fit(X, Y)
        Y_pred_cv = chain_cv.predict(X)
        Y_pred = chain.predict(X)

        assert Y_pred_cv.shape == Y_pred.shape
        assert not np.all(Y_pred == Y_pred_cv)
        if isinstance(chain, ClassifierChain):
            assert jaccard_score(Y, Y_pred_cv, average="samples") > 0.4
        else:
            assert mean_squared_error(Y, Y_pred_cv) < 0.25
예제 #14
0
 def __init__(self, fl, max_depth=8, num_est=300, chain=False):
     """
     Initialises new DTR model
     :param fl: fl class
     :param max_depth: max depth of each tree
     :param num_est: Number of estimators in the ensemble of trees
     :param chain: regressor chain (True) or independent multi-output (False)
     """
     self.labels_dim = fl.labels_dim
     self.labels_scaler = fl.labels_scaler
     if chain:
         self.model = RegressorChain(
             AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                               n_estimators=num_est))
     else:
         self.model = MultiOutputRegressor(
             AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                               n_estimators=num_est))
     self.normalise_labels = fl.normalise_labels
예제 #15
0
    def test_sklearn_regressor_chain(self):
        for n_targets in [2, 3, 4]:
            for model_class in [DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression]:
                seed = random.randint(0, 2**32 - 1)
                order = [i for i in range(n_targets)]
                random.Random(seed).shuffle(order)
                if model_class != LinearRegression:
                    model = RegressorChain(model_class(random_state=seed), order=order)
                else:
                    model = RegressorChain(model_class(), order=order)
                X, y = datasets.make_regression(
                    n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=seed
                )
                X = X.astype("float32")
                y = y.astype("float32")
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.TREE_OP_PRECISION_DTYPE: "float64"})
                self.assertTrue(torch_model is not None)
                np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-4, atol=1e-4, err_msg="{}/{}/{}".format(n_targets, model_class, seed))
예제 #16
0
    kneighborsregressor=KNeighborsRegressor,
    mlpregressor=MLPRegressor,
    stackingregressor=StackingRegressor,
    mlxtendstackingregressor=StackingRegressor,
    mlxtendstackingcvregressor=StackingCVRegressor,
    votingregressor=VotingRegressor)

_ESTIMATOR_DICT = dict(regression=_REGRESSOR_DICT)

_KERNEL_DICT = dict(dotproduct=DotProduct, rbf=RBF, whitekernel=WhiteKernel)

# We need to identify chaining as the fit method capitilizes
# the target Y, whereas other estimators conventionally write
# the target as y.
_CHAIN_FLAG = [
    RegressorChain(base_estimator=DummyRegressor()).__class__,
    ClassifierChain(base_estimator=DummyClassifier()).__class__
]

# We need to identify CatBoost as the predict method utilizes the
# parameter data for the conventional design matrix parameter X.
_CATBOOST_FLAG = cat.CatBoostRegressor().__class__

_MULTI_TARGET = ['continuous-multioutput', 'multiclass-multioutput']

_OPTIMIZE_METHOD = [
    'Nelder-Mead', 'Powell', 'CG', 'BFGS', 'Newton-CG', 'L-BFGS-B', 'TNC',
    'COBYLA', 'SLSQP', 'trust-constr', 'dogleg', 'trust-ncg', 'trust-exact',
    'trust-krylov', 'custom'
]
예제 #17
0
print('test Score: ', rf.score(X_test, rf_y_test))

# In[ ]:

# Chained Models for Each Output (RegressorChain)
# https://machinelearningmastery.com/multi-output-regression-models-with-python/
# Another approach to using single-output regression models for multioutput regression is to create a linear
# sequence of models.

# The first model in the sequence uses the input and predicts one output; the second model uses the input and
# the output from the first model to make a prediction; the third model uses the input and output from the
# first two models to make a prediction, and so on.

from sklearn.multioutput import RegressorChain

wrapper = RegressorChain(rf)
wrapper.fit(X_train, y_train)

rf_y_test_pred = wrapper.predict(X_test)
# summarize prediction
print(rf_y_test_pred[0:5])
print(rf_y_test_pred.astype('int')[0:5])
rf_y_test_pred = rf_y_test_pred.astype('int')

# In[ ]:

# Use the R forest's predict method on the test data
rf_y_test_pred = rf.predict(X_test)
print(rf_y_test_pred[0:5])
print(rf_y_test_pred.astype('int')[0:5])
rf_y_test_pred = rf_y_test_pred.astype('int')
예제 #18
0
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

POTENTIAL_TRANSFORMER = {
    'OneHotEncoder': ce.OneHotEncoder,
    'OrdinalEncoder': ce.OrdinalEncoder,
    'TargetEncoder': ce.TargetEncoder,
    'JamesSteinEncoder': ce.JamesSteinEncoder,
    'MinMaxScaler': MinMaxScaler,
    'StandardScaler': StandardScaler
}

POTENTIAL_MODELS = {
    'XGBRegressor': XGBRegressor,
    'XGBRegressorChain': RegressorChain(XGBRegressor),
    'ExtraTreesRegressor': ExtraTreesRegressor,
}


def _initialize_model(fc_model, params):
    initialized_params = {
        key: value
        for key, value in params.items() if key in fc_model().get_params()
    }
    model = fc_model(**initialized_params)
    return model


def get_model(params):
    params = params if params else {}
예제 #19
0
imputer = Imputer(missing_values='NaN', strategy='mean')
#imputer = imputer.fit(dataset)
dataset_r8 = imputer.fit_transform(dataset)

from sklearn.preprocessing import MinMaxScaler
sc_X = MinMaxScaler()
dataset_r8 = sc_X.fit_transform(dataset_r8)
dataset_r8 = pd.DataFrame(dataset_r8)
X = dataset_r8.iloc[:, :4]
y = dataset_r8.iloc[:, 4:]

# define base model
model = LinearSVR(max_iter=5000)
#model = LinearRegression()
# define the chained multioutput wrapper model
wrapper = RegressorChain(model)
# fit the model on the whole dataset
#wrapper.fit(X, y)
# make a single prediction
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=1)

# fit model
wrapper.fit(X_train, y_train)

y_pred = wrapper.predict(X_test)
from sklearn.metrics import mean_squared_error
rmse = (mean_squared_error(y_true=y_test,
예제 #20
0
파일: model.py 프로젝트: sarajcev/Seminar
        'forest__max_features': [50, 100, 150, 200, 250, None],
    }
elif multi_model == 'DecisionTree':
    # Decision tree regressor (supports multi-output natively)
    tree = DecisionTreeRegressor()
    # Creating a pipeline
    pipe = Pipeline(steps=[('tree', tree)])
    param_dists = {
        'tree__criterion': ['mse', 'mae'],
        'tree__max_depth': [2, 4, 6, 8, None],
        'tree__min_samples_leaf': stats.randint(low=1, high=9),
    }
elif multi_model == 'ChainSVR':
    # Support Vector Regression (does NOT support multi-output natively)
    # Building a regressor chain from the SVM base estimators.
    svr = RegressorChain(base_estimator=SVR(kernel='rbf', cache_size=512))
    # Creating a pipeline
    pipe = Pipeline(steps=[('preprocess', 'passthrough'), ('svr', svr)])
    # Parameters of pipeline for the randomized search with cross-validation
    param_dists = {
        'preprocess': [None, StandardScaler()],
        'svr__base_estimator__C': stats.loguniform(1e0, 1e3),
        'svr__base_estimator__epsilon': stats.loguniform(1e-5, 1e-2),
        'svr__base_estimator__gamma': ['scale', 'auto'],
    }
elif multi_model == 'MultiSVR':
    # Support Vector Regression (does NOT support multi-output natively)
    # Creating a multi-output regressor from the SVR base estimators.
    svr = MultiOutputRegressor(estimator=SVR(kernel='rbf'))
    # Creating a pipeline
    pipe = Pipeline(steps=[('preprocess', 'passthrough'), ('svr', svr)])
예제 #21
0
                                      alpha_g_fit=alpha_g_fit,
                                      alpha_g_split=alpha_g_split,
                                      g_cap=g_cap,
                                      realize_iter=realize_iter,
                                      median_predict=median_predict,
                                      bij_novelty=bij_novelty)
elif estimator_name == 'TBAB':
    regressor = AdaBoostRegressor(base_estimator=base_estimator,
                                  n_estimators=n_estimators,
                                  learning_rate=learning_rate,
                                  loss=loss,
                                  random_state=seed,
                                  bij_novelty=bij_novelty)
elif estimator_name == 'TBRC':
    regressor = RegressorChain(base_estimator=base_estimator,
                               order=order,
                               cv=cv,
                               random_state=seed)

else:
    regressor = base_estimator

t0 = t.time()
regressor.fit(x_train, y_train, tb=tb_train)
t1 = t.time()
print('\nFinished DecisionTreeRegressor in {:.4f} s'.format(t1 - t0))

# Save estimator
dump(regressor, case.resultPaths[time] + estimator_name + '.joblib')

score_test = regressor.score(x_test, y_test, tb=tb_test)
score_train = regressor.score(x_train, y_train, tb=tb_train)
예제 #22
0
def training(X_train, Y_train, flag, combine_type="multi_output"):
    """
    :param combine_type: "multi_output" or "chain"
    """

    algo_clf = None
    if flag == 0:
        algo_clf = KernelRidge()
    elif flag == 1:
        algo_clf = LinearSVR()
    elif flag == 2:
        algo_clf = SVR()
    elif flag == 3:
        algo_clf = NuSVR()
    elif flag == 4:
        algo_clf = LinearRegression()
    elif flag == 5:
        algo_clf = Ridge()
    elif flag == 6:
        algo_clf = Lasso()
    elif flag == 7:
        algo_clf = ElasticNet()
    elif flag == 8:
        algo_clf = Lars()
    elif flag == 9:
        algo_clf = LassoLars()
    elif flag == 10:
        algo_clf = BayesianRidge()
    elif flag == 11:
        algo_clf = SGDRegressor(loss="squared_loss")
    elif flag == 12:
        algo_clf = SGDRegressor(loss="huber")
    elif flag == 13:
        algo_clf = SGDRegressor(loss="epsilon_insensitive")
    elif flag == 14:
        algo_clf = KNeighborsRegressor()
    elif flag == 15:
        algo_clf = GaussianProcessRegressor()
    elif flag == 16:
        algo_clf = DecisionTreeRegressor()
    elif flag == 17:
        algo_clf = RandomForestRegressor(n_estimators=500)
    elif flag == 18:
        algo_clf = ExtraTreesRegressor(n_estimators=500)
    elif flag == 19:
        algo_clf = BaggingRegressor(n_estimators=500)
    elif flag == 20:
        algo_clf = AdaBoostRegressor(n_estimators=500)
    elif flag == 21:
        algo_clf = GradientBoostingRegressor(n_estimators=500)
    elif flag == 22:
        algo_clf = HistGradientBoostingRegressor()

    if combine_type == "multi_output":
        clf = MultiOutputRegressor(algo_clf).fit(X_train, Y_train)
    elif combine_type == "chain":
        clf = RegressorChain(algo_clf).fit(X_train, Y_train)
    else:
        raise Exception("Unimplemented!!!")

    return clf, algo_clf.__class__.__name__
예제 #23
0
feature_cols = ['day', 'hour', 'minute', 'weekday']  # FEATURES
#feature_cols = ['hour','minute','weekday']

X = dataframe.loc[:, feature_cols]
if not args.all_ap:
    print("One label")
    y = dataframe.AcadBldg18AP2  # Only one AP first as target
else:
    print("Multi-label")
    y = dataframe.loc[:, "AcadBldg10AP1":"SocBldg9AP1"]  # ALL APS

#print(dataframe.head(-1))

if args.chained:
    print("Using Regressor Chain")
    model = RegressorChain(model, random_state=1)
    mode = "rc"
elif args.binary:
    print("Using Binary Relevance")
    model = MultiOutputRegressor(model)
    mode = "br"
else:
    print("Using Raw Model")

timeSeriesSplitCV(model, X, y, args.split_num, args.show_plot)

#real = y.tolist()

# Scatter plot
# fig, ax = plt.subplots()
# ax.scatter(real, predicted)
예제 #24
0
print(f'Result: {mean(n_scores):.3f}, ({std(n_scores):.3f})')


# Wrapper for algorithms that don't support Multi-Output Regression
# Support Vector Machine
from sklearn.svm import LinearSVR
model = LinearSVR()

# Won't work:
# model.fit(X, y)  # Exception! ValueError, bad input shape

# Option 1: MultiOutput Regressor. Create seperate model for each output.
# Works well if outputs are independent or mostly independent
from sklearn.multioutput import MultiOutputRegressor
wrapper = MultiOutputRegressor(model)
# fit model
wrapper.fit(X, y)
# predict
yhat = wrapper.predict(data_in)
print('MultiOutputRegressor with Support Vector Regressor Model')
print(yhat[0])

# Option 2: RegressorChain
from sklearn.multioutput import RegressorChain
model = LinearSVR()
wrapper = RegressorChain(model)  # define chain order here
wrapper.fit(X, y)
yhat = wrapper.predict(data_in)
print('RegressorChain with Support Vector Regressor Model')
print(yhat[0])
# In[ ]:


# Using best hyper-parameters from the single-step ahead regression
multi_svr = MultiOutputRegressor(estimator=SVR(kernel='rbf', **svr.best_params_), n_jobs=-1)
multi_svr.fit(X_train.values, y_train.values)


# In[ ]:


# A multi-step model that arranges regressions into a chain. Each model makes a prediction
# in the order specified by the chain (i.e. order of columns in the target matrix) using
# all of the available features provided to the model plus the predictions of models that
# are earlier in the chain. Order of columns is arranged by time-lags. Base model is SVM!
chain_svr = RegressorChain(base_estimator=SVR(kernel='rbf', **svr.best_params_))
chain_svr.fit(X_train.values, y_train.values)


# ## DecisionTree multi-step regressor

# In[ ]:


# DecisionTreeRegressor supports multi-step output out-of-the-box!
# Grid search with cross-validation
parameters = [{'criterion':['mse', 'mae'],
              'max_depth':[1, 5, None],
              'max_features':['auto', 'log2', 0.5],
              'max_leaf_nodes':[2, None]}]
tree = GridSearchCV(estimator=DecisionTreeRegressor(), 
예제 #26
0
    def fit(self, X, y, sample_weight=None):

        if sample_weight is not None:
            raise NotImplementedError()

        if self.base_estimator is None:
            base_estimator = BoostedRegressor([
                LinearRegression(frame_out=True),
                AGPR(n_restarts_optimizer=250),
            ])
        else:
            base_estimator = self.base_estimator
        random_state = None
        if isinstance(self.random_state, int):
            random_state = self.random_state
        elif self.random_state is not None:
            random_state = self.random_state.randint(2**30)

        self.estimators = []
        rc = RegressorChain(
            base_estimator,
            order='random',
            cv=self.cv,
        )
        for n in range(self.n_chains):
            n_rc = clone(rc)
            if random_state is not None:
                n_rc.random_state = random_state + n
            i = (f'chain{n}', n_rc)
            self.estimators.append(i)

        self._pre_fit(X, y)
        #return super().fit(X, y, sample_weight=sample_weight)
        """
		common fit operations.
		"""
        if self.estimators is None or len(self.estimators) == 0:
            raise AttributeError('Invalid `estimators` attribute, `estimators`'
                                 ' should be a list of (string, estimator)'
                                 ' tuples')

        if (self.weights is not None
                and len(self.weights) != len(self.estimators)):
            raise ValueError('Number of `estimators` and weights must be equal'
                             '; got %d weights, %d estimators' %
                             (len(self.weights), len(self.estimators)))

        names, clfs = zip(*self.estimators)
        self._validate_names(names)

        n_isnone = np.sum(
            [clf in (None, 'drop') for _, clf in self.estimators])
        if n_isnone == len(self.estimators):
            raise ValueError(
                'All estimators are None or "drop". At least one is required!')

        # self.estimators_ = Parallel(n_jobs=self.n_jobs)(
        # 	delayed(_parallel_fit_estimator)(clone(clf), X, y,
        # 									 sample_weight=sample_weight)
        # 	for clf in clfs if clf not in (None, 'drop')
        # )

        self.estimators_ = []
        for (clfname, clf) in self.estimators:
            e = clone(clf).fit(X, y)
            self.estimators_.append(e)

        self.named_estimators_ = Bunch()
        for k, e in zip(self.estimators, self.estimators_):
            self.named_estimators_[k[0]] = e
        return self
예제 #27
0
    y = df[target_columns].values

    chain_order = [4, 3, 1, 2, 0]
    estimators = {
        "K-nn":
        KNeighborsRegressor(),
        "Ridge":
        Ridge(),
        "Lasso":
        Lasso(),
        "ElasticNet":
        ElasticNet(random_state=0),
        "MultiO/P AdaB":
        MultiOutputRegressor(AdaBoostRegressor(n_estimators=5)),
        "RegChain K-nn":
        RegressorChain(KNeighborsRegressor(), order=chain_order),
        "RandomForestRegressor":
        RandomForestRegressor(max_depth=4),
        "Decision Tree Regressor":
        DecisionTreeRegressor(max_depth=5),
        "Extra trees":
        ExtraTreesRegressor(n_estimators=10),
        "MultiO/P GBR":
        MultiOutputRegressor(GradientBoostingRegressor(n_estimators=5))
    }
    scores_df = defined_cross_val_score(x,
                                        y,
                                        estimators,
                                        n_folds=3,
                                        std_threshold=None)
    scores_df = scores_df.sort_values(by='mean_score')
예제 #28
0
 def train(self):
     self.model = Lasso()
     self.model = RegressorChain(self.model)
     self.model.fit(self.train_x, self.train_y)
     self.train_output = self.model.predict(self.train_x)