Beispiel #1
0
 def test_num_channels(self):
     auto_tcn = AutoTCN(input_feature_num=input_feature_dim,
                        output_target_num=output_feature_dim,
                        past_seq_len=past_seq_len,
                        future_seq_len=future_seq_len,
                        optimizer='Adam',
                        loss=torch.nn.MSELoss(),
                        metric="mse",
                        hidden_units=4,
                        levels=hp.randint(1, 3),
                        num_channels=[8] * 2,
                        kernel_size=hp.choice([2, 3]),
                        lr=hp.choice([0.001, 0.003, 0.01]),
                        dropout=hp.uniform(0.1, 0.2),
                        logs_dir="/tmp/auto_tcn",
                        cpus_per_trial=2,
                        name="auto_tcn")
     auto_tcn.fit(
         data=train_dataloader_creator,
         epochs=1,
         batch_size=hp.choice([32, 64]),
         validation_data=valid_dataloader_creator,
         n_sampling=1,
     )
     assert auto_tcn.get_best_model()
     best_config = auto_tcn.get_best_config()
     assert best_config['num_channels'] == [8] * 2
    def test_fit_lstm_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=7,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         logs_dir="/tmp/auto_trainer",
                                         cpus_per_trial=2,
                                         name="auto_trainer")
        auto_estimator.fit(data=get_data_creator(),
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=get_data_creator(),
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config["past_seq_len"] == 7
Beispiel #3
0
 def search_space(self):
     return {
         "model":
         "MTNet",
         "lr":
         0.001,
         "batch_size":
         16,
         "epochs":
         1,
         "cnn_dropout":
         0.2,
         "rnn_dropout":
         0.2,
         "time_step":
         hp.choice([3, 4]),
         "cnn_height":
         2,
         "long_num":
         hp.choice([3, 4]),
         "ar_size":
         hp.choice([2, 3]),
         "past_seq_len":
         hp.sample_from(lambda spec:
                        (spec.config.long_num + 1) * spec.config.time_step),
     }
Beispiel #4
0
def create_linear_search_space():
    from bigdl.orca.automl import hp
    return {
        "hidden_size": hp.choice([5, 10]),
        "lr": hp.choice([0.001, 0.003, 0.01]),
        "batch_size": hp.choice([32, 64])
    }
def create_linear_search_space():
    return {
        "dropout": hp.uniform(0.2, 0.3),
        "fc1_size": hp.choice([50, 64]),
        "fc2_size": hp.choice([100, 128]),
        LR_NAME: hp.choice([0.001, 0.003, 0.01]),
        "batch_size": hp.choice([32, 64])
    }
Beispiel #6
0
 def search_space(self):
     return {
         "model": "LSTM",
         "lstm_1_units": hp.choice([32, 64]),
         "dropout_1": hp.uniform(0.2, 0.5),
         "lstm_2_units": hp.choice([32, 64]),
         "dropout_2": hp.uniform(0.2, 0.5),
         "lr": 0.001,
         "batch_size": 1024,
         "epochs": 1,
         "past_seq_len": 2,
     }
Beispiel #7
0
    def __init__(
            self,
            num_rand_samples=10,
            n_estimators_range=(50, 1000),
            max_depth_range=(2, 15),
            lr=(1e-4, 1e-1),
            min_child_weight=[1, 2, 3],
    ):
        """
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled
          randomly
        :param n_estimators_range: range of number of gradient boosted trees.
        :param max_depth_range: range of max tree depth
        :param lr: learning rate
        :param min_child_weight: minimum sum of instance weight(hessian)
          needed in a child.
        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples

        self.n_estimators_range = n_estimators_range
        self.max_depth_range = max_depth_range
        self.lr = hp.loguniform(lr[0], lr[1])
        self.min_child_weight = hp.choice(min_child_weight)
Beispiel #8
0
 def test_fit(self):
     data, validation_data = get_data()
     auto_arima = AutoARIMA(metric="mse",
                            p=hp.randint(0, 4),
                            q=hp.randint(0, 4),
                            seasonality_mode=hp.choice([True, False]),
                            P=hp.randint(5, 12),
                            Q=hp.randint(5, 12),
                            m=hp.choice([4, 7]))
     auto_arima.fit(
         data=data,
         validation_data=validation_data,
         epochs=1,
         n_sampling=1,
     )
     best_model = auto_arima.get_best_model()
    def test_fit_third_party_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=7,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        auto_estimator.fit(data=get_data_creator(),
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=get_data_creator(),
                           n_sampling=1)

        config = auto_estimator.get_best_config()
        assert config["past_seq_len"] == 7
    def test_fit_third_party_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_3rdparty")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_3rdparty")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
Beispiel #11
0
def get_auto_estimator():
    auto_tcn = AutoTCN(input_feature_num=input_feature_dim,
                       output_target_num=output_feature_dim,
                       past_seq_len=past_seq_len,
                       future_seq_len=future_seq_len,
                       optimizer='Adam',
                       loss=torch.nn.MSELoss(),
                       metric="mse",
                       hidden_units=8,
                       levels=hp.randint(1, 3),
                       kernel_size=hp.choice([2, 3]),
                       lr=hp.choice([0.001, 0.003, 0.01]),
                       dropout=hp.uniform(0.1, 0.2),
                       logs_dir="/tmp/auto_tcn",
                       cpus_per_trial=2,
                       name="auto_tcn")
    return auto_tcn
Beispiel #12
0
    def __init__(self,
                 num_rand_samples=1,
                 n_estimators=[8, 15],
                 max_depth=[10, 15],
                 n_jobs=-1,
                 tree_method='hist',
                 random_state=2,
                 seed=0,
                 lr=(1e-4, 1e-1),
                 subsample=0.8,
                 colsample_bytree=0.8,
                 min_child_weight=[1, 2, 3],
                 gamma=0,
                 reg_alpha=0,
                 reg_lambda=1):
        """
        Constructor. For XGBoost hyper parameters, refer to
        https://xgboost.readthedocs.io/en/latest/python/python_api.html for
        details.

        :param num_rand_samples: number of hyper-param configurations sampled
          randomly
        :param n_estimators: number of gradient boosted trees.
        :param max_depth: max tree depth
        :param n_jobs: number of parallel threads used to run xgboost.
        :param tree_method: specify which tree method to use.
        :param random_state: random number seed.
        :param seed: seed used to generate the folds
        :param lr: learning rate
        :param subsample: subsample ratio of the training instance
        :param colsample_bytree: subsample ratio of columns when constructing
          each tree.
        :param min_child_weight: minimum sum of instance weight(hessian)
          needed in a child.
        :param gamma: minimum loss reduction required to make a further
          partition on a leaf node of the tree.
        :param reg_alpha: L1 regularization term on weights (xgb’s alpha).
        :param reg_lambda: L2 regularization term on weights (xgb’s lambda).

        """
        super(self.__class__, self).__init__()

        self.num_samples = num_rand_samples
        self.n_jobs = n_jobs
        self.tree_method = tree_method
        self.random_state = random_state
        self.seed = seed

        self.colsample_bytree = colsample_bytree
        self.gamma = gamma
        self.reg_alpha = reg_alpha
        self.reg_lambda = reg_lambda

        self.n_estimators = hp.grid_search(n_estimators)
        self.max_depth = hp.grid_search(max_depth)
        self.lr = hp.loguniform(lr[0], lr[-1])
        self.subsample = subsample
        self.min_child_weight = hp.choice(min_child_weight)
    def test_select_feature(self):
        sample_num = np.random.randint(100, 200)
        df = pd.DataFrame({
            "datetime":
            pd.date_range('1/1/2019', periods=sample_num),
            "value":
            np.random.randn(sample_num),
            "id":
            np.array(['00'] * sample_num)
        })
        train_ts, val_ts, _ = TSDataset.from_pandas(df,
                                                    target_col=['value'],
                                                    dt_col='datetime',
                                                    id_col='id',
                                                    with_split=True,
                                                    val_ratio=0.1)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        input_feature_dim, output_feature_dim = 1, 1
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=6,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2,
                                         name="auto_trainer")

        auto_estimator.fit(data=train_ts,
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=val_ts,
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config['past_seq_len'] == 6
Beispiel #14
0
    def __init__(self,
                 num_rand_samples=1,
                 epochs=5,
                 training_iteration=10,
                 time_step=[3, 4],
                 long_num=[3, 4],
                 cnn_height=[2, 3],
                 cnn_hid_size=[32, 50, 100],
                 ar_size=[2, 3],
                 batch_size=[32, 64]):
        """
        __init__()
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param epochs: no. of epochs to train in each iteration
        :param time_step: random search candidates for model param "time_step"
        :param long_num: random search candidates for model param "long_num"
        :param ar_size: random search candidates for model param "ar_size"
        :param batch_size: grid search candidates for batch size
        :param cnn_height: random search candidates for model param "cnn_height"
        :param cnn_hid_size: random search candidates for model param "cnn_hid_size"
        """
        super(self.__class__, self).__init__()
        # -- run time params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- optimization params
        self.lr = hp.uniform(0.001, 0.01)
        self.batch_size = hp.grid_search(batch_size)
        self.epochs = epochs

        # ---- model params
        self.cnn_dropout = hp.uniform(0.2, 0.5)
        self.rnn_dropout = hp.uniform(0.2, 0.5)
        self.time_step = hp.choice(time_step)
        self.long_num = hp.choice(long_num, )
        self.cnn_height = hp.choice(cnn_height)
        self.cnn_hid_size = hp.choice(cnn_hid_size)
        self.ar_size = hp.choice(ar_size)
        self.past_seq_len = hp.sample_from(
            lambda spec: (spec.config.long_num + 1) * spec.config.time_step)
Beispiel #15
0
    def search_space(self):
        return {
            "model": hp.choice(["LSTM", "Seq2seq"]),
            # --------- Vanilla LSTM model parameters
            "lstm_1_units": hp.choice([8, 16, 32, 64, 128]),
            "dropout_1": hp.uniform(0.2, 0.5),
            "lstm_2_units": hp.choice([8, 16, 32, 64, 128]),
            "dropout_2": hp.uniform(0.2, 0.5),

            # ----------- Seq2Seq model parameters
            "latent_dim": hp.choice([32, 64, 128, 256]),
            "dropout": hp.uniform(0.2, 0.5),

            # ----------- optimization parameters
            "lr": hp.uniform(0.001, 0.01),
            "batch_size": hp.choice([32, 64, 1024]),
            "epochs": self.epochs,
            "past_seq_len": self.past_seq_config,
        }
Beispiel #16
0
 def search_space(self):
     return {
         # -------- feature related parameters
         "model": "XGBRegressor",
         "imputation": hp.choice(["LastFillImpute", "FillZeroImpute"]),
         "n_estimators": self.n_estimators,
         "max_depth": self.max_depth,
         "min_child_weight": self.min_child_weight,
         "lr": self.lr
     }
Beispiel #17
0
 def test_fit_np(self):
     auto_lstm = get_auto_estimator()
     auto_lstm.fit(data=get_x_y(size=1000),
                   epochs=1,
                   batch_size=hp.choice([32, 64]),
                   validation_data=get_x_y(size=400),
                   n_sampling=1)
     assert auto_lstm.get_best_model()
     best_config = auto_lstm.get_best_config()
     assert 0.1 <= best_config['dropout'] <= 0.2
     assert best_config['batch_size'] in (32, 64)
     assert 1 <= best_config['layer_num'] < 3
Beispiel #18
0
    def search_space(self):
        return {
            # -------- model selection TODO add MTNet
            "model": hp.choice(["LSTM", "Seq2seq"]),

            # --------- Vanilla LSTM model parameters
            "lstm_1_units": hp.grid_search([16, 32]),
            "dropout_1": 0.2,
            "lstm_2_units": hp.grid_search([16, 32]),
            "dropout_2": hp.uniform(0.2, 0.5),

            # ----------- Seq2Seq model parameters
            "latent_dim": hp.grid_search([32, 64]),
            "dropout": hp.uniform(0.2, 0.5),

            # ----------- optimization parameters
            "lr": hp.uniform(0.001, 0.01),
            "batch_size": hp.choice([32, 64]),
            "epochs": self.epochs,
            "past_seq_len": self.past_seq_config,
        }
Beispiel #19
0
 def test_fit_data_creator(self):
     auto_lstm = get_auto_estimator()
     auto_lstm.fit(data=train_dataloader_creator,
                   epochs=1,
                   batch_size=hp.choice([32, 64]),
                   validation_data=valid_dataloader_creator,
                   n_sampling=1)
     assert auto_lstm.get_best_model()
     best_config = auto_lstm.get_best_config()
     assert 0.1 <= best_config['dropout'] <= 0.2
     assert best_config['batch_size'] in (32, 64)
     assert 1 <= best_config['layer_num'] < 3
Beispiel #20
0
    def __init__(self,
                 num_rand_samples=1,
                 training_iteration=40,
                 batch_size=[256, 512],
                 hidden_size=[32, 48],
                 levels=[6, 8],
                 kernel_size=[3, 5],
                 dropout=[0, 0.1],
                 lr=[0.001, 0.003]):
        """
        __init__()
        Constructor.

        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param batch_size: grid search candidates for batch size
        :param hidden_size: grid search candidates for hidden size of each layer
        :param levels: the number of layers
        :param kernel_size: the kernel size of each layer
        :param dropout: dropout rate (1 - keep probability)
        :param lr: learning rate
        """
        super(self.__class__, self).__init__()
        # -- run time params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- optimization params
        self.lr = hp.choice(lr)
        self.batch_size = hp.grid_search(batch_size)

        # ---- model params
        self.hidden_size = hp.grid_search(hidden_size)
        self.levels = hp.grid_search(levels)
        self.kernel_size = hp.grid_search(kernel_size)
        self.dropout = hp.choice(dropout)
Beispiel #21
0
def get_auto_estimator():
    auto_lstm = AutoLSTM(input_feature_num=input_feature_dim,
                         output_target_num=output_feature_dim,
                         past_seq_len=5,
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse",
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.001, 0.003, 0.01]),
                         dropout=hp.uniform(0.1, 0.2),
                         logs_dir="/tmp/auto_lstm",
                         cpus_per_trial=2,
                         name="auto_lstm")
    return auto_lstm
 def test_fit_np(self):
     auto_seq2seq = get_auto_estimator()
     auto_seq2seq.fit(
         data=get_x_y(size=1000),
         epochs=1,
         batch_size=hp.choice([32, 64]),
         validation_data=get_x_y(size=400),
         n_sampling=1,
     )
     assert auto_seq2seq.get_best_model()
     best_config = auto_seq2seq.get_best_config()
     assert 0.1 <= best_config['dropout'] <= 0.3
     assert best_config['batch_size'] in (32, 64)
     assert best_config['lstm_hidden_dim'] in (32, 64, 128)
     assert best_config['lstm_layer_num'] in (1, 2, 3, 4)
def get_auto_estimator():
    auto_seq2seq = AutoSeq2Seq(input_feature_num=input_feature_dim,
                               output_target_num=output_feature_dim,
                               past_seq_len=past_seq_len,
                               future_seq_len=future_seq_len,
                               optimizer='Adam',
                               loss=torch.nn.MSELoss(),
                               metric="mse",
                               lr=hp.choice([0.001, 0.003, 0.01]),
                               lstm_hidden_dim=hp.grid_search([32, 64, 128]),
                               lstm_layer_num=hp.randint(1, 4),
                               dropout=hp.uniform(0.1, 0.3),
                               teacher_forcing=False,
                               logs_dir="/tmp/auto_seq2seq",
                               cpus_per_trial=2,
                               name="auto_seq2seq")
    return auto_seq2seq
    def test_auto_prophet_save_load(self):
        data, expect_horizon = get_data()
        auto_prophet = AutoProphet(
            metric="mse",
            changepoint_prior_scale=hp.loguniform(0.001, 0.5),
            seasonality_prior_scale=hp.loguniform(0.01, 10),
            holidays_prior_scale=hp.loguniform(0.01, 10),
            seasonality_mode=hp.choice(['additive', 'multiplicative']),
            changepoint_range=hp.uniform(0.8, 0.95))

        auto_prophet.fit(
            data=data,
            expect_horizon=expect_horizon,
            n_sampling=1,
        )
        with tempfile.TemporaryDirectory() as tmp_dir_name:
            ckpt_name = os.path.join(tmp_dir_name, "json")
            auto_prophet.save(ckpt_name)
            auto_prophet.restore(ckpt_name)
    def test_auto_prophet_fit(self):
        data, expect_horizon = get_data()
        auto_prophet = AutoProphet(
            metric="mse",
            changepoint_prior_scale=hp.loguniform(0.001, 0.5),
            seasonality_prior_scale=hp.loguniform(0.01, 10),
            holidays_prior_scale=hp.loguniform(0.01, 10),
            seasonality_mode=hp.choice(['additive', 'multiplicative']),
            changepoint_range=hp.uniform(0.8, 0.95))

        auto_prophet.fit(
            data=data,
            expect_horizon=expect_horizon,
            n_sampling=1,
        )
        best_model = auto_prophet.get_best_model()
        assert 0.001 <= best_model.changepoint_prior_scale <= 0.5
        assert 0.01 <= best_model.seasonality_prior_scale <= 10
        assert 0.01 <= best_model.holidays_prior_scale <= 10
        assert best_model.seasonality_mode in ['additive', 'multiplicative']
        assert 0.8 <= best_model.changepoint_range <= 0.95
    def test_auto_prophet_predict_evaluate(self):
        data, expect_horizon = get_data()
        auto_prophet = AutoProphet(
            metric="mse",
            changepoint_prior_scale=hp.loguniform(0.001, 0.5),
            seasonality_prior_scale=hp.loguniform(0.01, 10),
            holidays_prior_scale=hp.loguniform(0.01, 10),
            seasonality_mode=hp.choice(['additive', 'multiplicative']),
            changepoint_range=hp.uniform(0.8, 0.95))

        auto_prophet.fit(
            data=data,
            cross_validation=False,
            expect_horizon=expect_horizon,
            n_sampling=1,
        )

        auto_prophet.predict(horizon=1, freq="D")
        test_data = pd.DataFrame(pd.date_range('20150101', periods=10),
                                 columns=['ds'])
        test_data.insert(1, 'y', np.random.rand(10))
        auto_prophet.evaluate(test_data)
    def test_future_list_input(self):
        sample_num = np.random.randint(100, 200)
        df = pd.DataFrame({
            "datetime":
            pd.date_range('1/1/2019', periods=sample_num),
            "value":
            np.random.randn(sample_num),
            "id":
            np.array(['00'] * sample_num)
        })
        train_ts, val_ts, _ = TSDataset.from_pandas(df,
                                                    target_col=['value'],
                                                    dt_col='datetime',
                                                    id_col='id',
                                                    with_split=True,
                                                    val_ratio=0.1)

        input_feature_dim, output_feature_dim = 1, 1
        auto_estimator = AutoTSEstimator(model='seq2seq',
                                         search_space="minimal",
                                         past_seq_len=6,
                                         future_seq_len=[1, 3],
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2,
                                         name="auto_trainer")

        auto_estimator.fit(data=train_ts,
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=val_ts,
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config['future_seq_len'] == 2
        assert auto_estimator._future_seq_len == [1, 3]
Beispiel #28
0
    def __init__(self,
                 num_rand_samples=1,
                 epochs=5,
                 training_iteration=10,
                 look_back=2,
                 lstm_1_units=[16, 32, 64, 128],
                 lstm_2_units=[16, 32, 64],
                 batch_size=[32, 64]):
        """
        __init__()
        Constructor.

        :param lstm_1_units: random search candidates for num of lstm_1_units
        :param lstm_2_units: grid search candidates for num of lstm_1_units
        :param batch_size: grid search candidates for batch size
        :param num_rand_samples: number of hyper-param configurations sampled randomly
        :param look_back: the length to look back, either a tuple with 2 int values,
          which is in format is (min len, max len), or a single int, which is
          a fixed length to look back.
        :param training_iteration: no. of iterations for training (n epochs) in trials
        :param epochs: no. of epochs to train in each iteration
        """
        super(self.__class__, self).__init__()
        # -- runtime params
        self.num_samples = num_rand_samples
        self.training_iteration = training_iteration

        # -- model params
        self.past_seq_config = PastSeqParamHandler.get_past_seq_config(
            look_back)
        self.lstm_1_units_config = hp.choice(lstm_1_units)
        self.lstm_2_units_config = hp.grid_search(lstm_2_units)
        self.dropout_2_config = hp.uniform(0.2, 0.5)

        # -- optimization params
        self.lr = hp.uniform(0.001, 0.01)
        self.batch_size = hp.grid_search(batch_size)
        self.epochs = epochs
    def test_fit_seq2seq_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        auto_estimator = AutoTSEstimator(model='seq2seq',
                                         search_space="minimal",
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         optimizer="Adam",
                                         loss=torch.nn.MSELoss(),
                                         logs_dir="/tmp/auto_trainer",
                                         cpus_per_trial=2,
                                         name="auto_trainer")
        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_seq2seq")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_seq2seq")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # check if load ppl is the same as previous with onnx
        try:
            import onnx
            import onnxruntime
            eval_result_new_onnx = new_ts_pipeline.evaluate_with_onnx(
                tsdata_valid)
            y_pred_new_onnx = new_ts_pipeline.predict_with_onnx(tsdata_valid)
            np.testing.assert_almost_equal(eval_result[0],
                                           eval_result_new_onnx[0],
                                           decimal=5)
            np.testing.assert_almost_equal(y_pred, y_pred_new_onnx, decimal=5)
        except ImportError:
            pass

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
Beispiel #30
0
    num_nodes = 1 if args.cluster_mode == "local" else args.num_workers
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      memory=args.memory,
                      num_nodes=num_nodes,
                      init_ray_on_spark=True)

    tsdata_train, tsdata_valid, tsdata_test = get_tsdata()

    auto_lstm = AutoLSTM(input_feature_num=1,
                         output_target_num=1,
                         past_seq_len=20,
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.01, 0.03, 0.1]),
                         dropout=hp.uniform(0.1, 0.2),
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse")

    x_train, y_train = tsdata_train.roll(lookback=20, horizon=1).to_numpy()
    x_val, y_val = tsdata_test.roll(lookback=20, horizon=1).to_numpy()
    x_test, y_test = tsdata_test.roll(lookback=20, horizon=1).to_numpy()

    auto_lstm.fit(data=(x_train, y_train),
                  epochs=args.epochs,
                  validation_data=(x_val, y_val))

    yhat = auto_lstm.predict(x_test)
    unscale_y_test = tsdata_test.unscale_numpy(y_test)