Esempio n. 1
0
 def search_space(self):
     space = {
         "n_estimators":
         hp.randint(self.n_estimators_range[0], self.n_estimators_range[1]),
         "max_depth":
         hp.randint(self.max_depth_range[0], self.max_depth_range[1]),
         "min_child_weight":
         self.min_child_weight,
         "lr":
         self.lr
     }
     return space
Esempio n. 2
0
    def get_past_seq_config(look_back):
        """
        get_past_seq_config(look_back)
        Generate pass sequence config based on look_back.

        :param look_back: look_back configuration
        :return: search configuration for past sequence
        """
        if isinstance(look_back, tuple) and len(look_back) == 2 and isinstance(
                look_back[0], int) and isinstance(look_back[1], int):
            if look_back[1] < 2:
                raise ValueError(
                    "The max look back value should be at least 2")
            if look_back[0] < 2:
                print("The input min look back value is smaller than 2. "
                      "We sample from range (2, {}) instead.".format(
                          look_back[1]))
            past_seq_config = hp.randint(look_back[0], look_back[1] + 1)
        elif isinstance(look_back, int):
            if look_back < 2:
                raise ValueError(
                    "look back value should not be smaller than 2. "
                    "Current value is ", look_back)
            past_seq_config = look_back
        else:
            raise ValueError(
                "look back is {}.\n "
                "look_back should be either a tuple with 2 int values:"
                " (min_len, max_len) or a single int".format(look_back))
        return past_seq_config
Esempio n. 3
0
 def test_fit(self):
     data, validation_data = get_data()
     auto_arima = AutoARIMA(metric="mse",
                            p=hp.randint(0, 4),
                            q=hp.randint(0, 4),
                            seasonality_mode=hp.choice([True, False]),
                            P=hp.randint(5, 12),
                            Q=hp.randint(5, 12),
                            m=hp.choice([4, 7]))
     auto_arima.fit(
         data=data,
         validation_data=validation_data,
         epochs=1,
         n_sampling=1,
     )
     best_model = auto_arima.get_best_model()
Esempio n. 4
0
 def test_num_channels(self):
     auto_tcn = AutoTCN(input_feature_num=input_feature_dim,
                        output_target_num=output_feature_dim,
                        past_seq_len=past_seq_len,
                        future_seq_len=future_seq_len,
                        optimizer='Adam',
                        loss=torch.nn.MSELoss(),
                        metric="mse",
                        hidden_units=4,
                        levels=hp.randint(1, 3),
                        num_channels=[8] * 2,
                        kernel_size=hp.choice([2, 3]),
                        lr=hp.choice([0.001, 0.003, 0.01]),
                        dropout=hp.uniform(0.1, 0.2),
                        logs_dir="/tmp/auto_tcn",
                        cpus_per_trial=2,
                        name="auto_tcn")
     auto_tcn.fit(
         data=train_dataloader_creator,
         epochs=1,
         batch_size=hp.choice([32, 64]),
         validation_data=valid_dataloader_creator,
         n_sampling=1,
     )
     assert auto_tcn.get_best_model()
     best_config = auto_tcn.get_best_config()
     assert best_config['num_channels'] == [8] * 2
    def test_fit_lstm_data_creator(self):
        input_feature_dim = 4
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=7,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         logs_dir="/tmp/auto_trainer",
                                         cpus_per_trial=2,
                                         name="auto_trainer")
        auto_estimator.fit(data=get_data_creator(),
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=get_data_creator(),
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config["past_seq_len"] == 7
    def test_fit_third_party_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        auto_estimator = AutoTSEstimator(model=model_creator,
                                         search_space=search_space,
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2)

        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_3rdparty")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_3rdparty")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
Esempio n. 7
0
def get_auto_estimator():
    auto_lstm = AutoLSTM(input_feature_num=input_feature_dim,
                         output_target_num=output_feature_dim,
                         past_seq_len=5,
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse",
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.001, 0.003, 0.01]),
                         dropout=hp.uniform(0.1, 0.2),
                         logs_dir="/tmp/auto_lstm",
                         cpus_per_trial=2,
                         name="auto_lstm")
    return auto_lstm
Esempio n. 8
0
 def _gen_sample_func(self, ranges, param_name):
     if isinstance(ranges, tuple):
         assert len(ranges) == 2, \
             f"length of tuple {param_name} should be 2 while get {len(ranges)} instead."
         assert param_name != "teacher_forcing", \
             f"type of {param_name} can only be a list while get a tuple"
         if param_name in ["lr"]:
             return hp.loguniform(lower=ranges[0], upper=ranges[1])
         if param_name in [
                 "lstm_hidden_dim", "lstm_layer_num", "batch_size"
         ]:
             return hp.randint(lower=ranges[0], upper=ranges[1])
         if param_name in ["dropout"]:
             return hp.uniform(lower=ranges[0], upper=ranges[1])
     if isinstance(ranges, list):
         return hp.grid_search(ranges)
     raise RuntimeError(f"{param_name} should be either a list or a tuple.")
Esempio n. 9
0
def get_auto_estimator():
    auto_tcn = AutoTCN(input_feature_num=input_feature_dim,
                       output_target_num=output_feature_dim,
                       past_seq_len=past_seq_len,
                       future_seq_len=future_seq_len,
                       optimizer='Adam',
                       loss=torch.nn.MSELoss(),
                       metric="mse",
                       hidden_units=8,
                       levels=hp.randint(1, 3),
                       kernel_size=hp.choice([2, 3]),
                       lr=hp.choice([0.001, 0.003, 0.01]),
                       dropout=hp.uniform(0.1, 0.2),
                       logs_dir="/tmp/auto_tcn",
                       cpus_per_trial=2,
                       name="auto_tcn")
    return auto_tcn
Esempio n. 10
0
def get_auto_estimator():
    auto_seq2seq = AutoSeq2Seq(input_feature_num=input_feature_dim,
                               output_target_num=output_feature_dim,
                               past_seq_len=past_seq_len,
                               future_seq_len=future_seq_len,
                               optimizer='Adam',
                               loss=torch.nn.MSELoss(),
                               metric="mse",
                               lr=hp.choice([0.001, 0.003, 0.01]),
                               lstm_hidden_dim=hp.grid_search([32, 64, 128]),
                               lstm_layer_num=hp.randint(1, 4),
                               dropout=hp.uniform(0.1, 0.3),
                               teacher_forcing=False,
                               logs_dir="/tmp/auto_seq2seq",
                               cpus_per_trial=2,
                               name="auto_seq2seq")
    return auto_seq2seq
    def test_select_feature(self):
        sample_num = np.random.randint(100, 200)
        df = pd.DataFrame({
            "datetime":
            pd.date_range('1/1/2019', periods=sample_num),
            "value":
            np.random.randn(sample_num),
            "id":
            np.array(['00'] * sample_num)
        })
        train_ts, val_ts, _ = TSDataset.from_pandas(df,
                                                    target_col=['value'],
                                                    dt_col='datetime',
                                                    id_col='id',
                                                    with_split=True,
                                                    val_ratio=0.1)

        search_space = {
            'hidden_dim': hp.grid_search([32, 64]),
            'layer_num': hp.randint(1, 3),
            'lr': hp.choice([0.001, 0.003, 0.01]),
            'dropout': hp.uniform(0.1, 0.2)
        }

        input_feature_dim, output_feature_dim = 1, 1
        auto_estimator = AutoTSEstimator(model='lstm',
                                         search_space=search_space,
                                         past_seq_len=6,
                                         future_seq_len=1,
                                         input_feature_num=input_feature_dim,
                                         output_target_num=output_feature_dim,
                                         selected_features="auto",
                                         metric="mse",
                                         loss=torch.nn.MSELoss(),
                                         cpus_per_trial=2,
                                         name="auto_trainer")

        auto_estimator.fit(data=train_ts,
                           epochs=1,
                           batch_size=hp.choice([32, 64]),
                           validation_data=val_ts,
                           n_sampling=1)
        config = auto_estimator.get_best_config()
        assert config['past_seq_len'] == 6
Esempio n. 12
0
    args = parser.parse_args()

    num_nodes = 1 if args.cluster_mode == "local" else args.num_workers
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      memory=args.memory,
                      num_nodes=num_nodes,
                      init_ray_on_spark=True)

    tsdata_train, tsdata_valid, tsdata_test = get_tsdata()

    auto_lstm = AutoLSTM(input_feature_num=1,
                         output_target_num=1,
                         past_seq_len=20,
                         hidden_dim=hp.grid_search([32, 64]),
                         layer_num=hp.randint(1, 3),
                         lr=hp.choice([0.01, 0.03, 0.1]),
                         dropout=hp.uniform(0.1, 0.2),
                         optimizer='Adam',
                         loss=torch.nn.MSELoss(),
                         metric="mse")

    x_train, y_train = tsdata_train.roll(lookback=20, horizon=1).to_numpy()
    x_val, y_val = tsdata_test.roll(lookback=20, horizon=1).to_numpy()
    x_test, y_test = tsdata_test.roll(lookback=20, horizon=1).to_numpy()

    auto_lstm.fit(data=(x_train, y_train),
                  epochs=args.epochs,
                  validation_data=(x_val, y_val))

    yhat = auto_lstm.predict(x_test)
    def test_fit_seq2seq_feature(self):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True)
        tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler,
                                                              fit=False)

        auto_estimator = AutoTSEstimator(model='seq2seq',
                                         search_space="minimal",
                                         past_seq_len=hp.randint(4, 6),
                                         future_seq_len=1,
                                         selected_features="auto",
                                         metric="mse",
                                         optimizer="Adam",
                                         loss=torch.nn.MSELoss(),
                                         logs_dir="/tmp/auto_trainer",
                                         cpus_per_trial=2,
                                         name="auto_trainer")
        ts_pipeline = auto_estimator.fit(data=tsdata_train,
                                         epochs=1,
                                         batch_size=hp.choice([32, 64]),
                                         validation_data=tsdata_valid,
                                         n_sampling=1)
        best_config = auto_estimator.get_best_config()
        best_model = auto_estimator._get_best_automl_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        assert isinstance(ts_pipeline, TSPipeline)

        # use raw base model to predic and evaluate
        tsdata_valid.roll(lookback=best_config["past_seq_len"],
                          horizon=0,
                          feature_col=best_config["selected_features"])
        x_valid, y_valid = tsdata_valid.to_numpy()
        y_pred_raw = best_model.predict(x_valid)
        y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw)

        # use tspipeline to predic and evaluate
        eval_result = ts_pipeline.evaluate(tsdata_valid)
        y_pred = ts_pipeline.predict(tsdata_valid)

        # check if they are the same
        np.testing.assert_almost_equal(y_pred, y_pred_raw)

        # save and load
        ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_seq2seq")
        new_ts_pipeline = TSPipeline.load(
            "/tmp/auto_trainer/autots_tmp_model_seq2seq")

        # check if load ppl is the same as previous
        eval_result_new = new_ts_pipeline.evaluate(tsdata_valid)
        y_pred_new = new_ts_pipeline.predict(tsdata_valid)
        np.testing.assert_almost_equal(eval_result[0], eval_result_new[0])
        np.testing.assert_almost_equal(y_pred, y_pred_new)

        # check if load ppl is the same as previous with onnx
        try:
            import onnx
            import onnxruntime
            eval_result_new_onnx = new_ts_pipeline.evaluate_with_onnx(
                tsdata_valid)
            y_pred_new_onnx = new_ts_pipeline.predict_with_onnx(tsdata_valid)
            np.testing.assert_almost_equal(eval_result[0],
                                           eval_result_new_onnx[0],
                                           decimal=5)
            np.testing.assert_almost_equal(y_pred, y_pred_new_onnx, decimal=5)
        except ImportError:
            pass

        # use tspipeline to incrementally train
        new_ts_pipeline.fit(tsdata_valid)
Esempio n. 14
0
                                                      random_state=2)

    config = {'random_state': 2}

    recipe = None

    num_rand_samples = 1
    n_estimators_range = (800, 1000)
    max_depth_range = (10, 15)
    lr = (1e-4, 1e-1)
    min_child_weight = [1, 2, 3]

    if opt.mode == 'skopt':
        search_space = {
            "n_estimators":
            hp.randint(n_estimators_range[0], n_estimators_range[1]),
            "max_depth":
            hp.randint(max_depth_range[0], max_depth_range[1]),
            "lr":
            hp.loguniform(lr[0], lr[-1]),
            "min_child_weight":
            hp.choice(min_child_weight),
        }
        search_alg = "skopt"
        search_alg_params = None
        scheduler = "AsyncHyperBand"
        scheduler_params = dict(
            max_t=50,
            grace_period=1,
            reduction_factor=3,
            brackets=3,
Esempio n. 15
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import bigdl.orca.automl.hp as hp


AUTO_MODEL_SUPPORT_LIST = ["lstm", "tcn", "seq2seq"]

AUTO_MODEL_DEFAULT_SEARCH_SPACE = {
    "lstm": {"minimal": {"hidden_dim": hp.grid_search([16, 32]),
                         "layer_num": hp.randint(1, 2),
                         "lr": hp.loguniform(0.001, 0.005),
                         "dropout": hp.uniform(0.1, 0.2)},
             "normal": {"hidden_dim": hp.grid_search([16, 32, 64]),
                        "layer_num": hp.grid_search([1, 2]),
                        "lr": hp.loguniform(0.0005, 0.01),
                        "dropout": hp.uniform(0, 0.2)},
             "large": {"hidden_dim": hp.grid_search([16, 32, 64, 128]),
                       "layer_num": hp.grid_search([1, 2, 3, 4]),
                       "lr": hp.loguniform(0.0005, 0.01),
                       "dropout": hp.uniform(0, 0.3)}},

    "tcn": {"minimal": {"hidden_units": hp.grid_search([16, 32]),
                        "levels": hp.randint(4, 6),
                        "kernel_size": 3,
                        "lr": hp.loguniform(0.001, 0.005),
Esempio n. 16
0
def get_xgb_search_space():
    return {
        "n_estimators": hp.randint(5, 10),
        "max_depth": hp.randint(2, 5),
        "lr": hp.loguniform(1e-4, 1e-1),
    }