def __init__(self, num_rand_samples=1, epochs=5, training_iteration=10, look_back=2, latent_dim=[32, 64, 128, 256], batch_size=[32, 64]): """ Constructor. :param lstm_1_units: random search candidates for num of lstm_1_units :param lstm_2_units: grid search candidates for num of lstm_1_units :param batch_size: grid search candidates for batch size :param num_rand_samples: number of hyper-param configurations sampled randomly :param look_back: the length to look back, either a tuple with 2 int values, which is in format is (min len, max len), or a single int, which is a fixed length to look back. :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration """ super(self.__class__, self).__init__() # -- runtime params self.num_samples = num_rand_samples self.training_iteration = training_iteration # -- model params self.past_seq_config = PastSeqParamHandler.get_past_seq_config( look_back) self.latent_dim = hp.choice(latent_dim) self.dropout_config = hp.uniform(0.2, 0.5) # -- optimization params self.lr = hp.uniform(0.001, 0.01) self.batch_size = hp.grid_search(batch_size) self.epochs = epochs
def search_space(self): return { "model": "LSTM", "lstm_1_units": hp.choice([32, 64]), "dropout_1": hp.uniform(0.2, 0.5), "lstm_2_units": hp.choice([32, 64]), "dropout_2": hp.uniform(0.2, 0.5), "lr": 0.001, "batch_size": 1024, "epochs": 1, "past_seq_len": 2, }
def search_space(self, all_available_features): return { "selected_features": json.dumps(all_available_features), "model": "LSTM", "lstm_1_units": hp.choice([32, 64]), "dropout_1": hp.uniform(0.2, 0.5), "lstm_2_units": hp.choice([32, 64]), "dropout_2": hp.uniform(0.2, 0.5), "lr": 0.001, "batch_size": 1024, "epochs": 1, "past_seq_len": 2, }
def test_fit_lstm_data_creator(self): input_feature_dim = 4 output_feature_dim = 2 # 2 targets are generated in get_tsdataset search_space = { 'hidden_dim': hp.grid_search([32, 64]), 'layer_num': hp.randint(1, 3), 'lr': hp.choice([0.001, 0.003, 0.01]), 'dropout': hp.uniform(0.1, 0.2) } auto_trainer = AutoTSTrainer(model='lstm', search_space=search_space, past_seq_len=7, future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", loss=torch.nn.MSELoss(), logs_dir="/tmp/auto_trainer", cpus_per_trial=2, name="auto_trainer") auto_trainer.fit(data=get_data_creator(), epochs=1, batch_size=hp.choice([32, 64]), validation_data=get_data_creator(), n_sampling=1) config = auto_trainer.get_best_config() assert config["past_seq_len"] == 7
def search_space(self): return { "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64]), "input_dim": input_dim, "output_dim": 1 }
def test_num_channels(self): auto_tcn = AutoTCN(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=past_seq_len, future_seq_len=future_seq_len, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", hidden_units=4, levels=hp.randint(1, 3), num_channels=[8] * 2, kernel_size=hp.choice([2, 3]), lr=hp.choice([0.001, 0.003, 0.01]), dropout=hp.uniform(0.1, 0.2), logs_dir="/tmp/auto_tcn", cpus_per_trial=2, name="auto_tcn") auto_tcn.fit(data=train_dataloader_creator, epochs=1, batch_size=hp.choice([32, 64]), validation_data=valid_dataloader_creator, n_sampling=1, ) assert auto_tcn.get_best_model() best_config = auto_tcn.get_best_config() assert best_config['num_channels'] == [8]*2
def create_lstm_search_space(input_dim): return { "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64]), "input_dim": input_dim, "output_dim": 1 }
def test_fit_data_creator(self): auto_lstm = AutoLSTM(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=5, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", hidden_dim=hp.grid_search([32, 64]), layer_num=hp.randint(1, 3), lr=hp.choice([0.001, 0.003, 0.01]), dropout=hp.uniform(0.1, 0.2), logs_dir="/tmp/auto_lstm", cpus_per_trial=2, name="auto_lstm") auto_lstm.fit(data=train_dataloader_creator, epochs=1, batch_size=hp.choice([32, 64]), validation_data=valid_dataloader_creator, n_sampling=1, ) assert auto_lstm.get_best_model() best_config = auto_lstm.get_best_config() assert 0.1 <= best_config['dropout'] <= 0.2 assert best_config['batch_size'] in (32, 64) assert 1 <= best_config['layer_num'] < 3
def test_fit_third_party_data_creator(self): input_feature_dim = 4 output_feature_dim = 2 # 2 targets are generated in get_tsdataset search_space = { 'hidden_dim': hp.grid_search([32, 64]), 'dropout': hp.uniform(0.1, 0.2) } auto_estimator = AutoTSEstimator(model=model_creator, search_space=search_space, past_seq_len=7, future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", loss=torch.nn.MSELoss(), cpus_per_trial=2) auto_estimator.fit(data=get_data_creator(), epochs=1, batch_size=hp.choice([32, 64]), validation_data=get_data_creator(), n_sampling=1) config = auto_estimator.get_best_config() assert config["past_seq_len"] == 7
def test_fit_third_party_feature(self): from sklearn.preprocessing import StandardScaler scaler = StandardScaler() tsdata_train = get_tsdataset().gen_dt_feature().scale(scaler, fit=True) tsdata_valid = get_tsdataset().gen_dt_feature().scale(scaler, fit=False) search_space = { 'hidden_dim': hp.grid_search([32, 64]), 'dropout': hp.uniform(0.1, 0.2) } auto_estimator = AutoTSEstimator(model=model_creator, search_space=search_space, past_seq_len=hp.randint(4, 6), future_seq_len=1, selected_features="auto", metric="mse", loss=torch.nn.MSELoss(), cpus_per_trial=2) ts_pipeline = auto_estimator.fit(data=tsdata_train, epochs=1, batch_size=hp.choice([32, 64]), validation_data=tsdata_valid, n_sampling=1) best_config = auto_estimator.get_best_config() best_model = auto_estimator._get_best_automl_model() assert 4 <= best_config["past_seq_len"] <= 6 assert isinstance(ts_pipeline, TSPipeline) # use raw base model to predic and evaluate tsdata_valid.roll(lookback=best_config["past_seq_len"], horizon=0, feature_col=best_config["selected_features"]) x_valid, y_valid = tsdata_valid.to_numpy() y_pred_raw = best_model.predict(x_valid) y_pred_raw = tsdata_valid.unscale_numpy(y_pred_raw) # use tspipeline to predic and evaluate eval_result = ts_pipeline.evaluate(tsdata_valid) y_pred = ts_pipeline.predict(tsdata_valid) # check if they are the same np.testing.assert_almost_equal(y_pred, y_pred_raw) # save and load ts_pipeline.save("/tmp/auto_trainer/autots_tmp_model_3rdparty") new_ts_pipeline = TSPipeline.load( "/tmp/auto_trainer/autots_tmp_model_3rdparty") # check if load ppl is the same as previous eval_result_new = new_ts_pipeline.evaluate(tsdata_valid) y_pred_new = new_ts_pipeline.predict(tsdata_valid) np.testing.assert_almost_equal(eval_result[0], eval_result_new[0]) np.testing.assert_almost_equal(y_pred, y_pred_new) # use tspipeline to incrementally train new_ts_pipeline.fit(tsdata_valid)
def create_linear_search_space(): return { "dropout": hp.uniform(0.2, 0.3), "fc1_size": hp.choice([50, 64]), "fc2_size": hp.choice([100, 128]), LR_NAME: hp.choice([0.001, 0.003, 0.01]), "batch_size": hp.choice([32, 64]) }
def search_space(self, all_available_features): from zoo.orca.automl import hp return { "dropout": hp.uniform(0.2, 0.3), "fc1_size": hp.choice([50, 64]), "fc2_size": hp.choice([100, 128]), LR_NAME: hp.choice([0.001, 0.003, 0.01]), "batch_size": hp.choice([32, 64]) }
def __init__(self, num_rand_samples=1, epochs=5, training_iteration=10, time_step=[3, 4], long_num=[3, 4], cnn_height=[2, 3], cnn_hid_size=[32, 50, 100], ar_size=[2, 3], batch_size=[32, 64]): """ __init__() Constructor. :param num_rand_samples: number of hyper-param configurations sampled randomly :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration :param time_step: random search candidates for model param "time_step" :param long_num: random search candidates for model param "long_num" :param ar_size: random search candidates for model param "ar_size" :param batch_size: grid search candidates for batch size :param cnn_height: random search candidates for model param "cnn_height" :param cnn_hid_size: random search candidates for model param "cnn_hid_size" """ super(self.__class__, self).__init__() # -- run time params self.num_samples = num_rand_samples self.training_iteration = training_iteration # -- optimization params self.lr = hp.uniform(0.001, 0.01) self.batch_size = hp.grid_search(batch_size) self.epochs = epochs # ---- model params self.cnn_dropout = hp.uniform(0.2, 0.5) self.rnn_dropout = hp.uniform(0.2, 0.5) self.time_step = hp.choice(time_step) self.long_num = hp.choice(long_num, ) self.cnn_height = hp.choice(cnn_height) self.cnn_hid_size = hp.choice(cnn_hid_size) self.ar_size = hp.choice(ar_size) self.past_seq_len = hp.sample_from( lambda spec: (spec.config.long_num + 1) * spec.config.time_step)
def search_space(self): return { "model": hp.choice(["LSTM", "Seq2seq"]), # --------- Vanilla LSTM model parameters "lstm_1_units": hp.choice([8, 16, 32, 64, 128]), "dropout_1": hp.uniform(0.2, 0.5), "lstm_2_units": hp.choice([8, 16, 32, 64, 128]), "dropout_2": hp.uniform(0.2, 0.5), # ----------- Seq2Seq model parameters "latent_dim": hp.choice([32, 64, 128, 256]), "dropout": hp.uniform(0.2, 0.5), # ----------- optimization parameters "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64, 1024]), "epochs": self.epochs, "past_seq_len": self.past_seq_config, }
def search_space(self, all_available_features): return { # -------- feature related parameters "selected_features": hp.sample_from(lambda spec: json.dumps( list( np.random.choice(all_available_features, size=np.random.randint( low=3, high=len(all_available_features)), replace=False)))), # -------- model selection TODO add MTNet "model": hp.choice(["LSTM", "Seq2seq"]), # --------- Vanilla LSTM model parameters "lstm_1_units": hp.grid_search([16, 32]), "dropout_1": 0.2, "lstm_2_units": hp.grid_search([16, 32]), "dropout_2": hp.uniform(0.2, 0.5), # ----------- Seq2Seq model parameters "latent_dim": hp.grid_search([32, 64]), "dropout": hp.uniform(0.2, 0.5), # ----------- optimization parameters "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64]), "epochs": self.epochs, "past_seq_len": self.past_seq_config, }
def search_space(self): return { # -------- model selection TODO add MTNet "model": hp.choice(["LSTM", "Seq2seq"]), # --------- Vanilla LSTM model parameters "lstm_1_units": hp.grid_search([16, 32]), "dropout_1": 0.2, "lstm_2_units": hp.grid_search([16, 32]), "dropout_2": hp.uniform(0.2, 0.5), # ----------- Seq2Seq model parameters "latent_dim": hp.grid_search([32, 64]), "dropout": hp.uniform(0.2, 0.5), # ----------- optimization parameters "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64]), "epochs": self.epochs, "past_seq_len": self.past_seq_config, }
def test_fit_tcn_feature(self): input_feature_dim = 11 # This param will not be used output_feature_dim = 2 # 2 targets are generated in get_tsdataset tsdata_train = get_tsdataset().gen_dt_feature() tsdata_valid = get_tsdataset().gen_dt_feature() tsdata_test = get_tsdataset().gen_dt_feature() search_space = { 'hidden_units': hp.grid_search([32, 64]), 'levels': hp.randint(4, 6), 'kernel_size': hp.randint(3, 5), 'dropout': hp.uniform(0.1, 0.2), 'lr': hp.loguniform(0.001, 0.01) } auto_trainer = AutoTSTrainer(model='tcn', search_space=search_space, past_seq_len=hp.randint(4, 6), future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", optimizer="Adam", loss=torch.nn.MSELoss(), logs_dir="/tmp/auto_trainer", cpus_per_trial=2, name="auto_trainer") auto_trainer.fit(data=tsdata_train, epochs=1, batch_size=hp.choice([32, 64]), validation_data=tsdata_valid, n_sampling=1) best_config = auto_trainer.get_best_config() best_model = auto_trainer.get_best_model() assert 4 <= best_config["past_seq_len"] <= 6 # really difficult to use the model currently... tsdata_test.roll(lookback=best_config["past_seq_len"], horizon=1, feature_col=best_config["selected_features"]) x_test, y_test = tsdata_test.to_numpy() y_pred = best_model.predict(x_test) best_model.save("best.ckpt") from zoo.automl.model.base_pytorch_model import PytorchModelBuilder restore_model = PytorchModelBuilder( model_creator=best_model.model_creator, optimizer_creator="Adam", loss_creator=torch.nn.MSELoss()).build(best_config) restore_model.restore("best.ckpt") y_pred_restore = restore_model.predict(x_test) np.testing.assert_almost_equal(y_pred, y_pred_restore)
def search_space(self, all_available_features): return { "lr": hp.uniform(0.001, 0.01), "batch_size": hp.choice([32, 64]), "selected_features": json.dumps(all_available_features), "input_dim": len(all_available_features) + 1 if all_available_features else 1, "output_dim": 1 }
def get_auto_estimator(): auto_lstm = AutoLSTM(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=5, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", hidden_dim=hp.grid_search([32, 64]), layer_num=hp.randint(1, 3), lr=hp.choice([0.001, 0.003, 0.01]), dropout=hp.uniform(0.1, 0.2), logs_dir="/tmp/auto_lstm", cpus_per_trial=2, name="auto_lstm") return auto_lstm
def _gen_sample_func(self, ranges, param_name): if isinstance(ranges, tuple): assert len(ranges) == 2, \ f"length of tuple {param_name} should be 2 while get {len(ranges)} instead." assert param_name != "teacher_forcing", \ f"type of {param_name} can only be a list while get a tuple" if param_name in ["lr"]: return hp.loguniform(lower=ranges[0], upper=ranges[1]) if param_name in [ "lstm_hidden_dim", "lstm_layer_num", "batch_size" ]: return hp.randint(lower=ranges[0], upper=ranges[1]) if param_name in ["dropout"]: return hp.uniform(lower=ranges[0], upper=ranges[1]) if isinstance(ranges, list): return hp.grid_search(ranges) raise RuntimeError(f"{param_name} should be either a list or a tuple.")
def get_auto_estimator(): auto_tcn = AutoTCN(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=past_seq_len, future_seq_len=future_seq_len, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", hidden_units=8, levels=hp.randint(1, 3), kernel_size=hp.choice([2, 3]), lr=hp.choice([0.001, 0.003, 0.01]), dropout=hp.uniform(0.1, 0.2), logs_dir="/tmp/auto_tcn", cpus_per_trial=2, name="auto_tcn") return auto_tcn
def get_auto_estimator(): auto_seq2seq = AutoSeq2Seq(input_feature_num=input_feature_dim, output_target_num=output_feature_dim, past_seq_len=past_seq_len, future_seq_len=future_seq_len, optimizer='Adam', loss=torch.nn.MSELoss(), metric="mse", lr=hp.choice([0.001, 0.003, 0.01]), lstm_hidden_dim=hp.grid_search([32, 64, 128]), lstm_layer_num=hp.randint(1, 4), dropout=hp.uniform(0.1, 0.3), teacher_forcing=False, logs_dir="/tmp/auto_seq2seq", cpus_per_trial=2, name="auto_seq2seq") return auto_seq2seq
def __init__(self, num_samples=1, look_back=2, epochs=5, reward_metric=-0.05, training_iteration=5): """ __init__() Constructor. :param num_samples: number of hyper-param configurations sampled :param look_back: the length to look back, either a tuple with 2 int values, which is in format is (min len, max len), or a single int, which is a fixed length to look back. :param reward_metric: the rewarding metric value, when reached, stop trial :param training_iteration: no. of iterations for training (n epochs) in trials :param epochs: no. of epochs to train in each iteration """ super(self.__class__, self).__init__() self.num_samples = num_samples self.reward_metric = reward_metric self.training_iteration = training_iteration self.epochs = epochs if isinstance(look_back, tuple) and len(look_back) == 2 and \ isinstance(look_back[0], int) and isinstance(look_back[1], int): if look_back[1] < 2: raise ValueError( "The max look back value should be at least 2") if look_back[0] < 2: print("The input min look back value is smaller than 2. " "We sample from range (2, {}) instead.".format( look_back[1])) self.bayes_past_seq_config = \ {"past_seq_len_float": hp.uniform(look_back[0], look_back[1])} elif isinstance(look_back, int): if look_back < 2: raise ValueError( "look back value should not be smaller than 2. " "Current value is ", look_back) self.bayes_past_seq_config = {"past_seq_len": look_back} else: raise ValueError( "look back is {}.\n " "look_back should be either a tuple with 2 int values:" " (min_len, max_len) or a single int".format(look_back))
def test_auto_prophet_save_load(self): data, expect_horizon = get_data() auto_prophet = AutoProphet(metric="mse", changepoint_prior_scale=hp.loguniform(0.001, 0.5), seasonality_prior_scale=hp.loguniform(0.01, 10), holidays_prior_scale=hp.loguniform(0.01, 10), seasonality_mode=hp.choice(['additive', 'multiplicative']), changepoint_range=hp.uniform(0.8, 0.95) ) auto_prophet.fit(data=data, expect_horizon=expect_horizon, n_sampling=1, ) with tempfile.TemporaryDirectory() as tmp_dir_name: ckpt_name = os.path.join(tmp_dir_name, "json") auto_prophet.save(ckpt_name) auto_prophet.restore(ckpt_name)
def test_select_feature(self): sample_num = np.random.randint(100, 200) df = pd.DataFrame({ "datetime": pd.date_range('1/1/2019', periods=sample_num), "value": np.random.randn(sample_num), "id": np.array(['00'] * sample_num) }) train_ts, val_ts, _ = TSDataset.from_pandas(df, target_col=['value'], dt_col='datetime', id_col='id', with_split=True, val_ratio=0.1) search_space = { 'hidden_dim': hp.grid_search([32, 64]), 'layer_num': hp.randint(1, 3), 'lr': hp.choice([0.001, 0.003, 0.01]), 'dropout': hp.uniform(0.1, 0.2) } input_feature_dim, output_feature_dim = 1, 1 auto_estimator = AutoTSEstimator(model='lstm', search_space=search_space, past_seq_len=6, future_seq_len=1, input_feature_num=input_feature_dim, output_target_num=output_feature_dim, selected_features="auto", metric="mse", loss=torch.nn.MSELoss(), cpus_per_trial=2, name="auto_trainer") auto_estimator.fit(data=train_ts, epochs=1, batch_size=hp.choice([32, 64]), validation_data=val_ts, n_sampling=1) config = auto_estimator.get_best_config() assert config['past_seq_len'] == 6
def test_auto_prophet_fit(self): data, expect_horizon = get_data() auto_prophet = AutoProphet(metric="mse", changepoint_prior_scale=hp.loguniform(0.001, 0.5), seasonality_prior_scale=hp.loguniform(0.01, 10), holidays_prior_scale=hp.loguniform(0.01, 10), seasonality_mode=hp.choice(['additive', 'multiplicative']), changepoint_range=hp.uniform(0.8, 0.95) ) auto_prophet.fit(data=data, expect_horizon=expect_horizon, n_sampling=1, ) best_model = auto_prophet.get_best_model() assert 0.001 <= best_model.changepoint_prior_scale <= 0.5 assert 0.01 <= best_model.seasonality_prior_scale <= 10 assert 0.01 <= best_model.holidays_prior_scale <= 10 assert best_model.seasonality_mode in ['additive', 'multiplicative'] assert 0.8 <= best_model.changepoint_range <= 0.95
def search_space(self): total_params = { "epochs": self.epochs, "model": "LSTM", # --------- model parameters "lstm_1_units_float": hp.uniform(8, 128), "dropout_1": hp.uniform(0.2, 0.5), "lstm_2_units_float": hp.uniform(8, 128), "dropout_2": hp.uniform(0.2, 0.5), # ----------- optimization parameters "lr": hp.uniform(0.001, 0.1), "batch_size_float": hp.uniform(32, 128), } total_params.update(self.bayes_past_seq_config) return total_params
def test_auto_prophet_predict_evaluate(self): data, expect_horizon = get_data() auto_prophet = AutoProphet(metric="mse", changepoint_prior_scale=hp.loguniform(0.001, 0.5), seasonality_prior_scale=hp.loguniform(0.01, 10), holidays_prior_scale=hp.loguniform(0.01, 10), seasonality_mode=hp.choice(['additive', 'multiplicative']), changepoint_range=hp.uniform(0.8, 0.95) ) auto_prophet.fit(data=data, cross_validation=False, expect_horizon=expect_horizon, n_sampling=1, ) auto_prophet.predict(horizon=1, freq="D") test_data = pd.DataFrame(pd.date_range('20150101', periods=10), columns=['ds']) test_data.insert(1, 'y', np.random.rand(10)) auto_prophet.evaluate(test_data)
def test_fit(self): auto_prophet = AutoProphet() data = get_data() search_space = { "changepoint_prior_scale": hp.loguniform(0.001, 0.5), "seasonality_prior_scale": hp.loguniform(0.01, 10), "holidays_prior_scale": hp.loguniform(0.01, 10), "seasonality_mode": hp.choice(['additive', 'multiplicative']), "changepoint_range": hp.uniform(0.8, 0.95) } auto_prophet.fit(data=data, epochs=1, metric="mse", n_sampling=10, search_space=search_space, ) best_model = auto_prophet.get_best_model() assert 0.001 <= best_model.model.changepoint_prior_scale <= 0.5 assert 0.01 <= best_model.model.seasonality_prior_scale <= 10 assert 0.01 <= best_model.holidays_prior_scale <= 10 assert best_model.model.seasonality_mode in ['additive', 'multiplicative'] assert 0.8 <= best_model.model.changepoint_range <= 0.95
def create_simple_recipe(): return { "lr": hp.uniform(0.01, 0.02), "batch_size": hp.choice([16, 32, 64]) }