def test_predict(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) config = { 'epochs': 2, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) y_pred = model.predict(x_test) assert y_pred.shape == (x_test.shape[0], 1)
def setup_method(self, method): # super().setup_method(method) train_data = pd.DataFrame(data=np.random.randn(64, 4)) val_data = pd.DataFrame(data=np.random.randn(16, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() self.x_train, self.y_train = tsft._roll_train( train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_val, self.y_val = tsft._roll_train( val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) self.config = { 'epochs': 1, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } self.model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len)
def test_evaluate(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) val_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_val, y_val = tsft._roll_train(val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) config = { 'epochs': 1, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) print("evaluate:", model.evaluate(x_val, y_val))
def __init__(self, feature_transformers=None, model=None, config=None): """ initialize a pipeline :param model: the internal model :param feature_transformers: the feature transformers """ if feature_transformers is None: assert model is None and config is None self.feature_transformers = TimeSequenceFeatureTransformer() self.model = VanillaLSTM(check_optional_config=False) print("Initialize new time sequence pipeline.") else: self.feature_transformers = feature_transformers self.model = model self.config = config
def test_save_restore(self): new_model = VanillaLSTM(check_optional_config=False) self.model.fit_eval(self.x_train, self.y_train, **self.config) predict_before = self.model.predict(self.x_test) dirname = tempfile.mkdtemp(prefix="automl_test_vanilla") try: save(dirname, model=self.model) restore(dirname, model=new_model, config=self.config) predict_after = new_model.predict(self.x_test) assert_array_almost_equal(predict_before, predict_after, decimal=2) new_config = {'epochs': 2} new_model.fit_eval(self.x_train, self.y_train, **new_config) finally: shutil.rmtree(dirname)
def test_save_restore(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) config = { 'epochs': 2, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } dirname = tempfile.mkdtemp(prefix="automl_test_vanilla") try: model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) predict_before = model.predict(x_test) model_path = os.path.join(dirname, "testmodel.h5") config_path = os.path.join(dirname, "local_config.json") model.save(model_path=model_path, config_path=config_path) local_config = load_config(config_path) config.update(local_config) model.restore(model_path=model_path, **config) predict_after = model.predict(x_test) assert np.allclose(predict_before, predict_after) finally: shutil.rmtree(dirname)
def test_predict_with_uncertainty(self, ): self.model.fit_eval(self.x_train, self.y_train, mc=True, **self.config) prediction, uncertainty = self.model.predict_with_uncertainty( self.x_test, n_iter=10) assert prediction.shape == (self.x_test.shape[0], 1) assert uncertainty.shape == (self.x_test.shape[0], 1) assert np.any(uncertainty) new_model = VanillaLSTM(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model) restore(dirname, model=new_model, config=self.config) prediction, uncertainty = new_model.predict_with_uncertainty( self.x_test, n_iter=2) assert prediction.shape == (self.x_test.shape[0], 1) assert uncertainty.shape == (self.x_test.shape[0], 1) assert np.any(uncertainty) finally: shutil.rmtree(dirname)
class TestVanillaLSTM(ZooTestCase): def setup_method(self, method): # super().setup_method(method) train_data = pd.DataFrame(data=np.random.randn(64, 4)) val_data = pd.DataFrame(data=np.random.randn(16, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() self.x_train, self.y_train = tsft._roll_train( train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_val, self.y_val = tsft._roll_train( val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) self.config = { 'epochs': 1, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } self.model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) def teardown_method(self, method): pass def test_fit_eval(self): print("fit_eval:", self.model.fit_eval(self.x_train, self.y_train, **self.config)) def test_fit_eval_mc(self): print( "fit_eval:", self.model.fit_eval(self.x_train, self.y_train, mc=True, **self.config)) def test_evaluate(self): self.model.fit_eval(self.x_train, self.y_train, **self.config) mse, rs = self.model.evaluate(self.x_val, self.y_val, metric=['mse', 'r2']) print("Mean squared error is:", mse) print("R square is:", rs) def test_predict(self): self.model.fit_eval(self.x_train, self.y_train, **self.config) self.y_pred = self.model.predict(self.x_test) assert self.y_pred.shape == (self.x_test.shape[0], 1) def test_save_restore(self): new_model = VanillaLSTM(check_optional_config=False) self.model.fit_eval(self.x_train, self.y_train, **self.config) predict_before = self.model.predict(self.x_test) dirname = tempfile.mkdtemp(prefix="automl_test_vanilla") try: save(dirname, model=self.model) restore(dirname, model=new_model, config=self.config) predict_after = new_model.predict(self.x_test) assert_array_almost_equal(predict_before, predict_after, decimal=2) new_config = {'epochs': 2} new_model.fit_eval(self.x_train, self.y_train, **new_config) finally: shutil.rmtree(dirname) def test_predict_with_uncertainty(self, ): self.model.fit_eval(self.x_train, self.y_train, mc=True, **self.config) prediction, uncertainty = self.model.predict_with_uncertainty( self.x_test, n_iter=10) assert prediction.shape == (self.x_test.shape[0], 1) assert uncertainty.shape == (self.x_test.shape[0], 1) assert np.any(uncertainty) new_model = VanillaLSTM(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model) restore(dirname, model=new_model, config=self.config) prediction, uncertainty = new_model.predict_with_uncertainty( self.x_test, n_iter=2) assert prediction.shape == (self.x_test.shape[0], 1) assert uncertainty.shape == (self.x_test.shape[0], 1) assert np.any(uncertainty) finally: shutil.rmtree(dirname)
def _hp_search(self, input_df, validation_df, metric): # features # feature_list = ["WEEKDAY(datetime)", "HOUR(datetime)", # "PERCENTILE(value)", "IS_WEEKEND(datetime)", # "IS_AWAKE(datetime)", "IS_BUSY_HOURS(datetime)" # # "DAY(datetime)","MONTH(datetime)", #probabaly not useful # ] # target_list = ["value"] # ft = TimeSequenceFeatures(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col) # ft = DummyTimeSequenceFeatures(file_path='../../../../data/nyc_taxi_rolled_split.npz') ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col, self.drop_missing) feature_list = ft.get_feature_list(input_df) # model model = VanillaLSTM(check_optional_config=False, future_seq_len=self.future_seq_len) search_space = { # -------- feature related parameters "selected_features": RandomSample(lambda spec: np.random.choice( feature_list, size=np.random.randint(low=3, high=len(feature_list), size=1), replace=False)), # --------- model related parameters # 'input_shape_x': x_train.shape[1], # 'input_shape_y': x_train.shape[-1], 'out_units': self.future_seq_len, "lr": 0.001, "lstm_1_units": GridSearch([16, 32]), "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": RandomSample(lambda spec: np.random.uniform(0.2, 0.5)), "batch_size": 1024, } stop = {"reward_metric": -0.05, "training_iteration": 10} searcher = RayTuneSearchEngine(logs_dir=self.logs_dir, ray_num_cpus=6, resources_per_trial={"cpu": 2}) searcher.compile( input_df, search_space=search_space, stop=stop, # feature_transformers=TimeSequenceFeatures, feature_transformers=ft, # use dummy features for testing the rest model=model, validation_df=validation_df, metric=metric) # searcher.test_run() trials = searcher.run() best = searcher.get_best_trials( k=1)[0] # get the best one trial, later could be n pipeline = self._make_pipeline( best, feature_transformers=ft, # feature_transformers=TimeSequenceFeatures( # file_path='../../../../data/nyc_taxi_rolled_split.npz'), model=VanillaLSTM(check_optional_config=False)) return pipeline
class TimeSequencePipeline(Pipeline): def __init__(self, feature_transformers=None, model=None, config=None): """ initialize a pipeline :param model: the internal model :param feature_transformers: the feature transformers """ if feature_transformers is None: assert model is None and config is None self.feature_transformers = TimeSequenceFeatureTransformer() self.model = VanillaLSTM(check_optional_config=False) print("Initialize new time sequence pipeline.") else: self.feature_transformers = feature_transformers self.model = model self.config = config def evaluate(self, input_df, metric=["mean_squared_error"]): """ evaluate the pipeline :param input_df: :param metric: :return: """ x, y = self.feature_transformers.transform(input_df, is_train=True) return self.model.evaluate(x, y, metric) def predict(self, input_df): # there might be no y in the data, TODO needs to fix in TimeSquenceFeatures x = self.feature_transformers.transform(input_df, is_train=False) y_pred = self.model.predict(x) y_output = self.feature_transformers.post_processing(y_pred) return y_output def save(self, file): """ save pipeline to file, contains feature transformer, model, trial config. :param file: :return: """ if not os.path.isdir(file): os.mkdir(file) model_path = os.path.join(file, "weights_tune.h5") config_path = os.path.join(file, "all_config.json") self.feature_transformers.save(config_path, replace=True) self.model.save(model_path, config_path) # check if ** is needed save_config(config_path, self.config) def restore(self, file): """ restore pipeline from file :param file: :param config: :return: """ model_path = os.path.join(file, "weights_tune.h5") config_path = os.path.join(file, "all_config.json") all_config = load_config(config_path) self.model.restore(model_path, **all_config) self.feature_transformers.restore(**all_config)