def test_predict(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) config = { 'epochs': 2, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) y_pred = model.predict(x_test) assert y_pred.shape == (x_test.shape[0], 1)
def test_save_restore(self): train_data = pd.DataFrame(data=np.random.randn(64, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() x_train, y_train = tsft._roll_train(train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) config = { 'epochs': 2, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } dirname = tempfile.mkdtemp(prefix="automl_test_vanilla") try: model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) model.fit_eval(x_train, y_train, **config) predict_before = model.predict(x_test) model_path = os.path.join(dirname, "testmodel.h5") config_path = os.path.join(dirname, "local_config.json") model.save(model_path=model_path, config_path=config_path) local_config = load_config(config_path) config.update(local_config) model.restore(model_path=model_path, **config) predict_after = model.predict(x_test) assert np.allclose(predict_before, predict_after) finally: shutil.rmtree(dirname)
def test_save_restore(self): new_model = VanillaLSTM(check_optional_config=False) self.model.fit_eval(self.x_train, self.y_train, **self.config) predict_before = self.model.predict(self.x_test) dirname = tempfile.mkdtemp(prefix="automl_test_vanilla") try: save(dirname, model=self.model) restore(dirname, model=new_model, config=self.config) predict_after = new_model.predict(self.x_test) assert_array_almost_equal(predict_before, predict_after, decimal=2) new_config = {'epochs': 2} new_model.fit_eval(self.x_train, self.y_train, **new_config) finally: shutil.rmtree(dirname)
class TestVanillaLSTM(ZooTestCase): def setup_method(self, method): # super().setup_method(method) train_data = pd.DataFrame(data=np.random.randn(64, 4)) val_data = pd.DataFrame(data=np.random.randn(16, 4)) test_data = pd.DataFrame(data=np.random.randn(16, 4)) future_seq_len = 1 past_seq_len = 6 # use roll method in time_sequence tsft = TimeSequenceFeatureTransformer() self.x_train, self.y_train = tsft._roll_train( train_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_val, self.y_val = tsft._roll_train( val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len) self.x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len) self.config = { 'epochs': 1, "lr": 0.001, "lstm_1_units": 16, "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": 0.2, "batch_size": 32, } self.model = VanillaLSTM(check_optional_config=False, future_seq_len=future_seq_len) def teardown_method(self, method): pass def test_fit_eval(self): print("fit_eval:", self.model.fit_eval(self.x_train, self.y_train, **self.config)) def test_fit_eval_mc(self): print( "fit_eval:", self.model.fit_eval(self.x_train, self.y_train, mc=True, **self.config)) def test_evaluate(self): self.model.fit_eval(self.x_train, self.y_train, **self.config) mse, rs = self.model.evaluate(self.x_val, self.y_val, metric=['mse', 'r2']) print("Mean squared error is:", mse) print("R square is:", rs) def test_predict(self): self.model.fit_eval(self.x_train, self.y_train, **self.config) self.y_pred = self.model.predict(self.x_test) assert self.y_pred.shape == (self.x_test.shape[0], 1) def test_save_restore(self): new_model = VanillaLSTM(check_optional_config=False) self.model.fit_eval(self.x_train, self.y_train, **self.config) predict_before = self.model.predict(self.x_test) dirname = tempfile.mkdtemp(prefix="automl_test_vanilla") try: save(dirname, model=self.model) restore(dirname, model=new_model, config=self.config) predict_after = new_model.predict(self.x_test) assert_array_almost_equal(predict_before, predict_after, decimal=2) new_config = {'epochs': 2} new_model.fit_eval(self.x_train, self.y_train, **new_config) finally: shutil.rmtree(dirname) def test_predict_with_uncertainty(self, ): self.model.fit_eval(self.x_train, self.y_train, mc=True, **self.config) prediction, uncertainty = self.model.predict_with_uncertainty( self.x_test, n_iter=10) assert prediction.shape == (self.x_test.shape[0], 1) assert uncertainty.shape == (self.x_test.shape[0], 1) assert np.any(uncertainty) new_model = VanillaLSTM(check_optional_config=False) dirname = tempfile.mkdtemp(prefix="automl_test_feature") try: save(dirname, model=self.model) restore(dirname, model=new_model, config=self.config) prediction, uncertainty = new_model.predict_with_uncertainty( self.x_test, n_iter=2) assert prediction.shape == (self.x_test.shape[0], 1) assert uncertainty.shape == (self.x_test.shape[0], 1) assert np.any(uncertainty) finally: shutil.rmtree(dirname)
class TimeSequencePipeline(Pipeline): def __init__(self, feature_transformers=None, model=None, config=None): """ initialize a pipeline :param model: the internal model :param feature_transformers: the feature transformers """ if feature_transformers is None: assert model is None and config is None self.feature_transformers = TimeSequenceFeatureTransformer() self.model = VanillaLSTM(check_optional_config=False) print("Initialize new time sequence pipeline.") else: self.feature_transformers = feature_transformers self.model = model self.config = config def evaluate(self, input_df, metric=["mean_squared_error"]): """ evaluate the pipeline :param input_df: :param metric: :return: """ x, y = self.feature_transformers.transform(input_df, is_train=True) return self.model.evaluate(x, y, metric) def predict(self, input_df): # there might be no y in the data, TODO needs to fix in TimeSquenceFeatures x = self.feature_transformers.transform(input_df, is_train=False) y_pred = self.model.predict(x) y_output = self.feature_transformers.post_processing(y_pred) return y_output def save(self, file): """ save pipeline to file, contains feature transformer, model, trial config. :param file: :return: """ if not os.path.isdir(file): os.mkdir(file) model_path = os.path.join(file, "weights_tune.h5") config_path = os.path.join(file, "all_config.json") self.feature_transformers.save(config_path, replace=True) self.model.save(model_path, config_path) # check if ** is needed save_config(config_path, self.config) def restore(self, file): """ restore pipeline from file :param file: :param config: :return: """ model_path = os.path.join(file, "weights_tune.h5") config_path = os.path.join(file, "all_config.json") all_config = load_config(config_path) self.model.restore(model_path, **all_config) self.feature_transformers.restore(**all_config)