コード例 #1
0
    def test_predict(self):
        train_data = pd.DataFrame(data=np.random.randn(64, 4))
        test_data = pd.DataFrame(data=np.random.randn(16, 4))
        future_seq_len = 1
        past_seq_len = 6

        # use roll method in time_sequence
        tsft = TimeSequenceFeatureTransformer()
        x_train, y_train = tsft._roll_train(train_data,
                                            past_seq_len=past_seq_len,
                                            future_seq_len=future_seq_len)
        x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len)

        config = {
            'epochs': 2,
            "lr": 0.001,
            "lstm_1_units": 16,
            "dropout_1": 0.2,
            "lstm_2_units": 10,
            "dropout_2": 0.2,
            "batch_size": 32,
        }
        model = VanillaLSTM(check_optional_config=False,
                            future_seq_len=future_seq_len)
        model.fit_eval(x_train, y_train, **config)
        y_pred = model.predict(x_test)
        assert y_pred.shape == (x_test.shape[0], 1)
コード例 #2
0
    def setup_method(self, method):
        # super().setup_method(method)
        train_data = pd.DataFrame(data=np.random.randn(64, 4))
        val_data = pd.DataFrame(data=np.random.randn(16, 4))
        test_data = pd.DataFrame(data=np.random.randn(16, 4))

        future_seq_len = 1
        past_seq_len = 6

        # use roll method in time_sequence
        tsft = TimeSequenceFeatureTransformer()
        self.x_train, self.y_train = tsft._roll_train(
            train_data,
            past_seq_len=past_seq_len,
            future_seq_len=future_seq_len)
        self.x_val, self.y_val = tsft._roll_train(
            val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len)
        self.x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len)
        self.config = {
            'epochs': 1,
            "lr": 0.001,
            "lstm_1_units": 16,
            "dropout_1": 0.2,
            "lstm_2_units": 10,
            "dropout_2": 0.2,
            "batch_size": 32,
        }
        self.model = VanillaLSTM(check_optional_config=False,
                                 future_seq_len=future_seq_len)
コード例 #3
0
 def test_evaluate(self):
     train_data = pd.DataFrame(data=np.random.randn(64, 4))
     val_data = pd.DataFrame(data=np.random.randn(16, 4))
     future_seq_len = 1
     past_seq_len = 6
     # use roll method in time_sequence
     tsft = TimeSequenceFeatureTransformer()
     x_train, y_train = tsft._roll_train(train_data,
                                         past_seq_len=past_seq_len,
                                         future_seq_len=future_seq_len)
     x_val, y_val = tsft._roll_train(val_data,
                                     past_seq_len=past_seq_len,
                                     future_seq_len=future_seq_len)
     config = {
         'epochs': 1,
         "lr": 0.001,
         "lstm_1_units": 16,
         "dropout_1": 0.2,
         "lstm_2_units": 10,
         "dropout_2": 0.2,
         "batch_size": 32,
     }
     model = VanillaLSTM(check_optional_config=False,
                         future_seq_len=future_seq_len)
     model.fit_eval(x_train, y_train, **config)
     print("evaluate:", model.evaluate(x_val, y_val))
コード例 #4
0
 def __init__(self, feature_transformers=None, model=None, config=None):
     """
     initialize a pipeline
     :param model: the internal model
     :param feature_transformers: the feature transformers
     """
     if feature_transformers is None:
         assert model is None and config is None
         self.feature_transformers = TimeSequenceFeatureTransformer()
         self.model = VanillaLSTM(check_optional_config=False)
         print("Initialize new time sequence pipeline.")
     else:
         self.feature_transformers = feature_transformers
         self.model = model
         self.config = config
コード例 #5
0
    def test_save_restore(self):
        new_model = VanillaLSTM(check_optional_config=False)
        self.model.fit_eval(self.x_train, self.y_train, **self.config)
        predict_before = self.model.predict(self.x_test)

        dirname = tempfile.mkdtemp(prefix="automl_test_vanilla")
        try:
            save(dirname, model=self.model)
            restore(dirname, model=new_model, config=self.config)
            predict_after = new_model.predict(self.x_test)
            assert_array_almost_equal(predict_before, predict_after, decimal=2)
            new_config = {'epochs': 2}
            new_model.fit_eval(self.x_train, self.y_train, **new_config)

        finally:
            shutil.rmtree(dirname)
コード例 #6
0
    def test_save_restore(self):
        train_data = pd.DataFrame(data=np.random.randn(64, 4))
        test_data = pd.DataFrame(data=np.random.randn(16, 4))
        future_seq_len = 1
        past_seq_len = 6

        # use roll method in time_sequence
        tsft = TimeSequenceFeatureTransformer()
        x_train, y_train = tsft._roll_train(train_data,
                                            past_seq_len=past_seq_len,
                                            future_seq_len=future_seq_len)
        x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len)

        config = {
            'epochs': 2,
            "lr": 0.001,
            "lstm_1_units": 16,
            "dropout_1": 0.2,
            "lstm_2_units": 10,
            "dropout_2": 0.2,
            "batch_size": 32,
        }

        dirname = tempfile.mkdtemp(prefix="automl_test_vanilla")
        try:
            model = VanillaLSTM(check_optional_config=False,
                                future_seq_len=future_seq_len)
            model.fit_eval(x_train, y_train, **config)
            predict_before = model.predict(x_test)

            model_path = os.path.join(dirname, "testmodel.h5")
            config_path = os.path.join(dirname, "local_config.json")

            model.save(model_path=model_path, config_path=config_path)

            local_config = load_config(config_path)
            config.update(local_config)
            model.restore(model_path=model_path, **config)
            predict_after = model.predict(x_test)
            assert np.allclose(predict_before, predict_after)
        finally:
            shutil.rmtree(dirname)
コード例 #7
0
    def test_predict_with_uncertainty(self, ):
        self.model.fit_eval(self.x_train, self.y_train, mc=True, **self.config)
        prediction, uncertainty = self.model.predict_with_uncertainty(
            self.x_test, n_iter=10)
        assert prediction.shape == (self.x_test.shape[0], 1)
        assert uncertainty.shape == (self.x_test.shape[0], 1)
        assert np.any(uncertainty)

        new_model = VanillaLSTM(check_optional_config=False)
        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model)
            restore(dirname, model=new_model, config=self.config)
            prediction, uncertainty = new_model.predict_with_uncertainty(
                self.x_test, n_iter=2)
            assert prediction.shape == (self.x_test.shape[0], 1)
            assert uncertainty.shape == (self.x_test.shape[0], 1)
            assert np.any(uncertainty)
        finally:
            shutil.rmtree(dirname)
コード例 #8
0
class TestVanillaLSTM(ZooTestCase):
    def setup_method(self, method):
        # super().setup_method(method)
        train_data = pd.DataFrame(data=np.random.randn(64, 4))
        val_data = pd.DataFrame(data=np.random.randn(16, 4))
        test_data = pd.DataFrame(data=np.random.randn(16, 4))

        future_seq_len = 1
        past_seq_len = 6

        # use roll method in time_sequence
        tsft = TimeSequenceFeatureTransformer()
        self.x_train, self.y_train = tsft._roll_train(
            train_data,
            past_seq_len=past_seq_len,
            future_seq_len=future_seq_len)
        self.x_val, self.y_val = tsft._roll_train(
            val_data, past_seq_len=past_seq_len, future_seq_len=future_seq_len)
        self.x_test = tsft._roll_test(test_data, past_seq_len=past_seq_len)
        self.config = {
            'epochs': 1,
            "lr": 0.001,
            "lstm_1_units": 16,
            "dropout_1": 0.2,
            "lstm_2_units": 10,
            "dropout_2": 0.2,
            "batch_size": 32,
        }
        self.model = VanillaLSTM(check_optional_config=False,
                                 future_seq_len=future_seq_len)

    def teardown_method(self, method):
        pass

    def test_fit_eval(self):
        print("fit_eval:",
              self.model.fit_eval(self.x_train, self.y_train, **self.config))

    def test_fit_eval_mc(self):
        print(
            "fit_eval:",
            self.model.fit_eval(self.x_train,
                                self.y_train,
                                mc=True,
                                **self.config))

    def test_evaluate(self):
        self.model.fit_eval(self.x_train, self.y_train, **self.config)
        mse, rs = self.model.evaluate(self.x_val,
                                      self.y_val,
                                      metric=['mse', 'r2'])
        print("Mean squared error is:", mse)
        print("R square is:", rs)

    def test_predict(self):
        self.model.fit_eval(self.x_train, self.y_train, **self.config)
        self.y_pred = self.model.predict(self.x_test)
        assert self.y_pred.shape == (self.x_test.shape[0], 1)

    def test_save_restore(self):
        new_model = VanillaLSTM(check_optional_config=False)
        self.model.fit_eval(self.x_train, self.y_train, **self.config)
        predict_before = self.model.predict(self.x_test)

        dirname = tempfile.mkdtemp(prefix="automl_test_vanilla")
        try:
            save(dirname, model=self.model)
            restore(dirname, model=new_model, config=self.config)
            predict_after = new_model.predict(self.x_test)
            assert_array_almost_equal(predict_before, predict_after, decimal=2)
            new_config = {'epochs': 2}
            new_model.fit_eval(self.x_train, self.y_train, **new_config)

        finally:
            shutil.rmtree(dirname)

    def test_predict_with_uncertainty(self, ):
        self.model.fit_eval(self.x_train, self.y_train, mc=True, **self.config)
        prediction, uncertainty = self.model.predict_with_uncertainty(
            self.x_test, n_iter=10)
        assert prediction.shape == (self.x_test.shape[0], 1)
        assert uncertainty.shape == (self.x_test.shape[0], 1)
        assert np.any(uncertainty)

        new_model = VanillaLSTM(check_optional_config=False)
        dirname = tempfile.mkdtemp(prefix="automl_test_feature")
        try:
            save(dirname, model=self.model)
            restore(dirname, model=new_model, config=self.config)
            prediction, uncertainty = new_model.predict_with_uncertainty(
                self.x_test, n_iter=2)
            assert prediction.shape == (self.x_test.shape[0], 1)
            assert uncertainty.shape == (self.x_test.shape[0], 1)
            assert np.any(uncertainty)
        finally:
            shutil.rmtree(dirname)
コード例 #9
0
    def _hp_search(self, input_df, validation_df, metric):
        # features
        # feature_list = ["WEEKDAY(datetime)", "HOUR(datetime)",
        #                "PERCENTILE(value)", "IS_WEEKEND(datetime)",
        #                "IS_AWAKE(datetime)", "IS_BUSY_HOURS(datetime)"
        #                # "DAY(datetime)","MONTH(datetime)", #probabaly not useful
        #                ]
        # target_list = ["value"]
        # ft = TimeSequenceFeatures(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col)

        # ft = DummyTimeSequenceFeatures(file_path='../../../../data/nyc_taxi_rolled_split.npz')
        ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col,
                                            self.target_col,
                                            self.extra_features_col,
                                            self.drop_missing)

        feature_list = ft.get_feature_list(input_df)
        # model
        model = VanillaLSTM(check_optional_config=False,
                            future_seq_len=self.future_seq_len)

        search_space = {
            # -------- feature related parameters
            "selected_features":
            RandomSample(lambda spec: np.random.choice(
                feature_list,
                size=np.random.randint(low=3, high=len(feature_list), size=1),
                replace=False)),

            # --------- model related parameters
            # 'input_shape_x': x_train.shape[1],
            # 'input_shape_y': x_train.shape[-1],
            'out_units':
            self.future_seq_len,
            "lr":
            0.001,
            "lstm_1_units":
            GridSearch([16, 32]),
            "dropout_1":
            0.2,
            "lstm_2_units":
            10,
            "dropout_2":
            RandomSample(lambda spec: np.random.uniform(0.2, 0.5)),
            "batch_size":
            1024,
        }

        stop = {"reward_metric": -0.05, "training_iteration": 10}

        searcher = RayTuneSearchEngine(logs_dir=self.logs_dir,
                                       ray_num_cpus=6,
                                       resources_per_trial={"cpu": 2})
        searcher.compile(
            input_df,
            search_space=search_space,
            stop=stop,
            # feature_transformers=TimeSequenceFeatures,
            feature_transformers=ft,  # use dummy features for testing the rest
            model=model,
            validation_df=validation_df,
            metric=metric)
        # searcher.test_run()

        trials = searcher.run()
        best = searcher.get_best_trials(
            k=1)[0]  # get the best one trial, later could be n
        pipeline = self._make_pipeline(
            best,
            feature_transformers=ft,
            # feature_transformers=TimeSequenceFeatures(
            #     file_path='../../../../data/nyc_taxi_rolled_split.npz'),
            model=VanillaLSTM(check_optional_config=False))
        return pipeline
コード例 #10
0
class TimeSequencePipeline(Pipeline):
    def __init__(self, feature_transformers=None, model=None, config=None):
        """
        initialize a pipeline
        :param model: the internal model
        :param feature_transformers: the feature transformers
        """
        if feature_transformers is None:
            assert model is None and config is None
            self.feature_transformers = TimeSequenceFeatureTransformer()
            self.model = VanillaLSTM(check_optional_config=False)
            print("Initialize new time sequence pipeline.")
        else:
            self.feature_transformers = feature_transformers
            self.model = model
            self.config = config

    def evaluate(self, input_df, metric=["mean_squared_error"]):
        """
        evaluate the pipeline
        :param input_df:
        :param metric:
        :return:
        """
        x, y = self.feature_transformers.transform(input_df, is_train=True)
        return self.model.evaluate(x, y, metric)

    def predict(self, input_df):
        # there might be no y in the data, TODO needs to fix in TimeSquenceFeatures
        x = self.feature_transformers.transform(input_df, is_train=False)
        y_pred = self.model.predict(x)
        y_output = self.feature_transformers.post_processing(y_pred)
        return y_output

    def save(self, file):
        """
        save pipeline to file, contains feature transformer, model, trial config.
        :param file:
        :return:
        """
        if not os.path.isdir(file):
            os.mkdir(file)
        model_path = os.path.join(file, "weights_tune.h5")
        config_path = os.path.join(file, "all_config.json")
        self.feature_transformers.save(config_path, replace=True)
        self.model.save(model_path, config_path)
        # check if ** is needed
        save_config(config_path, self.config)

    def restore(self, file):
        """
        restore pipeline from file
        :param file:
        :param config:
        :return:
        """
        model_path = os.path.join(file, "weights_tune.h5")
        config_path = os.path.join(file, "all_config.json")
        all_config = load_config(config_path)
        self.model.restore(model_path, **all_config)
        self.feature_transformers.restore(**all_config)