def test_evaluate(self):
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(config={
         "lr": 1e-2,
         "batch_size": 32,
     })
     model.fit_eval(data=(self.data["x"], self.data["y"]),
                    validation_data=(self.data["val_x"],
                                     self.data["val_y"]),
                    epochs=20)
     mse_eval = model.evaluate(x=self.data["val_x"], y=self.data["val_y"])
     try:
         import onnx
         import onnxruntime
         mse_eval_onnx = model.evaluate_with_onnx(x=self.data["val_x"],
                                                  y=self.data["val_y"])
         np.testing.assert_almost_equal(mse_eval, mse_eval_onnx)
     except ImportError:
         pass
     # incremental training test
     model.fit_eval(data=(self.data["x"], self.data["y"]),
                    validation_data=(self.data["val_x"],
                                     self.data["val_y"]),
                    epochs=20)
     mse_eval = model.evaluate(x=self.data["val_x"], y=self.data["val_y"])
     try:
         import onnx
         import onnxruntime
         mse_eval_onnx = model.evaluate_with_onnx(x=self.data["val_x"],
                                                  y=self.data["val_y"])
         np.testing.assert_almost_equal(mse_eval, mse_eval_onnx)
     except ImportError:
         pass
    def test_create_not_torch_model(self):
        def model_creator(config):
            return torch.Tensor(3, 5)

        modelBuilder = PytorchModelBuilder(model_creator=model_creator,
                                           optimizer_creator=optimizer_creator,
                                           loss_creator=loss_creator)
        with pytest.raises(ValueError):
            model = modelBuilder.build(config={
                "lr": 1e-2,
                "batch_size": 32,
            })
Beispiel #3
0
    def test_fit_tcn_feature(self):
        input_feature_dim = 11  # This param will not be used
        output_feature_dim = 2  # 2 targets are generated in get_tsdataset

        tsdata_train = get_tsdataset().gen_dt_feature()
        tsdata_valid = get_tsdataset().gen_dt_feature()
        tsdata_test = get_tsdataset().gen_dt_feature()

        search_space = {
            'hidden_units': hp.grid_search([32, 64]),
            'levels': hp.randint(4, 6),
            'kernel_size': hp.randint(3, 5),
            'dropout': hp.uniform(0.1, 0.2),
            'lr': hp.loguniform(0.001, 0.01)
        }
        auto_trainer = AutoTSTrainer(model='tcn',
                                     search_space=search_space,
                                     past_seq_len=hp.randint(4, 6),
                                     future_seq_len=1,
                                     input_feature_num=input_feature_dim,
                                     output_target_num=output_feature_dim,
                                     selected_features="auto",
                                     metric="mse",
                                     optimizer="Adam",
                                     loss=torch.nn.MSELoss(),
                                     logs_dir="/tmp/auto_trainer",
                                     cpus_per_trial=2,
                                     name="auto_trainer")
        auto_trainer.fit(data=tsdata_train,
                         epochs=1,
                         batch_size=hp.choice([32, 64]),
                         validation_data=tsdata_valid,
                         n_sampling=1)
        best_config = auto_trainer.get_best_config()
        best_model = auto_trainer.get_best_model()
        assert 4 <= best_config["past_seq_len"] <= 6

        # really difficult to use the model currently...
        tsdata_test.roll(lookback=best_config["past_seq_len"],
                         horizon=1,
                         feature_col=best_config["selected_features"])
        x_test, y_test = tsdata_test.to_numpy()
        y_pred = best_model.predict(x_test)
        best_model.save("best.ckpt")
        from zoo.automl.model.base_pytorch_model import PytorchModelBuilder
        restore_model = PytorchModelBuilder(
            model_creator=best_model.model_creator,
            optimizer_creator="Adam",
            loss_creator=torch.nn.MSELoss()).build(best_config)
        restore_model.restore("best.ckpt")
        y_pred_restore = restore_model.predict(x_test)
        np.testing.assert_almost_equal(y_pred, y_pred_restore)
 def test_fit_evaluate(self):
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(config={
         "lr": 1e-2,
         "batch_size": 32,
     })
     val_result = model.fit_eval(data=(self.data["x"], self.data["y"]),
                                 validation_data=(self.data["val_x"],
                                                  self.data["val_y"]),
                                 epochs=20)
     assert val_result is not None
Beispiel #5
0
    def __init__(self,
                 input_feature_num,
                 output_target_num,
                 optimizer,
                 loss,
                 metric,
                 hidden_dim=32,
                 layer_num=1,
                 lr=0.001,
                 dropout=0.2,
                 backend="torch",
                 logs_dir="/tmp/auto_lstm",
                 cpus_per_trial=1,
                 name="auto_lstm"):
        """
        Create an AutoLSTM.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param hidden_dim: Int or hp sampling function from an integer space. The number of features
               in the hidden state `h`. For hp sampling, see zoo.chronos.orca.automl.hp for more
               details. e.g. hp.grid_search([32, 64]).
        :param layer_num: Int or hp sampling function from an integer space. Number of recurrent
               layers. e.g. hp.randint(1, 3)
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param backend: The backend of the lstm model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_lstm"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoLSTM. It defaults to "auto_lstm"
        """
        # todo: support backend = 'keras'
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(
            hidden_dim=hidden_dim,
            layer_num=layer_num,
            lr=lr,
            dropout=dropout,
            input_feature_num=input_feature_num,
            output_feature_num=output_target_num,
        )
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)
Beispiel #6
0
    def from_torch(*,
                   model_creator,
                   optimizer,
                   loss,
                   logs_dir="/tmp/auto_estimator_logs",
                   resources_per_trial=None,
                   name=None,
                   ):
        """
        Create an AutoEstimator for torch.

        :param model_creator: PyTorch model creator function.
        :param optimizer: PyTorch optimizer creator function or pytorch optimizer name (string).
            Note that you should specify learning rate search space with key as "lr" or LR_NAME
            (from zoo.orca.automl.pytorch_utils import LR_NAME) if input optimizer name.
            Without learning rate search space specified, the default learning rate value of 1e-3
            will be used for all estimators.
        :param loss: PyTorch loss instance or PyTorch loss creator function
            or pytorch loss name (string).
        :param logs_dir: Local directory to save logs and results. It defaults to
            "/tmp/auto_estimator_logs"
        :param resources_per_trial: Dict. resources for each trial. e.g. {"cpu": 2}.
        :param name: Name of the auto estimator.

        :return: an AutoEstimator object.
        """
        from zoo.automl.model.base_pytorch_model import PytorchModelBuilder
        model_builder = PytorchModelBuilder(model_creator=model_creator,
                                            optimizer_creator=optimizer,
                                            loss_creator=loss)

        return AutoEstimator(model_builder=model_builder,
                             logs_dir=logs_dir,
                             resources_per_trial=resources_per_trial,
                             name=name)
Beispiel #7
0
def prepare_searcher(data,
                     search_space,
                     stop,
                     validation_data=None,
                     model_creator=linear_model_creator,
                     optimizer_creator=optimizer_creator,
                     loss_creator=loss_creator,
                     metric="mse",
                     metric_mode="min",
                     name="demo"):
    modelBuilder = PytorchModelBuilder(model_creator=model_creator,
                                       optimizer_creator=optimizer_creator,
                                       loss_creator=loss_creator)
    searcher = SearchEngineFactory.create_engine(
        backend="ray",
        logs_dir="~/zoo_automl_logs",
        resources_per_trial={"cpu": 2},
        name=name)
    searcher.compile(data=data,
                     validation_data=validation_data,
                     model_builder=modelBuilder,
                     search_space=search_space,
                     n_sampling=2,
                     epochs=stop["training_iteration"],
                     metric_threshold=stop["reward_metric"],
                     metric_mode=metric_mode,
                     metric=metric)
    return searcher
 def test_dataloader_fit_evaluate(self):
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(
         config={
             "lr": 1e-2,
             "batch_size": 32,
             "train_size": 500,
             "valid_size": 100,
             "shuffle": True
         })
     val_result = model.fit_eval(data=train_dataloader_creator,
                                 validation_data=valid_dataloader_creator,
                                 epochs=20)
     assert model.config["train_size"] == 500
     assert model.config["valid_size"] == 100
     assert model.config["shuffle"] is True
     assert val_result is not None
 def test_predict(self):
     modelBuilder = PytorchModelBuilder(model_creator=model_creator_pytorch,
                                        optimizer_creator=optimizer_creator,
                                        loss_creator=loss_creator)
     model = modelBuilder.build(config={
         "lr": 1e-2,
         "batch_size": 32,
     })
     model.fit_eval(data=(self.data["x"], self.data["y"]),
                    validation_data=(self.data["val_x"],
                                     self.data["val_y"]),
                    epochs=20)
     pred = model.predict(x=self.data["val_x"])
     pred_full_batch = model.predict(x=self.data["val_x"],
                                     batch_size=len(self.data["val_x"]))
     np.testing.assert_almost_equal(pred, pred_full_batch)
     try:
         import onnx
         import onnxruntime
         pred_onnx = model.predict_with_onnx(x=self.data["val_x"])
         np.testing.assert_almost_equal(pred, pred_onnx)
     except ImportError:
         pass
    def __init__(self,
                 input_feature_num,
                 output_target_num,
                 past_seq_len,
                 future_seq_len,
                 optimizer,
                 loss,
                 metric,
                 lr=0.001,
                 lstm_hidden_dim=128,
                 lstm_layer_num=2,
                 dropout=0.25,
                 teacher_forcing=False,
                 backend="torch",
                 logs_dir="/tmp/auto_seq2seq",
                 cpus_per_trial=1,
                 name="auto_seq2seq"):
        """
        Create an AutoSeq2Seq.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param lstm_hidden_dim: LSTM hidden channel for decoder and encoder.
               hp.grid_search([32, 64, 128])
        :param lstm_layer_num:LSTM layer number for decoder and encoder.
               e.g. hp.grid_search([1, 4])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param teacher_forcing: If use teacher forcing in training.
        :param backend: The backend of the Seq2Seq model. We only support backend as "torch"
               for now.
        :param logs_dir: Local directory to save logs and results. It defaults to
               "/tmp/auto_seq2seq"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoSeq2Seq. It defaults to "auto_seq2seq"
        """
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(input_feature_num=input_feature_num,
                                 output_feature_num=output_target_num,
                                 past_seq_len=past_seq_len,
                                 future_seq_len=future_seq_len,
                                 lstm_hidden_dim=lstm_hidden_dim,
                                 lstm_layer_num=lstm_layer_num,
                                 lr=lr,
                                 dropout=dropout,
                                 teacher_forcing=teacher_forcing)
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)
Beispiel #11
0
    def __init__(self,
                 input_feature_num,
                 output_target_num,
                 past_seq_len,
                 future_seq_len,
                 optimizer,
                 loss,
                 metric,
                 hidden_units=None,
                 levels=None,
                 num_channels=None,
                 kernel_size=7,
                 lr=0.001,
                 dropout=0.2,
                 backend="torch",
                 logs_dir="/tmp/auto_tcn",
                 cpus_per_trial=1,
                 name="auto_tcn"):
        """
        Create an AutoTCN.

        :param input_feature_num: Int. The number of features in the input
        :param output_target_num: Int. The number of targets in the output
        :param past_seq_len: Int. The number of historical steps used for forecasting.
        :param future_seq_len: Int. The number of future steps to forecast.
        :param optimizer: String or pyTorch optimizer creator function or
               tf.keras optimizer instance.
        :param loss: String or pytorch/tf.keras loss instance or pytorch loss creator function.
        :param metric: String. The evaluation metric name to optimize. e.g. "mse"
        :param hidden_units: Int or hp sampling function from an integer space. The number of hidden
               units or filters for each convolutional layer. It is similar to `units` for LSTM.
               It defaults to 30. We will omit the hidden_units value if num_channels is specified.
               For hp sampling, see zoo.orca.automl.hp for more details.
               e.g. hp.grid_search([32, 64]).
        :param levels: Int or hp sampling function from an integer space. The number of levels of
               TemporalBlocks to use. It defaults to 8. We will omit the levels value if
               num_channels is specified.
        :param num_channels: List of integers. A list of hidden_units for each level. You could
               specify num_channels if you want different hidden_units for different levels.
               By default, num_channels equals to
               [hidden_units] * (levels - 1) + [output_target_num].
        :param kernel_size: Int or hp sampling function from an integer space.
               The size of the kernel to use in each convolutional layer.
        :param lr: float or hp sampling function from a float space. Learning rate.
               e.g. hp.choice([0.001, 0.003, 0.01])
        :param dropout: float or hp sampling function from a float space. Learning rate. Dropout
               rate. e.g. hp.uniform(0.1, 0.3)
        :param backend: The backend of the TCN model. We only support backend as "torch" for now.
        :param logs_dir: Local directory to save logs and results. It defaults to "/tmp/auto_tcn"
        :param cpus_per_trial: Int. Number of cpus for each trial. It defaults to 1.
        :param name: name of the AutoTCN. It defaults to "auto_tcn"
        """
        # todo: support search for past_seq_len.
        # todo: add input check.
        if backend != "torch":
            raise ValueError(
                f"We only support backend as torch. Got {backend}")
        self.search_space = dict(
            input_feature_num=input_feature_num,
            output_feature_num=output_target_num,
            past_seq_len=past_seq_len,
            future_seq_len=future_seq_len,
            nhid=hidden_units,
            levels=levels,
            num_channels=num_channels,
            kernel_size=kernel_size,
            lr=lr,
            dropout=dropout,
        )
        self.metric = metric
        model_builder = PytorchModelBuilder(
            model_creator=model_creator,
            optimizer_creator=optimizer,
            loss_creator=loss,
        )
        self.auto_est = AutoEstimator(
            model_builder=model_builder,
            logs_dir=logs_dir,
            resources_per_trial={"cpu": cpus_per_trial},
            name=name)