Example #1
0
 def test_continuous_mountaincar(self, Setting: Type[Setting],
                                 observe_state: bool):
     method = self.Method()
     setting = Setting(
         dataset="MountainCarContinuous-v0",
         nb_tasks=2,
         steps_per_task=1_000,
         test_steps_per_task=1_000,
     )
     results: ContinualRLSetting.Results = setting.apply(
         method, config=Config(debug=True))
     print(results.summary())
Example #2
0
def test_continuous_mountaincar(Setting: Type[Setting], observe_state: bool):
    method = DDPGMethod()
    setting = Setting(
        dataset="MountainCarContinuous-v0",
        observe_state_directly=True,
        nb_tasks=2,
        steps_per_task=1_000,
        test_steps_per_task=1_000,
    )
    results: ContinualRLSetting.Results = setting.apply(
        method, config=Config(debug=True)
    )
    # TODO: Add some bounds on the expected performance here:
    print(results.summary())
Example #3
0
    def configure(self, setting: Setting):
        """ Called before the method is applied on a setting (before training). 

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """

        input_space: Box = setting.observation_space["x"]

        # For now all Settings have `Discrete` (i.e. classification) action spaces.
        action_space: spaces.Discrete = setting.action_space

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.num_actions = action_space.n
        self.num_inputs = np.prod(input_space.shape)

        self.added_tasks = []
        if not (setting.task_labels_at_train_time
                and setting.task_labels_at_test_time):
            logger.warning(
                RuntimeWarning(
                    "TODO: PNN doesn't have 'propper' task inference, and task labels "
                    "arent always available! This will use an output head at random."
                ))
        if isinstance(setting, RLSetting):
            # If we're applied to an RL setting:

            # Used these as the default hparams in RL:
            self.hparams = self.hparams or self.HParams()
            assert self.hparams
            self.train_steps_per_task = setting.steps_per_task

            # We want a batch_size of None, i.e. only one observation at a time.
            setting.batch_size = None

            self.num_steps = self.hparams.num_steps
            # Otherwise, we can train basically as long as we want on each task.
            self.loss_function = {
                "gamma": self.hparams.gamma,
            }
            if is_image(setting.observation_space.x):
                # Observing pixel input.
                self.arch = "conv"
            else:
                # Observing state input (e.g. the 4 floats in cartpole rather than images)
                self.arch = "mlp"
            self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)

        else:
            # If we're applied to a Supervised Learning setting:
            # Used these as the default hparams in SL:
            self.hparams = self.hparams or self.HParams(
                learning_rate=0.0001,
                batch_size=32,
            )
            if self.hparams.batch_size is None:
                self.hparams.batch_size = 32

            # Set the batch size on the setting.
            setting.batch_size = self.hparams.batch_size
            # For now all Settings on the supervised side of the tree have images as
            # inputs, so the observation spaces are of type `Image` (same as Box, but with
            # additional `h`, `w`, `c` and `b` attributes).
            assert isinstance(input_space, Image)
            assert (
                setting.increment == setting.test_increment
            ), "Assuming same number of classes per task for training and testing."
            # TODO: (@lebrice): Temporarily 'fixing' this by making it so each output
            # head has as many outputs as there are classes in total, which might make
            # no sense, but currently works.
            # It would be better to refactor this so that each output head can have only
            # as many outputs as is required, and then reshape / offset the predictions.
            n_outputs = setting.increment
            n_outputs = setting.action_space.n
            self.layer_size = [self.num_inputs, 256, n_outputs]
            self.model = PnnClassifier(n_layers=len(self.layer_size) - 1, )
Example #4
0
def run_track(method: Method, setting: Setting, yamlfile: str) -> Results:
    setting = SettingProxy(setting, yamlfile)
    results = setting.apply(method)
    print(f"Results summary:\n" f"{results.summary()}")
    print("=====================")
    print(results.to_log_dict())