def test_continuous_mountaincar(self, Setting: Type[Setting], observe_state: bool): method = self.Method() setting = Setting( dataset="MountainCarContinuous-v0", nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) results: ContinualRLSetting.Results = setting.apply( method, config=Config(debug=True)) print(results.summary())
def test_continuous_mountaincar(Setting: Type[Setting], observe_state: bool): method = DDPGMethod() setting = Setting( dataset="MountainCarContinuous-v0", observe_state_directly=True, nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) results: ContinualRLSetting.Results = setting.apply( method, config=Config(debug=True) ) # TODO: Add some bounds on the expected performance here: print(results.summary())
def configure(self, setting: Setting): """ Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ input_space: Box = setting.observation_space["x"] # For now all Settings have `Discrete` (i.e. classification) action spaces. action_space: spaces.Discrete = setting.action_space self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.num_actions = action_space.n self.num_inputs = np.prod(input_space.shape) self.added_tasks = [] if not (setting.task_labels_at_train_time and setting.task_labels_at_test_time): logger.warning( RuntimeWarning( "TODO: PNN doesn't have 'propper' task inference, and task labels " "arent always available! This will use an output head at random." )) if isinstance(setting, RLSetting): # If we're applied to an RL setting: # Used these as the default hparams in RL: self.hparams = self.hparams or self.HParams() assert self.hparams self.train_steps_per_task = setting.steps_per_task # We want a batch_size of None, i.e. only one observation at a time. setting.batch_size = None self.num_steps = self.hparams.num_steps # Otherwise, we can train basically as long as we want on each task. self.loss_function = { "gamma": self.hparams.gamma, } if is_image(setting.observation_space.x): # Observing pixel input. self.arch = "conv" else: # Observing state input (e.g. the 4 floats in cartpole rather than images) self.arch = "mlp" self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size) else: # If we're applied to a Supervised Learning setting: # Used these as the default hparams in SL: self.hparams = self.hparams or self.HParams( learning_rate=0.0001, batch_size=32, ) if self.hparams.batch_size is None: self.hparams.batch_size = 32 # Set the batch size on the setting. setting.batch_size = self.hparams.batch_size # For now all Settings on the supervised side of the tree have images as # inputs, so the observation spaces are of type `Image` (same as Box, but with # additional `h`, `w`, `c` and `b` attributes). assert isinstance(input_space, Image) assert ( setting.increment == setting.test_increment ), "Assuming same number of classes per task for training and testing." # TODO: (@lebrice): Temporarily 'fixing' this by making it so each output # head has as many outputs as there are classes in total, which might make # no sense, but currently works. # It would be better to refactor this so that each output head can have only # as many outputs as is required, and then reshape / offset the predictions. n_outputs = setting.increment n_outputs = setting.action_space.n self.layer_size = [self.num_inputs, 256, n_outputs] self.model = PnnClassifier(n_layers=len(self.layer_size) - 1, )
def run_track(method: Method, setting: Setting, yamlfile: str) -> Results: setting = SettingProxy(setting, yamlfile) results = setting.apply(method) print(f"Results summary:\n" f"{results.summary()}") print("=====================") print(results.to_log_dict())