def test_cartpole_state(config: Config, trainer: Trainer): """ Test that the baseline method can learn cartpole (state input) """ # TODO: Actually remove the trainer_config class from the BaselineMethod? method = BaselineMethod(config=config) method.trainer = trainer method.hparams.learning_rate = 0.01 setting = RLSetting(dataset="cartpole", max_steps=5000) results: ContinualRLResults = setting.apply(method) print(results.to_log_dict()) # The method should normally get the maximum length (200), but checking with # 100 just to account for randomness. assert results.mean_episode_length > 100.
def test_task_inference_rl_hard(config: Config): from sequoia.methods.baseline_method import BaselineMethod method = BaselineMethod(config=config) from sequoia.settings.active import RLSetting setting = RLSetting( dataset="cartpole", observe_state_directly=True, nb_tasks=2, max_steps=1000, test_steps_per_task=1000, ) results = setting.apply(method) assert results
def test_multitask_rl_bug_with_PL(monkeypatch): """ TODO: on_task_switch is called on the new observation, but we need to produce a loss for the output head that we were just using! """ # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at # each episode. max_episode_steps = 5 setting = RLSetting( dataset="cartpole", batch_size=1, nb_tasks=2, max_episode_steps=max_episode_steps, add_done_to_observations=True, observe_state_directly=True, ) assert setting._new_random_task_on_reset # setting = RLSetting.load_benchmark("monsterkong") config = Config(debug=True, verbose=True, seed=123) config.seed_everything() model = BaselineModel( setting=setting, hparams=MultiHeadModel.HParams( multihead=True, output_head=EpisodicA2C.HParams( accumulate_losses_before_backward=True)), config=config, ) # TODO: Maybe add some kind of "hook" to check which losses get returned when? model.train() assert not model.automatic_optimization from pytorch_lightning import Trainer trainer = Trainer(fast_dev_run=True) trainer.fit(model, train_dataloader=setting.train_dataloader()) # from pytorch_lightning import Trainer optimizer = torch.optim.Adam(model.parameters(), lr=0.01) episodes = 0 max_episodes = 5 # Dict mapping from step to loss at that step. losses: Dict[int, List[Loss]] = defaultdict(list) with setting.train_dataloader() as env: env.seed(123) # env = TimeLimit(env, max_episode_steps=max_episode_steps) # Iterate over the environment, which yields one observation at a time: for step, obs in enumerate(env): assert isinstance(obs, RLSetting.Observations) step_results = model.training_step(batch=obs, batch_idx=step) loss_tensor: Optional[Tensor] = None if step > 0 and step % 5 == 0: assert all(obs.done), step # Since batch_size == 1 for now. assert step_results is not None, (step, obs.task_labels) loss_tensor = step_results["loss"] loss: Loss = step_results["loss_object"] print(f"Loss at step {step}: {loss}") losses[step].append(loss) # # Manually perform the optimization step. # output_head_loss = loss.losses.get(model.output_head.name) # update_model = output_head_loss is not None and output_head_loss.requires_grad # assert update_model # model.manual_backward(loss_tensor, optimizer, retain_graph=not update_model) # model.optimizer_step() # if update_model: # optimizer.step() # optimizer.zero_grad() # else: # assert False, (loss, output_head_loss, model.output_head.name) else: assert step_results is None print( f"Step {step}, episode {episodes}: x={obs[0]}, done={obs.done}, task labels: {obs.task_labels}, loss_tensor: {loss_tensor}" ) if step > 100: break for step, step_losses in losses.items(): print(f"Losses at step {step}:") for loss in step_losses: print(f"\t{loss}")
def test_multitask_rl_bug_without_PL(monkeypatch): """ TODO: on_task_switch is called on the new observation, but we need to produce a loss for the output head that we were just using! """ # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at # each episode. max_episode_steps = 5 setting = RLSetting( dataset="cartpole", batch_size=1, nb_tasks=2, max_episode_steps=max_episode_steps, add_done_to_observations=True, observe_state_directly=True, ) assert setting._new_random_task_on_reset # setting = RLSetting.load_benchmark("monsterkong") config = Config(debug=True, verbose=True, seed=123) config.seed_everything() model = BaselineModel( setting=setting, hparams=MultiHeadModel.HParams( multihead=True, output_head=EpisodicA2C.HParams( accumulate_losses_before_backward=True)), config=config, ) # TODO: Maybe add some kind of "hook" to check which losses get returned when? model.train() # from pytorch_lightning import Trainer # trainer = Trainer(fast_dev_run=True) # trainer.fit(model, train_dataloader=setting.train_dataloader()) # trainer.setup(model, stage="fit") # from pytorch_lightning import Trainer optimizer = torch.optim.Adam(model.parameters(), lr=0.01) episodes = 0 max_episodes = 5 # Dict mapping from step to loss at that step. losses: Dict[int, Loss] = {} with setting.train_dataloader() as env: env.seed(123) # env = TimeLimit(env, max_episode_steps=max_episode_steps) # Iterate over the environment, which yields one observation at a time: for step, obs in enumerate(env): assert isinstance(obs, RLSetting.Observations) if step == 0: assert not any(obs.done) start_task_label = obs[1][0] stored_steps_in_each_head_before = { task_key: output_head.num_stored_steps(0) for task_key, output_head in model.output_heads.items() } forward_pass: ForwardPass = model.forward(observations=obs) rewards = env.send(forward_pass.actions) loss: Loss = model.get_loss(forward_pass=forward_pass, rewards=rewards, loss_name="debug") stored_steps_in_each_head_after = { task_key: output_head.num_stored_steps(0) for task_key, output_head in model.output_heads.items() } # if step == 5: # assert False, (loss, stored_steps_in_each_head_before, stored_steps_in_each_head_after) if any(obs.done): assert loss.loss != 0., step assert loss.loss.requires_grad # Backpropagate the loss, update the models, etc etc. loss.loss.backward() model.on_after_backward() optimizer.step() model.on_before_zero_grad(optimizer) optimizer.zero_grad() # TODO: Need to let the model know than an update is happening so it can clear # buffers etc. episodes += sum(obs.done) losses[step] = loss else: assert loss.loss == 0. # TODO: print( f"Step {step}, episode {episodes}: x={obs[0]}, done={obs.done}, reward={rewards} task labels: {obs.task_labels}, loss: {loss.losses.keys()}: {loss.loss}" ) if episodes > max_episodes: break
""" import wandb from sequoia.common import Config from sequoia.methods.baseline_method import BaselineMethod from sequoia.settings import TraditionalSLSetting, Results, Setting from sequoia.utils.logging_utils import get_logger logger = get_logger(__file__) if __name__ == "__main__": from simple_parsing import ArgumentParser ## Create the Setting: from sequoia.settings import RLSetting setting = RLSetting(dataset="monsterkong") # from sequoia.settings import TaskIncrementalSLSetting # setting = TaskIncrementalSLSetting(dataset="cifar10") ## Create the BaselineMethod: # Option 1: Create the method manually: # method = BaselineMethod() # Option 2: From the command-line: method, unused_args = BaselineMethod.from_known_args( ) # allow unused args. # parser = ArgumentParser(description=__doc__) # BaselineMethod.add_argparse_args(parser, dest="method") # args, unused_args = parser.parse_known_args() # method: BaselineMethod = BaselineMethod.from_argparse_args(args, dest="method")