def demo_manual(): """ Apply the custom method to a Setting, creating both manually in code. """ # Create any Setting from the tree: from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSetting # setting = TaskIncrementalSetting(dataset="mnist", nb_tasks=5) # SL setting = TaskIncrementalRLSetting( # RL dataset="cartpole", train_task_schedule={ 0: { "gravity": 10, "length": 0.5 }, 5000: { "gravity": 10, "length": 1.0 }, }, observe_state_directly=True, # state input, rather than pixel input. max_steps=10_000, ) ## Create the BaselineMethod: config = Config(debug=True) trainer_options = TrainerConfig(max_epochs=1) hparams = BaselineModel.HParams() base_method = BaselineMethod(hparams=hparams, config=config, trainer_options=trainer_options) ## Get the results of the baseline method: base_results = setting.apply(base_method, config=config) ## Create the CustomMethod: config = Config(debug=True) trainer_options = TrainerConfig(max_epochs=1) hparams = CustomizedBaselineModel.HParams() new_method = CustomMethod(hparams=hparams, config=config, trainer_options=trainer_options) ## Get the results for the 'improved' method: new_results = setting.apply(new_method, config=config) print(f"\n\nComparison: BaselineMethod vs CustomMethod") print("\n BaselineMethod results: ") print(base_results.summary()) print("\n CustomMethod results: ") print(new_results.summary())
def test_launch_sweep_with_constructor( method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]], tmp_path: Path, ): if not method_type.is_applicable(setting_type): pytest.skip( msg= f"Skipping test since Method {method_type} isn't applicable on settings of type {setting_type}." ) if issubclass(method_type, RandomBaselineMethod): pytest.skip( "BUG: RandomBaselineMethod has a hparam space that causes the HPO algo to go into an infinite loop." ) return experiment = HPOSweep( method=method_type, setting=setting_type, database_path=tmp_path / "debug.pkl", config=Config(debug=True), max_runs=3, ) best_hparams, best_performance = experiment.launch(["--debug"]) assert best_hparams assert best_performance
def test_monsterkong(): method = SACMethod() setting = IncrementalRLSetting( dataset="monsterkong", nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) results: IncrementalRLSetting.Results = setting.apply( method, config=Config(debug=True)) print(results.summary())
def test_cartpole_state(): method = PPOMethod(hparams=PPOModel.HParams(n_steps=64)) setting = RLSetting( dataset="cartpole", observe_state_directly=True, steps_per_task=5_000, test_steps_per_task=1_000, ) results = setting.apply(method, config=Config(debug=True)) print(results.summary()) assert 135 < results.average_final_performance.mean_episode_reward
def test_cartpole_state(): method = SACMethod() setting = IncrementalRLSetting( dataset="cartpole", observe_state_directly=True, nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) results: IncrementalRLSetting.Results = setting.apply( method, config=Config(debug=True)) print(results.summary())
def test_continuous_mountaincar(self, Setting: Type[Setting], observe_state: bool): method = self.Method() setting = Setting( dataset="MountainCarContinuous-v0", nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) results: ContinualRLSetting.Results = setting.apply( method, config=Config(debug=True)) print(results.summary())
def test_continuous_mountaincar(Setting: Type[Setting], observe_state: bool): method = DDPGMethod() setting = Setting( dataset="MountainCarContinuous-v0", observe_state_directly=True, nb_tasks=2, steps_per_task=1_000, test_steps_per_task=1_000, ) results: ContinualRLSetting.Results = setting.apply( method, config=Config(debug=True) ) # TODO: Add some bounds on the expected performance here: print(results.summary())
def config(tmp_path: Path): # TODO: Set the results dir somehow with the value of this `tmp_path` fixture. return Config(debug=True, data_dir=Path(os.environ.get("SLURM_TMPDIR", "data")), seed=123)
def test_multitask_rl_bug_with_PL(monkeypatch): """ TODO: on_task_switch is called on the new observation, but we need to produce a loss for the output head that we were just using! """ # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at # each episode. max_episode_steps = 5 setting = RLSetting( dataset="cartpole", batch_size=1, nb_tasks=2, max_episode_steps=max_episode_steps, add_done_to_observations=True, observe_state_directly=True, ) assert setting._new_random_task_on_reset # setting = RLSetting.load_benchmark("monsterkong") config = Config(debug=True, verbose=True, seed=123) config.seed_everything() model = BaselineModel( setting=setting, hparams=MultiHeadModel.HParams( multihead=True, output_head=EpisodicA2C.HParams( accumulate_losses_before_backward=True)), config=config, ) # TODO: Maybe add some kind of "hook" to check which losses get returned when? model.train() assert not model.automatic_optimization from pytorch_lightning import Trainer trainer = Trainer(fast_dev_run=True) trainer.fit(model, train_dataloader=setting.train_dataloader()) # from pytorch_lightning import Trainer optimizer = torch.optim.Adam(model.parameters(), lr=0.01) episodes = 0 max_episodes = 5 # Dict mapping from step to loss at that step. losses: Dict[int, List[Loss]] = defaultdict(list) with setting.train_dataloader() as env: env.seed(123) # env = TimeLimit(env, max_episode_steps=max_episode_steps) # Iterate over the environment, which yields one observation at a time: for step, obs in enumerate(env): assert isinstance(obs, RLSetting.Observations) step_results = model.training_step(batch=obs, batch_idx=step) loss_tensor: Optional[Tensor] = None if step > 0 and step % 5 == 0: assert all(obs.done), step # Since batch_size == 1 for now. assert step_results is not None, (step, obs.task_labels) loss_tensor = step_results["loss"] loss: Loss = step_results["loss_object"] print(f"Loss at step {step}: {loss}") losses[step].append(loss) # # Manually perform the optimization step. # output_head_loss = loss.losses.get(model.output_head.name) # update_model = output_head_loss is not None and output_head_loss.requires_grad # assert update_model # model.manual_backward(loss_tensor, optimizer, retain_graph=not update_model) # model.optimizer_step() # if update_model: # optimizer.step() # optimizer.zero_grad() # else: # assert False, (loss, output_head_loss, model.output_head.name) else: assert step_results is None print( f"Step {step}, episode {episodes}: x={obs[0]}, done={obs.done}, task labels: {obs.task_labels}, loss_tensor: {loss_tensor}" ) if step > 100: break for step, step_losses in losses.items(): print(f"Losses at step {step}:") for loss in step_losses: print(f"\t{loss}")
def test_multitask_rl_bug_without_PL(monkeypatch): """ TODO: on_task_switch is called on the new observation, but we need to produce a loss for the output head that we were just using! """ # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at # each episode. max_episode_steps = 5 setting = RLSetting( dataset="cartpole", batch_size=1, nb_tasks=2, max_episode_steps=max_episode_steps, add_done_to_observations=True, observe_state_directly=True, ) assert setting._new_random_task_on_reset # setting = RLSetting.load_benchmark("monsterkong") config = Config(debug=True, verbose=True, seed=123) config.seed_everything() model = BaselineModel( setting=setting, hparams=MultiHeadModel.HParams( multihead=True, output_head=EpisodicA2C.HParams( accumulate_losses_before_backward=True)), config=config, ) # TODO: Maybe add some kind of "hook" to check which losses get returned when? model.train() # from pytorch_lightning import Trainer # trainer = Trainer(fast_dev_run=True) # trainer.fit(model, train_dataloader=setting.train_dataloader()) # trainer.setup(model, stage="fit") # from pytorch_lightning import Trainer optimizer = torch.optim.Adam(model.parameters(), lr=0.01) episodes = 0 max_episodes = 5 # Dict mapping from step to loss at that step. losses: Dict[int, Loss] = {} with setting.train_dataloader() as env: env.seed(123) # env = TimeLimit(env, max_episode_steps=max_episode_steps) # Iterate over the environment, which yields one observation at a time: for step, obs in enumerate(env): assert isinstance(obs, RLSetting.Observations) if step == 0: assert not any(obs.done) start_task_label = obs[1][0] stored_steps_in_each_head_before = { task_key: output_head.num_stored_steps(0) for task_key, output_head in model.output_heads.items() } forward_pass: ForwardPass = model.forward(observations=obs) rewards = env.send(forward_pass.actions) loss: Loss = model.get_loss(forward_pass=forward_pass, rewards=rewards, loss_name="debug") stored_steps_in_each_head_after = { task_key: output_head.num_stored_steps(0) for task_key, output_head in model.output_heads.items() } # if step == 5: # assert False, (loss, stored_steps_in_each_head_before, stored_steps_in_each_head_after) if any(obs.done): assert loss.loss != 0., step assert loss.loss.requires_grad # Backpropagate the loss, update the models, etc etc. loss.loss.backward() model.on_after_backward() optimizer.step() model.on_before_zero_grad(optimizer) optimizer.zero_grad() # TODO: Need to let the model know than an update is happening so it can clear # buffers etc. episodes += sum(obs.done) losses[step] = loss else: assert loss.loss == 0. # TODO: print( f"Step {step}, episode {episodes}: x={obs[0]}, done={obs.done}, reward={rewards} task labels: {obs.task_labels}, loss: {loss.losses.keys()}: {loss.loss}" ) if episodes > max_episodes: break
def launch_batch_of_runs( setting: Optional[Setting], method: Optional[Method], argv: Union[str, List[str]] = None, ) -> List[Tuple[Dict, Results]]: if argv is None: argv = sys.argv[1:] if isinstance(argv, str): argv = shlex.split(argv) argv_copy = argv.copy() experiment: Experiment experiment, argv = Experiment.from_known_args(argv) setting: Optional[Type[Setting]] = experiment.setting method: Optional[Type[Method]] = experiment.method config = experiment.config # TODO: Maybe if everything stays exactly identical, we could 'cache' # the results of some experiments, so we don't re-run them all the time? all_results: Dict[Tuple[Type[Setting], Type[Method]], Results] = {} # The lists of arguments for each 'job'. method_types: List[Type[Method]] = [] setting_types: List[Type[Setting]] = [] run_configs: List[Config] = [] if setting: logger.info(f"Evaluating all applicable methods on Setting {setting}.") method_types = setting.get_applicable_methods() setting_types = [setting for _ in method_types] elif method: logger.info(f"Applying Method {method} on all its applicable settings.") setting_types = method.get_applicable_settings() method_types = [method for _ in setting_types] # Create a 'config' for each experiment. # Use a log_dir for each run using the 'base' log_dir (passed # when creating the Experiment), the name of the Setting, and # the name of the Method. for setting_type, method_type in zip(setting_types, method_types): run_log_dir = config.log_dir / setting_type.get_name() / method_type.get_name() run_config_kwargs = config.to_dict() run_config_kwargs["log_dir"] = run_log_dir run_config = Config(**run_config_kwargs) run_configs.append(run_config) arguments_of_each_run: List[Dict] = [] results_of_each_run: List[Result] = [] # Create one 'job' per setting-method combination: for setting_type, method_type, run_config in zip( setting_types, method_types, run_configs ): # NOTE: Some methods might use all the values in `argv`, and some # might not, so we set `strict=False`. arguments_of_each_run.append( dict( setting=setting_type, method=method_type, config=run_config, argv=argv, strict_args=False, ) ) # TODO: Use submitit or somethign like it, to run each of these in parallel: # See https://github.com/lebrice/Sequoia/issues/87 for more info. for run_arguments in arguments_of_each_run: result = Experiment.run_experiment(**run_arguments) logger.info(f"Results for arguments {run_arguments}: {result}") results_of_each_run.append(result) all_results = list(zip(arguments_of_each_run, results_of_each_run)) logger.info(f"All results: ") for run_arguments, run_results in all_results: print(f"Arguments: {run_arguments}") print(f"Results: {run_results}") return all_results
def config(tmp_path_factory: Path): test_log_dir = tmp_path_factory.mktemp("test_log_dir") # TODO: Set the results dir somehow with the value of this `tmp_path` fixture. data_dir = Path(os.environ.get("SLURM_TMPDIR", os.environ.get("DATA_DIR", "data"))) return Config(debug=True, data_dir=data_dir, seed=123, log_dir=test_log_dir)