def demo_manual():
    """ Apply the custom method to a Setting, creating both manually in code. """
    # Create any Setting from the tree:
    from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSetting

    # setting = TaskIncrementalSetting(dataset="mnist", nb_tasks=5)  # SL
    setting = TaskIncrementalRLSetting(  # RL
        dataset="cartpole",
        train_task_schedule={
            0: {
                "gravity": 10,
                "length": 0.5
            },
            5000: {
                "gravity": 10,
                "length": 1.0
            },
        },
        observe_state_directly=True,  # state input, rather than pixel input.
        max_steps=10_000,
    )

    ## Create the BaselineMethod:
    config = Config(debug=True)
    trainer_options = TrainerConfig(max_epochs=1)
    hparams = BaselineModel.HParams()
    base_method = BaselineMethod(hparams=hparams,
                                 config=config,
                                 trainer_options=trainer_options)

    ## Get the results of the baseline method:
    base_results = setting.apply(base_method, config=config)

    ## Create the CustomMethod:
    config = Config(debug=True)
    trainer_options = TrainerConfig(max_epochs=1)
    hparams = CustomizedBaselineModel.HParams()
    new_method = CustomMethod(hparams=hparams,
                              config=config,
                              trainer_options=trainer_options)

    ## Get the results for the 'improved' method:
    new_results = setting.apply(new_method, config=config)

    print(f"\n\nComparison: BaselineMethod vs CustomMethod")
    print("\n BaselineMethod results: ")
    print(base_results.summary())

    print("\n CustomMethod results: ")
    print(new_results.summary())
    def apply(self,
              method: Method,
              config: Config = None) -> ClassIncrementalResults:
        """Apply the given method on this setting to producing some results."""
        # TODO: It still isn't super clear what should be in charge of creating
        # the config, and how to create it, when it isn't passed explicitly.
        self.config: Config
        if config is not None:
            self.config = config
            logger.debug(f"Using Config {self.config}")
        elif isinstance(getattr(method, "config", None), Config):
            # If the Method has a `config` attribute that is a Config, use that.
            self.config = method.config
            logger.debug(f"Using Config from the Method: {self.config}")
        else:
            logger.debug("Parsing the Config from the command-line.")
            self.config = Config.from_args(self._argv, strict=False)
            logger.debug(f"Resulting Config: {self.config}")

        method.configure(setting=self)

        # Run the main loop (which is defined in IncrementalSetting).
        results: ClassIncrementalResults = super().main_loop(method)
        logger.info(results.summary())
        method.receive_results(self, results=results)
        return results
def test_launch_sweep_with_constructor(
    method_type: Optional[Type[Method]],
    setting_type: Optional[Type[Setting]],
    tmp_path: Path,
):
    if not method_type.is_applicable(setting_type):
        pytest.skip(
            msg=
            f"Skipping test since Method {method_type} isn't applicable on settings of type {setting_type}."
        )

    if issubclass(method_type, RandomBaselineMethod):
        pytest.skip(
            "BUG: RandomBaselineMethod has a hparam space that causes the HPO algo to go into an infinite loop."
        )
        return

    experiment = HPOSweep(
        method=method_type,
        setting=setting_type,
        database_path=tmp_path / "debug.pkl",
        config=Config(debug=True),
        max_runs=3,
    )
    best_hparams, best_performance = experiment.launch(["--debug"])
    assert best_hparams
    assert best_performance
Exemple #4
0
def test_monsterkong():
    method = SACMethod()
    setting = IncrementalRLSetting(
        dataset="monsterkong",
        nb_tasks=2,
        steps_per_task=1_000,
        test_steps_per_task=1_000,
    )
    results: IncrementalRLSetting.Results = setting.apply(
        method, config=Config(debug=True))
    print(results.summary())
Exemple #5
0
def test_cartpole_state():
    method = PPOMethod(hparams=PPOModel.HParams(n_steps=64))
    setting = RLSetting(
        dataset="cartpole",
        observe_state_directly=True,
        steps_per_task=5_000,
        test_steps_per_task=1_000,
    )
    results = setting.apply(method, config=Config(debug=True))
    print(results.summary())
    assert 135 < results.average_final_performance.mean_episode_reward
Exemple #6
0
def test_cartpole_state():
    method = SACMethod()
    setting = IncrementalRLSetting(
        dataset="cartpole",
        observe_state_directly=True,
        nb_tasks=2,
        steps_per_task=1_000,
        test_steps_per_task=1_000,
    )
    results: IncrementalRLSetting.Results = setting.apply(
        method, config=Config(debug=True))
    print(results.summary())
Exemple #7
0
 def test_continuous_mountaincar(self, Setting: Type[Setting],
                                 observe_state: bool):
     method = self.Method()
     setting = Setting(
         dataset="MountainCarContinuous-v0",
         nb_tasks=2,
         steps_per_task=1_000,
         test_steps_per_task=1_000,
     )
     results: ContinualRLSetting.Results = setting.apply(
         method, config=Config(debug=True))
     print(results.summary())
Exemple #8
0
def test_continuous_mountaincar(Setting: Type[Setting], observe_state: bool):
    method = DDPGMethod()
    setting = Setting(
        dataset="MountainCarContinuous-v0",
        observe_state_directly=True,
        nb_tasks=2,
        steps_per_task=1_000,
        test_steps_per_task=1_000,
    )
    results: ContinualRLSetting.Results = setting.apply(
        method, config=Config(debug=True)
    )
    # TODO: Add some bounds on the expected performance here:
    print(results.summary())
Exemple #9
0
    def prepare_data(self, data_dir: Path = None, **kwargs):
        self.config = self.config or Config.from_args(self._argv, strict=False)
        # if self.batch_size is None:
        #     logger.warning(UserWarning(
        #         f"Using the default batch size of 32. (You can set the "
        #         f"batch size by passing a value to the Setting constructor, or "
        #         f"by setting the attribute inside your 'configure' method) "
        #     ))
        #     self.batch_size = 32

        # data_dir = data_dir or self.data_dir or self.config.data_dir
        # self.make_dataset(data_dir, download=True)
        # self.data_dir = data_dir
        return super().prepare_data(data_dir=data_dir, **kwargs)
Exemple #10
0
 def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type["Method"], Results]:
     applicable_methods = self.get_applicable_methods()
     from sequoia.methods import Method
     all_results: Dict[Type[Method], Results] = {}
     config = Config.from_args(argv)
     for method_type in applicable_methods:
         method = method_type.from_args(argv)
         results = self.apply(method, config)
         all_results[method_type] = results
     logger.info(f"All results for setting of type {type(self)}:")
     logger.info({
         method.get_name(): (results.get_metric() if results else "crashed")
         for method, results in all_results.items()
     })
     return all_results
Exemple #11
0
 def _setup_config(self, method: Method) -> Config:
     config: Config
     if isinstance(getattr(method, "config", None), Config):
         config = method.config
         logger.debug(f"Using Config from the Method: {self.config}")
     else:
         argv = self._argv
         if argv:
             logger.debug(
                 f"Parsing the Config from the command-line arguments ({argv})"
             )
         else:
             logger.debug(
                 f"Parsing the config from the current command-line arguments."
             )
         config = Config.from_args(argv, strict=False)
     return config
Exemple #12
0
def config(tmp_path: Path):
    # TODO: Set the results dir somehow with the value of this `tmp_path` fixture.
    return Config(debug=True, data_dir=Path(os.environ.get("SLURM_TMPDIR", "data")), seed=123)
def test_multitask_rl_bug_with_PL(monkeypatch):
    """ TODO: on_task_switch is called on the new observation, but we need to produce a
    loss for the output head that we were just using!
    """
    # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at
    # each episode.
    max_episode_steps = 5
    setting = RLSetting(
        dataset="cartpole",
        batch_size=1,
        nb_tasks=2,
        max_episode_steps=max_episode_steps,
        add_done_to_observations=True,
        observe_state_directly=True,
    )
    assert setting._new_random_task_on_reset

    # setting = RLSetting.load_benchmark("monsterkong")
    config = Config(debug=True, verbose=True, seed=123)
    config.seed_everything()
    model = BaselineModel(
        setting=setting,
        hparams=MultiHeadModel.HParams(
            multihead=True,
            output_head=EpisodicA2C.HParams(
                accumulate_losses_before_backward=True)),
        config=config,
    )

    # TODO: Maybe add some kind of "hook" to check which losses get returned when?
    model.train()
    assert not model.automatic_optimization

    from pytorch_lightning import Trainer
    trainer = Trainer(fast_dev_run=True)
    trainer.fit(model, train_dataloader=setting.train_dataloader())

    # from pytorch_lightning import Trainer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    episodes = 0
    max_episodes = 5

    # Dict mapping from step to loss at that step.
    losses: Dict[int, List[Loss]] = defaultdict(list)

    with setting.train_dataloader() as env:
        env.seed(123)

        # env = TimeLimit(env, max_episode_steps=max_episode_steps)
        # Iterate over the environment, which yields one observation at a time:
        for step, obs in enumerate(env):
            assert isinstance(obs, RLSetting.Observations)

            step_results = model.training_step(batch=obs, batch_idx=step)
            loss_tensor: Optional[Tensor] = None

            if step > 0 and step % 5 == 0:
                assert all(obs.done), step  # Since batch_size == 1 for now.
                assert step_results is not None, (step, obs.task_labels)
                loss_tensor = step_results["loss"]
                loss: Loss = step_results["loss_object"]
                print(f"Loss at step {step}: {loss}")
                losses[step].append(loss)

                # # Manually perform the optimization step.
                # output_head_loss = loss.losses.get(model.output_head.name)
                # update_model = output_head_loss is not None and output_head_loss.requires_grad

                # assert update_model
                # model.manual_backward(loss_tensor, optimizer, retain_graph=not update_model)
                # model.optimizer_step()
                # if update_model:
                #     optimizer.step()
                #     optimizer.zero_grad()
                # else:
                #     assert False, (loss, output_head_loss, model.output_head.name)

            else:
                assert step_results is None

            print(
                f"Step {step}, episode {episodes}: x={obs[0]}, done={obs.done}, task labels: {obs.task_labels}, loss_tensor: {loss_tensor}"
            )

            if step > 100:
                break

    for step, step_losses in losses.items():
        print(f"Losses at step {step}:")
        for loss in step_losses:
            print(f"\t{loss}")
def test_multitask_rl_bug_without_PL(monkeypatch):
    """ TODO: on_task_switch is called on the new observation, but we need to produce a
    loss for the output head that we were just using!
    """
    # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at
    # each episode.
    max_episode_steps = 5
    setting = RLSetting(
        dataset="cartpole",
        batch_size=1,
        nb_tasks=2,
        max_episode_steps=max_episode_steps,
        add_done_to_observations=True,
        observe_state_directly=True,
    )
    assert setting._new_random_task_on_reset

    # setting = RLSetting.load_benchmark("monsterkong")
    config = Config(debug=True, verbose=True, seed=123)
    config.seed_everything()
    model = BaselineModel(
        setting=setting,
        hparams=MultiHeadModel.HParams(
            multihead=True,
            output_head=EpisodicA2C.HParams(
                accumulate_losses_before_backward=True)),
        config=config,
    )
    # TODO: Maybe add some kind of "hook" to check which losses get returned when?
    model.train()
    # from pytorch_lightning import Trainer
    # trainer = Trainer(fast_dev_run=True)
    # trainer.fit(model, train_dataloader=setting.train_dataloader())
    # trainer.setup(model, stage="fit")

    # from pytorch_lightning import Trainer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    episodes = 0
    max_episodes = 5

    # Dict mapping from step to loss at that step.
    losses: Dict[int, Loss] = {}

    with setting.train_dataloader() as env:
        env.seed(123)
        # env = TimeLimit(env, max_episode_steps=max_episode_steps)
        # Iterate over the environment, which yields one observation at a time:
        for step, obs in enumerate(env):
            assert isinstance(obs, RLSetting.Observations)

            if step == 0:
                assert not any(obs.done)
            start_task_label = obs[1][0]

            stored_steps_in_each_head_before = {
                task_key: output_head.num_stored_steps(0)
                for task_key, output_head in model.output_heads.items()
            }
            forward_pass: ForwardPass = model.forward(observations=obs)
            rewards = env.send(forward_pass.actions)

            loss: Loss = model.get_loss(forward_pass=forward_pass,
                                        rewards=rewards,
                                        loss_name="debug")
            stored_steps_in_each_head_after = {
                task_key: output_head.num_stored_steps(0)
                for task_key, output_head in model.output_heads.items()
            }
            # if step == 5:
            #     assert False, (loss, stored_steps_in_each_head_before, stored_steps_in_each_head_after)

            if any(obs.done):
                assert loss.loss != 0., step
                assert loss.loss.requires_grad

                # Backpropagate the loss, update the models, etc etc.
                loss.loss.backward()
                model.on_after_backward()
                optimizer.step()
                model.on_before_zero_grad(optimizer)
                optimizer.zero_grad()

                # TODO: Need to let the model know than an update is happening so it can clear
                # buffers etc.

                episodes += sum(obs.done)
                losses[step] = loss
            else:
                assert loss.loss == 0.
            # TODO:
            print(
                f"Step {step}, episode {episodes}: x={obs[0]}, done={obs.done}, reward={rewards} task labels: {obs.task_labels}, loss: {loss.losses.keys()}: {loss.loss}"
            )

            if episodes > max_episodes:
                break
Exemple #15
0
def launch_batch_of_runs(
    setting: Optional[Setting],
    method: Optional[Method],
    argv: Union[str, List[str]] = None,
) -> List[Tuple[Dict, Results]]:
    if argv is None:
        argv = sys.argv[1:]
    if isinstance(argv, str):
        argv = shlex.split(argv)
    argv_copy = argv.copy()

    experiment: Experiment
    experiment, argv = Experiment.from_known_args(argv)

    setting: Optional[Type[Setting]] = experiment.setting
    method: Optional[Type[Method]] = experiment.method
    config = experiment.config

    # TODO: Maybe if everything stays exactly identical, we could 'cache'
    # the results of some experiments, so we don't re-run them all the time?
    all_results: Dict[Tuple[Type[Setting], Type[Method]], Results] = {}

    # The lists of arguments for each 'job'.
    method_types: List[Type[Method]] = []
    setting_types: List[Type[Setting]] = []
    run_configs: List[Config] = []

    if setting:
        logger.info(f"Evaluating all applicable methods on Setting {setting}.")
        method_types = setting.get_applicable_methods()
        setting_types = [setting for _ in method_types]

    elif method:
        logger.info(f"Applying Method {method} on all its applicable settings.")
        setting_types = method.get_applicable_settings()
        method_types = [method for _ in setting_types]

    # Create a 'config' for each experiment.
    # Use a log_dir for each run using the 'base' log_dir (passed
    # when creating the Experiment), the name of the Setting, and
    # the name of the Method.
    for setting_type, method_type in zip(setting_types, method_types):
        run_log_dir = config.log_dir / setting_type.get_name() / method_type.get_name()

        run_config_kwargs = config.to_dict()
        run_config_kwargs["log_dir"] = run_log_dir
        run_config = Config(**run_config_kwargs)

        run_configs.append(run_config)

    arguments_of_each_run: List[Dict] = []
    results_of_each_run: List[Result] = []
    # Create one 'job' per setting-method combination:
    for setting_type, method_type, run_config in zip(
        setting_types, method_types, run_configs
    ):
        # NOTE: Some methods might use all the values in `argv`, and some
        # might not, so we set `strict=False`.
        arguments_of_each_run.append(
            dict(
                setting=setting_type,
                method=method_type,
                config=run_config,
                argv=argv,
                strict_args=False,
            )
        )

    # TODO: Use submitit or somethign like it, to run each of these in parallel:
    # See https://github.com/lebrice/Sequoia/issues/87 for more info.
    for run_arguments in arguments_of_each_run:
        result = Experiment.run_experiment(**run_arguments)
        logger.info(f"Results for arguments {run_arguments}: {result}")
        results_of_each_run.append(result)

    all_results = list(zip(arguments_of_each_run, results_of_each_run))
    logger.info(f"All results: ")
    for run_arguments, run_results in all_results:
        print(f"Arguments: {run_arguments}")
        print(f"Results: {run_results}")
    return all_results
Exemple #16
0
def config(tmp_path_factory: Path):
    test_log_dir = tmp_path_factory.mktemp("test_log_dir")
    # TODO: Set the results dir somehow with the value of this `tmp_path` fixture.
    data_dir = Path(os.environ.get("SLURM_TMPDIR", os.environ.get("DATA_DIR", "data")))
    return Config(debug=True, data_dir=data_dir, seed=123, log_dir=test_log_dir)