コード例 #1
0
ファイル: sac.py プロジェクト: gopeshh/Sequoia
class SACMethod(OffPolicyMethod):
    """ Method that uses the SAC model from stable-baselines3. """

    Model: ClassVar[Type[SACModel]] = SACModel

    # Hyper-parameters of the SAC model.
    hparams: SACModel.HParams = mutable_field(SACModel.HParams)

    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 2_048.0

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> SACModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: spaces.Space) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.
コード例 #2
0
class CustomPPOMethod(PPOMethod):
    Model: ClassVar[Type[PPOModel]] = PPOModel
    # Hyper-parameters of the PPO Model.
    hparams: PPOModel.HParams = mutable_field(PPOModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: spaces.Space) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """

    def get_search_space(
            self,
            setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
        return super().get_search_space(setting)
コード例 #3
0
class DDPGMethod(StableBaselines3Method):
    """ Method that uses the DDPG model from stable-baselines3. """

    Model: ClassVar[Type[DDPGModel]] = DDPGModel

    # Hyper-parameters of the DDPG model.
    hparams: DDPGModel.HParams = mutable_field(DDPGModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)

    def create_model(self, train_env: gym.Env,
                     valid_env: gym.Env) -> DDPGModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: spaces.Space) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.
コード例 #4
0
class TD3Method(OffPolicyMethod):
    """ Method that uses the TD3 model from stable-baselines3. """

    Model: ClassVar[Type[TD3Model]] = TD3Model
    hparams: TD3Model.HParams = mutable_field(TD3Model.HParams)

    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 2_048.0

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> TD3Model:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations, action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)
コード例 #5
0
ファイル: ewc_method.py プロジェクト: optimass/Sequoia
class EwcMethod(BaselineMethod, target_setting=IncrementalSetting):
    """ Subclass of the BaselineMethod, which adds the EWCTask to the `BaselineModel`.

    This Method is applicable to any CL setting (RL or SL) where there are clear task
    boundaries, regardless of if the task labels are given or not.
    """
    hparams: EwcModel.HParams = mutable_field(EwcModel.HParams)

    def __init__(
        self,
        hparams: EwcModel.HParams = None,
        config: Config = None,
        trainer_options: TrainerConfig = None,
        **kwargs,
    ):
        super().__init__(hparams=hparams,
                         config=config,
                         trainer_options=trainer_options,
                         **kwargs)

    def configure(self, setting: Setting):
        """ Called before the method is applied on a setting (before training).

        You can use this to instantiate your model, for instance, since this is
        where you get access to the observation & action spaces.
        """
        super().configure(setting)
        # self.model.add_auxiliary_task(EWCTask(options=self.hparams.ewc))

    def on_task_switch(self, task_id: Optional[int]):
        super().on_task_switch(task_id)

    def create_model(self, setting: Setting) -> EwcModel:
        """Create the Model to use for the given Setting.

        In this case, we want to return an `EwcModel` (our customized version of the
        BaselineModel).

        Parameters
        ----------
        setting : Setting
            The experimental Setting this Method will be applied to.

        Returns
        -------
        EwcModel
            The Model that will be trained and used for evaluation.
        """
        return EwcModel(setting=setting,
                        hparams=self.hparams,
                        config=self.config)
コード例 #6
0
ファイル: a2c.py プロジェクト: ryanlindeborg/Sequoia
class A2CMethod(StableBaselines3Method):
    """ Method that uses the A2C model from stable-baselines3. """

    # changing the 'name' in this case here, because the default name would be
    # 'a_2_c'.
    name: ClassVar[str] = "a2c"
    Model: ClassVar[Type[A2CModel]] = A2CModel

    # Hyper-parameters of the A2C model.
    hparams: A2CModel.HParams = mutable_field(A2CModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> A2CModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: spaces.Space) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """

    def get_search_space(
            self,
            setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
        search_space = super().get_search_space(setting)
        if isinstance(setting.action_space, spaces.Discrete):
            # From stable_baselines3/common/base_class.py", line 170:
            # > Generalized State-Dependent Exploration (gSDE) can only be used with
            #   continuous actions
            # Therefore we remove related entries in the search space, so they keep
            # their default values.
            search_space.pop("use_sde", None)
            search_space.pop("sde_sample_freq", None)
        return search_space
コード例 #7
0
class Config(FlattenedAccess):
    """Overall Configuration."""
    dataset: DatasetConfig = mutable_field(DatasetConfig)
    model: ModelConfig = mutable_field(ModelConfig)
    dpmmoe: DPMoEConfig = mutable_field(DPMoEConfig)
    train: TrainConfig = mutable_field(TrainConfig)
    eval: EvalConfig = mutable_field(EvalConfig)
    summary: SummaryConfig = mutable_field(SummaryConfig)
    et: float = 1.23
コード例 #8
0
ファイル: cndpm_method.py プロジェクト: ryanlindeborg/CN-DPM
class HParams(HyperParameters, FlattenedAccess):
    """ Hyper-parameters of the CN-DPM model. """

    # Denotes whether to use CPU instead of CUDA device
    disable_cuda: bool = False
    device: str = "cuda" if torch.cuda.is_available() else "cpu"

    dataset: DatasetConfig = mutable_field(DatasetConfig)
    model: ModelConfig = mutable_field(ModelConfig)
    dpmoe: DPMoEConfig = mutable_field(DPMoEConfig)
    train: TrainConfig = mutable_field(TrainConfig)
    eval: EvalConfig = mutable_field(EvalConfig)
    summary: SummaryConfig = mutable_field(SummaryConfig)
コード例 #9
0
ファイル: base_hparams.py プロジェクト: gopeshh/Sequoia
class BaseHParams(HyperParameters):
    """ Set of 'base' Hyperparameters for the 'base' LightningModule. """
    # Class variable versions of the above dicts, for easier subclassing.
    # NOTE: These don't get parsed from the command-line.
    available_optimizers: ClassVar[Dict[
        str, Type[Optimizer]]] = available_optimizers.copy()
    available_encoders: ClassVar[Dict[
        str, Type[nn.Module]]] = available_encoders.copy()

    # Learning rate of the optimizer.
    learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
    # L2 regularization term for the model weights.
    weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
    # Which optimizer to use.
    optimizer: Type[Optimizer] = categorical(available_optimizers,
                                             default=optim.Adam)
    # Use an encoder architecture from the torchvision.models package.
    encoder: Type[nn.Module] = categorical(
        available_encoders,
        default=tv_models.resnet18,
        # TODO: Only using these two by default when performing a sweep.
        probabilities={
            "resnet18": 0.5,
            "simple_convnet": 0.5
        },
    )

    # Batch size to use during training and evaluation.
    batch_size: Optional[int] = None

    # Number of hidden units (before the output head).
    # When left to None (default), the hidden size from the pretrained
    # encoder model will be used. When set to an integer value, an
    # additional Linear layer will be placed between the outputs of the
    # encoder in order to map from the pretrained encoder's output size H_e
    # to this new hidden size `new_hidden_size`.
    new_hidden_size: Optional[int] = None
    # Retrain the encoder from scratch.
    train_from_scratch: bool = False
    # Wether we should keep the weights of the pretrained encoder frozen.
    freeze_pretrained_encoder_weights: bool = False

    # Settings for the output head.
    # TODO: This could be overwritten in a subclass to do classification or
    # regression or RL, etc.
    output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

    # Wether the output head should be detached from the representations.
    # In other words, if the gradients from the downstream task should be
    # allowed to affect the representations.
    detach_output_head: bool = False

    def __post_init__(self):
        """Use this to initialize (or fix) any fields parsed from the
        command-line.
        """
        super().__post_init__()

    def make_optimizer(self, *args, **kwargs) -> Optimizer:
        """ Creates the Optimizer object from the options. """
        optimizer_class = self.optimizer
        options = {
            "lr": self.learning_rate,
            "weight_decay": self.weight_decay,
        }
        options.update(kwargs)
        return optimizer_class(*args, **options)

    @property
    def encoder_model(self) -> Type[nn.Module]:
        return self.encoder

    def make_encoder(self, encoder_name: str = None) -> Tuple[nn.Module, int]:
        """Creates an Encoder model and returns the resulting hidden size.

        Returns:
            Tuple[nn.Module, int]: the encoder and the hidden size.
        """
        if encoder_name and encoder_name not in self.available_encoders:
            raise KeyError(
                f"No encoder with name {encoder_name} found! "
                f"(available encoders: {list(self.available_encoders.keys())}."
            )
            encoder_model = self.available_encoders[encoder_name]
        else:
            encoder_model = self.encoder
        encoder, hidden_size = get_pretrained_encoder(
            encoder_model=encoder_model,
            pretrained=not self.train_from_scratch,
            freeze_pretrained_weights=self.freeze_pretrained_encoder_weights,
            new_hidden_size=self.new_hidden_size,
        )
        return encoder, hidden_size
コード例 #10
0
class Experiment(Parseable, Serializable):
    """ Applies a Method to an experimental Setting to obtain Results.

    When the `setting` is not set, this will apply the chosen method on all of
    its "applicable" settings. (i.e. all subclasses of its target setting).

    When the `method` is not set, this will apply all applicable methods on the
    chosen setting.
    """

    # Which experimental setting to use. When left unset, will evaluate the
    # provided method on all applicable settings.
    setting: Optional[Union[Setting, Type[Setting]]] = choice(
        {setting.get_name(): setting for setting in all_settings},
        default=None,
        type=str,
    )
    # Path to a json/yaml file containing preset options for the chosen setting.
    # Can also be one of the key from the `setting_presets` dictionary,
    # for convenience.
    benchmark: Optional[Union[str, Path]] = None

    # Which experimental method to use. When left unset, will evaluate all
    # compatible methods on the provided setting.
    method: Optional[Union[str, Method, Type[Method]]] = choice(get_method_names(), default=None)

    # All the other configuration options, which are independant of the choice
    # of Setting or of Method, go in this next dataclass here! For example,
    # things like the log directory, wether Cuda is used, etc.
    config: Config = mutable_field(Config)

    wandb: Optional[WandbConfig] = None

    def __post_init__(self):
        if not (self.setting or self.method):
            raise RuntimeError("One of `setting` or `method` must be set!")

        # All settings have a unique name.
        if isinstance(self.setting, str):
            self.setting = get_class_with_name(self.setting, all_settings)

        # Each Method also has a unique name.
        if isinstance(self.method, str):
            self.method = get_class_with_name(self.method, all_methods)

        if self.benchmark:
            # If the provided benchmark isn't a path, try to get the value from
            # the `setting_presets` dict. If it isn't in the dict, raise an
            # error.
            if not Path(self.benchmark).is_file():
                if self.benchmark in setting_presets:
                    self.benchmark = setting_presets[self.benchmark]
                else:
                    raise RuntimeError(
                        f"Could not find benchmark '{self.benchmark}': it "
                        f"is neither a path to a file or a key of the "
                        f"`setting_presets` dictionary. \n\n"
                        f"Available presets: \n"
                        + "\n".join(
                            f"- {preset_name}: \t{preset_file.relative_to(os.getcwd())}"
                            for preset_name, preset_file in setting_presets.items()
                        )
                    )
            # Creating an experiment for the given setting, loaded from the
            # config file.
            # TODO: IDEA: Do the same thing for loading the Method?
            logger.info(
                f"Will load the options for the setting from the file "
                f"at path {self.benchmark}."
            )
            drop_extras = True
            if self.setting is None:
                logger.warn(
                    UserWarning(
                        f"You didn't specify which setting to use, so this will "
                        f"try to infer the correct type of setting to use from the "
                        f"contents of the file, which might not work!\n (Consider "
                        f"running this with the `--setting` option instead."
                    )
                )
                # Find the first type of setting that fits the given file.
                drop_extras = False
                self.setting = Setting

            # Raise an error if any of the args in sys.argv would have been used
            # up by the Setting, just to prevent any ambiguities.
            try:
                _, unused_args = self.setting.from_known_args()
            except ImportError as exc:
                # NOTE: An ImportError can occur here because of a missing OpenGL
                # dependency, since when no arguments are passed, the default RL setting
                # is created (cartpole with pixel observations), which requires a render
                # wrapper to be added (which itself uses pyglet, which uses OpenGL).
                logger.warning(
                    RuntimeWarning(f"Unable to check for unused args: {exc}")
                )
                # In this case, we just pretend that no arguments would have been used.
                unused_args = sys.argv[1:]

            ignored_args = list(set(sys.argv[1:]) - set(unused_args))

            if ignored_args:
                # TODO: This could also be trigerred if there were arguments
                # in the method with the same name as some from the Setting.
                raise RuntimeError(
                    f"Cannot pass command-line arguments for the Setting when "
                    f"loading a preset, since these arguments whould have been "
                    f"ignored when creating the setting of type {self.setting} "
                    f"anyway: {ignored_args}"
                )

            assert isclass(self.setting) and issubclass(self.setting, Setting)
            # Actually load the setting from the file.
            # TODO: Why isn't this using `load_benchmark`?
            self.setting = self.setting.load(
                path=self.benchmark, drop_extra_fields=drop_extras
            )
            self.setting.wandb = self.wandb

            if self.method is None:
                raise NotImplementedError(
                    f"For now, you need to specify a Method to use using the "
                    f"`--method` argument when loading the setting from a file."
                )

        if self.setting is not None and self.method is not None:
            if not self.method.is_applicable(self.setting):
                raise RuntimeError(
                    f"Method {self.method} isn't applicable to "
                    f"setting {self.setting}!"
                )

        assert (
            self.setting is None
            or isinstance(self.setting, Setting)
            or issubclass(self.setting, Setting)
        )
        assert (
            self.method is None
            or isinstance(self.method, Method)
            or issubclass(self.method, Method)
        )

    @staticmethod
    def run_experiment(
        setting: Union[Setting, Type[Setting]],
        method: Union[Method, Type[Method]],
        config: Config,
        argv: Union[str, List[str]] = None,
        strict_args: bool = False,
    ) -> Results:
        """ Launches an experiment, applying `method` onto `setting`
        and returning the corresponding results.
        
        This assumes that both `setting` and `method` are not None.
        This always returns a single `Results` object.
        
        If either `setting` or `method` are classes, then instances of these
        classes from the command-line arguments `argv`.
        
        If `strict_args` is True and there are leftover arguments (not consumed
        by either the Setting or the Method), a RuntimeError is raised.
        
        This then returns the result of `setting.apply(method)`.

        Parameters
        ----------
        argv : Union[str, List[str]], optional
            List of command-line args. When not set, uses the contents of
            `sys.argv`. Defaults to `None`.
        strict_args : bool, optional
            Wether to raise an error when encountering command-line arguments
            that are unexpected by both the Setting and the Method. Defaults to
            `False`.

        Returns
        -------
        Results
            
        """
        assert setting is not None and method is not None
        assert isinstance(setting, Setting), f"TODO: Fix this, need to pass a wandb config to the Setting from the experiment!"
        if not (isinstance(setting, Setting) and isinstance(method, Method)):
            setting, method = parse_setting_and_method_instances(
                setting=setting, method=method, argv=argv, strict_args=strict_args
            )

        assert isinstance(setting, Setting)
        assert isinstance(method, Method)
        assert isinstance(config, Config)

        return setting.apply(method, config=config)

    def launch(
        self, argv: Union[str, List[str]] = None, strict_args: bool = False,
    ) -> Results:
        """ Launches the experiment, applying `self.method` onto `self.setting`
        and returning the corresponding results.
        
        This differs from `main` in that this assumes that both `self.setting`
        and `self.method` are not None, and so this always returns a single
        `Results` object.
        
        NOTE: Internally, this is equivalent to calling `run_experiment`,
        passing in the `setting`, `method` and `config` arguments from `self`.
        
        Parameters
        ----------
        argv : Union[str, List[str]], optional
            List of command-line args. When not set, uses the contents of
            `sys.argv`. Defaults to `None`.
        strict_args : bool, optional
            Wether to raise an error when encountering command-line arguments
            that are unexpected by both the Setting and the Method. Defaults to
            `False`.

        Returns
        -------
        Results
            An object describing the results of applying Method `self.method` onto
            the Setting `self.setting`.
        """
        assert self.setting is not None
        assert self.method is not None
        assert self.config is not None

        if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)):
            setting, method = parse_setting_and_method_instances(
                setting=self.setting, method=self.method, argv=argv, strict_args=strict_args
            )

        setting.wandb = self.wandb
        setting.config = self.config

        return setting.apply(method, config=self.config)


    @classmethod
    def main(
        cls, argv: Union[str, List[str]] = None, strict_args: bool = False,
    ) -> Union[Results, Tuple[Dict, Any], List[Tuple[Dict, Results]]]:
        """Launches one or more experiments from the command-line.

        First, we get the choice of method and setting using a first parser.
        Then, we parse the Setting and Method objects using the remaining args
        with two other parsers.

        Parameters
        ----------
        - argv : Union[str, List[str]], optional, by default None

            command-line arguments to use. When None (default), uses sys.argv.

        Returns
        -------
        Union[Results,
              Dict[Tuple[Type[Setting], Type[Method], Config], Results]]
            Results of the experiment, if only applying a method to a setting.
            Otherwise, if either of `--setting` or `--method` aren't set, this
            will be a dictionary mapping from
            (setting_type, method_type) tuples to Results.
        """

        if argv is None:
            argv = sys.argv[1:]
        if isinstance(argv, str):
            argv = shlex.split(argv)
        argv_copy = argv.copy()

        experiment: Experiment
        experiment, argv = cls.from_known_args(argv)

        setting: Optional[Type[Setting]] = experiment.setting
        method: Optional[Type[Method]] = experiment.method
        config: Config = experiment.config

        if method is None and setting is None:
            raise RuntimeError(f"One of setting or method must be set.")

        if setting and method:
            # One 'job': Launch it directly.
            setting, method = parse_setting_and_method_instances(
                setting=setting, method=method, argv=argv, strict_args=strict_args
            )
            assert isinstance(setting, Setting)
            assert isinstance(method, Method)
            setting.wandb = experiment.wandb

            results = experiment.launch(argv, strict_args=strict_args)
            print("\n\n EXPERIMENT IS DONE \n\n")
            print(f"Results: {results}")
            return results

        else:
            # TODO: Test out this other case. Haven't used it in a while.
            # TODO: Move this to something like a BatchExperiment?
            all_results = launch_batch_of_runs(
                setting=setting, method=method, argv=argv
            )
            return all_results
コード例 #11
0
ファイル: test_utils.py プロジェクト: lebrice/SimpleParsing
class B:
    # # shared_list: List = [] # not allowed.
    # different_list: List = field(default_factory=list)
    shared: A = A()
    different: A = mutable_field(A, a="123")
コード例 #12
0
class DQNMethod(StableBaselines3Method):
    """ Method that uses a DQN model from the stable-baselines3 package. """
    Model: ClassVar[Type[DQNModel]] = DQNModel

    # Hyper-parameters of the DQN model.
    hparams: DQNModel.HParams = mutable_field(DQNModel.HParams)

    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 50.

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)

        # The default value for the buffer size in the DQN model is WAY too
        # large, so we re-size it depending on the size of the observations.

        flattened_observation_space = flatten_space(setting.observation_space)
        observation_size_bytes = flattened_observation_space.sample().nbytes

        # IF there are more than a few dimensions per observation, then we
        # should probably reduce the size of the replay buffer according to
        # the size of the observations.
        max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024
        max_buffer_length = max_buffer_size_bytes // observation_size_bytes

        if max_buffer_length == 0:
            raise RuntimeError(
                f"Couldn't even fit a single observation in the buffer, "
                f"given the  specified max_buffer_size_megabytes "
                f"({self.max_buffer_size_megabytes}) and the size of a "
                f"single observation ({observation_size_bytes} bytes)!")

        if self.hparams.buffer_size > max_buffer_length:
            calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size
            calculated_size_gb = calculated_size_bytes / 1024**3
            warnings.warn(
                RuntimeWarning(
                    f"The selected buffer size ({self.hparams.buffer_size} is "
                    f"too large! (It would take roughly around "
                    f"{calculated_size_gb:.3f}Gb to hold  many observations alone! "
                    f"The buffer size will be capped at {max_buffer_length} "
                    f"entries."))

            self.hparams.buffer_size = int(max_buffer_length)

        # Don't use up too many of the observations from the task to fill up the buffer.
        # Truth is, we should probably get this to work first.

        # NOTE: Need to change some attributes depending on the maximal number of steps
        # in the environment allowed in the given Setting.
        if setting.max_steps:
            logger.info(
                f"Total training steps are limited to {setting.steps_per_task} steps "
                f"per task, {setting.max_steps} steps in total.")
            ten_percent_of_step_budget = setting.steps_per_task // 10

            if self.hparams.buffer_size > ten_percent_of_step_budget:
                warnings.warn(
                    RuntimeWarning(
                        "Reducing max buffer size to ten percent of the step budget."
                    ))
                self.hparams.buffer_size = ten_percent_of_step_budget

            if self.hparams.learning_starts > ten_percent_of_step_budget:
                logger.info(
                    f"The model was originally going to use the first "
                    f"{self.hparams.learning_starts} steps for pure random "
                    f"exploration, but the setting has a max number of steps set to "
                    f"{setting.max_steps}, therefore we will limit the number of "
                    f"exploration steps to 10% of that 'step budget' = "
                    f"{ten_percent_of_step_budget} steps.")
                self.hparams.learning_starts = ten_percent_of_step_budget

            if self.hparams.target_update_interval > ten_percent_of_step_budget:
                # Same for the 'update target network' interval.
                self.hparams.target_update_interval = ten_percent_of_step_budget // 2
                logger.info(
                    f"Reducing the target network update interval to "
                    f"{self.hparams.target_update_interval}, because of the limit on "
                    f"training steps imposed by the Setting.")

        logger.info(
            f"Will use a Replay buffer of size {self.hparams.buffer_size}.")

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DQNModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: spaces.Space) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.
コード例 #13
0
class StableBaselines3Method(Method, ABC, target_setting=ContinualRLSetting):
    """ Base class for the methods that use models from the stable_baselines3
    repo.
    """
    family: ClassVar[str] = "sb3"

    # Class variable that represents what kind of Model will be used.
    # (This is just here so we can easily create one Method class per model type
    # by just changing this class attribute.)
    Model: ClassVar[Type[BaseAlgorithm]]

    # HyperParameters of the Method.
    hparams: SB3BaseHParams = mutable_field(SB3BaseHParams)

    # The number of training steps to run per task.
    # NOTE: This shouldn't be set to more than the task length when applying this method
    # on a ContinualRLSetting, because we don't currently have a way of "resetting"
    # the nonstationarity in the environment, and there is only one task,
    # therefore if we trained for say 10 million steps, while the
    # non-stationarity only lasts for 10_000 steps, we'd have seen an almost
    # stationary distribution, since the environment would have stopped changing after
    # 10_000 steps.
    train_steps_per_task: int = 10_000

    # Evaluate the agent every ``eval_freq`` timesteps (this may vary a little)
    eval_freq: int = -1
    # callback(s) called at every step with state of the algorithm.
    callback: MaybeCallback = None
    # The number of timesteps before logging.
    log_interval: int = 100
    # the name of the run for TensorBoard logging
    tb_log_name: str = "run"
    # Evaluate the agent every ``eval_freq`` timesteps (this may vary a little)
    eval_freq: int = -1
    # Number of episode to evaluate the agent
    n_eval_episodes = 5
    # Path to a folder where the evaluations will be saved
    eval_log_path: Optional[str] = None

    def __post_init__(self):
        self.model: Optional[BaseAlgorithm] = None
        # Extra wrappers to add to the train_env and valid_env before passing
        # them to the `learn` method from stable-baselines3.
        from sequoia.common.gym_wrappers import TransformObservation, TransformAction, TransformReward
        import operator
        from functools import partial
        self.extra_train_wrappers: List[Callable[[gym.Env], gym.Env]] = [
            partial(TransformObservation, f=operator.itemgetter("x")),
            # partial(TransformAction, f=operator.itemgetter("y_pred"),
            partial(TransformReward, f=operator.itemgetter("y")),
        ]
        self.extra_valid_wrappers: List[Callable[[gym.Env], gym.Env]] = [
            partial(TransformObservation, f=operator.itemgetter("x")),
            partial(TransformReward, f=operator.itemgetter("y")),
        ]
        # Number of timesteps to train on for each task.
        self.total_timesteps_per_task: int = 0

    def configure(self, setting: ContinualRLSetting):
        # Delete the model, if present.
        self.model = None
        # For now, we don't batch the space because stablebaselines3 will add an
        # additional batch dimension if we do.
        # TODO: Still need to debug the batching stuff with stablebaselines,
        # some methods support it, some don't, and it doesn't recognize
        # VectorEnvs from gym.
        setting.batch_size = None

        # BUG: Need to fix an issue when using the CnnPolicy and Atary envs, the
        # input shape isn't what they expect (only 2 channels instead of three
        # apparently.)
        # from sequoia.common.transforms import Transforms
        # NOTE: Important to not use any transforms, since the SB3 methods want to get
        # the 'raw' np.uint8 image as an input.
        transforms = [
            # Transforms.to_tensor,
            # Transforms.three_channels,
            # Transforms.channels_first_if_needed,
        ]
        setting.transforms = transforms
        setting.train_transforms = transforms
        setting.val_transforms = transforms
        setting.test_transforms = transforms

        if self.hparams.policy is None:
            if is_image(setting.observation_space.x):
                self.hparams.policy = "CnnPolicy"
            else:
                self.hparams.policy = "MlpPolicy"

        logger.debug(f"Will use {self.hparams.policy} as the policy.")
        # TODO: Double check that some settings might not impose a limit on
        # number of training steps per environment (e.g. task-incremental RL?)
        if setting.steps_per_phase:
            if self.train_steps_per_task > setting.steps_per_phase:
                warnings.warn(
                    RuntimeWarning(
                        f"Can't train for the requested {self.train_steps_per_task} "
                        f"steps, since we're (currently) only allowed a maximum of "
                        f"{setting.steps_per_phase} steps.)"))
            # Use as many training steps as possible.
            self.train_steps_per_task = setting.steps_per_phase - 1
        # Otherwise, we can train basically as long as we want on each task.

    def create_model(self, train_env: gym.Env,
                     valid_env: gym.Env) -> BaseAlgorithm:
        """ Create a Model given the training and validation environments. """
        model_kwargs = self.hparams.to_dict()
        assert "clear_buffers_between_tasks" not in model_kwargs
        return self.Model(env=train_env, **model_kwargs)

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        # Remove the extra information that the Setting gives us.
        for wrapper in self.extra_train_wrappers:
            train_env = wrapper(train_env)

        for wrapper in self.extra_valid_wrappers:
            valid_env = wrapper(valid_env)

        if self.model is None:
            self.model = self.create_model(train_env, valid_env)
        else:
            # TODO: "Adapt"/re-train the model on the new environment.
            self.model.set_env(train_env)

        # Decide how many steps to train on.
        total_timesteps = self.train_steps_per_task
        logger.info(
            f"Starting training, for a maximum of {total_timesteps} steps.")
        # todo: Customize the parametrers of the model and/or of this "learn"
        # method if needed.
        self.model = self.model.learn(
            # The total number of samples (env steps) to train on
            total_timesteps=total_timesteps,
            eval_env=valid_env,
            callback=self.callback,
            log_interval=self.log_interval,
            tb_log_name=self.tb_log_name,
            eval_freq=self.eval_freq,
            n_eval_episodes=self.n_eval_episodes,
            eval_log_path=self.eval_log_path,
            # whether or not to reset the current timestep number (used in logging)
            reset_num_timesteps=True,
        )

    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: spaces.Space) -> ContinualRLSetting.Actions:
        obs = observations.x
        predictions = self.model.predict(obs)
        action, _ = predictions
        assert action in action_space, (observations, action, action_space)
        return action

    def get_search_space(self,
                         setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        return {
            "algo_hparams": self.hparams.get_orion_space(),
        }

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.

        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        # Here we overwrite the corresponding attributes with the new suggested values
        # leaving other fields unchanged.
        # NOTE: These new hyper-paramers will be used in the next run in the sweep,
        # since each call to `configure` will create a new Model.
        self.hparams = self.hparams.replace(**new_hparams["algo_hparams"])

    def setup_wandb(self, run: Run) -> None:
        """ Called by the Setting when using Weights & Biases, after `wandb.init`.

        This method is here to provide Methods with the opportunity to log some of their
        configuration options or hyper-parameters to wandb.

        NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
        this point.

        Parameters
        ----------
        run : wandb.Run
            Current wandb Run.
        """
        run.config["hparams"] = self.hparams.to_dict()

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        if self.hparams.clear_buffers_between_tasks:
            self.clear_buffers()

    def clear_buffers(self):
        """ Clears out the experience buffer of the Policy. """
        # I think that's the right way to do it.. not sure.
        assert False, self.model.replay_buffer.pos
        if self.model:
            # TODO: These are really interesting methods!
            # self.model.save_replay_buffer
            # self.model.load_replay_buffer

            self.model.replay_buffer.reset()
コード例 #14
0
ファイル: off_policy_method.py プロジェクト: gopeshh/Sequoia
class OffPolicyMethod(StableBaselines3Method, ABC):
    """ ABC for a Method that uses an off-policy Algorithm from SB3. """

    # Type of model to use. This has to be overwritten in a subclass.
    Model: ClassVar[Type[OffPolicyModel]] = OffPolicyModel
    # Hyper-parameters of the DDPG model.
    hparams: OffPolicyModel.HParams = mutable_field(OffPolicyModel.HParams)
    # Approximate limit on the size of the replay buffer, in megabytes.
    max_buffer_size_megabytes: float = 2_048.0

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)
        # The default value for the buffer size in the DQN model is WAY too
        # large, so we re-size it depending on the size of the observations.
        # NOTE: (issue #156) Only consider the images, not the task labels for these
        # buffer size calculations (since the task labels might be None and have the
        # np.object dtype).
        x_space = setting.observation_space.x
        flattened_observation_space = flatten_space(x_space)
        observation_size_bytes = flattened_observation_space.sample().nbytes

        # IF there are more than a few dimensions per observation, then we
        # should probably reduce the size of the replay buffer according to
        # the size of the observations.
        max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024
        max_buffer_length = max_buffer_size_bytes // observation_size_bytes

        if max_buffer_length == 0:
            raise RuntimeError(
                f"Couldn't even fit a single observation in the buffer, "
                f"given the  specified max_buffer_size_megabytes "
                f"({self.max_buffer_size_megabytes}) and the size of a "
                f"single observation ({observation_size_bytes} bytes)!"
            )

        if self.hparams.buffer_size > max_buffer_length:
            calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size
            calculated_size_gb = calculated_size_bytes / 1024 ** 3
            warnings.warn(
                RuntimeWarning(
                    f"The selected buffer size ({self.hparams.buffer_size} is "
                    f"too large! (It would take roughly around "
                    f"{calculated_size_gb:.3f}Gb to hold  many observations alone! "
                    f"The buffer size will be capped at {max_buffer_length} "
                    f"entries."
                )
            )

            self.hparams.buffer_size = int(max_buffer_length)

        # NOTE: Need to change some attributes depending on the maximal number of steps
        # in the environment allowed in the given Setting.
        if setting.max_steps:
            logger.info(
                f"Total training steps are limited to {setting.steps_per_task} steps "
                f"per task, {setting.max_steps} steps in total."
            )
            ten_percent_of_step_budget = setting.steps_per_phase // 10

            if self.hparams.buffer_size > ten_percent_of_step_budget:
                warnings.warn(
                    RuntimeWarning(
                        "Reducing max buffer size to ten percent of the step budget."
                    )
                )
                self.hparams.buffer_size = ten_percent_of_step_budget

            if self.hparams.learning_starts > ten_percent_of_step_budget:
                logger.info(
                    f"The model was originally going to use the first "
                    f"{self.hparams.learning_starts} steps for pure random "
                    f"exploration, but the setting has a max number of steps set to "
                    f"{setting.max_steps}, therefore we will limit the number of "
                    f"exploration steps to 10% of that 'step budget' = "
                    f"{ten_percent_of_step_budget} steps."
                )
                self.hparams.learning_starts = ten_percent_of_step_budget
                if self.hparams.train_freq != -1:
                    # Update the model at least 2 times during each task, and at most
                    # once per step.
                    self.hparams.train_freq = min(
                        self.hparams.train_freq, int(0.5 * ten_percent_of_step_budget),
                    )
                    self.hparams.train_freq = max(self.hparams.train_freq, 1)

                logger.info(f"Training frequency: {self.hparams.train_freq}")

        logger.info(f"Will use a Replay buffer of size {self.hparams.buffer_size}.")

        if setting.steps_per_phase:
            if not isinstance(self.hparams.train_freq, int):
                if self.hparams.train_freq[1] == "step":
                    self.hparams.train_freq = self.hparams.train_freq[0]
                else:
                    assert self.hparams.train_freq[1] == "episode"

                    # Use some value based of the maximum episode length if available,
                    # else use a "reasonable" default value.
                    # TODO: Double-check that this makes sense.
                    if setting.max_episode_steps:
                        self.hparams.train_freq = setting.max_episode_steps
                    else:
                        self.hparams.train_freq = 10

                    warnings.warn(
                        RuntimeWarning(
                            f"Need the training frequency units to be steps for now! "
                            f"(Train freq has been changed to every "
                            f"{self.hparams.train_freq} steps)."
                        )
                    )

            # NOTE: We limit the number of training steps per task, such that we never
            # attempt to fill the buffer using more samples than the environment allows.
            if self.hparams.train_freq > setting.steps_per_phase:
                self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase)
                logger.info(
                    f"Capping the n_steps to 10% of step budget length: "
                    f"{self.hparams.n_steps}"
                )

            self.train_steps_per_task = min(
                self.train_steps_per_task,
                setting.steps_per_phase - self.hparams.train_freq - 1,
            )
            logger.info(
                f"Limitting training steps per task to {self.train_steps_per_task}"
            )

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OffPolicyModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(
        self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
    ) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations, action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.
コード例 #15
0
 class ParentWithOptionalChildrenWithFriends(Serializable):
     name: str = "Consuela"
     children: Mapping[str, Optional[ChildWithFriends]] = mutable_field(
         OrderedDict)
コード例 #16
0
ファイル: knn_callback.py プロジェクト: ryanlindeborg/Sequoia
class KnnCallback(Callback):
    """ Addon that adds the option of evaluating representations with a KNN.
    
    TODO: Perform the KNN evaluations in different processes using multiprocessing.
    TODO: We could even evaluate the representations of a DIFFERENT dataset with
    the KNN, if the shapes were compatible with the model! For example, we could
    train the model on some CL/RL/etc task, like Omniglot or something, and at
    the same time, evaluate how good the model's representations are at
    disentangling the classes from MNIST or Fashion-MNIST or something else
    entirely! This could be nice when trying to argue about better generalization
    in the model's representations.
    """
    # Options for the KNN classifier
    knn_options: KnnClassifierOptions = mutable_field(KnnClassifierOptions)
    # Maximum number of examples to take from the dataloaders. When None, uses
    # the entire training/validaton/test datasets.
    knn_samples: int = 0

    def __post_init__(self):
        self.max_num_batches: int = 0

        self.model: LightningModule
        self.trainer: Trainer

    def on_train_start(self, trainer, pl_module):
        """Called when the train begins."""
        self.trainer = trainer
        self.model = pl_module
        self.setting: ClassIncrementalSetting

    def setup(self, trainer, pl_module, stage: str):
        """Called when fit or test begins"""
        super().setup(trainer, pl_module, stage)

    def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
        self.trainer = trainer
        self.model = pl_module
        self.setting = self.model.setting
        config = self.model.config

        if self.knn_samples > 0:
            batch_size = pl_module.batch_size
            # We round this up so we always take at least one batch_size of
            # samples from each dataloader.
            self.max_num_batches = math.ceil(self.knn_samples / batch_size)
            logger.debug(
                f"Taking a maximum of {self.max_num_batches} batches from each dataloader."
            )

            if config.debug:
                self.knn_samples = min(self.knn_samples, 100)

            valid_knn_loss, test_knn_loss = self.evaluate_knn(pl_module)

            # assert False, trainer.callback_metrics.keys()
            loss: Optional[Loss] = trainer.callback_metrics.get("loss_object")
            if loss:
                assert "knn/valid" not in loss.losses
                assert "knn/test" not in loss.losses
                loss.losses["knn/valid"] = valid_knn_loss
                loss.losses["knn/test"] = test_knn_loss

    def log(self, loss_object: Loss):
        if self.trainer.logger:
            self.trainer.logger.log_metrics(loss_object.to_log_dict())

    def get_dataloaders(self, model: LightningModule,
                        mode: str) -> List[DataLoader]:
        """ Retrieve the train/val/test dataloaders for all 'tasks'. """
        setting = model.datamodule
        assert setting, "The LightningModule must have its 'datamodule' attribute set for now."
        # if the setting defines a dataloaders() method, those are for each of the tasks, which is what we want!
        fn = getattr(setting, f"{mode}_dataloaders",
                     getattr(setting, f"{mode}_dataloader"))
        loaders = fn()
        if isinstance(loaders, DataLoader):
            return [loaders]
        assert isinstance(loaders, list)
        return loaders

    def evaluate_knn(self, model: LightningModule) -> Tuple[Loss, Loss]:
        """ Evaluate the representations with a KNN in the context of CL.

        We shorten the train dataloaders to take only the first
        `knn_samples` samples in order to save some compute.
        TODO: Figure out a way to cleanly add the metrics from the callback to
        the ``log dict'' which is returned by the model. Right now they are
        only printed / logged to wandb directly from here. 
        """
        setting = model.datamodule
        assert isinstance(setting, Setting)
        # TODO: Remove this if we want to use this for something else than a
        # Continual setting in the future.
        assert isinstance(setting, ClassIncrementalSetting)
        num_classes = setting.num_classes

        # Check wether the method has access to the task labels at train/test time.
        task_labels_at_test_time: bool = False
        from sequoia.settings import TaskIncrementalSetting
        if isinstance(setting, TaskIncrementalSetting):
            if setting.task_labels_at_test_time:
                task_labels_at_test_time = True
        # TODO: Figure out a way to make sure that we get at least one example
        # of each class to fit the KNN.
        self.knn_samples = max(self.knn_samples, num_classes**2)
        self.max_num_batches = math.ceil(self.knn_samples / model.batch_size)
        logger.info(f"number of classes: {num_classes}")
        logger.info(f"Number of KNN samples: {self.knn_samples}")
        logger.debug(
            f"Taking a maximum of {self.max_num_batches} batches from each dataloader."
        )

        train_loaders: List[DataLoader] = self.get_dataloaders(model,
                                                               mode="train")
        valid_loaders: List[DataLoader] = self.get_dataloaders(model,
                                                               mode="val")
        test_loaders: List[DataLoader] = self.get_dataloaders(model,
                                                              mode="test")

        # Only take the first `knn_samples` samples from each dataloader.
        def shorten(dataloader: DataLoader):
            return take(dataloader, n=self.max_num_batches)

        if self.max_num_batches:
            train_loaders = list(map(shorten, train_loaders))
            valid_loaders = list(map(shorten, valid_loaders))
            test_loaders = list(map(shorten, test_loaders))

        # Create an iterator that alternates between each of the train dataloaders.
        # NOTE: we shortened each of the dataloaders just to be sure that we get at least
        train_loader = roundrobin(*train_loaders)

        h_x, y = get_hidden_codes_array(model=model,
                                        dataloader=train_loader,
                                        description="KNN (Train)")
        train_loss, scaler, knn_classifier = fit_knn(x=h_x,
                                                     y=y,
                                                     options=self.knn_options,
                                                     num_classes=num_classes,
                                                     loss_name="knn/train")
        logger.info(f"KNN Train Acc: {train_loss.accuracy:.2%}")
        self.log(train_loss)
        total_valid_loss = Loss("knn/valid")

        # Save the current task ID so we can reset it after testing.
        starting_task_id = model.setting.current_task_id

        for i, dataloader in enumerate(valid_loaders):
            if task_labels_at_test_time:
                model.on_task_switch(i, training=False)
            loss_i = evaluate(model=model,
                              dataloader=dataloader,
                              loss_name=f"[{i}]",
                              scaler=scaler,
                              knn_classifier=knn_classifier,
                              num_classes=setting.num_classes_in_task(i))
            # We use `.absorb(loss_i)` here so that the metrics get merged.
            # That way, if we access `total_valid_loss.accuracy`, this gives the
            # accuracy over all the validation tasks.
            # If we instead used `+= loss_i`, then loss_i would become a subloss
            # of `total_valid_loss`, since they have different names.
            # TODO: Explain this in more detail somewhere else.
            total_valid_loss.absorb(loss_i)
            logger.info(f"KNN Valid[{i}] Acc: {loss_i.accuracy:.2%}")
            self.log(loss_i)

        logger.info(f"KNN Average Valid Acc: {total_valid_loss.accuracy:.2%}")
        self.log(total_valid_loss)

        total_test_loss = Loss("knn/test")
        for i, dataloader in enumerate(test_loaders):
            if task_labels_at_test_time:
                model.on_task_switch(i, training=False)

            # TODO Should we set the number of classes to be the number of
            # classes in the current task?

            loss_i = evaluate(
                model=model,
                dataloader=dataloader,
                loss_name=f"[{i}]",
                scaler=scaler,
                knn_classifier=knn_classifier,
                num_classes=num_classes,
            )
            total_test_loss.absorb(loss_i)
            logger.info(f"KNN Test[{i}] Acc: {loss_i.accuracy:.2%}")
            self.log(loss_i)

        if task_labels_at_test_time:
            model.on_task_switch(starting_task_id, training=False)

        logger.info(f"KNN Average Test Acc: {total_test_loss.accuracy:.2%}")
        self.log(total_test_loss)
        return total_valid_loss, total_test_loss
コード例 #17
0
    class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams,
                  MultiHeadModel.HParams):
        """ HParams of the Model. """
        # NOTE: All the fields below were just copied from the BaseHParams class, just
        # to improve visibility a bit.

        # Class variables that hold the available optimizers and encoders.
        # NOTE: These don't get parsed from the command-line.
        available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = {
            "sgd": optim.SGD,
            "adam": optim.Adam,
            "rmsprop": optim.RMSprop,
        }

        # Which optimizer to use.
        optimizer: Type[Optimizer] = categorical(available_optimizers,
                                                 default=optim.Adam)

        available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = {
            "vgg16": tv_models.vgg16,
            "resnet18": tv_models.resnet18,
            "resnet34": tv_models.resnet34,
            "resnet50": tv_models.resnet50,
            "resnet101": tv_models.resnet101,
            "resnet152": tv_models.resnet152,
            "alexnet": tv_models.alexnet,
            "densenet": tv_models.densenet161,
            # TODO: Add the self-supervised pl modules here!
            "simple_convnet": SimpleConvNet,
        }
        # Which encoder to use.
        encoder: Type[nn.Module] = choice(
            available_encoders,
            default=SimpleConvNet,
            # # TODO: Only considering these two for now when performing an HPO sweep.
            # probabilities={"resnet18": 0., "simple_convnet": 1.0},
        )

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
        # L2 regularization term for the model weights.
        weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)

        # Batch size to use during training and evaluation.
        batch_size: Optional[int] = None

        # Number of hidden units (before the output head).
        # When left to None (default), the hidden size from the pretrained
        # encoder model will be used. When set to an integer value, an
        # additional Linear layer will be placed between the outputs of the
        # encoder in order to map from the pretrained encoder's output size H_e
        # to this new hidden size `new_hidden_size`.
        new_hidden_size: Optional[int] = None
        # Retrain the encoder from scratch.
        train_from_scratch: bool = False
        # Wether we should keep the weights of the pretrained encoder frozen.
        freeze_pretrained_encoder_weights: bool = False

        # Hyper-parameters of the output head.
        output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

        # Wether the output head should be detached from the representations.
        # In other words, if the gradients from the downstream task should be
        # allowed to affect the representations.
        detach_output_head: bool = False
コード例 #18
0
 class ParentWithOptionalChildren(Parent):
     name: str = "Consuela"
     children: Dict[str, Optional[Child]] = mutable_field(OrderedDict)
コード例 #19
0
 class Parent(Serializable):
     name: str = "Consuela"
     children: Dict[str, Child] = mutable_field(OrderedDict)
コード例 #20
0
 class Bob(Serializable):
     cats: Dict[str, Cat] = mutable_field(dict)
コード例 #21
0
ファイル: baseline_method.py プロジェクト: optimass/Sequoia
class BaselineMethod(Method, Serializable, Parseable, target_setting=Setting):
    """ Versatile Baseline method which targets all settings.

    Uses pytorch-lightning's Trainer for training and LightningModule as model.

    Uses a [BaselineModel](methods/models/baseline_model/baseline_model.py), which
    can be used for:
    - Self-Supervised training with modular auxiliary tasks;
    - Semi-Supervised training on partially labeled batches;
    - Multi-Head prediction (e.g. in task-incremental scenario);
    """

    # NOTE: these two fields are also used to create the command-line arguments.
    # HyperParameters of the method.
    hparams: BaselineModel.HParams = mutable_field(BaselineModel.HParams)
    # Configuration options.
    config: Config = mutable_field(Config)
    # Options for the Trainer object.
    trainer_options: TrainerConfig = mutable_field(TrainerConfig)

    def __init__(
        self,
        hparams: BaselineModel.HParams = None,
        config: Config = None,
        trainer_options: TrainerConfig = None,
        **kwargs,
    ):
        """ Creates a new BaselineMethod, using the provided configuration options.

        Parameters
        ----------
        hparams : BaselineModel.HParams, optional
            Hyper-parameters of the BaselineModel used by this Method. Defaults to None.

        config : Config, optional
            Configuration dataclass with options like log_dir, device, etc. Defaults to
            None.

        trainer_options : TrainerConfig, optional
            Dataclass which holds all the options for creating the `pl.Trainer` which
            will be used for training. Defaults to None.

        **kwargs :
            If any of the above arguments are left as `None`, then they will be created
            using any appropriate value from `kwargs`, if present.

        ## Examples:
        ```
        method = BaselineMethod(hparams=BaselineModel.HParams(learning_rate=0.01))
        method = BaselineMethod(learning_rate=0.01) # Same as above

        method = BaselineMethod(config=Config(debug=True))
        method = BaselineMethod(debug=True) # Same as above

        method = BaselineMethod(hparams=BaselineModel.HParams(learning_rate=0.01),
                                config=Config(debug=True))
        method = BaselineMethod(learning_rate=0.01, debug=True) # Same as above
        ```
        """
        # TODO: When creating a Method from a script, like `BaselineMethod()`,
        # should we expect the hparams to be passed? Should we create them from
        # the **kwargs? Should we parse them from the command-line?

        # Option 2: Try to use the keyword arguments to create the hparams,
        # config and trainer options.
        if kwargs:
            logger.info(
                f"using keyword arguments {kwargs} to populate the corresponding "
                f"values in the hparams, config and trainer_options.")
            self.hparams = hparams or BaselineModel.HParams.from_dict(
                kwargs, drop_extra_fields=True)
            self.config = config or Config.from_dict(kwargs,
                                                     drop_extra_fields=True)
            self.trainer_options = trainer_options or TrainerConfig.from_dict(
                kwargs, drop_extra_fields=True)

        elif self._argv:
            # Since the method was parsed from the command-line, parse those as
            # well from the argv that were used to create the Method.
            # Option 3: Parse them from the command-line.
            # assert not kwargs, "Don't pass any extra kwargs to the constructor!"
            self.hparams = hparams or BaselineModel.HParams.from_args(
                self._argv, strict=False)
            self.config = config or Config.from_args(self._argv, strict=False)
            self.trainer_options = trainer_options or TrainerConfig.from_args(
                self._argv, strict=False)

        else:
            # Option 1: Use the default values:
            self.hparams = hparams or BaselineModel.HParams()
            self.config = config or Config()
            self.trainer_options = trainer_options or TrainerConfig()
        assert self.hparams
        assert self.config
        assert self.trainer_options

        if self.config.debug:
            # Disable wandb logging if debug is True.
            self.trainer_options.no_wandb = True

        # The model and Trainer objects will be created in `self.configure`.
        # NOTE: This right here doesn't create the fields, it just gives some
        # type information for static type checking.
        self.trainer: Trainer
        self.model: BaselineModel

        self.additional_train_wrappers: List[Callable] = []
        self.additional_valid_wrappers: List[Callable] = []

        self.setting: Setting

    def configure(self, setting: SettingType) -> None:
        """Configures the method for the given Setting.

        Concretely, this creates the model and Trainer objects which will be
        used to train and test a model for the given `setting`.

        Args:
            setting (SettingType): The setting the method will be evaluated on.

        TODO: For the Challenge, this should be some kind of read-only proxy to the
        actual Setting.
        """
        # Note: this here is temporary, just tinkering with wandb atm.
        method_name: str = self.get_name()

        # Set the default batch size to use, depending on the kind of Setting.
        if self.hparams.batch_size is None:
            if isinstance(setting, ActiveSetting):
                # Default batch size of 1 in RL
                self.hparams.batch_size = 1
            elif isinstance(setting, PassiveSetting):
                self.hparams.batch_size = 32
            else:
                warnings.warn(
                    UserWarning(
                        f"Dont know what batch size to use by default for setting "
                        f"{setting}, will try 16."))
                self.hparams.batch_size = 16
        # Set the batch size on the setting.
        setting.batch_size = self.hparams.batch_size

        # TODO: Should we set the 'config' on the setting from here?
        if setting.config and setting.config == self.config:
            pass
        elif self.config != Config():
            assert (
                setting.config is None or setting.config == Config()
            ), "method.config has been modified, and so has setting.config!"
            setting.config = self.config
        elif setting.config:
            assert (setting.config !=
                    Config()), "Weird, both configs have default values.."
            self.config = setting.config

        setting_name: str = setting.get_name()
        dataset = setting.dataset

        if isinstance(setting, IncrementalSetting):
            if self.hparams.multihead is None:
                # Use a multi-head model by default if the task labels are
                # available at both train and test time.
                if setting.task_labels_at_test_time:
                    assert setting.task_labels_at_train_time
                self.hparams.multihead = setting.task_labels_at_test_time

        if isinstance(setting, ContinualRLSetting):
            setting.add_done_to_observations = True

            if not setting.observe_state_directly:
                if self.hparams.encoder is None:
                    self.hparams.encoder = "simple_convnet"
                # TODO: Add 'proper' transforms for cartpole, specifically?
                from sequoia.common.transforms import Transforms

                setting.train_transforms.append(Transforms.resize_64x64)
                setting.val_transforms.append(Transforms.resize_64x64)
                setting.test_transforms.append(Transforms.resize_64x64)

            # Configure the baseline specifically for an RL setting.
            # TODO: Select which output head to use from the command-line?
            # Limit the number of epochs so we never iterate on a closed env.
            # TODO: Would multiple "epochs" be possible?
            if setting.max_steps is not None:
                self.trainer_options.max_epochs = 1
                self.trainer_options.limit_train_batches = setting.max_steps // (
                    setting.batch_size or 1)
                self.trainer_options.limit_val_batches = min(
                    setting.max_steps // (setting.batch_size or 1), 1000)
                # TODO: Test batch size is limited to 1 for now.
                # NOTE: This isn't used, since we don't call `trainer.test()`.
                self.trainer_options.limit_test_batches = setting.max_steps

        self.model = self.create_model(setting)
        assert self.hparams is self.model.hp

        # The PolicyHead actually does its own backward pass, so we disable
        # automatic optimization when using it.
        from .models.output_heads import PolicyHead

        if isinstance(self.model.output_head, PolicyHead):
            # Doing the backward pass manually, since there might not be a loss
            # at each step.
            self.trainer_options.automatic_optimization = False

        self.trainer = self.create_trainer(setting)
        self.setting = setting

    def fit(
        self,
        train_env: Environment[Observations, Actions, Rewards],
        valid_env: Environment[Observations, Actions, Rewards],
    ):
        """Called by the Setting to train the method.
        Could be called more than once before training is 'over', for instance
        when training on a series of tasks.
        Overwrite this to customize training.
        """
        assert self.model is not None, (
            "Setting should have been called method.configure(setting=self) "
            "before calling `fit`!")
        # TODO: Figure out if there is a smarter way to reset the state of the Trainer,
        # rather than just creating a new one every time.
        self.trainer = self.create_trainer(self.setting)

        # NOTE: It doesn't seem sufficient to just do this, since for instance the
        # early-stopping callback would prevent training on future tasks, since they
        # have higher validation loss:
        # self.trainer.current_epoch = 0

        success = self.trainer.fit(
            model=self.model,
            train_dataloader=train_env,
            val_dataloaders=valid_env,
        )
        # BUG: After `fit`, it seems like the output head of the model is on the CPU?
        self.model.to(self.config.device)

        return success

    def get_actions(self, observations: Observations,
                    action_space: gym.Space) -> Actions:
        """ Get a batch of predictions (actions) for a batch of observations.

        This gets called by the Setting during the test loop.

        TODO: There is a mismatch here between the type of the output of this
        method (`Actions`) and the type of `action_space`: we should either have
        a `Discrete` action space, and this method should return ints, or this
        method should return `Actions`, and the `action_space` should be a
        `NamedTupleSpace` or something similar.
        Either way, `get_actions(obs, action_space) in action_space` should
        always be `True`.
        """
        self.model.eval()
        with torch.no_grad():
            forward_pass = self.model.forward(observations)
        actions: Actions = forward_pass.actions
        action_numpy = actions.actions_np
        assert action_numpy in action_space, (action_numpy, action_space)
        return actions

    def create_model(self, setting: SettingType) -> BaselineModel[SettingType]:
        """Creates the BaselineModel (a LightningModule) for the given Setting.

        You could extend this to customize which model is used depending on the
        setting.

        TODO: As @oleksost pointed out, this might allow the creation of weird
        'frankenstein' methods that are super-specific to each setting, without
        really having anything in common.

        Args:
            setting (SettingType): An experimental setting.

        Returns:
            BaselineModel[SettingType]: The BaselineModel that is to be applied
            to that setting.
        """
        # Create the model, passing the setting, hparams and config.
        return BaselineModel(setting=setting,
                             hparams=self.hparams,
                             config=self.config)

    def create_trainer(self, setting: SettingType) -> Trainer:
        """Creates a Trainer object from pytorch-lightning for the given setting.

        NOTE: At the moment, uses the KNN and VAE callbacks.
        To use different callbacks, overwrite this method.

        Args:

        Returns:
            Trainer: the Trainer object.
        """
        # We use this here to create loggers!
        callbacks = self.create_callbacks(setting)
        loggers = []
        if setting.wandb:
            wandb_logger = setting.wandb.make_logger()
            loggers.append(wandb_logger)
        trainer = self.trainer_options.make_trainer(
            config=self.config,
            callbacks=callbacks,
            loggers=loggers,
        )
        return trainer

    def get_experiment_name(self,
                            setting: Setting,
                            experiment_id: str = None) -> str:
        """Gets a unique name for the experiment where `self` is applied to `setting`.

        This experiment name will be passed to `orion` when performing a run of
        Hyper-Parameter Optimization.

        Parameters
        ----------
        - setting : Setting

            The `Setting` onto which this method will be applied. This method will be used when

        - experiment_id: str, optional

            A custom hash to append to the experiment name. When `None` (default), a
            unique hash will be created based on the values of the Setting's fields.

        Returns
        -------
        str
            The name for the experiment.
        """
        if not experiment_id:
            setting_dict = setting.to_dict()
            # BUG: Some settings have non-string keys/value or something?
            from sequoia.utils.utils import flatten_dict

            d = flatten_dict(setting_dict)
            experiment_id = compute_identity(size=5, **d)
        assert isinstance(setting.dataset,
                          str), "assuming that dataset is a str for now."
        return (
            f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}"
        )

    def get_search_space(self,
                         setting: Setting) -> Mapping[str, Union[str, Dict]]:
        """Returns the search space to use for HPO in the given Setting.

        Parameters
        ----------
        setting : Setting
            The Setting on which the run of HPO will take place.

        Returns
        -------
        Mapping[str, Union[str, Dict]]
            An orion-formatted search space dictionary, mapping from hyper-parameter
            names (str) to their priors (str), or to nested dicts of the same form.
        """
        return {
            "hparams": self.hparams.get_orion_space(),
            "trainer_options": self.trainer_options.get_orion_space(),
        }

    def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
        """Adapts the Method when it receives new Hyper-Parameters to try for a new run.

        It is required that this method be implemented if you want to perform HPO sweeps
        with Orion.
        
        Parameters
        ----------
        new_hparams : Dict[str, Any]
            The new hyper-parameters being recommended by the HPO algorithm. These will
            have the same structure as the search space.
        """
        # Here we overwrite the corresponding attributes with the new suggested values
        # leaving other fields unchanged.
        self.hparams = self.hparams.replace(**new_hparams["hparams"])
        # BUG with the `replace` function and Union[int, float] type, it doesn't
        # preserve the type of the field when serializing/deserializing!
        self.trainer_options.max_epochs = new_hparams["trainer_options"][
            "max_epochs"]

    def hparam_sweep(
        self,
        setting: Setting,
        search_space: Dict[str, Union[str, Dict]] = None,
        experiment_id: str = None,
        database_path: Union[str, Path] = None,
        max_runs: int = None,
        debug: bool = False,
    ) -> Tuple[BaselineModel.HParams, float]:
        # Setting max epochs to 1, just to keep runs somewhat short.
        # NOTE: Now we're actually going to have the max_epochs as a tunable
        # hyper-parameter, so we're not hard-setting this value anymore.
        # self.trainer_options.max_epochs = 1

        # Call 'configure', so that we create `self.model` at least once, which will
        # update the hparams.output_head field to be of the right type. This is
        # necessary in order for the `get_orion_space` to retrieve all the hparams
        # of the output head.
        self.configure(setting)

        return super().hparam_sweep(
            setting=setting,
            search_space=search_space,
            experiment_id=experiment_id,
            database_path=database_path,
            max_runs=max_runs,
            debug=debug or self.config.debug,
        )

    def receive_results(self, setting: Setting, results: Results):
        """ Receives the results of an experiment, where `self` was applied to Setting
        `setting`, which produced results `results`.
        """
        # TODO: Reset the run name so a new one is used for each experiment.

    def create_callbacks(self, setting: SettingType) -> List[Callback]:
        """Create the PytorchLightning Callbacks for this Setting.

        These callbacks will get added to the Trainer in `create_trainer`.

        Parameters
        ----------
        setting : SettingType
            The `Setting` on which this Method is going to be applied.

        Returns
        -------
        List[Callback]
            A List of `Callaback` objects to use during training.
        """
        # TODO: Move this to something like a `configure_callbacks` method in the model,
        # once PL adds it.
        # from sequoia.common.callbacks.vae_callback import SaveVaeSamplesCallback
        return [
            EarlyStopping(monitor="val Loss")
            # self.hparams.knn_callback,
            # SaveVaeSamplesCallback(),
        ]

    def apply_all(
            self,
            argv: Union[str,
                        List[str]] = None) -> Dict[Type[Setting], Results]:
        """(WIP): Runs this Method on all its applicable settings.

        Returns
        -------

            Dict mapping from setting type to the Results produced by this method.
        """
        applicable_settings = self.get_applicable_settings()

        all_results: Dict[Type[Setting], Results] = {}
        for setting_type in applicable_settings:
            setting = setting_type.from_args(argv)
            results = setting.apply(self)
            all_results[setting_type] = results
        print(f"All results for method of type {type(self)}:")
        print({
            method.get_name(): (results.get_metric() if results else "crashed")
            for method, results in all_results.items()
        })
        return all_results

    def __init_subclass__(cls,
                          target_setting: Type[SettingType] = Setting,
                          **kwargs) -> None:
        """Called when creating a new subclass of Method.

        Args:
            target_setting (Type[Setting], optional): The target setting.
                Defaults to None, in which case the method will inherit the
                target setting of it's parent class.
        """
        if not is_dataclass(cls):
            logger.critical(
                UserWarning(
                    f"The BaselineMethod subclass {cls} should be decorated with "
                    f"@dataclass!\n"
                    f"While this isn't strictly necessary for things to work, it is"
                    f"highly recommended, as any dataclass-style class attributes "
                    f"won't have the corresponding command-line arguments "
                    f"generated, which can cause a lot of subtle bugs."))
        super().__init_subclass__(target_setting=target_setting, **kwargs)

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """Called when switching between tasks.
        
        Args:
            task_id (int, optional): the id of the new task. When None, we are
            basically being informed that there is a task boundary, but without
            knowing what task we're switching to.
        """
        self.model.on_task_switch(task_id)

    def setup_wandb(self, run: Run) -> None:
        """ Called by the Setting when using Weights & Biases, after `wandb.init`.
コード例 #22
0
 class Bob(FrozenSerializable if frozen else Serializable):
     cats: Dict[str, Cat] = mutable_field(dict)
コード例 #23
0
 class ChildWithFriends(Child):
     friends: List[Optional[Child]] = mutable_field(list)
コード例 #24
0
class A2CMethod(OnPolicyMethod):
    """ Method that uses the A2C model from stable-baselines3. """

    # changing the 'name' in this case here, because the default name would be
    # 'a_2_c'.
    name: ClassVar[str] = "a2c"
    Model: ClassVar[Type[A2CModel]] = A2CModel

    # Hyper-parameters of the A2C model.
    hparams: A2CModel.HParams = mutable_field(A2CModel.HParams)

    def configure(self, setting: ContinualRLSetting):
        super().configure(setting=setting)
        if setting.steps_per_phase:
            if self.hparams.n_steps > setting.steps_per_phase:
                self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase)
                logger.info(
                    f"Capping the n_steps to 10% of step budget length: "
                    f"{self.hparams.n_steps}")
            # NOTE: We limit the number of trainign steps per task, such that we never
            # attempt to fill the buffer using more samples than the environment allows.
            self.train_steps_per_task = min(
                self.train_steps_per_task,
                setting.steps_per_phase - self.hparams.n_steps - 1,
            )
            logger.info(
                f"Limitting training steps per task to {self.train_steps_per_task}"
            )

    def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> A2CModel:
        return self.Model(env=train_env, **self.hparams.to_dict())

    def fit(self, train_env: gym.Env, valid_env: gym.Env):
        super().fit(train_env=train_env, valid_env=valid_env)

    def get_actions(self, observations: ContinualRLSetting.Observations,
                    action_space: spaces.Space) -> ContinualRLSetting.Actions:
        return super().get_actions(
            observations=observations,
            action_space=action_space,
        )

    def on_task_switch(self, task_id: Optional[int]) -> None:
        """ Called when switching tasks in a CL setting.

        If task labels are available, `task_id` will correspond to the index of
        the new task. Otherwise, if task labels aren't available, `task_id` will
        be `None`.

        todo: use this to customize how your method handles task transitions.
        """
        super().on_task_switch(task_id=task_id)

    def get_search_space(
            self,
            setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
        search_space = super().get_search_space(setting)
        if isinstance(setting.action_space, spaces.Discrete):
            # From stable_baselines3/common/base_class.py", line 170:
            # > Generalized State-Dependent Exploration (gSDE) can only be used with
            #   continuous actions
            # Therefore we remove related entries in the search space, so they keep
            # their default values.
            search_space.pop("use_sde", None)
            search_space.pop("sde_sample_freq", None)
        return search_space