class SACMethod(OffPolicyMethod): """ Method that uses the SAC model from stable-baselines3. """ Model: ClassVar[Type[SACModel]] = SACModel # Hyper-parameters of the SAC model. hparams: SACModel.HParams = mutable_field(SACModel.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 2_048.0 def configure(self, setting: ContinualRLSetting): super().configure(setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> SACModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions(self, observations: ContinualRLSetting.Observations, action_space: spaces.Space) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting.
class CustomPPOMethod(PPOMethod): Model: ClassVar[Type[PPOModel]] = PPOModel # Hyper-parameters of the PPO Model. hparams: PPOModel.HParams = mutable_field(PPOModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions(self, observations: ContinualRLSetting.Observations, action_space: spaces.Space) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ def get_search_space( self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]: return super().get_search_space(setting)
class DDPGMethod(StableBaselines3Method): """ Method that uses the DDPG model from stable-baselines3. """ Model: ClassVar[Type[DDPGModel]] = DDPGModel # Hyper-parameters of the DDPG model. hparams: DDPGModel.HParams = mutable_field(DDPGModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DDPGModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions(self, observations: ContinualRLSetting.Observations, action_space: spaces.Space) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting.
class TD3Method(OffPolicyMethod): """ Method that uses the TD3 model from stable-baselines3. """ Model: ClassVar[Type[TD3Model]] = TD3Model hparams: TD3Model.HParams = mutable_field(TD3Model.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 2_048.0 def configure(self, setting: ContinualRLSetting): super().configure(setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> TD3Model: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id)
class EwcMethod(BaselineMethod, target_setting=IncrementalSetting): """ Subclass of the BaselineMethod, which adds the EWCTask to the `BaselineModel`. This Method is applicable to any CL setting (RL or SL) where there are clear task boundaries, regardless of if the task labels are given or not. """ hparams: EwcModel.HParams = mutable_field(EwcModel.HParams) def __init__( self, hparams: EwcModel.HParams = None, config: Config = None, trainer_options: TrainerConfig = None, **kwargs, ): super().__init__(hparams=hparams, config=config, trainer_options=trainer_options, **kwargs) def configure(self, setting: Setting): """ Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ super().configure(setting) # self.model.add_auxiliary_task(EWCTask(options=self.hparams.ewc)) def on_task_switch(self, task_id: Optional[int]): super().on_task_switch(task_id) def create_model(self, setting: Setting) -> EwcModel: """Create the Model to use for the given Setting. In this case, we want to return an `EwcModel` (our customized version of the BaselineModel). Parameters ---------- setting : Setting The experimental Setting this Method will be applied to. Returns ------- EwcModel The Model that will be trained and used for evaluation. """ return EwcModel(setting=setting, hparams=self.hparams, config=self.config)
class A2CMethod(StableBaselines3Method): """ Method that uses the A2C model from stable-baselines3. """ # changing the 'name' in this case here, because the default name would be # 'a_2_c'. name: ClassVar[str] = "a2c" Model: ClassVar[Type[A2CModel]] = A2CModel # Hyper-parameters of the A2C model. hparams: A2CModel.HParams = mutable_field(A2CModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> A2CModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions(self, observations: ContinualRLSetting.Observations, action_space: spaces.Space) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ def get_search_space( self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]: search_space = super().get_search_space(setting) if isinstance(setting.action_space, spaces.Discrete): # From stable_baselines3/common/base_class.py", line 170: # > Generalized State-Dependent Exploration (gSDE) can only be used with # continuous actions # Therefore we remove related entries in the search space, so they keep # their default values. search_space.pop("use_sde", None) search_space.pop("sde_sample_freq", None) return search_space
class Config(FlattenedAccess): """Overall Configuration.""" dataset: DatasetConfig = mutable_field(DatasetConfig) model: ModelConfig = mutable_field(ModelConfig) dpmmoe: DPMoEConfig = mutable_field(DPMoEConfig) train: TrainConfig = mutable_field(TrainConfig) eval: EvalConfig = mutable_field(EvalConfig) summary: SummaryConfig = mutable_field(SummaryConfig) et: float = 1.23
class HParams(HyperParameters, FlattenedAccess): """ Hyper-parameters of the CN-DPM model. """ # Denotes whether to use CPU instead of CUDA device disable_cuda: bool = False device: str = "cuda" if torch.cuda.is_available() else "cpu" dataset: DatasetConfig = mutable_field(DatasetConfig) model: ModelConfig = mutable_field(ModelConfig) dpmoe: DPMoEConfig = mutable_field(DPMoEConfig) train: TrainConfig = mutable_field(TrainConfig) eval: EvalConfig = mutable_field(EvalConfig) summary: SummaryConfig = mutable_field(SummaryConfig)
class BaseHParams(HyperParameters): """ Set of 'base' Hyperparameters for the 'base' LightningModule. """ # Class variable versions of the above dicts, for easier subclassing. # NOTE: These don't get parsed from the command-line. available_optimizers: ClassVar[Dict[ str, Type[Optimizer]]] = available_optimizers.copy() available_encoders: ClassVar[Dict[ str, Type[nn.Module]]] = available_encoders.copy() # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3) # L2 regularization term for the model weights. weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6) # Which optimizer to use. optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam) # Use an encoder architecture from the torchvision.models package. encoder: Type[nn.Module] = categorical( available_encoders, default=tv_models.resnet18, # TODO: Only using these two by default when performing a sweep. probabilities={ "resnet18": 0.5, "simple_convnet": 0.5 }, ) # Batch size to use during training and evaluation. batch_size: Optional[int] = None # Number of hidden units (before the output head). # When left to None (default), the hidden size from the pretrained # encoder model will be used. When set to an integer value, an # additional Linear layer will be placed between the outputs of the # encoder in order to map from the pretrained encoder's output size H_e # to this new hidden size `new_hidden_size`. new_hidden_size: Optional[int] = None # Retrain the encoder from scratch. train_from_scratch: bool = False # Wether we should keep the weights of the pretrained encoder frozen. freeze_pretrained_encoder_weights: bool = False # Settings for the output head. # TODO: This could be overwritten in a subclass to do classification or # regression or RL, etc. output_head: OutputHead.HParams = mutable_field(OutputHead.HParams) # Wether the output head should be detached from the representations. # In other words, if the gradients from the downstream task should be # allowed to affect the representations. detach_output_head: bool = False def __post_init__(self): """Use this to initialize (or fix) any fields parsed from the command-line. """ super().__post_init__() def make_optimizer(self, *args, **kwargs) -> Optimizer: """ Creates the Optimizer object from the options. """ optimizer_class = self.optimizer options = { "lr": self.learning_rate, "weight_decay": self.weight_decay, } options.update(kwargs) return optimizer_class(*args, **options) @property def encoder_model(self) -> Type[nn.Module]: return self.encoder def make_encoder(self, encoder_name: str = None) -> Tuple[nn.Module, int]: """Creates an Encoder model and returns the resulting hidden size. Returns: Tuple[nn.Module, int]: the encoder and the hidden size. """ if encoder_name and encoder_name not in self.available_encoders: raise KeyError( f"No encoder with name {encoder_name} found! " f"(available encoders: {list(self.available_encoders.keys())}." ) encoder_model = self.available_encoders[encoder_name] else: encoder_model = self.encoder encoder, hidden_size = get_pretrained_encoder( encoder_model=encoder_model, pretrained=not self.train_from_scratch, freeze_pretrained_weights=self.freeze_pretrained_encoder_weights, new_hidden_size=self.new_hidden_size, ) return encoder, hidden_size
class Experiment(Parseable, Serializable): """ Applies a Method to an experimental Setting to obtain Results. When the `setting` is not set, this will apply the chosen method on all of its "applicable" settings. (i.e. all subclasses of its target setting). When the `method` is not set, this will apply all applicable methods on the chosen setting. """ # Which experimental setting to use. When left unset, will evaluate the # provided method on all applicable settings. setting: Optional[Union[Setting, Type[Setting]]] = choice( {setting.get_name(): setting for setting in all_settings}, default=None, type=str, ) # Path to a json/yaml file containing preset options for the chosen setting. # Can also be one of the key from the `setting_presets` dictionary, # for convenience. benchmark: Optional[Union[str, Path]] = None # Which experimental method to use. When left unset, will evaluate all # compatible methods on the provided setting. method: Optional[Union[str, Method, Type[Method]]] = choice(get_method_names(), default=None) # All the other configuration options, which are independant of the choice # of Setting or of Method, go in this next dataclass here! For example, # things like the log directory, wether Cuda is used, etc. config: Config = mutable_field(Config) wandb: Optional[WandbConfig] = None def __post_init__(self): if not (self.setting or self.method): raise RuntimeError("One of `setting` or `method` must be set!") # All settings have a unique name. if isinstance(self.setting, str): self.setting = get_class_with_name(self.setting, all_settings) # Each Method also has a unique name. if isinstance(self.method, str): self.method = get_class_with_name(self.method, all_methods) if self.benchmark: # If the provided benchmark isn't a path, try to get the value from # the `setting_presets` dict. If it isn't in the dict, raise an # error. if not Path(self.benchmark).is_file(): if self.benchmark in setting_presets: self.benchmark = setting_presets[self.benchmark] else: raise RuntimeError( f"Could not find benchmark '{self.benchmark}': it " f"is neither a path to a file or a key of the " f"`setting_presets` dictionary. \n\n" f"Available presets: \n" + "\n".join( f"- {preset_name}: \t{preset_file.relative_to(os.getcwd())}" for preset_name, preset_file in setting_presets.items() ) ) # Creating an experiment for the given setting, loaded from the # config file. # TODO: IDEA: Do the same thing for loading the Method? logger.info( f"Will load the options for the setting from the file " f"at path {self.benchmark}." ) drop_extras = True if self.setting is None: logger.warn( UserWarning( f"You didn't specify which setting to use, so this will " f"try to infer the correct type of setting to use from the " f"contents of the file, which might not work!\n (Consider " f"running this with the `--setting` option instead." ) ) # Find the first type of setting that fits the given file. drop_extras = False self.setting = Setting # Raise an error if any of the args in sys.argv would have been used # up by the Setting, just to prevent any ambiguities. try: _, unused_args = self.setting.from_known_args() except ImportError as exc: # NOTE: An ImportError can occur here because of a missing OpenGL # dependency, since when no arguments are passed, the default RL setting # is created (cartpole with pixel observations), which requires a render # wrapper to be added (which itself uses pyglet, which uses OpenGL). logger.warning( RuntimeWarning(f"Unable to check for unused args: {exc}") ) # In this case, we just pretend that no arguments would have been used. unused_args = sys.argv[1:] ignored_args = list(set(sys.argv[1:]) - set(unused_args)) if ignored_args: # TODO: This could also be trigerred if there were arguments # in the method with the same name as some from the Setting. raise RuntimeError( f"Cannot pass command-line arguments for the Setting when " f"loading a preset, since these arguments whould have been " f"ignored when creating the setting of type {self.setting} " f"anyway: {ignored_args}" ) assert isclass(self.setting) and issubclass(self.setting, Setting) # Actually load the setting from the file. # TODO: Why isn't this using `load_benchmark`? self.setting = self.setting.load( path=self.benchmark, drop_extra_fields=drop_extras ) self.setting.wandb = self.wandb if self.method is None: raise NotImplementedError( f"For now, you need to specify a Method to use using the " f"`--method` argument when loading the setting from a file." ) if self.setting is not None and self.method is not None: if not self.method.is_applicable(self.setting): raise RuntimeError( f"Method {self.method} isn't applicable to " f"setting {self.setting}!" ) assert ( self.setting is None or isinstance(self.setting, Setting) or issubclass(self.setting, Setting) ) assert ( self.method is None or isinstance(self.method, Method) or issubclass(self.method, Method) ) @staticmethod def run_experiment( setting: Union[Setting, Type[Setting]], method: Union[Method, Type[Method]], config: Config, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Results: """ Launches an experiment, applying `method` onto `setting` and returning the corresponding results. This assumes that both `setting` and `method` are not None. This always returns a single `Results` object. If either `setting` or `method` are classes, then instances of these classes from the command-line arguments `argv`. If `strict_args` is True and there are leftover arguments (not consumed by either the Setting or the Method), a RuntimeError is raised. This then returns the result of `setting.apply(method)`. Parameters ---------- argv : Union[str, List[str]], optional List of command-line args. When not set, uses the contents of `sys.argv`. Defaults to `None`. strict_args : bool, optional Wether to raise an error when encountering command-line arguments that are unexpected by both the Setting and the Method. Defaults to `False`. Returns ------- Results """ assert setting is not None and method is not None assert isinstance(setting, Setting), f"TODO: Fix this, need to pass a wandb config to the Setting from the experiment!" if not (isinstance(setting, Setting) and isinstance(method, Method)): setting, method = parse_setting_and_method_instances( setting=setting, method=method, argv=argv, strict_args=strict_args ) assert isinstance(setting, Setting) assert isinstance(method, Method) assert isinstance(config, Config) return setting.apply(method, config=config) def launch( self, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Results: """ Launches the experiment, applying `self.method` onto `self.setting` and returning the corresponding results. This differs from `main` in that this assumes that both `self.setting` and `self.method` are not None, and so this always returns a single `Results` object. NOTE: Internally, this is equivalent to calling `run_experiment`, passing in the `setting`, `method` and `config` arguments from `self`. Parameters ---------- argv : Union[str, List[str]], optional List of command-line args. When not set, uses the contents of `sys.argv`. Defaults to `None`. strict_args : bool, optional Wether to raise an error when encountering command-line arguments that are unexpected by both the Setting and the Method. Defaults to `False`. Returns ------- Results An object describing the results of applying Method `self.method` onto the Setting `self.setting`. """ assert self.setting is not None assert self.method is not None assert self.config is not None if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)): setting, method = parse_setting_and_method_instances( setting=self.setting, method=self.method, argv=argv, strict_args=strict_args ) setting.wandb = self.wandb setting.config = self.config return setting.apply(method, config=self.config) @classmethod def main( cls, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Union[Results, Tuple[Dict, Any], List[Tuple[Dict, Results]]]: """Launches one or more experiments from the command-line. First, we get the choice of method and setting using a first parser. Then, we parse the Setting and Method objects using the remaining args with two other parsers. Parameters ---------- - argv : Union[str, List[str]], optional, by default None command-line arguments to use. When None (default), uses sys.argv. Returns ------- Union[Results, Dict[Tuple[Type[Setting], Type[Method], Config], Results]] Results of the experiment, if only applying a method to a setting. Otherwise, if either of `--setting` or `--method` aren't set, this will be a dictionary mapping from (setting_type, method_type) tuples to Results. """ if argv is None: argv = sys.argv[1:] if isinstance(argv, str): argv = shlex.split(argv) argv_copy = argv.copy() experiment: Experiment experiment, argv = cls.from_known_args(argv) setting: Optional[Type[Setting]] = experiment.setting method: Optional[Type[Method]] = experiment.method config: Config = experiment.config if method is None and setting is None: raise RuntimeError(f"One of setting or method must be set.") if setting and method: # One 'job': Launch it directly. setting, method = parse_setting_and_method_instances( setting=setting, method=method, argv=argv, strict_args=strict_args ) assert isinstance(setting, Setting) assert isinstance(method, Method) setting.wandb = experiment.wandb results = experiment.launch(argv, strict_args=strict_args) print("\n\n EXPERIMENT IS DONE \n\n") print(f"Results: {results}") return results else: # TODO: Test out this other case. Haven't used it in a while. # TODO: Move this to something like a BatchExperiment? all_results = launch_batch_of_runs( setting=setting, method=method, argv=argv ) return all_results
class B: # # shared_list: List = [] # not allowed. # different_list: List = field(default_factory=list) shared: A = A() different: A = mutable_field(A, a="123")
class DQNMethod(StableBaselines3Method): """ Method that uses a DQN model from the stable-baselines3 package. """ Model: ClassVar[Type[DQNModel]] = DQNModel # Hyper-parameters of the DQN model. hparams: DQNModel.HParams = mutable_field(DQNModel.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 50. def configure(self, setting: ContinualRLSetting): super().configure(setting) # The default value for the buffer size in the DQN model is WAY too # large, so we re-size it depending on the size of the observations. flattened_observation_space = flatten_space(setting.observation_space) observation_size_bytes = flattened_observation_space.sample().nbytes # IF there are more than a few dimensions per observation, then we # should probably reduce the size of the replay buffer according to # the size of the observations. max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024 max_buffer_length = max_buffer_size_bytes // observation_size_bytes if max_buffer_length == 0: raise RuntimeError( f"Couldn't even fit a single observation in the buffer, " f"given the specified max_buffer_size_megabytes " f"({self.max_buffer_size_megabytes}) and the size of a " f"single observation ({observation_size_bytes} bytes)!") if self.hparams.buffer_size > max_buffer_length: calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size calculated_size_gb = calculated_size_bytes / 1024**3 warnings.warn( RuntimeWarning( f"The selected buffer size ({self.hparams.buffer_size} is " f"too large! (It would take roughly around " f"{calculated_size_gb:.3f}Gb to hold many observations alone! " f"The buffer size will be capped at {max_buffer_length} " f"entries.")) self.hparams.buffer_size = int(max_buffer_length) # Don't use up too many of the observations from the task to fill up the buffer. # Truth is, we should probably get this to work first. # NOTE: Need to change some attributes depending on the maximal number of steps # in the environment allowed in the given Setting. if setting.max_steps: logger.info( f"Total training steps are limited to {setting.steps_per_task} steps " f"per task, {setting.max_steps} steps in total.") ten_percent_of_step_budget = setting.steps_per_task // 10 if self.hparams.buffer_size > ten_percent_of_step_budget: warnings.warn( RuntimeWarning( "Reducing max buffer size to ten percent of the step budget." )) self.hparams.buffer_size = ten_percent_of_step_budget if self.hparams.learning_starts > ten_percent_of_step_budget: logger.info( f"The model was originally going to use the first " f"{self.hparams.learning_starts} steps for pure random " f"exploration, but the setting has a max number of steps set to " f"{setting.max_steps}, therefore we will limit the number of " f"exploration steps to 10% of that 'step budget' = " f"{ten_percent_of_step_budget} steps.") self.hparams.learning_starts = ten_percent_of_step_budget if self.hparams.target_update_interval > ten_percent_of_step_budget: # Same for the 'update target network' interval. self.hparams.target_update_interval = ten_percent_of_step_budget // 2 logger.info( f"Reducing the target network update interval to " f"{self.hparams.target_update_interval}, because of the limit on " f"training steps imposed by the Setting.") logger.info( f"Will use a Replay buffer of size {self.hparams.buffer_size}.") def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DQNModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions(self, observations: ContinualRLSetting.Observations, action_space: spaces.Space) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting.
class StableBaselines3Method(Method, ABC, target_setting=ContinualRLSetting): """ Base class for the methods that use models from the stable_baselines3 repo. """ family: ClassVar[str] = "sb3" # Class variable that represents what kind of Model will be used. # (This is just here so we can easily create one Method class per model type # by just changing this class attribute.) Model: ClassVar[Type[BaseAlgorithm]] # HyperParameters of the Method. hparams: SB3BaseHParams = mutable_field(SB3BaseHParams) # The number of training steps to run per task. # NOTE: This shouldn't be set to more than the task length when applying this method # on a ContinualRLSetting, because we don't currently have a way of "resetting" # the nonstationarity in the environment, and there is only one task, # therefore if we trained for say 10 million steps, while the # non-stationarity only lasts for 10_000 steps, we'd have seen an almost # stationary distribution, since the environment would have stopped changing after # 10_000 steps. train_steps_per_task: int = 10_000 # Evaluate the agent every ``eval_freq`` timesteps (this may vary a little) eval_freq: int = -1 # callback(s) called at every step with state of the algorithm. callback: MaybeCallback = None # The number of timesteps before logging. log_interval: int = 100 # the name of the run for TensorBoard logging tb_log_name: str = "run" # Evaluate the agent every ``eval_freq`` timesteps (this may vary a little) eval_freq: int = -1 # Number of episode to evaluate the agent n_eval_episodes = 5 # Path to a folder where the evaluations will be saved eval_log_path: Optional[str] = None def __post_init__(self): self.model: Optional[BaseAlgorithm] = None # Extra wrappers to add to the train_env and valid_env before passing # them to the `learn` method from stable-baselines3. from sequoia.common.gym_wrappers import TransformObservation, TransformAction, TransformReward import operator from functools import partial self.extra_train_wrappers: List[Callable[[gym.Env], gym.Env]] = [ partial(TransformObservation, f=operator.itemgetter("x")), # partial(TransformAction, f=operator.itemgetter("y_pred"), partial(TransformReward, f=operator.itemgetter("y")), ] self.extra_valid_wrappers: List[Callable[[gym.Env], gym.Env]] = [ partial(TransformObservation, f=operator.itemgetter("x")), partial(TransformReward, f=operator.itemgetter("y")), ] # Number of timesteps to train on for each task. self.total_timesteps_per_task: int = 0 def configure(self, setting: ContinualRLSetting): # Delete the model, if present. self.model = None # For now, we don't batch the space because stablebaselines3 will add an # additional batch dimension if we do. # TODO: Still need to debug the batching stuff with stablebaselines, # some methods support it, some don't, and it doesn't recognize # VectorEnvs from gym. setting.batch_size = None # BUG: Need to fix an issue when using the CnnPolicy and Atary envs, the # input shape isn't what they expect (only 2 channels instead of three # apparently.) # from sequoia.common.transforms import Transforms # NOTE: Important to not use any transforms, since the SB3 methods want to get # the 'raw' np.uint8 image as an input. transforms = [ # Transforms.to_tensor, # Transforms.three_channels, # Transforms.channels_first_if_needed, ] setting.transforms = transforms setting.train_transforms = transforms setting.val_transforms = transforms setting.test_transforms = transforms if self.hparams.policy is None: if is_image(setting.observation_space.x): self.hparams.policy = "CnnPolicy" else: self.hparams.policy = "MlpPolicy" logger.debug(f"Will use {self.hparams.policy} as the policy.") # TODO: Double check that some settings might not impose a limit on # number of training steps per environment (e.g. task-incremental RL?) if setting.steps_per_phase: if self.train_steps_per_task > setting.steps_per_phase: warnings.warn( RuntimeWarning( f"Can't train for the requested {self.train_steps_per_task} " f"steps, since we're (currently) only allowed a maximum of " f"{setting.steps_per_phase} steps.)")) # Use as many training steps as possible. self.train_steps_per_task = setting.steps_per_phase - 1 # Otherwise, we can train basically as long as we want on each task. def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm: """ Create a Model given the training and validation environments. """ model_kwargs = self.hparams.to_dict() assert "clear_buffers_between_tasks" not in model_kwargs return self.Model(env=train_env, **model_kwargs) def fit(self, train_env: gym.Env, valid_env: gym.Env): # Remove the extra information that the Setting gives us. for wrapper in self.extra_train_wrappers: train_env = wrapper(train_env) for wrapper in self.extra_valid_wrappers: valid_env = wrapper(valid_env) if self.model is None: self.model = self.create_model(train_env, valid_env) else: # TODO: "Adapt"/re-train the model on the new environment. self.model.set_env(train_env) # Decide how many steps to train on. total_timesteps = self.train_steps_per_task logger.info( f"Starting training, for a maximum of {total_timesteps} steps.") # todo: Customize the parametrers of the model and/or of this "learn" # method if needed. self.model = self.model.learn( # The total number of samples (env steps) to train on total_timesteps=total_timesteps, eval_env=valid_env, callback=self.callback, log_interval=self.log_interval, tb_log_name=self.tb_log_name, eval_freq=self.eval_freq, n_eval_episodes=self.n_eval_episodes, eval_log_path=self.eval_log_path, # whether or not to reset the current timestep number (used in logging) reset_num_timesteps=True, ) def get_actions(self, observations: ContinualRLSetting.Observations, action_space: spaces.Space) -> ContinualRLSetting.Actions: obs = observations.x predictions = self.model.predict(obs) action, _ = predictions assert action in action_space, (observations, action, action_space) return action def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ return { "algo_hparams": self.hparams.get_orion_space(), } def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ # Here we overwrite the corresponding attributes with the new suggested values # leaving other fields unchanged. # NOTE: These new hyper-paramers will be used in the next run in the sweep, # since each call to `configure` will create a new Model. self.hparams = self.hparams.replace(**new_hparams["algo_hparams"]) def setup_wandb(self, run: Run) -> None: """ Called by the Setting when using Weights & Biases, after `wandb.init`. This method is here to provide Methods with the opportunity to log some of their configuration options or hyper-parameters to wandb. NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by this point. Parameters ---------- run : wandb.Run Current wandb Run. """ run.config["hparams"] = self.hparams.to_dict() def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ if self.hparams.clear_buffers_between_tasks: self.clear_buffers() def clear_buffers(self): """ Clears out the experience buffer of the Policy. """ # I think that's the right way to do it.. not sure. assert False, self.model.replay_buffer.pos if self.model: # TODO: These are really interesting methods! # self.model.save_replay_buffer # self.model.load_replay_buffer self.model.replay_buffer.reset()
class OffPolicyMethod(StableBaselines3Method, ABC): """ ABC for a Method that uses an off-policy Algorithm from SB3. """ # Type of model to use. This has to be overwritten in a subclass. Model: ClassVar[Type[OffPolicyModel]] = OffPolicyModel # Hyper-parameters of the DDPG model. hparams: OffPolicyModel.HParams = mutable_field(OffPolicyModel.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 2_048.0 def configure(self, setting: ContinualRLSetting): super().configure(setting) # The default value for the buffer size in the DQN model is WAY too # large, so we re-size it depending on the size of the observations. # NOTE: (issue #156) Only consider the images, not the task labels for these # buffer size calculations (since the task labels might be None and have the # np.object dtype). x_space = setting.observation_space.x flattened_observation_space = flatten_space(x_space) observation_size_bytes = flattened_observation_space.sample().nbytes # IF there are more than a few dimensions per observation, then we # should probably reduce the size of the replay buffer according to # the size of the observations. max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024 max_buffer_length = max_buffer_size_bytes // observation_size_bytes if max_buffer_length == 0: raise RuntimeError( f"Couldn't even fit a single observation in the buffer, " f"given the specified max_buffer_size_megabytes " f"({self.max_buffer_size_megabytes}) and the size of a " f"single observation ({observation_size_bytes} bytes)!" ) if self.hparams.buffer_size > max_buffer_length: calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size calculated_size_gb = calculated_size_bytes / 1024 ** 3 warnings.warn( RuntimeWarning( f"The selected buffer size ({self.hparams.buffer_size} is " f"too large! (It would take roughly around " f"{calculated_size_gb:.3f}Gb to hold many observations alone! " f"The buffer size will be capped at {max_buffer_length} " f"entries." ) ) self.hparams.buffer_size = int(max_buffer_length) # NOTE: Need to change some attributes depending on the maximal number of steps # in the environment allowed in the given Setting. if setting.max_steps: logger.info( f"Total training steps are limited to {setting.steps_per_task} steps " f"per task, {setting.max_steps} steps in total." ) ten_percent_of_step_budget = setting.steps_per_phase // 10 if self.hparams.buffer_size > ten_percent_of_step_budget: warnings.warn( RuntimeWarning( "Reducing max buffer size to ten percent of the step budget." ) ) self.hparams.buffer_size = ten_percent_of_step_budget if self.hparams.learning_starts > ten_percent_of_step_budget: logger.info( f"The model was originally going to use the first " f"{self.hparams.learning_starts} steps for pure random " f"exploration, but the setting has a max number of steps set to " f"{setting.max_steps}, therefore we will limit the number of " f"exploration steps to 10% of that 'step budget' = " f"{ten_percent_of_step_budget} steps." ) self.hparams.learning_starts = ten_percent_of_step_budget if self.hparams.train_freq != -1: # Update the model at least 2 times during each task, and at most # once per step. self.hparams.train_freq = min( self.hparams.train_freq, int(0.5 * ten_percent_of_step_budget), ) self.hparams.train_freq = max(self.hparams.train_freq, 1) logger.info(f"Training frequency: {self.hparams.train_freq}") logger.info(f"Will use a Replay buffer of size {self.hparams.buffer_size}.") if setting.steps_per_phase: if not isinstance(self.hparams.train_freq, int): if self.hparams.train_freq[1] == "step": self.hparams.train_freq = self.hparams.train_freq[0] else: assert self.hparams.train_freq[1] == "episode" # Use some value based of the maximum episode length if available, # else use a "reasonable" default value. # TODO: Double-check that this makes sense. if setting.max_episode_steps: self.hparams.train_freq = setting.max_episode_steps else: self.hparams.train_freq = 10 warnings.warn( RuntimeWarning( f"Need the training frequency units to be steps for now! " f"(Train freq has been changed to every " f"{self.hparams.train_freq} steps)." ) ) # NOTE: We limit the number of training steps per task, such that we never # attempt to fill the buffer using more samples than the environment allows. if self.hparams.train_freq > setting.steps_per_phase: self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase) logger.info( f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}" ) self.train_steps_per_task = min( self.train_steps_per_task, setting.steps_per_phase - self.hparams.train_freq - 1, ) logger.info( f"Limitting training steps per task to {self.train_steps_per_task}" ) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OffPolicyModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting.
class ParentWithOptionalChildrenWithFriends(Serializable): name: str = "Consuela" children: Mapping[str, Optional[ChildWithFriends]] = mutable_field( OrderedDict)
class KnnCallback(Callback): """ Addon that adds the option of evaluating representations with a KNN. TODO: Perform the KNN evaluations in different processes using multiprocessing. TODO: We could even evaluate the representations of a DIFFERENT dataset with the KNN, if the shapes were compatible with the model! For example, we could train the model on some CL/RL/etc task, like Omniglot or something, and at the same time, evaluate how good the model's representations are at disentangling the classes from MNIST or Fashion-MNIST or something else entirely! This could be nice when trying to argue about better generalization in the model's representations. """ # Options for the KNN classifier knn_options: KnnClassifierOptions = mutable_field(KnnClassifierOptions) # Maximum number of examples to take from the dataloaders. When None, uses # the entire training/validaton/test datasets. knn_samples: int = 0 def __post_init__(self): self.max_num_batches: int = 0 self.model: LightningModule self.trainer: Trainer def on_train_start(self, trainer, pl_module): """Called when the train begins.""" self.trainer = trainer self.model = pl_module self.setting: ClassIncrementalSetting def setup(self, trainer, pl_module, stage: str): """Called when fit or test begins""" super().setup(trainer, pl_module, stage) def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule): self.trainer = trainer self.model = pl_module self.setting = self.model.setting config = self.model.config if self.knn_samples > 0: batch_size = pl_module.batch_size # We round this up so we always take at least one batch_size of # samples from each dataloader. self.max_num_batches = math.ceil(self.knn_samples / batch_size) logger.debug( f"Taking a maximum of {self.max_num_batches} batches from each dataloader." ) if config.debug: self.knn_samples = min(self.knn_samples, 100) valid_knn_loss, test_knn_loss = self.evaluate_knn(pl_module) # assert False, trainer.callback_metrics.keys() loss: Optional[Loss] = trainer.callback_metrics.get("loss_object") if loss: assert "knn/valid" not in loss.losses assert "knn/test" not in loss.losses loss.losses["knn/valid"] = valid_knn_loss loss.losses["knn/test"] = test_knn_loss def log(self, loss_object: Loss): if self.trainer.logger: self.trainer.logger.log_metrics(loss_object.to_log_dict()) def get_dataloaders(self, model: LightningModule, mode: str) -> List[DataLoader]: """ Retrieve the train/val/test dataloaders for all 'tasks'. """ setting = model.datamodule assert setting, "The LightningModule must have its 'datamodule' attribute set for now." # if the setting defines a dataloaders() method, those are for each of the tasks, which is what we want! fn = getattr(setting, f"{mode}_dataloaders", getattr(setting, f"{mode}_dataloader")) loaders = fn() if isinstance(loaders, DataLoader): return [loaders] assert isinstance(loaders, list) return loaders def evaluate_knn(self, model: LightningModule) -> Tuple[Loss, Loss]: """ Evaluate the representations with a KNN in the context of CL. We shorten the train dataloaders to take only the first `knn_samples` samples in order to save some compute. TODO: Figure out a way to cleanly add the metrics from the callback to the ``log dict'' which is returned by the model. Right now they are only printed / logged to wandb directly from here. """ setting = model.datamodule assert isinstance(setting, Setting) # TODO: Remove this if we want to use this for something else than a # Continual setting in the future. assert isinstance(setting, ClassIncrementalSetting) num_classes = setting.num_classes # Check wether the method has access to the task labels at train/test time. task_labels_at_test_time: bool = False from sequoia.settings import TaskIncrementalSetting if isinstance(setting, TaskIncrementalSetting): if setting.task_labels_at_test_time: task_labels_at_test_time = True # TODO: Figure out a way to make sure that we get at least one example # of each class to fit the KNN. self.knn_samples = max(self.knn_samples, num_classes**2) self.max_num_batches = math.ceil(self.knn_samples / model.batch_size) logger.info(f"number of classes: {num_classes}") logger.info(f"Number of KNN samples: {self.knn_samples}") logger.debug( f"Taking a maximum of {self.max_num_batches} batches from each dataloader." ) train_loaders: List[DataLoader] = self.get_dataloaders(model, mode="train") valid_loaders: List[DataLoader] = self.get_dataloaders(model, mode="val") test_loaders: List[DataLoader] = self.get_dataloaders(model, mode="test") # Only take the first `knn_samples` samples from each dataloader. def shorten(dataloader: DataLoader): return take(dataloader, n=self.max_num_batches) if self.max_num_batches: train_loaders = list(map(shorten, train_loaders)) valid_loaders = list(map(shorten, valid_loaders)) test_loaders = list(map(shorten, test_loaders)) # Create an iterator that alternates between each of the train dataloaders. # NOTE: we shortened each of the dataloaders just to be sure that we get at least train_loader = roundrobin(*train_loaders) h_x, y = get_hidden_codes_array(model=model, dataloader=train_loader, description="KNN (Train)") train_loss, scaler, knn_classifier = fit_knn(x=h_x, y=y, options=self.knn_options, num_classes=num_classes, loss_name="knn/train") logger.info(f"KNN Train Acc: {train_loss.accuracy:.2%}") self.log(train_loss) total_valid_loss = Loss("knn/valid") # Save the current task ID so we can reset it after testing. starting_task_id = model.setting.current_task_id for i, dataloader in enumerate(valid_loaders): if task_labels_at_test_time: model.on_task_switch(i, training=False) loss_i = evaluate(model=model, dataloader=dataloader, loss_name=f"[{i}]", scaler=scaler, knn_classifier=knn_classifier, num_classes=setting.num_classes_in_task(i)) # We use `.absorb(loss_i)` here so that the metrics get merged. # That way, if we access `total_valid_loss.accuracy`, this gives the # accuracy over all the validation tasks. # If we instead used `+= loss_i`, then loss_i would become a subloss # of `total_valid_loss`, since they have different names. # TODO: Explain this in more detail somewhere else. total_valid_loss.absorb(loss_i) logger.info(f"KNN Valid[{i}] Acc: {loss_i.accuracy:.2%}") self.log(loss_i) logger.info(f"KNN Average Valid Acc: {total_valid_loss.accuracy:.2%}") self.log(total_valid_loss) total_test_loss = Loss("knn/test") for i, dataloader in enumerate(test_loaders): if task_labels_at_test_time: model.on_task_switch(i, training=False) # TODO Should we set the number of classes to be the number of # classes in the current task? loss_i = evaluate( model=model, dataloader=dataloader, loss_name=f"[{i}]", scaler=scaler, knn_classifier=knn_classifier, num_classes=num_classes, ) total_test_loss.absorb(loss_i) logger.info(f"KNN Test[{i}] Acc: {loss_i.accuracy:.2%}") self.log(loss_i) if task_labels_at_test_time: model.on_task_switch(starting_task_id, training=False) logger.info(f"KNN Average Test Acc: {total_test_loss.accuracy:.2%}") self.log(total_test_loss) return total_valid_loss, total_test_loss
class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams, MultiHeadModel.HParams): """ HParams of the Model. """ # NOTE: All the fields below were just copied from the BaseHParams class, just # to improve visibility a bit. # Class variables that hold the available optimizers and encoders. # NOTE: These don't get parsed from the command-line. available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = { "sgd": optim.SGD, "adam": optim.Adam, "rmsprop": optim.RMSprop, } # Which optimizer to use. optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam) available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = { "vgg16": tv_models.vgg16, "resnet18": tv_models.resnet18, "resnet34": tv_models.resnet34, "resnet50": tv_models.resnet50, "resnet101": tv_models.resnet101, "resnet152": tv_models.resnet152, "alexnet": tv_models.alexnet, "densenet": tv_models.densenet161, # TODO: Add the self-supervised pl modules here! "simple_convnet": SimpleConvNet, } # Which encoder to use. encoder: Type[nn.Module] = choice( available_encoders, default=SimpleConvNet, # # TODO: Only considering these two for now when performing an HPO sweep. # probabilities={"resnet18": 0., "simple_convnet": 1.0}, ) # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3) # L2 regularization term for the model weights. weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6) # Batch size to use during training and evaluation. batch_size: Optional[int] = None # Number of hidden units (before the output head). # When left to None (default), the hidden size from the pretrained # encoder model will be used. When set to an integer value, an # additional Linear layer will be placed between the outputs of the # encoder in order to map from the pretrained encoder's output size H_e # to this new hidden size `new_hidden_size`. new_hidden_size: Optional[int] = None # Retrain the encoder from scratch. train_from_scratch: bool = False # Wether we should keep the weights of the pretrained encoder frozen. freeze_pretrained_encoder_weights: bool = False # Hyper-parameters of the output head. output_head: OutputHead.HParams = mutable_field(OutputHead.HParams) # Wether the output head should be detached from the representations. # In other words, if the gradients from the downstream task should be # allowed to affect the representations. detach_output_head: bool = False
class ParentWithOptionalChildren(Parent): name: str = "Consuela" children: Dict[str, Optional[Child]] = mutable_field(OrderedDict)
class Parent(Serializable): name: str = "Consuela" children: Dict[str, Child] = mutable_field(OrderedDict)
class Bob(Serializable): cats: Dict[str, Cat] = mutable_field(dict)
class BaselineMethod(Method, Serializable, Parseable, target_setting=Setting): """ Versatile Baseline method which targets all settings. Uses pytorch-lightning's Trainer for training and LightningModule as model. Uses a [BaselineModel](methods/models/baseline_model/baseline_model.py), which can be used for: - Self-Supervised training with modular auxiliary tasks; - Semi-Supervised training on partially labeled batches; - Multi-Head prediction (e.g. in task-incremental scenario); """ # NOTE: these two fields are also used to create the command-line arguments. # HyperParameters of the method. hparams: BaselineModel.HParams = mutable_field(BaselineModel.HParams) # Configuration options. config: Config = mutable_field(Config) # Options for the Trainer object. trainer_options: TrainerConfig = mutable_field(TrainerConfig) def __init__( self, hparams: BaselineModel.HParams = None, config: Config = None, trainer_options: TrainerConfig = None, **kwargs, ): """ Creates a new BaselineMethod, using the provided configuration options. Parameters ---------- hparams : BaselineModel.HParams, optional Hyper-parameters of the BaselineModel used by this Method. Defaults to None. config : Config, optional Configuration dataclass with options like log_dir, device, etc. Defaults to None. trainer_options : TrainerConfig, optional Dataclass which holds all the options for creating the `pl.Trainer` which will be used for training. Defaults to None. **kwargs : If any of the above arguments are left as `None`, then they will be created using any appropriate value from `kwargs`, if present. ## Examples: ``` method = BaselineMethod(hparams=BaselineModel.HParams(learning_rate=0.01)) method = BaselineMethod(learning_rate=0.01) # Same as above method = BaselineMethod(config=Config(debug=True)) method = BaselineMethod(debug=True) # Same as above method = BaselineMethod(hparams=BaselineModel.HParams(learning_rate=0.01), config=Config(debug=True)) method = BaselineMethod(learning_rate=0.01, debug=True) # Same as above ``` """ # TODO: When creating a Method from a script, like `BaselineMethod()`, # should we expect the hparams to be passed? Should we create them from # the **kwargs? Should we parse them from the command-line? # Option 2: Try to use the keyword arguments to create the hparams, # config and trainer options. if kwargs: logger.info( f"using keyword arguments {kwargs} to populate the corresponding " f"values in the hparams, config and trainer_options.") self.hparams = hparams or BaselineModel.HParams.from_dict( kwargs, drop_extra_fields=True) self.config = config or Config.from_dict(kwargs, drop_extra_fields=True) self.trainer_options = trainer_options or TrainerConfig.from_dict( kwargs, drop_extra_fields=True) elif self._argv: # Since the method was parsed from the command-line, parse those as # well from the argv that were used to create the Method. # Option 3: Parse them from the command-line. # assert not kwargs, "Don't pass any extra kwargs to the constructor!" self.hparams = hparams or BaselineModel.HParams.from_args( self._argv, strict=False) self.config = config or Config.from_args(self._argv, strict=False) self.trainer_options = trainer_options or TrainerConfig.from_args( self._argv, strict=False) else: # Option 1: Use the default values: self.hparams = hparams or BaselineModel.HParams() self.config = config or Config() self.trainer_options = trainer_options or TrainerConfig() assert self.hparams assert self.config assert self.trainer_options if self.config.debug: # Disable wandb logging if debug is True. self.trainer_options.no_wandb = True # The model and Trainer objects will be created in `self.configure`. # NOTE: This right here doesn't create the fields, it just gives some # type information for static type checking. self.trainer: Trainer self.model: BaselineModel self.additional_train_wrappers: List[Callable] = [] self.additional_valid_wrappers: List[Callable] = [] self.setting: Setting def configure(self, setting: SettingType) -> None: """Configures the method for the given Setting. Concretely, this creates the model and Trainer objects which will be used to train and test a model for the given `setting`. Args: setting (SettingType): The setting the method will be evaluated on. TODO: For the Challenge, this should be some kind of read-only proxy to the actual Setting. """ # Note: this here is temporary, just tinkering with wandb atm. method_name: str = self.get_name() # Set the default batch size to use, depending on the kind of Setting. if self.hparams.batch_size is None: if isinstance(setting, ActiveSetting): # Default batch size of 1 in RL self.hparams.batch_size = 1 elif isinstance(setting, PassiveSetting): self.hparams.batch_size = 32 else: warnings.warn( UserWarning( f"Dont know what batch size to use by default for setting " f"{setting}, will try 16.")) self.hparams.batch_size = 16 # Set the batch size on the setting. setting.batch_size = self.hparams.batch_size # TODO: Should we set the 'config' on the setting from here? if setting.config and setting.config == self.config: pass elif self.config != Config(): assert ( setting.config is None or setting.config == Config() ), "method.config has been modified, and so has setting.config!" setting.config = self.config elif setting.config: assert (setting.config != Config()), "Weird, both configs have default values.." self.config = setting.config setting_name: str = setting.get_name() dataset = setting.dataset if isinstance(setting, IncrementalSetting): if self.hparams.multihead is None: # Use a multi-head model by default if the task labels are # available at both train and test time. if setting.task_labels_at_test_time: assert setting.task_labels_at_train_time self.hparams.multihead = setting.task_labels_at_test_time if isinstance(setting, ContinualRLSetting): setting.add_done_to_observations = True if not setting.observe_state_directly: if self.hparams.encoder is None: self.hparams.encoder = "simple_convnet" # TODO: Add 'proper' transforms for cartpole, specifically? from sequoia.common.transforms import Transforms setting.train_transforms.append(Transforms.resize_64x64) setting.val_transforms.append(Transforms.resize_64x64) setting.test_transforms.append(Transforms.resize_64x64) # Configure the baseline specifically for an RL setting. # TODO: Select which output head to use from the command-line? # Limit the number of epochs so we never iterate on a closed env. # TODO: Would multiple "epochs" be possible? if setting.max_steps is not None: self.trainer_options.max_epochs = 1 self.trainer_options.limit_train_batches = setting.max_steps // ( setting.batch_size or 1) self.trainer_options.limit_val_batches = min( setting.max_steps // (setting.batch_size or 1), 1000) # TODO: Test batch size is limited to 1 for now. # NOTE: This isn't used, since we don't call `trainer.test()`. self.trainer_options.limit_test_batches = setting.max_steps self.model = self.create_model(setting) assert self.hparams is self.model.hp # The PolicyHead actually does its own backward pass, so we disable # automatic optimization when using it. from .models.output_heads import PolicyHead if isinstance(self.model.output_head, PolicyHead): # Doing the backward pass manually, since there might not be a loss # at each step. self.trainer_options.automatic_optimization = False self.trainer = self.create_trainer(setting) self.setting = setting def fit( self, train_env: Environment[Observations, Actions, Rewards], valid_env: Environment[Observations, Actions, Rewards], ): """Called by the Setting to train the method. Could be called more than once before training is 'over', for instance when training on a series of tasks. Overwrite this to customize training. """ assert self.model is not None, ( "Setting should have been called method.configure(setting=self) " "before calling `fit`!") # TODO: Figure out if there is a smarter way to reset the state of the Trainer, # rather than just creating a new one every time. self.trainer = self.create_trainer(self.setting) # NOTE: It doesn't seem sufficient to just do this, since for instance the # early-stopping callback would prevent training on future tasks, since they # have higher validation loss: # self.trainer.current_epoch = 0 success = self.trainer.fit( model=self.model, train_dataloader=train_env, val_dataloaders=valid_env, ) # BUG: After `fit`, it seems like the output head of the model is on the CPU? self.model.to(self.config.device) return success def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: """ Get a batch of predictions (actions) for a batch of observations. This gets called by the Setting during the test loop. TODO: There is a mismatch here between the type of the output of this method (`Actions`) and the type of `action_space`: we should either have a `Discrete` action space, and this method should return ints, or this method should return `Actions`, and the `action_space` should be a `NamedTupleSpace` or something similar. Either way, `get_actions(obs, action_space) in action_space` should always be `True`. """ self.model.eval() with torch.no_grad(): forward_pass = self.model.forward(observations) actions: Actions = forward_pass.actions action_numpy = actions.actions_np assert action_numpy in action_space, (action_numpy, action_space) return actions def create_model(self, setting: SettingType) -> BaselineModel[SettingType]: """Creates the BaselineModel (a LightningModule) for the given Setting. You could extend this to customize which model is used depending on the setting. TODO: As @oleksost pointed out, this might allow the creation of weird 'frankenstein' methods that are super-specific to each setting, without really having anything in common. Args: setting (SettingType): An experimental setting. Returns: BaselineModel[SettingType]: The BaselineModel that is to be applied to that setting. """ # Create the model, passing the setting, hparams and config. return BaselineModel(setting=setting, hparams=self.hparams, config=self.config) def create_trainer(self, setting: SettingType) -> Trainer: """Creates a Trainer object from pytorch-lightning for the given setting. NOTE: At the moment, uses the KNN and VAE callbacks. To use different callbacks, overwrite this method. Args: Returns: Trainer: the Trainer object. """ # We use this here to create loggers! callbacks = self.create_callbacks(setting) loggers = [] if setting.wandb: wandb_logger = setting.wandb.make_logger() loggers.append(wandb_logger) trainer = self.trainer_options.make_trainer( config=self.config, callbacks=callbacks, loggers=loggers, ) return trainer def get_experiment_name(self, setting: Setting, experiment_id: str = None) -> str: """Gets a unique name for the experiment where `self` is applied to `setting`. This experiment name will be passed to `orion` when performing a run of Hyper-Parameter Optimization. Parameters ---------- - setting : Setting The `Setting` onto which this method will be applied. This method will be used when - experiment_id: str, optional A custom hash to append to the experiment name. When `None` (default), a unique hash will be created based on the values of the Setting's fields. Returns ------- str The name for the experiment. """ if not experiment_id: setting_dict = setting.to_dict() # BUG: Some settings have non-string keys/value or something? from sequoia.utils.utils import flatten_dict d = flatten_dict(setting_dict) experiment_id = compute_identity(size=5, **d) assert isinstance(setting.dataset, str), "assuming that dataset is a str for now." return ( f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}" ) def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ return { "hparams": self.hparams.get_orion_space(), "trainer_options": self.trainer_options.get_orion_space(), } def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ # Here we overwrite the corresponding attributes with the new suggested values # leaving other fields unchanged. self.hparams = self.hparams.replace(**new_hparams["hparams"]) # BUG with the `replace` function and Union[int, float] type, it doesn't # preserve the type of the field when serializing/deserializing! self.trainer_options.max_epochs = new_hparams["trainer_options"][ "max_epochs"] def hparam_sweep( self, setting: Setting, search_space: Dict[str, Union[str, Dict]] = None, experiment_id: str = None, database_path: Union[str, Path] = None, max_runs: int = None, debug: bool = False, ) -> Tuple[BaselineModel.HParams, float]: # Setting max epochs to 1, just to keep runs somewhat short. # NOTE: Now we're actually going to have the max_epochs as a tunable # hyper-parameter, so we're not hard-setting this value anymore. # self.trainer_options.max_epochs = 1 # Call 'configure', so that we create `self.model` at least once, which will # update the hparams.output_head field to be of the right type. This is # necessary in order for the `get_orion_space` to retrieve all the hparams # of the output head. self.configure(setting) return super().hparam_sweep( setting=setting, search_space=search_space, experiment_id=experiment_id, database_path=database_path, max_runs=max_runs, debug=debug or self.config.debug, ) def receive_results(self, setting: Setting, results: Results): """ Receives the results of an experiment, where `self` was applied to Setting `setting`, which produced results `results`. """ # TODO: Reset the run name so a new one is used for each experiment. def create_callbacks(self, setting: SettingType) -> List[Callback]: """Create the PytorchLightning Callbacks for this Setting. These callbacks will get added to the Trainer in `create_trainer`. Parameters ---------- setting : SettingType The `Setting` on which this Method is going to be applied. Returns ------- List[Callback] A List of `Callaback` objects to use during training. """ # TODO: Move this to something like a `configure_callbacks` method in the model, # once PL adds it. # from sequoia.common.callbacks.vae_callback import SaveVaeSamplesCallback return [ EarlyStopping(monitor="val Loss") # self.hparams.knn_callback, # SaveVaeSamplesCallback(), ] def apply_all( self, argv: Union[str, List[str]] = None) -> Dict[Type[Setting], Results]: """(WIP): Runs this Method on all its applicable settings. Returns ------- Dict mapping from setting type to the Results produced by this method. """ applicable_settings = self.get_applicable_settings() all_results: Dict[Type[Setting], Results] = {} for setting_type in applicable_settings: setting = setting_type.from_args(argv) results = setting.apply(self) all_results[setting_type] = results print(f"All results for method of type {type(self)}:") print({ method.get_name(): (results.get_metric() if results else "crashed") for method, results in all_results.items() }) return all_results def __init_subclass__(cls, target_setting: Type[SettingType] = Setting, **kwargs) -> None: """Called when creating a new subclass of Method. Args: target_setting (Type[Setting], optional): The target setting. Defaults to None, in which case the method will inherit the target setting of it's parent class. """ if not is_dataclass(cls): logger.critical( UserWarning( f"The BaselineMethod subclass {cls} should be decorated with " f"@dataclass!\n" f"While this isn't strictly necessary for things to work, it is" f"highly recommended, as any dataclass-style class attributes " f"won't have the corresponding command-line arguments " f"generated, which can cause a lot of subtle bugs.")) super().__init_subclass__(target_setting=target_setting, **kwargs) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching between tasks. Args: task_id (int, optional): the id of the new task. When None, we are basically being informed that there is a task boundary, but without knowing what task we're switching to. """ self.model.on_task_switch(task_id) def setup_wandb(self, run: Run) -> None: """ Called by the Setting when using Weights & Biases, after `wandb.init`.
class Bob(FrozenSerializable if frozen else Serializable): cats: Dict[str, Cat] = mutable_field(dict)
class ChildWithFriends(Child): friends: List[Optional[Child]] = mutable_field(list)
class A2CMethod(OnPolicyMethod): """ Method that uses the A2C model from stable-baselines3. """ # changing the 'name' in this case here, because the default name would be # 'a_2_c'. name: ClassVar[str] = "a2c" Model: ClassVar[Type[A2CModel]] = A2CModel # Hyper-parameters of the A2C model. hparams: A2CModel.HParams = mutable_field(A2CModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) if setting.steps_per_phase: if self.hparams.n_steps > setting.steps_per_phase: self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase) logger.info( f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}") # NOTE: We limit the number of trainign steps per task, such that we never # attempt to fill the buffer using more samples than the environment allows. self.train_steps_per_task = min( self.train_steps_per_task, setting.steps_per_phase - self.hparams.n_steps - 1, ) logger.info( f"Limitting training steps per task to {self.train_steps_per_task}" ) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> A2CModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions(self, observations: ContinualRLSetting.Observations, action_space: spaces.Space) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) def get_search_space( self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]: search_space = super().get_search_space(setting) if isinstance(setting.action_space, spaces.Discrete): # From stable_baselines3/common/base_class.py", line 170: # > Generalized State-Dependent Exploration (gSDE) can only be used with # continuous actions # Therefore we remove related entries in the search space, so they keep # their default values. search_space.pop("use_sde", None) search_space.pop("sde_sample_freq", None) return search_space