class HParams(HyperParameters): """ Hyper-parameters of the demo model. """ # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001) # L2 regularization coefficient. weight_decay: float = log_uniform(1e-9, 1e-3, default=1e-6) # Maximum number of training epochs per task. max_epochs_per_task: int = 10 # Number of epochs with increasing validation loss after which we stop training. early_stop_patience: int = 2
class HParams(SB3BaseHParams): """ Hyper-parameters of the DQN model from `stable_baselines3`. The command-line arguments for these are created with simple-parsing. """ # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4) # size of the replay buffer buffer_size: int = uniform(100, 10_000_000, default=1_000_000) # How many steps of the model to collect transitions for before learning # starts. learning_starts: int = 50_000 # learning_starts: int = uniform(1_000, 100_000, default=50_000) # Minibatch size for each gradient update batch_size: int = 32 # batch_size: Optional[int] = categorical(1, 2, 4, 8, 16, 32, 128, default=32) # The soft update coefficient ("Polyak update", between 0 and 1) default # 1 for hard update tau: float = 1.0 # tau: float = uniform(0., 1., default=1.0) # The discount factor gamma: float = 0.99 # gamma: float = uniform(0.9, 0.9999, default=0.99) # Update the model every ``train_freq`` steps. Set to `-1` to disable. train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=10) # How many gradient steps to do after each rollout (see ``train_freq`` # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient # steps as steps done in the environment during the rollout. gradient_steps: int = 1 # gradient_steps: int = categorical(1, -1, default=1) # Enable a memory efficient variant of the replay buffer at a cost of # more complexity. # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195 optimize_memory_usage: bool = False # Update the target network every ``target_update_interval`` environment # steps. target_update_interval: int = categorical(1, 10, 100, 1_000, 10_000, default=10_000) # Fraction of entire training period over which the exploration rate is # reduced. exploration_fraction: float = 0.1 # exploration_fraction: float = uniform(0.05, 0.3, default=0.1) # Initial value of random action probability. exploration_initial_eps: float = 1.0 # exploration_initial_eps: float = uniform(0.5, 1.0, default=1.0) # final value of random action probability. exploration_final_eps: float = 0.05 # exploration_final_eps: float = uniform(0, 0.1, default=0.05) # The maximum value for the gradient clipping. max_grad_norm: float = 10 # max_grad_norm: float = uniform(1, 100, default=10) # Whether to create a second environment that will be used for # evaluating the agent periodically. (Only available when passing string # for the environment) create_eval_env: bool = False # Whether or not to build the network at the creation # of the instance _init_setup_model: bool = True
class HParams(HyperParameters): """ Hyper-parameters of the Settings. """ # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001) # Batch size batch_size: int = categorical(16, 32, 64, 128, default=128) # weight/importance of the task embedding to the gate function s_hat: float = uniform(1.0, 100.0, default=50.0) # Maximum number of training epochs per task max_epochs_per_task: int = uniform(1, 20, default=10, discrete=True)
class HParams(SB3BaseHParams): """ Hyper-parameters of the A2C Model. TODO: Set actual 'good' priors for these hyper-parameters, as these were set somewhat randomly. """ # Discount factor gamma: float = uniform(0.9, 0.9999, default=0.99) # Factor for trade-off of bias vs variance for Generalized Advantage Estimator. # Equivalent to classic advantage when set to 1. gae_lambda: float = uniform(0.5, 1.0, default=1.0) # Entropy coefficient for the loss calculation ent_coef: float = uniform(0.0, 1.0, default=0.0) # Value function coefficient for the loss calculation vf_coef: float = uniform(0.01, 1.0, default=0.5) # The maximum value for the gradient clipping max_grad_norm: float = uniform(0.1, 10, default=0.5) # RMSProp epsilon. It stabilizes square root computation in denominator of # RMSProp update. rms_prop_eps: float = log_uniform(1e-7, 1e-3, default=1e-5) # :param use_rms_prop: Whether to use RMSprop (default) or Adam as optimizer use_rms_prop: bool = categorical(True, False, default=True) # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) use_sde: bool = categorical(True, False, default=False) # Sample a new noise matrix every n steps when using gSDE. # Default: -1 (only sample at the beginning of the rollout) sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1) # Whether to normalize or not the advantage normalize_advantage: bool = categorical(True, False, default=False) # The log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # # Whether to create a second environment that will be used for evaluating the # # agent periodically. (Only available when passing string for the environment) # create_eval_env: bool = False # # Additional arguments to be passed to the policy on creation # policy_kwargs: Optional[Dict[str, Any]] = None # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 0 # Seed for the pseudo random generators seed: Optional[int] = None # Device (cpu, cuda, ...) on which the code should be run. # Setting it to auto, the code will be run on the GPU if possible. device: Union[torch.device, str] = "auto"
class HParams(HyperParameters): """ Hyper-parameters of the Pnn method. """ # Learning rate of the optimizer. Defauts to 0.0001 when in SL. learning_rate: float = log_uniform(1e-6, 1e-2, default=2e-4) num_steps: int = 200 # (only applicable in RL settings.) # Discount factor (Only used in RL settings). gamma: float = uniform(0.9, 0.999, default=0.99) # Number of hidden units (only used in RL settings.) hidden_size: int = categorical(64, 128, 256, default=256) # Batch size in SL, and number of parallel environments in RL. # Defaults to None in RL, and 32 when in SL. batch_size: Optional[int] = None # Maximum number of training epochs per task. (only used in SL Settings) max_epochs_per_task: int = uniform(1, 20, default=10)
class HParams(SB3BaseHParams): """ Hyper-parameters common to all off-policy algos from SB3. """ # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4) # size of the replay buffer buffer_size: int = uniform(100, 10_000_000, default=1_000_000) # How many steps of the model to collect transitions for before learning # starts. learning_starts: int = 100 # Minibatch size for each gradient update batch_size: int = 256 # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32) # The soft update coefficient ("Polyak update", between 0 and 1) default # 1 for hard update tau: float = 0.005 # tau: float = uniform(0., 1., default=1.0) # The discount factor gamma: float = 0.99 # gamma: float = uniform(0.9, 0.9999, default=0.99) # Update the model every ``train_freq`` steps. Set to `-1` to disable. train_freq: int = 1 # train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=10) # How many gradient steps to do after each rollout (see ``train_freq`` # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient # steps as steps done in the environment during the rollout. gradient_steps: int = 1 # gradient_steps: int = categorical(1, -1, default=1) # Enable a memory efficient variant of the replay buffer at a cost of # more complexity. # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195 optimize_memory_usage: bool = False # Whether to create a second environment that will be used for # evaluating the agent periodically. (Only available when passing string # for the environment) create_eval_env: bool = False # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 1
class HParams(HyperParameters): """ Hyper-Parameters of the model, as a dataclass. Fields get command-line arguments with simple-parsing. """ # Hidden size (representation size). hidden_size: int = 256 # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4) # Discount factor gamma: float = 0.99 # Coefficient for the entropy term in the loss formula. entropy_term_coefficient: float = 0.001 # Maximum length of an episode, when desired. (Generally not needed). max_episode_steps: Optional[int] = None
class HParams(OffPolicyModel.HParams): """ Hyper-parameters of the TD3 model. """ # TODO: Add HParams specific to TD3 here, if any, and also check that the # default values are correct. # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3) # Minibatch size for each gradient update batch_size: int = 100 # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32) train_freq: TrainFreq = (1, "episode") # How many gradient steps to do after each rollout (see ``train_freq`` # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient # steps as steps done in the environment during the rollout. gradient_steps: int = -1
class SB3BaseHParams(HyperParameters): """ Hyper-parameters of a model from the `stable_baselines3` package. The command-line arguments for these are created with simple-parsing. """ # The policy model to use (MlpPolicy, CnnPolicy, ...) policy: Optional[Union[str, Type[BasePolicy]]] = choice("MlpPolicy", "CnnPolicy", default=None) # # The base policy used by this method # policy_base: Type[BasePolicy] # learning rate for the optimizer, it can be a function of the current # progress remaining (from 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-4) # Additional arguments to be passed to the policy on creation policy_kwargs: Optional[Dict[str, Any]] = None # the log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # The verbosity level: 0 none, 1 training information, 2 debug verbose: int = 1 # Device on which the code should run. By default, it will try to use a Cuda # compatible device and fallback to cpu if it is not possible. device: Union[torch.device, str] = "auto" # # Whether the algorithm supports training with multiple environments (as in A2C) # support_multi_env: bool = False # Whether to create a second environment that will be used for evaluating # the agent periodically. (Only available when passing string for the # environment) create_eval_env: bool = False # # When creating an environment, whether to wrap it or not in a Monitor wrapper. # monitor_wrapper: bool = True # Seed for the pseudo random generators seed: Optional[int] = None
class HParams(OffPolicyModel.HParams): """ Hyper-parameters of the SAC Model. """ # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=3e-4) buffer_size: int = 1_000_000 learning_starts: int = 100 batch_size: int = 256 tau: float = 0.005 gamma: float = 0.99 train_freq = 1 gradient_steps: int = 1 # action_noise: Optional[ActionNoise] = None optimize_memory_usage: bool = False ent_coef: Union[str, float] = "auto" target_update_interval: int = 1 target_entropy: Union[str, float] = "auto" use_sde: bool = False sde_sample_freq: int = -1
class HParams(OffPolicyModel.HParams): """ Hyper-parameters of the DDPG Model. """ # TODO: Add hparams specific to DDPG here. # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3) # The verbosity level: 0 none, 1 training information, 2 debug verbose: int = 0 train_freq: TrainFreq = TrainFreq(frequency=1, unit="episode") # Minibatch size for each gradient update batch_size: int = 100 # How many gradient steps to do after each rollout (see ``train_freq`` # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient # steps as steps done in the environment during the rollout. gradient_steps: int = -1
class HParams(SB3BaseHParams): """ Hyper-parameters of the PPO Model. """ # # The policy model to use (MlpPolicy, CnnPolicy, ...) # policy: Union[str, Type[ActorCriticPolicy]] # # The environment to learn from (if registered in Gym, can be str) # env: Union[GymEnv, str] # The learning rate, it can be a function of the current progress remaining # (from 1 to 0) learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4) # The number of steps to run for each environment per update (i.e. batch size # is n_steps * n_env where n_env is number of environment copies running in # parallel) # TODO: Limit this, as is done in A2C, based on the value of setting.max steps. n_steps: int = categorical(32, 128, 256, 1024, 2048, 4096, 8192, default=2048) # Minibatch size batch_size: Optional[int] = categorical(16, 32, 64, 128, default=64) # Number of epoch when optimizing the surrogate loss n_epochs: int = 10 # Discount factor gamma: float = uniform(0.9, 0.9999, default=0.99) # Factor for trade-off of bias vs variance for Generalized Advantage Estimator gae_lambda: float = uniform(0.8, 1.0, default=0.95) # Clipping parameter, it can be a function of the current progress remaining # (from 1 to 0). clip_range: float = uniform(0.05, 0.4, default=0.2) # Clipping parameter for the value function, it can be a function of the current # progress remaining (from 1 to 0). This is a parameter specific to the OpenAI # implementation. If None is passed (default), no clipping will be done on the # value function. IMPORTANT: this clipping depends on the reward scaling. clip_range_vf: Optional[float] = None # Entropy coefficient for the loss calculation ent_coef: float = uniform(0., 1., default=0.0) # Value function coefficient for the loss calculation vf_coef: float = uniform(0.01, 1.0, default=0.5) # The maximum value for the gradient clipping max_grad_norm: float = uniform(0.1, 10, default=0.5) # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) use_sde: bool = categorical(True, False, default=False) # Sample a new noise matrix every n steps when using gSDE Default: -1 (only # sample at the beginning of the rollout) sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1) # Limit the KL divergence between updates, because the clipping is not enough to # prevent large update see issue #213 # (cf https://github.com/hill-a/stable-baselines/issues/213) # By default, there is no limit on the kl div. target_kl: Optional[float] = None # the log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # # Whether to create a second environment that will be used for evaluating the # # agent periodically. (Only available when passing string for the environment) # create_eval_env: bool = False # # Additional arguments to be passed to the policy on creation # policy_kwargs: Optional[Dict[str, Any]] = None # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 1 # Seed for the pseudo random generators seed: Optional[int] = None # Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, # the code will be run on the GPU if possible. device: Union[torch.device, str] = "auto"
class HParams(SB3BaseHParams): """ Hyper-parameters common to all on-policy algos from SB3. """ # learning rate for the optimizer, it can be a function of the current # progress remaining (from 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-3) # The number of steps to run for each environment per update (i.e. batch size # is n_steps * n_env where n_env is number of environment copies running in # parallel) # NOTE: Default value here is much lower than in PPO, which might indicate # that this A2C is more "on-policy"? (i.e. that it requires data to be super # "fresh")? n_steps: int = uniform(3, 64, default=5, discrete=True) # Discount factor gamma: float = 0.99 # gamma: float = uniform(0.9, 0.9999, default=0.99) # Factor for trade-off of bias vs variance for Generalized Advantage Estimator. # Equivalent to classic advantage when set to 1. gae_lambda: float = 1.0 # gae_lambda: float = uniform(0.5, 1.0, default=1.0) # Entropy coefficient for the loss calculation ent_coef: float = 0.0 # ent_coef: float = uniform(0.0, 1.0, default=0.0) # Value function coefficient for the loss calculation vf_coef: float = 0.5 # vf_coef: float = uniform(0.01, 1.0, default=0.5) # The maximum value for the gradient clipping max_grad_norm: float = 0.5 # max_grad_norm: float = uniform(0.1, 10, default=0.5) # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) use_sde: bool = False # use_sde: bool = categorical(True, False, default=False) # Sample a new noise matrix every n steps when using gSDE. # Default: -1 (only sample at the beginning of the rollout) sde_sample_freq: int = -1 # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1) # The log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # # Whether to create a second environment that will be used for evaluating the # # agent periodically. (Only available when passing string for the environment) # create_eval_env: bool = False # # Additional arguments to be passed to the policy on creation # policy_kwargs: Optional[Dict[str, Any]] = None # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 1 # Seed for the pseudo random generators seed: Optional[int] = None # Device (cpu, cuda, ...) on which the code should be run. # Setting it to auto, the code will be run on the GPU if possible. device: Union[torch.device, str] = "auto"
class BaseHParams(HyperParameters): """ Set of 'base' Hyperparameters for the 'base' LightningModule. """ # Class variable versions of the above dicts, for easier subclassing. # NOTE: These don't get parsed from the command-line. available_optimizers: ClassVar[Dict[ str, Type[Optimizer]]] = available_optimizers.copy() available_encoders: ClassVar[Dict[ str, Type[nn.Module]]] = available_encoders.copy() # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3) # L2 regularization term for the model weights. weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6) # Which optimizer to use. optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam) # Use an encoder architecture from the torchvision.models package. encoder: Type[nn.Module] = categorical( available_encoders, default=tv_models.resnet18, # TODO: Only using these two by default when performing a sweep. probabilities={ "resnet18": 0.5, "simple_convnet": 0.5 }, ) # Batch size to use during training and evaluation. batch_size: Optional[int] = None # Number of hidden units (before the output head). # When left to None (default), the hidden size from the pretrained # encoder model will be used. When set to an integer value, an # additional Linear layer will be placed between the outputs of the # encoder in order to map from the pretrained encoder's output size H_e # to this new hidden size `new_hidden_size`. new_hidden_size: Optional[int] = None # Retrain the encoder from scratch. train_from_scratch: bool = False # Wether we should keep the weights of the pretrained encoder frozen. freeze_pretrained_encoder_weights: bool = False # Settings for the output head. # TODO: This could be overwritten in a subclass to do classification or # regression or RL, etc. output_head: OutputHead.HParams = mutable_field(OutputHead.HParams) # Wether the output head should be detached from the representations. # In other words, if the gradients from the downstream task should be # allowed to affect the representations. detach_output_head: bool = False def __post_init__(self): """Use this to initialize (or fix) any fields parsed from the command-line. """ super().__post_init__() def make_optimizer(self, *args, **kwargs) -> Optimizer: """ Creates the Optimizer object from the options. """ optimizer_class = self.optimizer options = { "lr": self.learning_rate, "weight_decay": self.weight_decay, } options.update(kwargs) return optimizer_class(*args, **options) @property def encoder_model(self) -> Type[nn.Module]: return self.encoder def make_encoder(self, encoder_name: str = None) -> Tuple[nn.Module, int]: """Creates an Encoder model and returns the resulting hidden size. Returns: Tuple[nn.Module, int]: the encoder and the hidden size. """ if encoder_name and encoder_name not in self.available_encoders: raise KeyError( f"No encoder with name {encoder_name} found! " f"(available encoders: {list(self.available_encoders.keys())}." ) encoder_model = self.available_encoders[encoder_name] else: encoder_model = self.encoder encoder, hidden_size = get_pretrained_encoder( encoder_model=encoder_model, pretrained=not self.train_from_scratch, freeze_pretrained_weights=self.freeze_pretrained_encoder_weights, new_hidden_size=self.new_hidden_size, ) return encoder, hidden_size
class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams, MultiHeadModel.HParams): """ HParams of the Model. """ # NOTE: All the fields below were just copied from the BaseHParams class, just # to improve visibility a bit. # Class variables that hold the available optimizers and encoders. # NOTE: These don't get parsed from the command-line. available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = { "sgd": optim.SGD, "adam": optim.Adam, "rmsprop": optim.RMSprop, } # Which optimizer to use. optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam) available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = { "vgg16": tv_models.vgg16, "resnet18": tv_models.resnet18, "resnet34": tv_models.resnet34, "resnet50": tv_models.resnet50, "resnet101": tv_models.resnet101, "resnet152": tv_models.resnet152, "alexnet": tv_models.alexnet, "densenet": tv_models.densenet161, # TODO: Add the self-supervised pl modules here! "simple_convnet": SimpleConvNet, } # Which encoder to use. encoder: Type[nn.Module] = choice( available_encoders, default=SimpleConvNet, # # TODO: Only considering these two for now when performing an HPO sweep. # probabilities={"resnet18": 0., "simple_convnet": 1.0}, ) # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3) # L2 regularization term for the model weights. weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6) # Batch size to use during training and evaluation. batch_size: Optional[int] = None # Number of hidden units (before the output head). # When left to None (default), the hidden size from the pretrained # encoder model will be used. When set to an integer value, an # additional Linear layer will be placed between the outputs of the # encoder in order to map from the pretrained encoder's output size H_e # to this new hidden size `new_hidden_size`. new_hidden_size: Optional[int] = None # Retrain the encoder from scratch. train_from_scratch: bool = False # Wether we should keep the weights of the pretrained encoder frozen. freeze_pretrained_encoder_weights: bool = False # Hyper-parameters of the output head. output_head: OutputHead.HParams = mutable_field(OutputHead.HParams) # Wether the output head should be detached from the representations. # In other words, if the gradients from the downstream task should be # allowed to affect the representations. detach_output_head: bool = False
class HParams(HyperParameters): """ Hyper-parameters of the demo model. """ # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)