class GanParams: """ Gan parameters """ criterion: str = choice('GAN', 'WGAN', default="WGAN") # GAN Training criterion gp: str = choice("None", 'original', default="original") # Add gradient penalty gp_lambda: float = 10. # GP lambda critic_iters: int = 5 # Number of critic iterations clamp: float = 0.01 # clamp the weights for WGAN
class NetworkParams: # Network parameters gen_type: str = choice( "dcgan", "mlp", "cnn", "resnet", default="dcgan" ) # One of: mlp, cnn, dcgan, resnet # try resnet :) gen_norm: str = choice( "batchnorm", "instancenorm", default="batchnorm" ) # One of: None, batchnorm, instancenorm ngf: int = 75 # number of features in the generator network nef: int = 65 # number of features in the generator network gen_nextra_layers: int = 0 # number of extra layers in the generator network gen_bias_type: Optional[str] = choice( None, "plane", default=None ) # One of: None, plane netG: str = "" # path to netG (to continue training) netG2: str = "" # path to netG2 (normal generator to continue training) fix_splat_pos: bool = True # X and Y coordinates are fix zloss: float = 0.0 # use Z loss unit_normalloss: float = 0.0 # use unit_normal loss norm_sph_coord: bool = True # Use spherical coordinates for the normal max_gnorm: float = 500.0 # max grad norm to which it will be clipped (if exceeded) disc_type: str = choice("cnn", "dcgan", default="cnn") # One of: cnn, dcgan disc_norm: str = choice( "None", "batchnorm", "instancenorm", default="None" ) # One of: None, batchnorm, instancenorm ndf: int = 75 # number of features in the discriminator network disc_nextra_layers: int = 0 # number of extra layers in the discriminator network nz: int = 100 # size of the latent z vector netD: str = "" # path to netD (to continue training) netE: str = "" # path to netD (to continue training)
class HyperParameters(TestSetup, Serializable): """Hyperparameters of a multi-headed model.""" batch_size: int = 128 # the batch size learning_rate: float = 0.001 # Learning Rate optimizer: str = choice("SGD", "ADAM", default="SGD") # Which optimizer to use during training. # number of individual 'pages' that were kept during preprocessing of the 'likes'. # This corresponds to the number of entries in the multi-hot like vector. num_like_pages: int = 10_000 gender_loss_weight: float = 1.0 # relative weight of the gender loss age_loss_weight: float = 1.0 # relative weight of the age_group loss num_text_features: ClassVar[int] = 91 num_image_features: ClassVar[int] = 65 max_number_of_likes: int = 2000 embedding_dim: int = 8 shared_likes_embedding: bool = True # Wether or not to better filtering of liked pages use_custom_likes: bool = True # Gender model settings gender: TaskHyperParameters = TaskHyperParameters( "gender", num_layers=1, num_units=32, use_batchnorm=False, use_dropout=True, dropout_rate=0.1, use_image_features=True, use_likes=True, ) # Age Group Model settings age_group: TaskHyperParameters = TaskHyperParameters( "age_group", num_layers=2, num_units=64, use_batchnorm=False, use_dropout=True, dropout_rate=0.1, use_image_features=True, use_likes=True, ) # Personality Model(s) settings: personality: TaskHyperParameters = TaskHyperParameters( "personality", num_layers=1, num_units=8, use_batchnorm=False, use_dropout=True, dropout_rate=0.1, use_image_features=False, use_likes=False, )
class EWCExampleMethod(StableBaselines3Method): Model: ClassVar[Type[BaseAlgorithm]] #Model = A2CModel # Works great! (fastest) Model = DQNModel # Works great! (fastest) # Coefficient for the EWC-like loss. reg_coefficient: float = 1.0 # Number of observations to use for FIM calculation total_steps_fim: int = 1000 #Fisher information type (diagonal or block diagobnal) fim_representation: PMatAbstract = choice( { 'diagonal': PMatDiag, 'block_diagonal': PMatKFAC }, default=PMatKFAC) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm: # Create the model, as usual: model = super().create_model(train_env, valid_env) # 'Wrap' the algorithm's policy with the EWC wrapper. model = EWCPolicy.wrap_algorithm( model, reg_coefficient=self.reg_coefficient, fim_representation=self.fim_representation, ) return model def on_task_switch(self, task_id: Optional[int]) -> None: """ Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. """ if self.model: #create onbservation collection to use for FIM calculation observation_collection = [] while len(observation_collection) < self.total_steps_fim: state = self.model.env.reset() for _ in range(1000): action = self.get_actions(Observations(state), self.model.env.action_space) state, _, done, _ = self.model.env.step(action) observation_collection.append( torch.tensor(state).to(self.model.device)) if done: break dataloader = DataLoader(TensorDataset( torch.cat(observation_collection)), batch_size=100, shuffle=False) if 'a2c' in str(self.model.__class__): rl_method = 'a2c' elif 'dqn' in str(self.model.__class__): rl_method = 'dqn' else: raise NotImplementedError self.model.policy.on_task_switch(task_id, dataloader, method=rl_method)
class Options(AuxiliaryTask.Options): """ Options of the EWC auxiliary task. """ # Coefficient of the EWC auxilary task. # NOTE: It seems to be the case that, at least just for EWC, the coefficient # can be often be much greater than 1, hence why we overwrite the prior over # that hyper-parameter here. coefficient: float = uniform(0.0, 100.0, default=1.0) # Batchsize to be used when computing FIM (unused atm) batch_size_fim: int = 32 # Number of observations to use for FIM calculation sample_size_fim: int = categorical(2, 4, 8, 16, 32, 64, 128, 256, 512, default=8) # Fisher information representation type (diagonal or block diagonal). fim_representation: Type[PMatAbstract] = choice( { "diagonal": PMatDiag, "block_diagonal": PMatKFAC }, default=PMatDiag, )
class PassiveSetting(Setting[PassiveEnvironment[ObservationType, ActionType, RewardType]]): """Setting where actions have no influence on future observations. For example, supervised learning is a Passive setting, since predicting a label has no effect on the reward you're given (the label) or on the next samples you observe. """ # @dataclass(frozen=True) # class Observations(Setting.Observations): # pass # @dataclass(frozen=True) # class Actions(Setting.Actions): # pass # @dataclass(frozen=True) # class Rewards(Setting.Rewards): # pass # TODO: rename/remove this, as it isn't used, and there could be some # confusion with the available_datasets in task-incremental and iid. # Also, since those are already LightningDataModules, what should we do? available_datasets: ClassVar[Dict[str, Type[LightningDataModule]]] = { # "mnist": MNISTDataModule, # "fashion_mnist": FashionMNISTDataModule, # "cifar10": CIFAR10DataModule, # "imagenet": ImagenetDataModule, } # Which setup / dataset to use. # The setups/dataset are implemented as `LightningDataModule`s. dataset: str = choice(available_datasets.keys(), default="mnist")
class HParams: """Set of options for the training of a Model.""" num_layers: int = 4 num_units: int = 64 optimizer: str = choice("ADAM", "SGD", "RMSPROP", default="ADAM") learning_rate: float = 0.001
class C(TestSetup): option: Union[AA, BB] = choice( { "a": AA("aaa"), "b": BB("bbb"), "bob": AA("bobobo"), }, default="a")
class D(TestSetup): option: List[Base] = choice( { "a": [AA("aa1"), AA("aa2")], "b": 1.23, "bob": BB("bobobo"), }, default="a")
class HyperParameters(Serializable): """Hyperparameters of the Generator and Discriminator networks.""" learning_rate: float = 1e-4 optimizer: str = choice("ADAM", "RMSPROP", "SGD", default="ADAM") n_disc_iters_per_g_iter: int = ( 1 # Number of Discriminator iterations per Generator iteration. )
class CameraOpts: """ Camera model settings """ fix_aspect: bool = False # Fix aspect ratio of cameras allow_skew: bool = False # Allow skew parameter in camera intrinsics distortion_model: str = choice("standard", "rational", "thin_prism", "tilted", default="standard") motion_model: bool = choice("rolling", "static", default="static") # Camera motion model to use limit_intrinsic: Optional[ int] = 50 # Limit intrinsic images to enable faster initialisation calibration: Optional[ str] = None # Initialise from previous (or single camera) calibration
class D(TestSetup): option: Union[AA, BB, float] = choice( { "a": [AA("aa1"), AA("aa2")], "b": 1.23, "bob": BB("bobobo"), }, default="a")
class IncrementalRLSetting(ContinualRLSetting): """ Continual RL setting the data is divided into 'tasks' with clear boundaries. By default, the task labels are given at train time, but not at test time. TODO: Decide how to implement the train procedure, if we give a single dataloader, we might need to call the agent's `on_task_switch` when we reach the task boundary.. Or, we could produce one dataloader per task, and then implement a custom `fit` procedure in the CLTrainer class, that loops over the tasks and calls the `on_task_switch` when needed. """ # Number of tasks. nb_tasks: int = 1 # Wether the task boundaries are smooth or sudden. smooth_task_boundaries: bool = constant(False) # Wether to give access to the task labels at train time. task_labels_at_train_time: bool = True # Wether to give access to the task labels at test time. task_labels_at_test_time: bool = False # Class variable that holds the dict of available environments. available_datasets: ClassVar[Dict[str, str]] = dict_union( ContinualRLSetting.available_datasets, { "monsterkong": "MetaMonsterKong-v0", }, ) dataset: str = choice(available_datasets, default="cartpole") def __post_init__(self, *args, **kwargs): super().__post_init__(*args, **kwargs) if self.dataset == "MetaMonsterKong-v0": # TODO: Limit the episode length in monsterkong? # TODO: Actually end episodes when reaching a task boundary, to force the # level to change? self.max_episode_steps = self.max_episode_steps or 500 @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. In this Incremental-RL Setting, fit is called once per task. (Same as ClassIncrementalSetting in SL). """ return self.nb_tasks def create_task_schedule(self, temp_env: MultiTaskEnvironment, change_steps: List[int]) -> Dict[int, Dict]: task_schedule: Dict[int, Dict] = {} if monsterkong_installed: if isinstance(temp_env.unwrapped, MetaMonsterKongEnv): for i, task_step in enumerate(change_steps): task_schedule[task_step] = {"level": i} return task_schedule return super().create_task_schedule(temp_env=temp_env, change_steps=change_steps)
class SLSetting(Setting[PassiveEnvironment[ObservationType, ActionType, RewardType]]): """Supervised Learning Setting. Core assuptions: - Current actions have no influence on future observations. - The environment gives back "dense feedback", (the 'reward' associated with all possible actions at each step, rather than a single action) For example, supervised learning is a Passive setting, since predicting a label has no effect on the reward you're given (the label) or on the next samples you observe. """ @dataclass(frozen=True) class Observations(Setting.Observations): x: Tensor @dataclass(frozen=True) class Actions(Setting.Actions): pass @dataclass(frozen=True) class Rewards(Setting.Rewards): pass Environment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment # TODO: rename/remove this, as it isn't used, and there could be some # confusion with the available_datasets in task-incremental and iid. # Also, since those are already LightningDataModules, what should we do? available_datasets: ClassVar[Dict[str, Type[LightningDataModule]]] = { # "mnist": MNISTDataModule, # "fashion_mnist": FashionMNISTDataModule, # "cifar10": CIFAR10DataModule, # "imagenet": ImagenetDataModule, } # Which setup / dataset to use. # The setups/dataset are implemented as `LightningDataModule`s. dataset: str = choice(available_datasets.keys(), default="mnist") # Transforms to be applied to the observatons of the train/valid/test # environments. transforms: List[Transforms] = list_field() # Transforms to be applied to the training datasets. train_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels) # Transforms to be applied to the validation datasets. val_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels) # Transforms to be applied to the testing datasets. test_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels) # Wether to drop the last batch (during training). Useful if you use batchnorm, to # avoid having an error when the batch_size is 1. drop_last: bool = False
class C(TestSetup): option: Union[AA, BB, float] = choice( { "a": AA("aaa"), "b": BB("bbb"), "bob": AA("bobobo"), "f": 1.23 }, default="a", ) options: List[Union[AA, BB, float]] = choice( { "a": AA("aaa"), "b": BB("bbb"), "bob": AA("bobobo"), "f": 1.23 }, default_factory=["a"].copy, )
class SB3BaseHParams(HyperParameters): """ Hyper-parameters of a model from the `stable_baselines3` package. The command-line arguments for these are created with simple-parsing. """ # The policy model to use (MlpPolicy, CnnPolicy, ...) policy: Optional[Union[str, Type[BasePolicy]]] = choice("MlpPolicy", "CnnPolicy", default=None) # # The base policy used by this method # policy_base: Type[BasePolicy] # learning rate for the optimizer, it can be a function of the current # progress remaining (from 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-4) # Additional arguments to be passed to the policy on creation policy_kwargs: Optional[Dict[str, Any]] = None # the log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # The verbosity level: 0 none, 1 training information, 2 debug verbose: int = 1 # Device on which the code should run. By default, it will try to use a Cuda # compatible device and fallback to cpu if it is not possible. device: Union[torch.device, str] = "auto" # # Whether the algorithm supports training with multiple environments (as in A2C) # support_multi_env: bool = False # Whether to create a second environment that will be used for evaluating # the agent periodically. (Only available when passing string for the # environment) create_eval_env: bool = False # # When creating an environment, whether to wrap it or not in a Monitor wrapper. # monitor_wrapper: bool = True # Seed for the pseudo random generators seed: Optional[int] = None # # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) # use_sde: bool = False # # Sample a new noise matrix every n steps when using gSDE Default: -1 # (only sample at the beginning of the rollout) # sde_sample_freq: int = -1 # Wether to clear the experience buffer at the beginning of a new task. # NOTE: We use to_dict here so that it doesn't get passed do the Policy class. clear_buffers_between_tasks: bool = categorical(True, False, default=False, to_dict=False)
class HParams(BaseHParams): """ HParams of the Model. """ # Which algorithm to use for the output head when in an RL setting. # TODO: Run the PolicyHead in the following conditions: # - Compare the big backward pass vs many small ones # - Try to have it learn from pixel input, if possible # - Try to have it learn on a multi-task RL setting, # TODO: Finish the ActorCritic and EpisodicA2C heads. rl_output_head_algo: Type[OutputHead] = choice({ "reinforce": PolicyHead, "a2c_online": ActorCriticHead, "a2c_episodic": EpisodicA2C, }, default=EpisodicA2C)
class Boards: """ Generate boards and show/detect for configuration file """ boards: str # Configuration file (YAML) for calibration boards detect: Optional[str] = None # Show detections from an example image show: bool = False # Show image of boards write: Optional[str] = None # Directory to write board images pixels_mm: int = 1 # Pixels per mm of pattern margin_mm: int = 20 # Border width in mm paper_size_mm: Optional[str] = None # Paper size in mm WxH paper_size: Optional[str] = choice(*standard_sizes.keys()) def execute(self): show_boards(self)
class OptimizerOpts: """ Optimizer settings including outlier rejection settings and parameters to fix/adjust """ iter: int = 3 # Iterations of bundle adjustment/outlier rejection loss: str = choice( 'linear', 'soft_l1', 'huber', 'arctan', default='linear') # Loss function to use in bundle adjustment outlier_quantile: float = 0.75 # Quantile for outlier rejection (multiplied by threshold factor) outlier_threshold: float = 5.0 # Threshold for outliers (factor of quartile of reprojection error) auto_scale: Optional[ float] = None # Threshold for auto_scale to reduce outlier influence (factor of upper quartile of reprojection error) - requires non-linear loss fix_intrinsic: bool = False # Constant camera intrinsic parameters fix_camera_poses: bool = False # Constant camera pose (extrinsic) parameters fix_board_poses: bool = False # Constant poses between boards fix_motion: bool = False # Constant camera motion estimates adjust_board: bool = False # Enable optimization for board non-planarity
class TaskHyperParameters(TestSetup, Serializable): """ HyperParameters for a task-specific model """ name: str # name of the task num_layers: int = 1 # number of dense layers num_units: int = 8 # units per layer activation: str = choice("tanh", "relu", "linear", default="tanh") # activation function use_batchnorm: bool = False # wether or not to use batch normalization after each dense layer use_dropout: bool = True # wether or not to use dropout after each dense layer dropout_rate: float = 0.1 # the dropout rate use_image_features: bool = True # wether or not image features should be used as input use_likes: bool = True # wether or not 'likes' features should be used as input l1_reg: float = 0.005 # L1 regularization coefficient l2_reg: float = 0.005 # L2 regularization coefficient # Wether or not a task-specific Embedding layer should be used on the 'likes' features. # When set to 'True', it is expected that there no shared embedding is used. embed_likes: bool = False
class SB3BaseHParams(HyperParameters): """ Hyper-parameters of a model from the `stable_baselines3` package. The command-line arguments for these are created with simple-parsing. """ # The policy model to use (MlpPolicy, CnnPolicy, ...) policy: Optional[Union[str, Type[BasePolicy]]] = choice("MlpPolicy", "CnnPolicy", default=None) # # The base policy used by this method # policy_base: Type[BasePolicy] # learning rate for the optimizer, it can be a function of the current # progress remaining (from 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-4) # Additional arguments to be passed to the policy on creation policy_kwargs: Optional[Dict[str, Any]] = None # the log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # The verbosity level: 0 none, 1 training information, 2 debug verbose: int = 1 # Device on which the code should run. By default, it will try to use a Cuda # compatible device and fallback to cpu if it is not possible. device: Union[torch.device, str] = "auto" # # Whether the algorithm supports training with multiple environments (as in A2C) # support_multi_env: bool = False # Whether to create a second environment that will be used for evaluating # the agent periodically. (Only available when passing string for the # environment) create_eval_env: bool = False # # When creating an environment, whether to wrap it or not in a Monitor wrapper. # monitor_wrapper: bool = True # Seed for the pseudo random generators seed: Optional[int] = None
class ClassIncrementalSetting(PassiveSetting, IncrementalSetting): """Supervised Setting where the data is a sequence of 'tasks'. This class is basically is the supervised version of an Incremental Setting The current task can be set at the `current_task_id` attribute. """ Results: ClassVar[Type[Results]] = ClassIncrementalResults # (NOTE: commenting out PassiveSetting.Observations as it is the same class # as Setting.Observations, and we want a consistent method resolution order. @dataclass(frozen=True) class Observations( #PassiveSetting.Observations, IncrementalSetting.Observations): """ Incremental Observations, in a supervised context. """ pass # @dataclass(frozen=True) # class Actions(PassiveSetting.Actions, # IncrementalSetting.Actions): # """Incremental Actions, in a supervised (passive) context.""" # pass # @dataclass(frozen=True) # class Rewards(PassiveSetting.Rewards, # IncrementalSetting.Rewards): # """Incremental Rewards, in a supervised context.""" # pass # Class variable holding a dict of the names and types of all available # datasets. # TODO: Issue #43: Support other datasets than just classification available_datasets: ClassVar[Dict[str, Type[_ContinuumDataset]]] = { c.__name__.lower(): c for c in [ CIFARFellowship, MNISTFellowship, ImageNet100, ImageNet1000, CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST, QMNIST, FashionMNIST, Synbols, ] # "synbols": Synbols, # "synbols_font": partial(Synbols, task="fonts"), } # A continual dataset to use. (Should be taken from the continuum package). dataset: str = choice(available_datasets.keys(), default="mnist") # Transformations to use. See the Transforms enum for the available values. transforms: List[Transforms] = list_field( Transforms.to_tensor, # BUG: The input_shape given to the Model doesn't have the right number # of channels, even if we 'fixed' them here. However the images are fine # after. Transforms.three_channels, Transforms.channels_first_if_needed, ) # Either number of classes per task, or a list specifying for # every task the amount of new classes. increment: Union[int, List[int]] = list_field(2, type=int, nargs="*", alias="n_classes_per_task") # The scenario number of tasks. # If zero, defaults to the number of classes divied by the increment. nb_tasks: int = 0 # A different task size applied only for the first task. # Desactivated if `increment` is a list. initial_increment: int = 0 # An optional custom class order, used for NC. class_order: Optional[List[int]] = None # Either number of classes per task, or a list specifying for # every task the amount of new classes (defaults to the value of # `increment`). test_increment: Optional[Union[List[int], int]] = None # A different task size applied only for the first test task. # Desactivated if `test_increment` is a list. Defaults to the # value of `initial_increment`. test_initial_increment: Optional[int] = None # An optional custom class order for testing, used for NC. # Defaults to the value of `class_order`. test_class_order: Optional[List[int]] = None # TODO: Need to put num_workers in only one place. batch_size: int = field(default=32, cmd=False) num_workers: int = field(default=4, cmd=False) # Wether or not to relabel the images to be within the [0, n_classes_per_task] # range. Floating (False by default) in Class-Incremental Setting, but set to True # in domain_incremental Setting. relabel: bool = False def __post_init__(self): """Initializes the fields of the Setting (and LightningDataModule), including the transforms, shapes, etc. """ if isinstance(self.increment, list) and len(self.increment) == 1: # This can happen when parsing a list from the command-line. self.increment = self.increment[0] base_reward_space = reward_spaces[self.dataset] # action space = reward space by default base_action_space = base_reward_space if isinstance(base_action_space, spaces.Discrete): # Classification dataset self.num_classes = base_action_space.n # Set the number of tasks depending on the increment, and vice-versa. # (as only one of the two should be used). if self.nb_tasks == 0: self.nb_tasks = self.num_classes // self.increment else: self.increment = self.num_classes // self.nb_tasks else: raise NotImplementedError("TODO: (issue #43)") if not self.class_order: self.class_order = list(range(self.num_classes)) # Test values default to the same as train. self.test_increment = self.test_increment or self.increment self.test_initial_increment = self.test_initial_increment or self.test_increment self.test_class_order = self.test_class_order or self.class_order # TODO: For now we assume a fixed, equal number of classes per task, for # sake of simplicity. We could take out this assumption, but it might # make things a bit more complicated. assert isinstance(self.increment, int) assert isinstance(self.test_increment, int) self.n_classes_per_task: int = self.increment action_space = spaces.Discrete(self.n_classes_per_task) reward_space = spaces.Discrete(self.n_classes_per_task) super().__post_init__( # observation_space=observation_space, action_space=action_space, reward_space=reward_space, # the labels have shape (1,) always. ) self.train_datasets: List[_ContinuumDataset] = [] self.val_datasets: List[_ContinuumDataset] = [] self.test_datasets: List[_ContinuumDataset] = [] # This will be set by the Experiment, or passed to the `apply` method. # TODO: This could be a bit cleaner. self.config: Config # Default path to which the datasets will be downloaded. self.data_dir: Optional[Path] = None self.train_env: PassiveEnvironment = None # type: ignore self.val_env: PassiveEnvironment = None # type: ignore self.test_env: PassiveEnvironment = None # type: ignore @property def observation_space(self) -> NamedTupleSpace: """ The un-batched observation space, based on the choice of dataset and the transforms at `self.transforms` (which apply to the train/valid/test environments). The returned spaces is a NamedTupleSpace, with the following properties: - `x`: observation space (e.g. `Image` space) - `task_labels`: Union[Discrete, Sparse[Discrete]] The task labels for each sample. When task labels are not available, the task labels space is Sparse, and entries will be `None`. """ x_space = base_observation_spaces[self.dataset] if not self.transforms: # NOTE: When we don't pass any transforms, continuum scenarios still # at least use 'to_tensor'. x_space = Transforms.to_tensor(x_space) # apply the transforms to the observation space. for transform in self.transforms: x_space = transform(x_space) x_space = add_tensor_support(x_space) task_label_space = spaces.Discrete(self.nb_tasks) if not self.task_labels_at_train_time: task_label_space = Sparse(task_label_space, 1.0) task_label_space = add_tensor_support(task_label_space) return NamedTupleSpace( x=x_space, task_labels=task_label_space, dtype=self.Observations, ) @property def action_space(self) -> spaces.Discrete: """ Action space for this setting. """ if self.relabel: return spaces.Discrete(self.n_classes_per_task) return spaces.Discrete(self.num_classes) # TODO: IDEA: Have the action space only reflect the number of 'current' classes # in order to create a "true" class-incremental learning setting. n_classes_seen_so_far = 0 for task_id in range(self.current_task_id): n_classes_seen_so_far += self.num_classes_in_task(task_id) return spaces.Discrete(n_classes_seen_so_far) @property def reward_space(self) -> spaces.Discrete: return self.action_space def apply(self, method: Method, config: Config = None) -> ClassIncrementalResults: """Apply the given method on this setting to producing some results.""" # TODO: It still isn't super clear what should be in charge of creating # the config, and how to create it, when it isn't passed explicitly. self.config: Config if config is not None: self.config = config logger.debug(f"Using Config {self.config}") elif isinstance(getattr(method, "config", None), Config): # If the Method has a `config` attribute that is a Config, use that. self.config = method.config logger.debug(f"Using Config from the Method: {self.config}") else: logger.debug("Parsing the Config from the command-line.") self.config = Config.from_args(self._argv, strict=False) logger.debug(f"Resulting Config: {self.config}") method.configure(setting=self) # Run the main loop (which is defined in IncrementalSetting). results: ClassIncrementalResults = super().main_loop(method) logger.info(results.summary()) method.receive_results(self, results=results) return results def prepare_data(self, data_dir: Path = None, **kwargs): self.config = self.config or Config.from_args(self._argv, strict=False) # if self.batch_size is None: # logger.warning(UserWarning( # f"Using the default batch size of 32. (You can set the " # f"batch size by passing a value to the Setting constructor, or " # f"by setting the attribute inside your 'configure' method) " # )) # self.batch_size = 32 data_dir = data_dir or self.data_dir or self.config.data_dir self.make_dataset(data_dir, download=True) self.data_dir = data_dir super().prepare_data(**kwargs) def setup(self, stage: Optional[str] = None, *args, **kwargs): """ Creates the datasets for each task. TODO: Figure out a way of setting data_dir elsewhere maybe? """ assert self.config # self.config = self.config or Config.from_args(self._argv) logger.debug( f"data_dir: {self.data_dir}, setup args: {args} kwargs: {kwargs}") self.train_cl_dataset = self.make_dataset(self.data_dir, download=False, train=True) self.test_cl_dataset = self.make_dataset(self.data_dir, download=False, train=False) self.train_cl_loader: _BaseScenario = self.make_train_cl_loader( self.train_cl_dataset) self.test_cl_loader: _BaseScenario = self.make_test_cl_loader( self.test_cl_dataset) logger.info(f"Number of train tasks: {self.train_cl_loader.nb_tasks}.") logger.info(f"Number of test tasks: {self.train_cl_loader.nb_tasks}.") self.train_datasets.clear() self.val_datasets.clear() self.test_datasets.clear() for task_id, train_dataset in enumerate(self.train_cl_loader): train_dataset, val_dataset = split_train_val( train_dataset, val_split=self.val_fraction) self.train_datasets.append(train_dataset) self.val_datasets.append(val_dataset) for task_id, test_dataset in enumerate(self.test_cl_loader): self.test_datasets.append(test_dataset) super().setup(stage, *args, **kwargs) # TODO: Adding this temporarily just for the competition self.test_boundary_steps = [0] + list( itertools.accumulate(map(len, self.test_datasets)))[:-1] self.test_steps = sum(map(len, self.test_datasets)) # self.test_steps = [0] + list( # itertools.accumulate(map(len, self.test_datasets)) # )[:-1] def get_train_dataset(self) -> Dataset: return self.train_datasets[self.current_task_id] def get_val_dataset(self) -> Dataset: return self.val_datasets[self.current_task_id] def get_test_dataset(self) -> Dataset: return ConcatDataset(self.test_datasets) def train_dataloader(self, batch_size: int = None, num_workers: int = None) -> PassiveEnvironment: """Returns a DataLoader for the train dataset of the current task. """ if not self.has_prepared_data: self.prepare_data() if not self.has_setup_fit: self.setup("fit") if self.train_env: self.train_env.close() batch_size = batch_size if batch_size is not None else self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers dataset = self.get_train_dataset() # TODO: Add some kind of Wrapper around the dataset to make it # semi-supervised. env = PassiveEnvironment( dataset, split_batch_fn=self.split_batch_function(training=True), observation_space=self.observation_space, action_space=self.action_space, reward_space=self.reward_space, pin_memory=True, batch_size=batch_size, num_workers=num_workers, # Since the dataset only contains data from the current task(s), it's fine # to shuffle here. TODO: Double-check this. shuffle=True, ) if self.config.render: # TODO: Add a callback wrapper that calls 'env.render' at each step? env = RenderEnvWrapper(env) if self.train_transforms: # TODO: Check that the transforms aren't already being applied in the # 'dataset' portion. env = TransformObservation(env, f=self.train_transforms) if self.monitor_training_performance: env = MeasureSLPerformanceWrapper( env, first_epoch_only=True, wandb_prefix=f"Train/Task {self.current_task_id}", ) self.train_env = env return self.train_env def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> PassiveEnvironment: """Returns a DataLoader for the validation dataset of the current task. """ if not self.has_prepared_data: self.prepare_data() if not self.has_setup_fit: self.setup("fit") dataset = self.get_val_dataset() batch_size = batch_size if batch_size is not None else self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers env = PassiveEnvironment( dataset, split_batch_fn=self.split_batch_function(training=True), observation_space=self.observation_space, action_space=self.action_space, reward_space=self.reward_space, pin_memory=True, batch_size=batch_size, num_workers=num_workers, # Since the dataset only contains data from the current task(s), it's fine # to shuffle here. TODO: Double-check this. shuffle=True, ) if self.val_transforms: env = TransformObservation(env, f=self.val_transforms) if self.val_env: self.val_env.close() del self.val_env self.val_env = env return self.val_env def test_dataloader( self, batch_size: int = None, num_workers: int = None ) -> PassiveEnvironment["ClassIncrementalSetting.Observations", Actions, Rewards]: """Returns a DataLoader for the test dataset of the current task. """ if not self.has_prepared_data: self.prepare_data() if not self.has_setup_test: self.setup("test") # Testing this out, we're gonna have a "test schedule" like this to try # to imitate the MultiTaskEnvironment in RL. transition_steps = [0] + list( itertools.accumulate(map(len, self.test_datasets)))[:-1] # Join all the test datasets. dataset = self.get_test_dataset() batch_size = batch_size if batch_size is not None else self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers env = PassiveEnvironment( dataset, batch_size=batch_size, num_workers=num_workers, split_batch_fn=self.split_batch_function(training=False), observation_space=self.observation_space, action_space=self.action_space, reward_space=self.reward_space, pretend_to_be_active=True, shuffle=False, ) if self.test_transforms: env = TransformObservation(env, f=self.test_transforms) # NOTE: Two ways of removing the task labels: Either using a different # 'split_batch_fn' at train and test time, or by using this wrapper # which is also used in the RL side of the tree: # TODO: Maybe remove/simplify the 'split_batch_function'. from sequoia.settings.active.continual.wrappers import HideTaskLabelsWrapper if not self.task_labels_at_test_time: env = HideTaskLabelsWrapper(env) # FIXME: Creating a 'task schedule' for the TestEnvironment, mimicing what's in # the RL settings. test_task_schedule = dict.fromkeys( [step // (env.batch_size or 1) for step in transition_steps], range(len(transition_steps)), ) # TODO: Configure the 'monitoring' dir properly. test_dir = "results" test_loop_max_steps = len(dataset) // (env.batch_size or 1) # TODO: Fix this: iteration doesn't ever end for some reason. test_env = ClassIncrementalTestEnvironment( env, directory=test_dir, step_limit=test_loop_max_steps, task_schedule=test_task_schedule, force=True, config=self.config, ) if self.test_env: self.test_env.close() self.test_env = test_env return self.test_env def split_batch_function( self, training: bool ) -> Callable[[Tuple[Tensor, ...]], Tuple[Observations, Rewards]]: """ Returns a callable that is used to split a batch into observations and rewards. """ task_classes = { i: self.task_classes(i, train=training) for i in range(self.nb_tasks) } def split_batch( batch: Tuple[Tensor, ...]) -> Tuple[Observations, Rewards]: """Splits the batch into a tuple of Observations and Rewards. Parameters ---------- batch : Tuple[Tensor, ...] A batch of data coming from the dataset. Returns ------- Tuple[Observations, Rewards] A tuple of Observations and Rewards. """ # In this context (class_incremental), we will always have 3 items per # batch, because we use the ClassIncremental scenario from Continuum. assert len(batch) == 3 x, y, t = batch # Relabel y so it is always in [0, n_classes_per_task) for each task. if self.relabel: y = relabel(y, task_classes) if (training and not self.task_labels_at_train_time) or ( not training and not self.task_labels_at_test_time): # Remove the task labels if we're not currently allowed to have # them. # TODO: Using None might cause some issues. Maybe set -1 instead? t = None observations = self.Observations(x=x, task_labels=t) rewards = self.Rewards(y=y) return observations, rewards return split_batch def make_train_cl_loader( self, train_dataset: _ContinuumDataset) -> _BaseScenario: """ Creates a train ClassIncremental object from continuum. """ return ClassIncremental( train_dataset, nb_tasks=self.nb_tasks, increment=self.increment, initial_increment=self.initial_increment, class_order=self.class_order, transformations=self.transforms, ) def make_test_cl_loader(self, test_dataset: _ContinuumDataset) -> _BaseScenario: """ Creates a test ClassIncremental object from continuum. """ return ClassIncremental( test_dataset, nb_tasks=self.nb_tasks, increment=self.test_increment, initial_increment=self.test_initial_increment, class_order=self.test_class_order, transformations=self.transforms, ) def make_dataset(self, data_dir: Path, download: bool = True, train: bool = True, **kwargs) -> _ContinuumDataset: # TODO: #7 Use this method here to fix the errors that happen when # trying to create every single dataset from continuum. data_dir = Path(data_dir) if not data_dir.exists(): data_dir.mkdir(parents=True, exist_ok=True) if self.dataset in self.available_datasets: dataset_class = self.available_datasets[self.dataset] return dataset_class(data_path=data_dir, download=download, train=train, **kwargs) elif self.dataset in self.available_datasets.values(): dataset_class = self.dataset return dataset_class(data_path=data_dir, download=download, train=train, **kwargs) elif isinstance(self.dataset, Dataset): logger.info(f"Using a custom dataset {self.dataset}") return self.dataset else: raise NotImplementedError # These methods below are used by the MultiHeadModel, mostly when # using a multihead model, to figure out how to relabel the batches, or how # many classes there are in the current task (since we support a different # number of classes per task). # TODO: Remove this? Since I'm simplifying to a fixed number of classes per # task for now... def num_classes_in_task(self, task_id: int, train: bool) -> Union[int, List[int]]: """ Returns the number of classes in the given task. """ increment = self.increment if train else self.test_increment if isinstance(increment, list): return increment[task_id] return increment def num_classes_in_current_task(self, train: bool = None) -> int: """ Returns the number of classes in the current task. """ # TODO: Its ugly to have the 'method' tell us if we're currently in # train/eval/test, no? Maybe just make a method for each? return self.num_classes_in_task(self._current_task_id, train=train) def task_classes(self, task_id: int, train: bool) -> List[int]: """ Gives back the 'true' labels present in the given task. """ start_index = sum( self.num_classes_in_task(i, train) for i in range(task_id)) end_index = start_index + self.num_classes_in_task(task_id, train) if train: return self.class_order[start_index:end_index] else: return self.test_class_order[start_index:end_index] def current_task_classes(self, train: bool) -> List[int]: """ Gives back the labels present in the current task. """ return self.task_classes(self._current_task_id, train) def _check_environments(self): """ Do a quick check to make sure that the dataloaders give back the right observations / reward types. """ for loader_method in [ self.train_dataloader, self.val_dataloader, self.test_dataloader, ]: logger.debug(f"Checking loader method {loader_method.__name__}") env = loader_method(batch_size=5) obs = env.reset() assert isinstance(obs, self.Observations) # Convert the observation to numpy arrays, to make it easier to # check if the elements are in the spaces. obs = obs.numpy() # take a slice of the first batch, to get sample tensors. first_obs = obs[:, 0] # TODO: Here we'd like to be able to check that the first observation # is inside the observation space, but we can't do that because the # task label might be None, and so that would make it fail. x, task_label = first_obs if task_label is None: assert x in self.observation_space[0] for i in range(5): actions = env.action_space.sample() observations, rewards, done, info = env.step(actions) assert isinstance(observations, self.Observations), type(observations) assert isinstance(rewards, self.Rewards), type(rewards) actions = env.action_space.sample() if done: observations = env.reset() env.close()
class A(TestSetup): color: str = choice("red", "green", "blue", default="red")
class Something2(TestSetup): favorite_color: Color = choice(Color, default=Color.orange)
class Experiment(Parseable, Serializable): """ Applies a Method to an experimental Setting to obtain Results. When the `setting` is not set, this will apply the chosen method on all of its "applicable" settings. (i.e. all subclasses of its target setting). When the `method` is not set, this will apply all applicable methods on the chosen setting. """ # Which experimental setting to use. When left unset, will evaluate the # provided method on all applicable settings. setting: Optional[Union[Setting, Type[Setting]]] = choice( {setting.get_name(): setting for setting in all_settings}, default=None, type=str, ) # Path to a json/yaml file containing preset options for the chosen setting. # Can also be one of the key from the `setting_presets` dictionary, # for convenience. benchmark: Optional[Union[str, Path]] = None # Which experimental method to use. When left unset, will evaluate all # compatible methods on the provided setting. method: Optional[Union[str, Method, Type[Method]]] = choice(get_method_names(), default=None) # All the other configuration options, which are independant of the choice # of Setting or of Method, go in this next dataclass here! For example, # things like the log directory, wether Cuda is used, etc. config: Config = mutable_field(Config) wandb: Optional[WandbConfig] = None def __post_init__(self): if not (self.setting or self.method): raise RuntimeError("One of `setting` or `method` must be set!") # All settings have a unique name. if isinstance(self.setting, str): self.setting = get_class_with_name(self.setting, all_settings) # Each Method also has a unique name. if isinstance(self.method, str): self.method = get_class_with_name(self.method, all_methods) if self.benchmark: # If the provided benchmark isn't a path, try to get the value from # the `setting_presets` dict. If it isn't in the dict, raise an # error. if not Path(self.benchmark).is_file(): if self.benchmark in setting_presets: self.benchmark = setting_presets[self.benchmark] else: raise RuntimeError( f"Could not find benchmark '{self.benchmark}': it " f"is neither a path to a file or a key of the " f"`setting_presets` dictionary. \n\n" f"Available presets: \n" + "\n".join( f"- {preset_name}: \t{preset_file.relative_to(os.getcwd())}" for preset_name, preset_file in setting_presets.items() ) ) # Creating an experiment for the given setting, loaded from the # config file. # TODO: IDEA: Do the same thing for loading the Method? logger.info( f"Will load the options for the setting from the file " f"at path {self.benchmark}." ) drop_extras = True if self.setting is None: logger.warn( UserWarning( f"You didn't specify which setting to use, so this will " f"try to infer the correct type of setting to use from the " f"contents of the file, which might not work!\n (Consider " f"running this with the `--setting` option instead." ) ) # Find the first type of setting that fits the given file. drop_extras = False self.setting = Setting # Raise an error if any of the args in sys.argv would have been used # up by the Setting, just to prevent any ambiguities. try: _, unused_args = self.setting.from_known_args() except ImportError as exc: # NOTE: An ImportError can occur here because of a missing OpenGL # dependency, since when no arguments are passed, the default RL setting # is created (cartpole with pixel observations), which requires a render # wrapper to be added (which itself uses pyglet, which uses OpenGL). logger.warning( RuntimeWarning(f"Unable to check for unused args: {exc}") ) # In this case, we just pretend that no arguments would have been used. unused_args = sys.argv[1:] ignored_args = list(set(sys.argv[1:]) - set(unused_args)) if ignored_args: # TODO: This could also be trigerred if there were arguments # in the method with the same name as some from the Setting. raise RuntimeError( f"Cannot pass command-line arguments for the Setting when " f"loading a preset, since these arguments whould have been " f"ignored when creating the setting of type {self.setting} " f"anyway: {ignored_args}" ) assert isclass(self.setting) and issubclass(self.setting, Setting) # Actually load the setting from the file. # TODO: Why isn't this using `load_benchmark`? self.setting = self.setting.load( path=self.benchmark, drop_extra_fields=drop_extras ) self.setting.wandb = self.wandb if self.method is None: raise NotImplementedError( f"For now, you need to specify a Method to use using the " f"`--method` argument when loading the setting from a file." ) if self.setting is not None and self.method is not None: if not self.method.is_applicable(self.setting): raise RuntimeError( f"Method {self.method} isn't applicable to " f"setting {self.setting}!" ) assert ( self.setting is None or isinstance(self.setting, Setting) or issubclass(self.setting, Setting) ) assert ( self.method is None or isinstance(self.method, Method) or issubclass(self.method, Method) ) @staticmethod def run_experiment( setting: Union[Setting, Type[Setting]], method: Union[Method, Type[Method]], config: Config, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Results: """ Launches an experiment, applying `method` onto `setting` and returning the corresponding results. This assumes that both `setting` and `method` are not None. This always returns a single `Results` object. If either `setting` or `method` are classes, then instances of these classes from the command-line arguments `argv`. If `strict_args` is True and there are leftover arguments (not consumed by either the Setting or the Method), a RuntimeError is raised. This then returns the result of `setting.apply(method)`. Parameters ---------- argv : Union[str, List[str]], optional List of command-line args. When not set, uses the contents of `sys.argv`. Defaults to `None`. strict_args : bool, optional Wether to raise an error when encountering command-line arguments that are unexpected by both the Setting and the Method. Defaults to `False`. Returns ------- Results """ assert setting is not None and method is not None assert isinstance(setting, Setting), f"TODO: Fix this, need to pass a wandb config to the Setting from the experiment!" if not (isinstance(setting, Setting) and isinstance(method, Method)): setting, method = parse_setting_and_method_instances( setting=setting, method=method, argv=argv, strict_args=strict_args ) assert isinstance(setting, Setting) assert isinstance(method, Method) assert isinstance(config, Config) return setting.apply(method, config=config) def launch( self, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Results: """ Launches the experiment, applying `self.method` onto `self.setting` and returning the corresponding results. This differs from `main` in that this assumes that both `self.setting` and `self.method` are not None, and so this always returns a single `Results` object. NOTE: Internally, this is equivalent to calling `run_experiment`, passing in the `setting`, `method` and `config` arguments from `self`. Parameters ---------- argv : Union[str, List[str]], optional List of command-line args. When not set, uses the contents of `sys.argv`. Defaults to `None`. strict_args : bool, optional Wether to raise an error when encountering command-line arguments that are unexpected by both the Setting and the Method. Defaults to `False`. Returns ------- Results An object describing the results of applying Method `self.method` onto the Setting `self.setting`. """ assert self.setting is not None assert self.method is not None assert self.config is not None if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)): setting, method = parse_setting_and_method_instances( setting=self.setting, method=self.method, argv=argv, strict_args=strict_args ) setting.wandb = self.wandb setting.config = self.config return setting.apply(method, config=self.config) @classmethod def main( cls, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Union[Results, Tuple[Dict, Any], List[Tuple[Dict, Results]]]: """Launches one or more experiments from the command-line. First, we get the choice of method and setting using a first parser. Then, we parse the Setting and Method objects using the remaining args with two other parsers. Parameters ---------- - argv : Union[str, List[str]], optional, by default None command-line arguments to use. When None (default), uses sys.argv. Returns ------- Union[Results, Dict[Tuple[Type[Setting], Type[Method], Config], Results]] Results of the experiment, if only applying a method to a setting. Otherwise, if either of `--setting` or `--method` aren't set, this will be a dictionary mapping from (setting_type, method_type) tuples to Results. """ if argv is None: argv = sys.argv[1:] if isinstance(argv, str): argv = shlex.split(argv) argv_copy = argv.copy() experiment: Experiment experiment, argv = cls.from_known_args(argv) setting: Optional[Type[Setting]] = experiment.setting method: Optional[Type[Method]] = experiment.method config: Config = experiment.config if method is None and setting is None: raise RuntimeError(f"One of setting or method must be set.") if setting and method: # One 'job': Launch it directly. setting, method = parse_setting_and_method_instances( setting=setting, method=method, argv=argv, strict_args=strict_args ) assert isinstance(setting, Setting) assert isinstance(method, Method) setting.wandb = experiment.wandb results = experiment.launch(argv, strict_args=strict_args) print("\n\n EXPERIMENT IS DONE \n\n") print(f"Results: {results}") return results else: # TODO: Test out this other case. Haven't used it in a while. # TODO: Move this to something like a BatchExperiment? all_results = launch_batch_of_runs( setting=setting, method=method, argv=argv ) return all_results
class Something(TestSetup): favorite_color: Color = choice(Color, default="blue")
class Something(TestSetup): favorite_color: Color = choice(Color)
class Something(TestSetup): favorite_color: Color = choice(Color, default=Color.green)
class ContinualRLSetting(ActiveSetting, IncrementalSetting): """ Reinforcement Learning Setting where the environment changes over time. This is an Active setting which uses gym environments as sources of data. These environments' attributes could change over time following a task schedule. An example of this could be that the gravity increases over time in cartpole, making the task progressively harder as the agent interacts with the environment. """ # The type of results returned by an RL experiment. Results: ClassVar[Type[Results]] = RLResults @dataclass(frozen=True) class Observations(IncrementalSetting.Observations): """ Observations in a continual RL Setting. """ # Just as a reminder, these are the fields defined in the base classes: # x: Tensor # task_labels: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = None # The 'done' part of the 'step' method. We add this here in case a # method were to iterate on the environments in the dataloader-style so # they also have access to those (i.e. for the BaselineMethod). done: Optional[Sequence[bool]] = None # Same, for the 'info' portion of the result of 'step'. # TODO: If we add the 'task space' (with all the attributes, for instance # then add it to the observations using the `AddInfoToObservations`. # info: Optional[Sequence[Dict]] = None # Image transforms to use. transforms: List[Transforms] = list_field() # Class variable that holds the dict of available environments. available_datasets: ClassVar[Dict[str, str]] = { "cartpole": "CartPole-v0", "pendulum": "Pendulum-v0", "breakout": "Breakout-v0", # "duckietown": "Duckietown-straight_road-v0" } # TODO: Add breakout to 'available_datasets' only when atari_py is installed. # Which environment (a.k.a. "dataset") to learn on. # The dataset could be either a string (env id or a key from the # available_datasets dict), a gym.Env, or a callable that returns a single environment. # If self.dataset isn't one of those, an error will be raised. dataset: str = choice(available_datasets, default="cartpole") # The number of tasks. By default 1 for this setting. nb_tasks: int = field(1, alias=["n_tasks", "num_tasks"]) # Max number of steps per task. (Also acts as the "length" of the training # and validation "Datasets") max_steps: int = 100_000 # Maximum episodes per task. # TODO: Test that the limit on the number of episodes actually works. max_episodes: Optional[int] = None # Number of steps per task. When left unset and when `max_steps` is set, # takes the value of `max_steps` divided by `nb_tasks`. steps_per_task: Optional[int] = None # (WIP): Number of episodes per task. episodes_per_task: Optional[int] = None # Total number of steps in the test loop. (Also acts as the "length" of the testing # environment.) test_steps: int = 10_000 # Number of steps per task in the test loop. When left unset and when `test_steps` # is set, takes the value of `test_steps` divided by `nb_tasks`. test_steps_per_task: Optional[int] = None # Standard deviation of the multiplicative Gaussian noise that is used to # create the values of the env attributes for each task. task_noise_std: float = 0.2 # Wether the task boundaries are smooth or sudden. smooth_task_boundaries: bool = True # Wether to observe the state directly, rather than pixels. This can be # useful to debug environments like CartPole, for instance. observe_state_directly: bool = False # Path to a json file from which to read the train task schedule. train_task_schedule_path: Optional[Path] = None # Path to a json file from which to read the validation task schedule. valid_task_schedule_path: Optional[Path] = None # Path to a json file from which to read the test task schedule. test_task_schedule_path: Optional[Path] = None # Wether observations from the environments whould include # the end-of-episode signal. Only really useful if your method will iterate # over the environments in the dataloader style # (as does the baseline method). add_done_to_observations: bool = False # The maximum number of steps per episode. When None, there is no limit. max_episode_steps: Optional[int] = None # NOTE: Added this `cmd=False` option to mark that we don't want to generate # any command-line arguments for these fields. train_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False) valid_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False) test_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False) train_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False) valid_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False) test_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False) batch_size: Optional[int] = field(default=None, cmd=False) num_workers: Optional[int] = field(default=None, cmd=False) def __post_init__(self, *args, **kwargs): super().__post_init__(*args, **kwargs) self._new_random_task_on_reset: bool = False # Post processing of the 'dataset' field: if self.dataset in self.available_datasets.keys(): # the environment name was passed, rather than an id # (e.g. 'cartpole' -> 'CartPole-v0"). self.dataset = self.available_datasets[self.dataset] elif self.dataset not in self.available_datasets.values(): # The passed dataset is assumed to be an environment ID, but it # wasn't in the dict of available datasets! We issue a warning, but # proceed to let the user use whatever environment they want to. logger.warning( UserWarning( f"The chosen dataset/environment ({self.dataset}) isn't in the " f"available_datasets dict, so we can't garantee this will work!" ) ) if isinstance(self.dataset, gym.Env) and self.batch_size: raise RuntimeError( f"Batch size should be None when a gym.Env " f"object is passed as `dataset`." ) if not isinstance(self.dataset, (str, gym.Env)) and not callable(self.dataset): raise RuntimeError( f"`dataset` must be either a string, a gym.Env, or a callable. " f"(got {self.dataset})" ) # Set the number of tasks depending on the increment, and vice-versa. # (as only one of the two should be used). assert self.max_steps, "assuming this should always be set, for now." # TODO: Clean this up, not super clear what options take precedence on # which other options. # Load the task schedules from the corresponding files, if present. if self.train_task_schedule_path: self.train_task_schedule = self.load_task_schedule( self.train_task_schedule_path ) if self.valid_task_schedule_path: self.valid_task_schedule = self.load_task_schedule( self.valid_task_schedule_path ) if self.test_task_schedule_path: self.test_task_schedule = self.load_task_schedule( self.test_task_schedule_path ) if self.train_task_schedule: if self.steps_per_task is not None: # If steps per task was passed, then we overwrite the keys of the tasks # schedule. self.train_task_schedule = { i * self.steps_per_task: self.train_task_schedule[step] for i, step in enumerate(sorted(self.train_task_schedule.keys())) } else: # A task schedule was passed: infer the number of tasks from it. change_steps = sorted(self.train_task_schedule.keys()) assert 0 in change_steps, "Schedule needs a task at step 0." # TODO: @lebrice: I guess we have to assume that the interval # between steps is constant for now? Do we actually depend on this # being the case? I think steps_per_task is only really ever used # for creating the task schedule, which we already have in this # case. assert ( len(change_steps) >= 2 ), "WIP: need a minimum of two tasks in the task schedule for now." self.steps_per_task = change_steps[1] - change_steps[0] # Double-check that this is the case. for i in range(len(change_steps) - 1): if change_steps[i + 1] - change_steps[i] != self.steps_per_task: raise NotImplementedError( f"WIP: This might not work yet if the tasks aren't " f"equally spaced out at a fixed interval." ) nb_tasks = len(self.train_task_schedule) if self.smooth_task_boundaries: # NOTE: When in a ContinualRLSetting with smooth task boundaries, # the last entry in the schedule represents the state of the env at # the end of the "task". When there are clear task boundaries (i.e. # when in 'Class'/Task-Incremental RL), the last entry is the start # of the last task. nb_tasks -= 1 if self.nb_tasks != 1: if self.nb_tasks != nb_tasks: raise RuntimeError( f"Passed number of tasks {self.nb_tasks} doesn't match the " f"number of tasks deduced from the task schedule ({nb_tasks})" ) self.nb_tasks = nb_tasks self.max_steps = max(self.train_task_schedule.keys()) if not self.smooth_task_boundaries: # See above note about the last entry. self.max_steps += self.steps_per_task elif self.nb_tasks: if self.steps_per_task: self.max_steps = self.nb_tasks * self.steps_per_task elif self.max_steps: self.steps_per_task = self.max_steps // self.nb_tasks elif self.steps_per_task: if self.nb_tasks: self.max_steps = self.nb_tasks * self.steps_per_task elif self.max_steps: self.nb_tasks = self.max_steps // self.steps_per_task elif self.max_steps: if self.nb_tasks: self.steps_per_task = self.max_steps // self.nb_tasks elif self.steps_per_task: self.nb_tasks = self.max_steps // self.steps_per_task if not all([self.nb_tasks, self.max_steps, self.steps_per_task]): raise RuntimeError( f"You need to provide at least two of 'max_steps', " f"'nb_tasks', or 'steps_per_task'." ) assert self.max_steps == self.nb_tasks * self.steps_per_task if self.test_task_schedule: if 0 not in self.test_task_schedule: raise RuntimeError("Task schedules needs to include an initial task.") if self.test_steps_per_task is not None: # If steps per task was passed, then we overwrite the number of steps # for each task in the schedule to match. self.test_task_schedule = { i * self.test_steps_per_task: self.test_task_schedule[step] for i, step in enumerate(sorted(self.test_task_schedule.keys())) } change_steps = sorted(self.test_task_schedule.keys()) assert 0 in change_steps, "Schedule needs to include task at step 0." nb_test_tasks = len(change_steps) if self.smooth_task_boundaries: nb_test_tasks -= 1 assert ( nb_test_tasks == self.nb_tasks ), "nb of tasks should be the same for train and test." self.test_steps_per_task = change_steps[1] - change_steps[0] for i in range(self.nb_tasks - 1): if change_steps[i + 1] - change_steps[i] != self.test_steps_per_task: raise NotImplementedError( "WIP: This might not work yet if the test tasks aren't " "equally spaced out at a fixed interval." ) self.test_steps = max(change_steps) if not self.smooth_task_boundaries: # See above note about the last entry. self.test_steps += self.test_steps_per_task elif self.test_steps_per_task is None: # This is basically never the case, since the test_steps defaults to 10_000. assert ( self.test_steps ), "need to set one of test_steps or test_steps_per_task" self.test_steps_per_task = self.test_steps // self.nb_tasks else: # FIXME: This is too complicated for what is is. # Check that the test steps must either be the default value, or the right # value to use in this case. assert self.test_steps in {10_000, self.test_steps_per_task * self.nb_tasks} assert ( self.test_steps_per_task ), "need to set one of test_steps or test_steps_per_task" self.test_steps = self.test_steps_per_task * self.nb_tasks assert self.test_steps // self.test_steps_per_task == self.nb_tasks if self.smooth_task_boundaries: # If we're operating in the 'Online/smooth task transitions' "regime", # then there is only one "task", and we don't have task labels. # TODO: HOWEVER, the task schedule could/should be able to have more # than one non-stationarity! This indicates a need for a distinction # between 'tasks' and 'non-stationarities' (changes in the env). self.known_task_boundaries_at_train_time = False self.known_task_boundaries_at_test_time = False self.task_labels_at_train_time = False self.task_labels_at_test_time = False # self.steps_per_task = self.max_steps # Task schedules for training / validation and testing. # Create a temporary environment so we can extract the spaces and create # the task schedules. with self._make_env( self.dataset, self._temp_wrappers(), self.observe_state_directly ) as temp_env: # FIXME: Replacing the observation space dtypes from their original # 'generated' NamedTuples to self.Observations. The alternative # would be to add another argument to the MultiTaskEnv wrapper, to # pass down a dtype to be set on its observation_space's `dtype` # attribute, which would be ugly. assert isinstance(temp_env.observation_space, NamedTupleSpace) temp_env.observation_space.dtype = self.Observations # Populate the task schedules created above. if not self.train_task_schedule: train_change_steps = list(range(0, self.max_steps, self.steps_per_task)) if self.smooth_task_boundaries: # Add a last 'task' at the end of the 'epoch', so that the # env changes smoothly right until the end. train_change_steps.append(self.max_steps) self.train_task_schedule = self.create_task_schedule( temp_env, train_change_steps, ) assert self.train_task_schedule is not None # The validation task schedule is the same as the one used in # training by default. if not self.valid_task_schedule: self.valid_task_schedule = deepcopy(self.train_task_schedule) if not self.test_task_schedule: # The test task schedule is by default the same as in validation # except that the interval between the tasks may be different, # depending on the value of `self.test_steps_per_task`. valid_steps = sorted(self.valid_task_schedule.keys()) valid_tasks = [self.valid_task_schedule[step] for step in valid_steps] self.test_task_schedule = { i * self.test_steps_per_task: deepcopy(task) for i, task in enumerate(valid_tasks) } # Set the spaces using the temp env. self.observation_space = temp_env.observation_space self.action_space = temp_env.action_space self.reward_range = temp_env.reward_range self.reward_space = getattr( temp_env, "reward_space", spaces.Box( low=self.reward_range[0], high=self.reward_range[1], shape=() ), ) del temp_env self.train_env: gym.Env self.valid_env: gym.Env self.test_env: gym.Env def create_task_schedule( self, temp_env: MultiTaskEnvironment, change_steps: List[int] ) -> Dict[int, Dict]: """ Create the task schedule, which maps from a step to the changes that will occur in the environment when that step is reached. Uses the provided `temp_env` to generate the random tasks at the steps given in `change_steps` (a list of integers). Returns a dictionary mapping from integers (the steps) to the changes that will occur in the env at that step. TODO: IDEA: Instead of just setting env attributes, use the `methodcaller` or `attrsetter` from the `operator` built-in package, that way later when we want to add support for Meta-World, we can just use `partial(methodcaller("set_task"), task="new_task")(env)` or something like that (i.e. generalize from changing an attribute to applying a function on the env, which would allow calling methods in addition to setting attributes.) """ task_schedule: Dict[int, Dict] = {} # Start with the default task (step 0) and then add a new task at # intervals of `self.steps_per_task` for task_step in change_steps: if task_step == 0: # Start with the default task, so that we can recover the 'iid' # case with standard env dynamics when there is only one task # and no non-stationarity. task_schedule[task_step] = temp_env.default_task else: task_schedule[task_step] = temp_env.random_task() return task_schedule def apply( self, method: Method, config: Config = None ) -> "ContinualRLSetting.Results": """Apply the given method on this setting to producing some results. """ # Use the supplied config, or parse one from the arguments that were # used to create `self`. self.config: Config if config is not None: self.config = config logger.debug(f"Using Config {self.config}") elif isinstance(getattr(method, "config", None), Config): self.config = method.config logger.debug(f"Using Config from the Method: {self.config}") else: logger.debug(f"Parsing the Config from the command-line.") self.config = Config.from_args(self._argv, strict=False) logger.debug(f"Resulting Config: {self.config}") # TODO: Test to make sure that this doesn't cause any other bugs with respect to # the display of stuff: # Call this method, which creates a virtual display if necessary. self.config.get_display() # TODO: Should we really overwrite the method's 'config' attribute here? if not getattr(method, "config", None): method.config = self.config # TODO: Remove `Setting.configure(method)` entirely, from everywhere, # and use the `prepare_data` or `setup` methods instead (since these # `configure` methods aren't using the `method` anyway.) method.configure(setting=self) # BUG This won't work if the task schedule uses callables as the values (as # they aren't json-serializable.) if self._new_random_task_on_reset: logger.info( f"Train tasks: " + json.dumps(list(self.train_task_schedule.values()), indent="\t") ) else: logger.info( f"Train task schedule:" + json.dumps(self.train_task_schedule, indent="\t") ) if self.config.debug: logger.debug( f"Test task schedule:" + json.dumps(self.test_task_schedule, indent="\t") ) # Run the Training loop (which is defined in IncrementalSetting). results = self.main_loop(method) logger.info("Results summary:") logger.info(results.to_log_dict()) logger.info(results.summary()) method.receive_results(self, results=results) return results # Run the Test loop (which is defined in IncrementalSetting). # results: RlResults = self.test_loop(method) def setup(self, stage: str = None) -> None: # Called before the start of each task during training, validation and # testing. super().setup(stage=stage) if stage in {"fit", None}: self.train_wrappers = self.create_train_wrappers() self.valid_wrappers = self.create_valid_wrappers() elif stage in {"test", None}: self.test_wrappers = self.create_test_wrappers() def prepare_data(self, *args, **kwargs) -> None: # We don't really download anything atm. if self.config is None: self.config = Config() super().prepare_data(*args, **kwargs) def train_dataloader( self, batch_size: int = None, num_workers: int = None ) -> ActiveEnvironment: """Create a training gym.Env/DataLoader for the current task. Parameters ---------- batch_size : int, optional The batch size, which in this case is the number of environments to run in parallel. When `None`, the env won't be vectorized. Defaults to None. num_workers : int, optional The number of workers (processes) to use in the vectorized env. When None, the envs are run in sequence, which could be very slow. Only applies when `batch_size` is not None. Defaults to None. Returns ------- GymDataLoader A (possibly vectorized) environment/dataloader for the current task. """ if not self.has_prepared_data: self.prepare_data() # NOTE: We actually want to call setup every time, so we re-create the # wrappers for each task. # if not self.has_setup_fit: self.setup("fit") batch_size = batch_size or self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers env_factory = partial( self._make_env, base_env=self.dataset, wrappers=self.train_wrappers, observe_state_directly=self.observe_state_directly, ) env_dataloader = self._make_env_dataloader( env_factory, batch_size=batch_size, num_workers=num_workers, max_steps=self.steps_per_task, max_episodes=self.episodes_per_task, ) if self.monitor_training_performance: from sequoia.settings.passive.cl.measure_performance_wrapper import ( MeasureRLPerformanceWrapper, ) env_dataloader = MeasureRLPerformanceWrapper( env_dataloader, wandb_prefix=f"Train/Task {self.current_task_id}" ) self.train_env = env_dataloader # BUG: There is a mismatch between the train env's observation space and the # shape of its observations. self.observation_space = self.train_env.observation_space return self.train_env def val_dataloader( self, batch_size: int = None, num_workers: int = None ) -> Environment: """Create a validation gym.Env/DataLoader for the current task. Parameters ---------- batch_size : int, optional The batch size, which in this case is the number of environments to run in parallel. When `None`, the env won't be vectorized. Defaults to None. num_workers : int, optional The number of workers (processes) to use in the vectorized env. When None, the envs are run in sequence, which could be very slow. Only applies when `batch_size` is not None. Defaults to None. Returns ------- GymDataLoader A (possibly vectorized) environment/dataloader for the current task. """ if not self.has_prepared_data: self.prepare_data() self.setup("fit") env_factory = partial( self._make_env, base_env=self.dataset, wrappers=self.valid_wrappers, observe_state_directly=self.observe_state_directly, ) env_dataloader = self._make_env_dataloader( env_factory, batch_size=batch_size or self.batch_size, num_workers=num_workers if num_workers is not None else self.num_workers, max_steps=self.steps_per_task, max_episodes=self.episodes_per_task, ) self.val_env = env_dataloader return self.val_env def test_dataloader( self, batch_size: int = None, num_workers: int = None ) -> TestEnvironment: """Create the test 'dataloader/gym.Env' for all tasks. NOTE: This test environment isn't just for the current task, it actually contains the sequence of all tasks. This is different than the train or validation environments, since if the task labels are available at train time, then calling train/valid_dataloader` returns the envs for the current task only, and the `.fit` method is called once per task. This environment is also different in that it is wrapped with a Monitor, which we might eventually use to save the results/gifs/logs of the testing runs. Parameters ---------- batch_size : int, optional The batch size, which in this case is the number of environments to run in parallel. When `None`, the env won't be vectorized. Defaults to None. num_workers : int, optional The number of workers (processes) to use in the vectorized env. When None, the envs are run in sequence, which could be very slow. Only applies when `batch_size` is not None. Defaults to None. Returns ------- TestEnvironment A testing environment which keeps track of the performance of the actor and accumulates logs/statistics that are used to eventually create the 'Result' object. """ if not self.has_prepared_data: self.prepare_data() self.setup("test") # BUG: gym.wrappers.Monitor doesn't want to play nice when applied to # Vectorized env, it seems.. # FIXME: Remove this when the Monitor class works correctly with # batched environments. batch_size = batch_size or self.batch_size if batch_size is not None: logger.warn( UserWarning( colorize( f"WIP: Only support batch size of `None` (i.e., a single env) " f"for the test environments of RL Settings at the moment, " f"because the Monitor class from gym doesn't work with " f"VectorEnvs. (batch size was {batch_size})", "yellow", ) ) ) batch_size = None num_workers = num_workers if num_workers is not None else self.num_workers env_factory = partial( self._make_env, base_env=self.dataset, wrappers=self.test_wrappers, observe_state_directly=self.observe_state_directly, ) # TODO: Pass the max_steps argument to this `_make_env_dataloader` method, # rather than to a `step_limit` on the TestEnvironment. env_dataloader = self._make_env_dataloader( env_factory, batch_size=batch_size, num_workers=num_workers, ) # TODO: We should probably change the max_steps depending on the # batch size of the env. test_loop_max_steps = self.test_steps // (batch_size or 1) # TODO: Find where to configure this 'test directory' for the outputs of # the Monitor. test_dir = "results" # TODO: Debug wandb Monitor integration. self.test_env = ContinualRLTestEnvironment( env_dataloader, task_schedule=self.test_task_schedule, directory=test_dir, step_limit=test_loop_max_steps, config=self.config, force=True, video_callable=None if self.config.render else False, ) return self.test_env @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. In the case of ContinualRL, fit is only called once, with an environment that shifts between all the tasks. """ return 1 @staticmethod def _make_env( base_env: Union[str, gym.Env, Callable[[], gym.Env]], wrappers: List[Callable[[gym.Env], gym.Env]] = None, observe_state_directly: bool = False, ) -> gym.Env: """ Helper function to create a single (non-vectorized) environment. """ env: gym.Env if isinstance(base_env, str): if base_env.startswith("MetaMonsterKong") and observe_state_directly: env = gym.make(base_env, observe_state=True) else: env = gym.make(base_env) elif isinstance(base_env, gym.Env): env = base_env elif callable(base_env): env = base_env() else: raise RuntimeError( f"base_env should either be a string, a callable, or a gym " f"env. (got {base_env})." ) for wrapper in wrappers: env = wrapper(env) return env def _make_env_dataloader( self, env_factory: Callable[[], gym.Env], batch_size: Optional[int], num_workers: Optional[int] = None, seed: Optional[int] = None, max_steps: Optional[int] = None, max_episodes: Optional[int] = None, ) -> GymDataLoader: """ Helper function for creating a (possibly vectorized) environment. """ logger.debug( f"batch_size: {batch_size}, num_workers: {num_workers}, seed: {seed}" ) env: Union[gym.Env, gym.vector.VectorEnv] if batch_size is None: env = env_factory() else: env = make_batched_env( env_factory, batch_size=batch_size, num_workers=num_workers, # TODO: Still debugging shared memory + custom spaces (e.g. Sparse). shared_memory=False, ) ## Apply the "post-batch" wrappers: # from sequoia.common.gym_wrappers import ConvertToFromTensors # TODO: Only the BaselineMethod requires this, we should enable it only # from the BaselineMethod, and leave it 'off' by default. if self.add_done_to_observations: env = AddDoneToObservation(env) # # Convert the samples to tensors and move them to the right device. # env = ConvertToFromTensors(env) # env = ConvertToFromTensors(env, device=self.config.device) # Add a wrapper that converts numpy arrays / etc to Observations/Rewards # and from Actions objects to numpy arrays. env = TypedObjectsWrapper( env, observations_type=self.Observations, rewards_type=self.Rewards, actions_type=self.Actions, ) # Create an IterableDataset from the env using the EnvDataset wrapper. dataset = EnvDataset(env, max_steps=max_steps, max_episodes=max_episodes,) # Create a GymDataLoader for the EnvDataset. env_dataloader = GymDataLoader(dataset) if batch_size and seed: # Seed each environment with its own seed (based on the base seed). env.seed([seed + i for i in range(env_dataloader.num_envs)]) else: env.seed(seed) return env_dataloader def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Get the list of wrappers to add to each training environment. The result of this method must be pickleable when using multiprocessing. Returns ------- List[Callable[[gym.Env], gym.Env]] [description] """ # We add a restriction to prevent users from getting data from # previous or future tasks. # TODO: This assumes that tasks all have the same length. starting_step = self.current_task_id * self.steps_per_task max_steps = starting_step + self.steps_per_task - 1 return self._make_wrappers( task_schedule=self.train_task_schedule, sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.train_transforms, starting_step=starting_step, max_steps=max_steps, new_random_task_on_reset=self._new_random_task_on_reset, ) def create_valid_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Get the list of wrappers to add to each validation environment. The result of this method must be pickleable when using multiprocessing. Returns ------- List[Callable[[gym.Env], gym.Env]] [description] TODO: Decide how this 'validation' environment should behave in comparison with the train and test environments. """ # We add a restriction to prevent users from getting data from # previous or future tasks. # TODO: Should the validation environment only be for the current task? starting_step = self.current_task_id * self.steps_per_task max_steps = starting_step + self.steps_per_task - 1 return self._make_wrappers( task_schedule=self.valid_task_schedule, sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.val_transforms, starting_step=starting_step, max_steps=max_steps, new_random_task_on_reset=self._new_random_task_on_reset, ) def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Get the list of wrappers to add to a single test environment. The result of this method must be pickleable when using multiprocessing. Returns ------- List[Callable[[gym.Env], gym.Env]] [description] """ return self._make_wrappers( task_schedule=self.test_task_schedule, sharp_task_boundaries=self.known_task_boundaries_at_test_time, task_labels_available=self.task_labels_at_test_time, transforms=self.test_transforms, starting_step=0, max_steps=self.max_steps, new_random_task_on_reset=self._new_random_task_on_reset, ) def load_task_schedule(self, file_path: Path) -> Dict[int, Dict]: """ Load a task schedule from the given path. """ with open(file_path) as f: task_schedule = json.load(f) return {int(k): task_schedule[k] for k in sorted(task_schedule.keys())} def _make_wrappers( self, task_schedule: Dict[int, Dict], sharp_task_boundaries: bool, task_labels_available: bool, transforms: List[Transforms], starting_step: int, max_steps: int, new_random_task_on_reset: bool, ) -> List[Callable[[gym.Env], gym.Env]]: """ helper function for creating the train/valid/test wrappers. These wrappers get applied *before* the batching, if applicable. """ wrappers: List[Callable[[gym.Env], gym.Env]] = [] # NOTE: When transitions are smooth, there are no "task boundaries". assert sharp_task_boundaries == (not self.smooth_task_boundaries) # TODO: Add some kind of Wrapper around the dataset to make it # semi-supervised? if self.max_episode_steps: wrappers.append( partial(TimeLimit, max_episode_steps=self.max_episode_steps) ) if is_classic_control_env(self.dataset) and not self.observe_state_directly: # If we are in a classic control env, and we dont want the state to # be fully-observable (i.e. we want pixel observations rather than # getting the pole angle, velocity, etc.), then add the # PixelObservation wrapper to the list of wrappers. wrappers.append(PixelObservationWrapper) wrappers.append(ImageObservations) if ( isinstance(self.dataset, str) and self.dataset.lower().startswith("metamonsterkong") and not self.observe_state_directly ): # TODO: Do we need the AtariPreprocessing wrapper on MonsterKong? # wrappers.append(partial(AtariPreprocessing, frame_skip=1)) pass elif is_atari_env(self.dataset): # TODO: Test & Debug this: Adding the Atari preprocessing wrapper. # TODO: Figure out the differences (if there are any) between the # AtariWrapper from SB3 and the AtariPreprocessing wrapper from gym. wrappers.append(AtariWrapper) # wrappers.append(AtariPreprocessing) wrappers.append(ImageObservations) # Apply image transforms if the env will have image-like obs space if not self.observe_state_directly: # wrappers.append(ImageObservations) # Wrapper to apply the image transforms to the env. wrappers.append(partial(TransformObservation, f=transforms)) # Add a wrapper which will add non-stationarity to the environment. # The "task" transitions will either be sharp or smooth. # In either case, the task ids for each sample are added to the # observations, and the dicts containing the task information (i.e. the # current values of the env attributes from the task schedule) get added # to the 'info' dicts. if sharp_task_boundaries: assert self.nb_tasks >= 1 # Add a wrapper that creates sharp tasks. cl_wrapper = MultiTaskEnvironment else: # Add a wrapper that creates smooth tasks. cl_wrapper = SmoothTransitions wrappers.append( partial( cl_wrapper, noise_std=self.task_noise_std, task_schedule=task_schedule, add_task_id_to_obs=True, add_task_dict_to_info=True, starting_step=starting_step, new_random_task_on_reset=new_random_task_on_reset, max_steps=max_steps, ) ) # If the task labels aren't available, we then add another wrapper that # hides that information (setting both of them to None) and also marks # those spaces as `Sparse`. if not task_labels_available: # NOTE: This sets the task labels to None, rather than removing # them entirely. # wrappers.append(RemoveTaskLabelsWrapper) wrappers.append(HideTaskLabelsWrapper) return wrappers def _temp_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """ Gets the minimal wrappers needed to figure out the Spaces of the train/valid/test environments. This is called in the 'constructor' (__post_init__) to set the Setting's observation/action/reward spaces, so this should depend on as little state from `self` as possible, since not all attributes have been defined at the time when this is called. """ return self._make_wrappers( task_schedule=self.train_task_schedule, sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.train_transforms, # These two shouldn't matter really: starting_step=0, max_steps=self.max_steps, new_random_task_on_reset=self._new_random_task_on_reset, )
class DiscriminatorHParams(ConvBlock): """Settings of the Discriminator model""" optimizer: str = choice("ADAM", "RMSPROP", "SGD", default="ADAM")