def check_is_multitask_env(env: Environment, has_rewards: bool): # dataloader-style: for i, (observations, rewards) in itertools.islice(enumerate(env), 10): assert isinstance(observations, MultiTaskSetting.Observations) assert len(set(observations.task_labels.cpu().tolist())) > 1 if has_rewards: assert isinstance(rewards, MultiTaskSetting.Rewards) # Check that there is no relabelling happening, by checking that there are # more different y's then there are usually classes in each batch. assert len(set(rewards.y.cpu().tolist())) > 2 else: assert rewards is None # gym-style interaction: obs = env.reset() assert env.observation_space.contains(obs.numpy()) done = False steps = 0 while not done and steps < 10: action = Actions(y_pred=torch.randint(10, [env.batch_size])) # BUG: convert_tensors seems to be causing issues again: We shouldn't have # to manually convert obs to numpy before checking `obs in obs_space`. # TODO: Also not super clean that we can't just do `action in action_space`. # assert action.numpy() in env.action_space assert action.y_pred.numpy() in env.action_space obs, reward, done, info = env.step(action) assert obs.numpy() in env.observation_space assert reward.y in env.reward_space steps += 1 assert done is False assert steps == 10
def shared_step( self, batch: Tuple[Observations, Rewards], batch_idx: int, environment: Environment, loss_name: str, dataloader_idx: int = None, optimizer_idx: int = None, ) -> Dict: """ This is the shared step for this 'example' LightningModule. Feel free to customize/change it if you want! """ if dataloader_idx is not None: assert isinstance(dataloader_idx, int) loss_name += f"/{dataloader_idx}" # Split the batch into observations and rewards. # NOTE: Only in the case of the Supervised settings do we ever get the # Rewards at the same time as the Observations. # TODO: It would be nice if we could actually do the same things for # both sides of the tree here.. observations, rewards = self.split_batch(batch) # FIXME: Remove this, debugging: assert isinstance(observations, Observations), observations assert isinstance(observations.x, Tensor), observations.shapes # Get the forward pass results, containing: # - "observation": the augmented/transformed/processed observation. # - "representations": the representations for the observations. # - "actions": The actions (predictions) forward_pass: ForwardPass = self(observations) # get the actions from the forward pass: actions = forward_pass.actions if rewards is None: # Get the reward from the environment (the dataloader). if self.config.debug and self.config.render: environment.render("human") # import matplotlib.pyplot as plt # plt.waitforbuttonpress(10) rewards = environment.send(actions) assert rewards is not None loss: Loss = self.get_loss(forward_pass, rewards, loss_name=loss_name) return { "loss": loss.loss, "loss_object": loss, }
def fit(self, train_env: Environment, valid_env: Environment): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch y_preds = train_env.action_space.sample() if rewards is None: action_space = train_env.action_space if train_env.action_space.shape: # This is a bit complicated, but it's needed because the last batch # might have a different batch dimension than the env's action # space, (only happens on the last batch in supervised learning). # TODO: Should we perhaps drop the last batch? action_space = train_env.action_space batch_size = getattr(train_env, "num_envs", getattr(train_env, "batch_size", 0)) env_is_batched = batch_size is not None and batch_size >= 1 if env_is_batched: # NOTE: Need to pass an action space that actually reflects the batch # size, even for the last batch! obs_batch_size = observations.x.shape[ 0] if observations.x.shape else None action_space_batch_size = ( train_env.action_space.shape[0] if train_env.action_space.shape else None) if (obs_batch_size is not None and obs_batch_size != action_space_batch_size): action_space = batch_space( train_env.single_action_space, obs_batch_size) y_preds = action_space.sample() rewards = train_env.send(Actions(y_pred=y_preds))
def fit(self, train_env: Environment, valid_env: Environment): """ Example train loop. You can do whatever you want with train_env and valid_env here. NOTE: In the Settings where task boundaries are known (in this case all the supervised CL settings), this will be called once per task. """ # configure() will have been called by the setting before we get here. episodes = 0 with tqdm.tqdm(desc="training") as train_pbar: while not train_env.is_closed(): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: observations, rewards = batch batch_size = observations.x.shape[0] y_pred = train_env.action_space.sample() # If we're at the last batch, it might have a different size, so w # give only the required number of values. if isinstance(y_pred, (np.ndarray, Tensor)): if y_pred.shape[0] != batch_size: y_pred = y_pred[:batch_size] if rewards is None: rewards = train_env.send(y_pred) train_pbar.set_postfix({ "Episode": episodes, "Step": i, }) # train as you usually would. episodes += 1 if self.max_train_episodes and episodes >= self.max_train_episodes: train_env.close() break
def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment ) -> Tuple[Tensor, Dict]: """Shared step used for both training and validation. Parameters ---------- batch : Tuple[Observations, Optional[Rewards]] Batch containing Observations, and optional Rewards. When the Rewards are None, it means that we'll need to provide the Environment with actions before we can get the Rewards (e.g. image labels) back. This happens for example when being applied in a Setting which cares about sample efficiency or training performance, for example. environment : Environment The environment we're currently interacting with. Used to provide the rewards when they aren't already part of the batch, for example when our performance is being monitored during training. Returns ------- Tuple[Tensor, Dict] The Loss tensor, and a dict of metrics to be logged. """ # Since we're training on a Passive environment, we will get both observations # and rewards, unless we're being evaluated based on our training performance, # in which case we will need to send actions to the environments before we can # get the corresponding rewards (image labels) back. observations: Observations = batch[0] rewards: Optional[Rewards] = batch[1] # Get the predictions: logits, _ = self(observations) y_pred = logits.argmax(-1) if rewards is None: # If the rewards in the batch were None, it means we're expected to give # actions before we can get rewards back from the environment. # This happens when the Setting is monitoring our training performance. rewards = environment.send(Actions(y_pred)) assert rewards is not None image_labels = rewards.y loss = self.loss(logits, image_labels) accuracy = (y_pred == image_labels).sum().float() / len(image_labels) metrics_dict = {"accuracy": accuracy} return loss, metrics_dict
def fit(self, train_env: Environment, valid_env: Environment): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch y_preds = train_env.action_space.sample() if rewards is None: action_space = train_env.action_space if train_env.action_space.shape: obs_batch_size = observations.x.shape[0] # BUG: Fix the `batch_size` attribute on `Batch` so it works # even when task labels are None, by checking wether there is # one or more shapes, and then if there are, then that the first # dimension match between those. action_space_batch_size = action_space.shape[0] if obs_batch_size != action_space_batch_size: action_space = batch_space( train_env.single_action_space, obs_batch_size) rewards = train_env.send(Actions(action_space.sample()))