def compute_advantage(self, batch:Batch, last_r: float, 
                           gamma: float = 0.9, lamda: float = 1.0, use_gae: bool = True, use_critic: bool = True):
        """
         Given a rollout, compute its value targets and the advantage
         Args: batch (Batch): batch of a single trajectory
               last_r (float): value estimation for the last observation
               gamma (float): Discount factor
               lambda (float): parameter for GAE
               use_gae (bool): using Generalized Advantage Estimation
               use_critic (bool): whether to use critic (value estimation), setting this to false will use 0 as baseline
        
         Returns: batch (Batch): object with experience from batch and processed rewards
        """  

        assert batch.vf_preds in batch or not use_critic
        assert use_critic or not use_gae

        if use_gae:
            vpred_t = np.concatenate([batch.vf_preds, np.array([last_r])])
            delta_t = (batch.rew + gamma * vpred_t[1:] - vpred_t[:-1])
            # This formula for the advantage comes from "Generalized Advantage Estimation": https://arxiv.org/abs/1506.02438
            batch.advantages = self.discount_cumsum(delta_t, gamma * lamda)
            batch.value_targets = (batch.advantages + batch.vf_preds).astype(np.float32)

        else:
            rewards_plus_v = np.concatenate([batch.rew, np.array([last_r])])
            discounted_returns = discount_cumsum(rewards_plus_v, gamma)[:-1].astype(np.float32)

            if use_critic:
                batch.advantages = discounted_returns - batch.vf_preds
                batch.value_targets = discounted_return
            else:
                batch.advantages = discounted_returns
                batch.value_targets = np.zeros_like(batch.advantages)

        batch.advantages = batch.advantages.astype(np.float32)

        return batch