def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: in_walkthrough_phase_mask = observations[ self.in_walkthrough_phase_uuid] in_unshuffle_phase_mask = ~in_walkthrough_phase_mask in_walkthrough_float = in_walkthrough_phase_mask.float() in_unshuffle_float = in_unshuffle_phase_mask.float() # Don't reset hidden state at start of the unshuffle task masks_no_unshuffle_reset = (masks.bool() | in_unshuffle_phase_mask).float() masks_with_unshuffle_reset = masks.float() del masks # Just to make sure we don't accidentally use `masks when we want `masks_no_unshuffle_reset` # Visual features cur_img_resnet = observations[self.rgb_uuid] unshuffled_img_resnet = observations[self.unshuffled_rgb_uuid] concat_img = torch.cat( ( cur_img_resnet, unshuffled_img_resnet, cur_img_resnet * unshuffled_img_resnet, ), dim=-3, ) batch_shape, features_shape = concat_img.shape[:-3], concat_img.shape[ -3:] concat_img_reshaped = concat_img.view(-1, *features_shape) attention_probs = torch.softmax( self.visual_attention(concat_img_reshaped).view( concat_img_reshaped.shape[0], -1), dim=-1, ).view(concat_img_reshaped.shape[0], 1, *concat_img_reshaped.shape[-2:]) vis_features = ((self.visual_encoder(concat_img_reshaped) * attention_probs).mean(-1).mean(-1)) vis_features = vis_features.view(*batch_shape, -1) # Various embeddings prev_action_embeddings = self.prev_action_embedder( ((~masks_with_unshuffle_reset.bool()).long() * (prev_actions.unsqueeze(-1) + 1))).squeeze(-2) is_walkthrough_phase_embedding = self.is_walkthrough_phase_embedder( in_walkthrough_phase_mask.long()).squeeze(-2) to_cat = [ vis_features, prev_action_embeddings, is_walkthrough_phase_embedding, ] rnn_hidden_states = memory.tensor("rnn") rnn_outs = [] obs_for_rnn = torch.cat(to_cat, dim=-1) last_walkthrough_encoding = memory.tensor("walkthrough_encoding") for step in range(masks_with_unshuffle_reset.shape[0]): rnn_out, rnn_hidden_states = self.state_encoder( torch.cat( ( obs_for_rnn[step:step + 1], last_walkthrough_encoding * masks_no_unshuffle_reset[step:step + 1], ), dim=-1, ), rnn_hidden_states, masks_with_unshuffle_reset[step:step + 1], ) rnn_outs.append(rnn_out) walkthrough_encoding, _ = self.walkthrough_encoder( rnn_out, last_walkthrough_encoding, masks_no_unshuffle_reset[step:step + 1], ) last_walkthrough_encoding = ( last_walkthrough_encoding * in_unshuffle_float[step:step + 1] + walkthrough_encoding * in_walkthrough_float[step:step + 1]) memory = memory.set_tensor("walkthrough_encoding", last_walkthrough_encoding) rnn_out = torch.cat(rnn_outs, dim=0) walkthrough_dist, walkthrough_vals = self.walkthrough_ac(rnn_out) unshuffle_dist, unshuffle_vals = self.unshuffle_ac(rnn_out) assert len(in_walkthrough_float.shape) == len( walkthrough_dist.logits.shape) if self.walkthrough_good_action_logits is not None: walkthrough_logits = ( walkthrough_dist.logits + self.walkthrough_good_action_logits.view( *((1, ) * (len(walkthrough_dist.logits.shape) - 1)), -1)) else: walkthrough_logits = walkthrough_dist.logits actor = CategoricalDistr( logits=in_walkthrough_float * walkthrough_logits + in_unshuffle_float * unshuffle_dist.logits) values = (in_walkthrough_float * walkthrough_vals + in_unshuffle_float * unshuffle_vals) ac_output = ActorCriticOutput(distributions=actor, values=values, extras={}) return ac_output, memory.set_tensor("rnn", rnn_hidden_states)
def forward( # type:ignore self, observations: ObservationType, memory: Memory, prev_actions: torch.Tensor, masks: torch.FloatTensor, ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: in_walkthrough_phase_mask = observations[ self.in_walkthrough_phase_uuid] in_unshuffle_phase_mask = ~in_walkthrough_phase_mask in_walkthrough_float = in_walkthrough_phase_mask.float() in_unshuffle_float = in_unshuffle_phase_mask.float() # Don't reset hidden state at start of the unshuffle task masks_no_unshuffle_reset = (masks.bool() | in_unshuffle_phase_mask).float() cur_img = observations[self.rgb_uuid] unshuffled_img = observations[self.unshuffled_rgb_uuid] concat_img = torch.cat((cur_img, unshuffled_img), dim=-1) # Various embeddings vis_features = self.visual_encoder({self.concat_rgb_uuid: concat_img}) prev_action_embeddings = self.prev_action_embedder( ((~masks.bool()).long() * (prev_actions.unsqueeze(-1) + 1))).squeeze(-2) is_walkthrough_phase_embedding = self.is_walkthrough_phase_embedder( in_walkthrough_phase_mask.long()).squeeze(-2) to_cat = [ vis_features, prev_action_embeddings, is_walkthrough_phase_embedding, ] rnn_hidden_states = memory.tensor("rnn") rnn_outs = [] obs_for_rnn = torch.cat(to_cat, dim=-1) last_walkthrough_encoding = memory.tensor("walkthrough_encoding") for step in range(masks.shape[0]): rnn_out, rnn_hidden_states = self.state_encoder( torch.cat( (obs_for_rnn[step:step + 1], last_walkthrough_encoding), dim=-1), rnn_hidden_states, masks[step:step + 1], ) rnn_outs.append(rnn_out) walkthrough_encoding, _ = self.walkthrough_encoder( rnn_out, last_walkthrough_encoding, masks_no_unshuffle_reset[step:step + 1], ) last_walkthrough_encoding = ( last_walkthrough_encoding * in_unshuffle_float[step:step + 1] + walkthrough_encoding * in_walkthrough_float[step:step + 1]) memory = memory.set_tensor("walkthrough_encoding", last_walkthrough_encoding) rnn_out = torch.cat(rnn_outs, dim=0) walkthrough_dist, walkthrough_vals = self.walkthrough_ac(rnn_out) unshuffle_dist, unshuffle_vals = self.unshuffle_ac(rnn_out) assert len(in_walkthrough_float.shape) == len( walkthrough_dist.logits.shape) if self.walkthrough_good_action_logits is not None: walkthrough_logits = ( walkthrough_dist.logits + self.walkthrough_good_action_logits.view( *((1, ) * (len(walkthrough_dist.logits.shape) - 1)), -1)) else: walkthrough_logits = walkthrough_dist.logits actor = CategoricalDistr( logits=in_walkthrough_float * walkthrough_logits + in_unshuffle_float * unshuffle_dist.logits) values = (in_walkthrough_float * walkthrough_vals + in_unshuffle_float * unshuffle_vals) ac_output = ActorCriticOutput(distributions=actor, values=values, extras={}) return ac_output, memory.set_tensor("rnn", rnn_hidden_states)