def create_state_encoders( self, obs_embed_size: int, prev_action_embed_size: int, num_rnn_layers: int, rnn_type: str, add_prev_actions: bool, trainable_masked_hidden_state=False, ): rnn_input_size = obs_embed_size self.prev_action_embedder = FeatureEmbedding( input_size=self.action_space.n, output_size=prev_action_embed_size if add_prev_actions else 0, ) if add_prev_actions: rnn_input_size += prev_action_embed_size state_encoders = OrderedDict() # perserve insertion order in py3.6 if self.multiple_beliefs: # multiple belief model for aux_uuid in self.auxiliary_uuids: state_encoders[aux_uuid] = RNNStateEncoder( rnn_input_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, trainable_masked_hidden_state=trainable_masked_hidden_state, ) # create fusion model self.fusion_model = self.beliefs_fusion( hidden_size=self._hidden_size, obs_embed_size=obs_embed_size, num_tasks=len(self.auxiliary_uuids), ) else: # single belief model state_encoders["single_belief"] = RNNStateEncoder( rnn_input_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, trainable_masked_hidden_state=trainable_masked_hidden_state, ) self.state_encoders = nn.ModuleDict(state_encoders) self.belief_names = list(self.state_encoders.keys()) get_logger().info("there are {} belief models: {}".format( len(self.belief_names), self.belief_names))
def __init__( self, action_space: gym.spaces.Tuple, observation_space: SpaceDict, rgb_uuid: Optional[str] = "rgb", hidden_size=512, num_rnn_layers=1, rnn_type="GRU", ): super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.rgb_uuid = rgb_uuid self.visual_encoder = SimpleCNN( observation_space=observation_space, output_size=hidden_size, rgb_uuid=self.rgb_uuid, depth_uuid=None, ) self.state_encoder = RNNStateEncoder( 0 if self.is_blind else self.recurrent_hidden_state_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor_critic = TupleLinearActorCriticHead(self._hidden_size, action_space[0].n) self.train()
def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, hidden_size=512, obj_state_embedding_size=512, trainable_masked_hidden_state: bool = False, num_rnn_layers=1, rnn_type="GRU", ): """Initializer. See class documentation for parameter definitions. """ super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.object_type_embedding_size = obj_state_embedding_size self.visual_encoder_pick = SimpleCNN( self.observation_space, self._hidden_size, rgb_uuid=None, depth_uuid="depth_lowres", ) self.visual_encoder_drop = SimpleCNN( self.observation_space, self._hidden_size, rgb_uuid=None, depth_uuid="depth_lowres", ) self.state_encoder = RNNStateEncoder( (self._hidden_size) + obj_state_embedding_size, self._hidden_size, trainable_masked_hidden_state=trainable_masked_hidden_state, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor_pick = LinearActorHeadNoCategory(self._hidden_size, action_space.n) self.critic_pick = LinearCriticHead(self._hidden_size) self.actor_drop = LinearActorHeadNoCategory(self._hidden_size, action_space.n) self.critic_drop = LinearCriticHead(self._hidden_size) # self.object_state_embedding = nn.Embedding(num_embeddings=6, embedding_dim=obj_state_embedding_size) relative_dist_embedding_size = torch.Tensor( [3, 100, obj_state_embedding_size]) self.relative_dist_embedding_pick = input_embedding_net( relative_dist_embedding_size.long().tolist(), dropout=0) self.relative_dist_embedding_drop = input_embedding_net( relative_dist_embedding_size.long().tolist(), dropout=0) self.train()
def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, goal_sensor_uuid: str, rgb_resnet_preprocessor_uuid: Optional[str], depth_resnet_preprocessor_uuid: Optional[str] = None, hidden_size: int = 512, goal_dims: int = 32, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), include_auxiliary_head: bool = False, ): super().__init__( action_space=action_space, observation_space=observation_space, ) self._hidden_size = hidden_size self.include_auxiliary_head = include_auxiliary_head if ( rgb_resnet_preprocessor_uuid is None or depth_resnet_preprocessor_uuid is None ): resnet_preprocessor_uuid = ( rgb_resnet_preprocessor_uuid if rgb_resnet_preprocessor_uuid is not None else depth_resnet_preprocessor_uuid ) self.goal_visual_encoder = ResnetTensorGoalEncoder( self.observation_space, goal_sensor_uuid, resnet_preprocessor_uuid, goal_dims, resnet_compressor_hidden_out_dims, combiner_hidden_out_dims, ) else: self.goal_visual_encoder = ResnetDualTensorGoalEncoder( # type:ignore self.observation_space, goal_sensor_uuid, rgb_resnet_preprocessor_uuid, depth_resnet_preprocessor_uuid, goal_dims, resnet_compressor_hidden_out_dims, combiner_hidden_out_dims, ) self.state_encoder = RNNStateEncoder( self.goal_visual_encoder.output_dims, self._hidden_size, ) self.actor = LinearActorHead(self._hidden_size, action_space.n) self.critic = LinearCriticHead(self._hidden_size) if self.include_auxiliary_head: self.auxiliary_actor = LinearActorHead(self._hidden_size, action_space.n) self.train()
def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, rgb_uuid: Optional[str], depth_uuid: Optional[str], goal_sensor_uuid: str, hidden_size=512, embed_coordinates=False, coordinate_embedding_dim=8, coordinate_dims=2, num_rnn_layers=1, rnn_type="GRU", ): super().__init__(action_space=action_space, observation_space=observation_space) self.goal_sensor_uuid = goal_sensor_uuid self._hidden_size = hidden_size self.embed_coordinates = embed_coordinates if self.embed_coordinates: self.coordinate_embedding_size = coordinate_embedding_dim else: self.coordinate_embedding_size = coordinate_dims self.sensor_fusion = False if rgb_uuid is not None and depth_uuid is not None: self.sensor_fuser = nn.Linear(hidden_size * 2, hidden_size) self.sensor_fusion = True self.visual_encoder = SimpleCNN( observation_space=observation_space, output_size=hidden_size, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, ) self.state_encoder = RNNStateEncoder( (0 if self.is_blind else self.recurrent_hidden_state_size) + self.coordinate_embedding_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor = LinearActorHead(self._hidden_size, action_space.n) self.critic = LinearCriticHead(self._hidden_size) if self.embed_coordinates: self.coordinate_embedding = nn.Linear(coordinate_dims, coordinate_embedding_dim) self.train()
def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, hidden_size=512, obj_state_embedding_size=512, trainable_masked_hidden_state: bool = False, num_rnn_layers=1, rnn_type="GRU", ): """Initializer. See class documentation for parameter definitions. """ super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.object_type_embedding_size = obj_state_embedding_size sensor_names = self.observation_space.spaces.keys() self.visual_encoder = SimpleCNN( self.observation_space, self._hidden_size, rgb_uuid="rgb_lowres" if "rgb_lowres" in sensor_names else None, depth_uuid="depth_lowres" if "depth_lowres" in sensor_names else None, ) if "rgb_lowres" in sensor_names and "depth_lowres" in sensor_names: input_visual_feature_num = 2 elif "rgb_lowres" in sensor_names: input_visual_feature_num = 1 elif "depth_lowres" in sensor_names: input_visual_feature_num = 1 self.state_encoder = RNNStateEncoder( (self._hidden_size) * input_visual_feature_num + obj_state_embedding_size, self._hidden_size, trainable_masked_hidden_state=trainable_masked_hidden_state, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor = LinearActorHead(self._hidden_size, action_space.n) self.critic = LinearCriticHead(self._hidden_size) relative_dist_embedding_size = torch.Tensor( [3, 100, obj_state_embedding_size]) self.relative_dist_embedding = input_embedding_net( relative_dist_embedding_size.long().tolist(), dropout=0) self.train()
def __init__( self, input_uuid: str, action_space: gym.spaces.Dict, observation_space: gym.spaces.Dict, hidden_size: int = 128, num_layers: int = 1, rnn_type: str = "GRU", head_type: Callable[ ..., ActorCriticModel[SequentialDistr] ] = ConditionedLinearActorCritic, ): super().__init__(action_space=action_space, observation_space=observation_space) self.hidden_size = hidden_size self.rnn_type = rnn_type assert ( input_uuid in observation_space.spaces ), "LinearActorCritic expects only a single observational input." self.input_uuid = input_uuid box_space: gym.spaces.Box = observation_space[self.input_uuid] assert isinstance(box_space, gym.spaces.Box), ( "RNNActorCritic requires that" "observation space corresponding to the input uuid is a Box space." ) assert len(box_space.shape) == 1 self.in_dim = box_space.shape[0] self.state_encoder = RNNStateEncoder( input_size=self.in_dim, hidden_size=hidden_size, num_layers=num_layers, rnn_type=rnn_type, trainable_masked_hidden_state=True, ) self.head_uuid = "{}_{}".format("rnn", input_uuid) self.ac_nonrecurrent_head: ActorCriticModel[SequentialDistr] = head_type( input_uuid=self.head_uuid, action_space=action_space, observation_space=gym.spaces.Dict( { self.head_uuid: gym.spaces.Box( low=np.float32(0.0), high=np.float32(1.0), shape=(hidden_size,) ) } ), ) self.memory_key = "rnn"
def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, goal_sensor_uuid: str, rgb_uuid: Optional[str], depth_uuid: Optional[str], hidden_size=512, object_type_embedding_dim=8, trainable_masked_hidden_state: bool = False, num_rnn_layers=1, rnn_type="GRU", ): """Initializer. See class documentation for parameter definitions. """ super().__init__(action_space=action_space, observation_space=observation_space) self.goal_sensor_uuid = goal_sensor_uuid self._n_object_types = self.observation_space.spaces[ self.goal_sensor_uuid].n self._hidden_size = hidden_size self.object_type_embedding_size = object_type_embedding_dim self.visual_encoder = SimpleCNN( observation_space=self.observation_space, output_size=self._hidden_size, rgb_uuid=rgb_uuid, depth_uuid=depth_uuid, ) self.state_encoder = RNNStateEncoder( (0 if self.is_blind else self._hidden_size) + object_type_embedding_dim, self._hidden_size, trainable_masked_hidden_state=trainable_masked_hidden_state, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor = LinearActorHead(self._hidden_size, action_space.n) self.critic = LinearCriticHead(self._hidden_size) self.object_type_embedding = nn.Embedding( num_embeddings=self._n_object_types, embedding_dim=object_type_embedding_dim, ) self.train()
def __init__( self, action_space: gym.spaces.Discrete, observation_space: gym.spaces.Dict, rgb_uuid: str, unshuffled_rgb_uuid: str, hidden_size=512, num_rnn_layers=1, rnn_type="GRU", ): """Initialize a `RearrangeActorCriticSimpleConvRNN` object. # Parameters action_space : The action space of the agent. Should equal `gym.spaces.Discrete(# actions available to the agent)`. observation_space : The observation space available to the agent. rgb_uuid : The unique id of the RGB image sensor (see `RGBSensor`). unshuffled_rgb_uuid : The unique id of the `UnshuffledRGBRearrangeSensor` available to the agent. hidden_size : The size of the hidden layer of the RNN. num_rnn_layers: The number of hidden layers in the RNN. rnn_type : The RNN type, should be "GRU" or "LSTM". """ super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.rgb_uuid = rgb_uuid self.unshuffled_rgb_uuid = unshuffled_rgb_uuid self.concat_rgb_uuid = "concat_rgb" assert self.concat_rgb_uuid not in observation_space self.visual_encoder = self._create_visual_encoder() self.state_encoder = RNNStateEncoder( self.recurrent_hidden_state_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.actor = LinearActorHead(self._hidden_size, action_space.n) self.critic = LinearCriticHead(self._hidden_size) self.train()
def __init__( self, action_space: gym.spaces.Discrete, observation_space: SpaceDict, goal_sensor_uuid: str, resnet_preprocessor_uuid: str, detector_preprocessor_uuid: str, rnn_hidden_size=512, goal_dims: int = 32, max_dets: int = 3, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), box_embedder_hidden_out_dims: Tuple[int, int] = (128, 32), class_embedder_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), ): super().__init__( action_space=action_space, observation_space=observation_space, ) self.hidden_size = rnn_hidden_size self.goal_visual_encoder = ResnetFasterRCNNTensorsGoalEncoder( self.observation_space, goal_sensor_uuid, resnet_preprocessor_uuid, detector_preprocessor_uuid, goal_dims, max_dets, resnet_compressor_hidden_out_dims, box_embedder_hidden_out_dims, class_embedder_hidden_out_dims, combiner_hidden_out_dims, ) self.state_encoder = RNNStateEncoder( self.goal_visual_encoder.output_dims, rnn_hidden_size, ) self.actor_critic = LinearActorCriticHead(self.hidden_size, action_space.n) self.train()
def __init__( self, action_space: gym.spaces.Discrete, observation_space: gym.spaces.Dict, rgb_uuid: str, unshuffled_rgb_uuid: str, in_walkthrough_phase_uuid: str, is_walkthrough_phase_embedding_dim: int, done_action_index: int, walkthrougher_should_ignore_action_mask: Optional[ Sequence[float]] = None, prev_action_embedding_dim: int = 32, hidden_size=512, num_rnn_layers=1, rnn_type="GRU", ): """A CNN->RNN model for joint training of the Walkthrough and Unshuffle tasks. Similar to `RearrangeActorCriticSimpleConvRNN` but with some additional sensor inputs (e.g. the `InWalkthroughPhaseSensor` is used to tell the agent which phase it is in). """ super().__init__(action_space=action_space, observation_space=observation_space) self._hidden_size = hidden_size self.rgb_uuid = rgb_uuid self.unshuffled_rgb_uuid = unshuffled_rgb_uuid self.in_walkthrough_phase_uuid = in_walkthrough_phase_uuid self.done_action_index = done_action_index self.prev_action_embedder = nn.Embedding( action_space.n + 1, embedding_dim=prev_action_embedding_dim) self.is_walkthrough_phase_embedder = nn.Embedding( num_embeddings=2, embedding_dim=is_walkthrough_phase_embedding_dim) self.walkthrough_good_action_logits: Optional[torch.Tensor] if walkthrougher_should_ignore_action_mask is not None: self.register_buffer( "walkthrough_good_action_logits", -1000 * torch.FloatTensor(walkthrougher_should_ignore_action_mask), persistent=False, ) else: self.walkthrough_good_action_logits = None self.concat_rgb_uuid = "concat_rgb" assert self.concat_rgb_uuid not in observation_space self.visual_encoder = self._create_visual_encoder() self.state_encoder = RNNStateEncoder( prev_action_embedding_dim + is_walkthrough_phase_embedding_dim + 2 * self.recurrent_hidden_state_size, self._hidden_size, num_layers=num_rnn_layers, rnn_type=rnn_type, ) self.walkthrough_encoder = RNNStateEncoder( self._hidden_size, self._hidden_size, num_layers=1, rnn_type="GRU", ) self.apply(simple_conv_and_linear_weights_init) self.walkthrough_ac = LinearActorCriticHead(self._hidden_size, action_space.n) self.walkthrough_ac.actor_and_critic.bias.data[ self.done_action_index] -= 3 self.unshuffle_ac = LinearActorCriticHead(self._hidden_size, action_space.n) self.train()