def __init__( self, observation_space: gym.Space, action_space: gym.Space, use_state: bool, use_action: bool, use_next_state: bool, use_done: bool, **kwargs, ): """Builds reward MLP. Args: observation_space: The observation space. action_space: The action space. use_state: should the current state be included as an input to the MLP? use_action: should the current action be included as an input to the MLP? use_next_state: should the next state be included as an input to the MLP? use_done: should the "done" flag be included as an input to the MLP? kwargs: passed straight through to build_mlp. """ super().__init__() combined_size = 0 self.use_state = use_state if self.use_state: combined_size += preprocessing.get_flattened_obs_dim( observation_space) self.use_action = use_action if self.use_action: combined_size += preprocessing.get_flattened_obs_dim(action_space) self.use_next_state = use_next_state if self.use_next_state: combined_size += preprocessing.get_flattened_obs_dim( observation_space) self.use_done = use_done if self.use_done: combined_size += 1 full_build_mlp_kwargs = { "hid_sizes": (32, 32), } full_build_mlp_kwargs.update(kwargs) full_build_mlp_kwargs.update({ # we do not want these overridden "in_size": combined_size, "out_size": 1, "squeeze_output": True, }) self.mlp = networks.build_mlp(**full_build_mlp_kwargs)
def __init__( self, action_space: gym.Space, observation_space: gym.Space, **mlp_kwargs ): super().__init__() in_size = preprocessing.get_flattened_obs_dim( observation_space ) + preprocessing.get_flattened_obs_dim(action_space) self.mlp = build_mlp( **{"in_size": in_size, "out_size": 1, **mlp_kwargs} )
def __init__(self, observation_space: gym.Space): super(FlattenBatchNormExtractor, self).__init__(observation_space, get_flattened_obs_dim(observation_space)) self.flatten = nn.Flatten() self.batch_norm = nn.BatchNorm1d(self._features_dim) self.dropout = nn.Dropout(0.5)
def _initialize(self): self.dense1 = tf.keras.layers.Dense( 400, input_shape=(get_flattened_obs_dim(TaskEnv.observation_space) + get_action_dim(TaskEnv.action_space), ), activation='relu') self.dense2 = tf.keras.layers.Dense(300, activation='relu') self.dense3 = tf.keras.layers.Dense(1)
def __init__(self): super().__init__() self._initialize() self.call( tf.constant( np.zeros( shape=(1, get_flattened_obs_dim(TaskEnv.observation_space) )))) # Initialize parameters by calling self.loss_function_id = ACTOR_LOSS
def __init__( self, observation_space: gym.Space, action_space: gym.Space, *, base_reward_net: Optional[nn.Module] = None, potential_net: Optional[nn.Module] = None, **kwargs, ): """Builds a simple shaped reward network. Args: observation_space: The observation space. action_space: The action space. base_reward_net: Network responsible for computing "base" reward. potential_net: Net work responsible for computing a potential function that will be used to provide additional potential-based shaping, in addition to the reward produced by `base_reward_net`. kwargs: Passed through to `RewardNetShaped`. """ super().__init__( observation_space, action_space, **kwargs, ) if base_reward_net is None: self._base_reward_net = BasicRewardMLP( observation_space=self.observation_space, action_space=self.action_space, use_state=self.use_state, use_action=self.use_action, use_next_state=self.use_next_state, use_done=self.use_done, hid_sizes=(32, 32), ) else: self._base_reward_net = base_reward_net if potential_net is None: potential_in_size = preprocessing.get_flattened_obs_dim( self.observation_space) self._potential_net = networks.build_mlp( in_size=potential_in_size, hid_sizes=(32, 32), squeeze_output=True, flatten_input=True, ) else: self._potential_net = potential_net
def __init__(self, observation_space: gym.Space): super().__init__(observation_space, get_flattened_obs_dim(observation_space)) self.total_available_modules = 8 _pns = [] for i in range(self.total_available_modules): alignment_matrix = nn.Linear(26,26) if common.args.pns_init: # first 8 numbers should be global observation (a reasonable prior), so initialize this, might make learning faster. with th.no_grad(): alignment_matrix.weight[:, :8] = 0. alignment_matrix.weight[:8, :] = 0. for i in range(8): alignment_matrix.weight[i,i] = 1. _pns.append(alignment_matrix) self.pns = nn.ModuleList(_pns) self.robot_id_2_idx = {}
def _setup_airl_undiscounted_shaped_reward_net(venv): potential_in_size = preprocessing.get_flattened_obs_dim( venv.observation_space) potential_net = networks.build_mlp( in_size=potential_in_size, hid_sizes=(32, 32), squeeze_output=True, ) reward_net = reward_nets.BasicShapedRewardNet( venv.observation_space, venv.action_space, discount_factor=1.0, use_next_state=True, use_done=True, potential_net=potential_net, ) return discrim_nets.DiscrimNetAIRL(reward_net)
def __init__(self, observation_space: gym.spaces.Dict, cnn_output_dim: int = 256): # TODO we do not know features-dim here before going over all the items, so put something there. This is dirty! super(CombinedExtractor, self).__init__(observation_space, features_dim=1) extractors = {} total_concat_size = 0 for key, subspace in observation_space.spaces.items(): if is_image_space(subspace): extractors[key] = NatureCNN(subspace, features_dim=cnn_output_dim) total_concat_size += cnn_output_dim else: # The observation key is a vector, flatten it if needed extractors[key] = nn.Flatten() total_concat_size += get_flattened_obs_dim(subspace) self.extractors = nn.ModuleDict(extractors) # Update the features dim manually self._features_dim = total_concat_size
def __init__(self, observation_space: gym.Space): super(FlattenExtractor, self).__init__(observation_space, get_flattened_obs_dim(observation_space)) self.flatten = nn.Flatten()
def __init__(self, pg_agent_config : PolicyGradientAgentConfig, observation_space: gym.Space, node_net : bool = False, at_net : bool = False): super(FlattenExtractor, self).__init__(pg_agent_config, observation_space, get_flattened_obs_dim(observation_space), at_net=at_net, node_net=node_net) self.flatten = nn.Flatten()