def __init__(self, obs_space, action_space, num_outputs, model_config, name, true_obs_shape=(4, ), action_embed_size=2, **kw): DQNTorchModel.__init__(self, obs_space, action_space, num_outputs, model_config, name, **kw) self.action_embed_model = TorchFC(Box(-1, 1, shape=true_obs_shape), action_space, action_embed_size, model_config, name + "_action_embed")
def __init__( self, obs_space, action_space, num_outputs, model_config, name, action_embed_size=2, # Dimensionality of sentence embeddings TODO don't make this hard-coded **kw): DQNTorchModel.__init__(self, obs_space, action_space, num_outputs, model_config, name, **kw) self.true_obs_preprocessor = DictFlatteningPreprocessor( obs_space.original_space["true_obs"]) self.action_embed_model = TorchFC( Box(-10, 10, self.true_obs_preprocessor.shape), action_space, action_embed_size, model_config, name + "_action_embed")
def __init__(self, obs_space, action_space, num_outputs, model_config, name): DQNTorchModel.__init__( self, Space(shape=(obs_space.shape[0] * obs_space.shape[1] * 4, ), dtype=np.float), action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.convfilter = nn.Sequential( nn.Conv2d(21, 8, kernel_size=3, padding=2), nn.BatchNorm2d(8), nn.ReLU(), nn.Conv2d(8, 4, kernel_size=3, padding=2), nn.BatchNorm2d(4), nn.ReLU(), nn.Flatten(), nn.Linear(obs_space.shape[0] * obs_space.shape[1] * 4, 32), nn.ReLU(), )
def __init__(self, obs_space: Any, action_space: spaces.Space, num_outputs: int, model_config: Dict, name: str, maze_model_composer_config: ConfigType, spaces_config_dump_file: str, state_dict_dump_file: str, dueling: bool = True, num_atoms: int = 1, **kwargs): unused(num_outputs) org_obs_space = obs_space.original_space assert isinstance(action_space, spaces.Discrete), f'Only discrete spaces supported but got {action_space}' num_outputs = action_space.n org_action_space = spaces.Dict({'action': action_space}) assert dueling is True, 'Only dueling==True is supported at this point' assert num_atoms == 1, 'Only num_atoms == 1 is suported at this point' DQNTorchModel.__init__(self, obs_space=obs_space, action_space=action_space, model_config=model_config, num_outputs=num_outputs, name=name + '_maze_wrapper', dueling=dueling, num_atoms=num_atoms, **kwargs) import random import numpy as np random.seed(42) np.random.seed(42) torch.manual_seed(42) MazeRLlibBaseModel.__init__(self, observation_space=org_obs_space, action_space=org_action_space, model_config=model_config, maze_model_composer_config=maze_model_composer_config, spaces_config_dump_file=spaces_config_dump_file, state_dict_dump_file=state_dict_dump_file) self._advantage_module: nn.Module = list(self.model_composer.policy.networks.values())[0] self.advantage_module = nn.Identity() # Assert that at most one network is used for critic assert self.model_composer.critic is not None and len(self.model_composer.critic.networks) == 1 self._value_module: nn.Module = list(self.model_composer.critic.networks.values())[0] # Init class values self._value_module_input = None self._model_maze_input: Optional[Dict[str, torch.Tensor]] = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): nn.Module.__init__(self) DQNTorchModel.__init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs) self.shape = model_config["custom_options"]["shape"] self.num_stack = model_config["custom_options"]["num_stack"] in_channels = self.shape[2] # Module Parameters self.module = nn.Sequential() # First Convolutional layer self.module.add_module( "conv_1", nn.Conv3d(in_channels, 16, kernel_size=3, stride=2, padding=2)) self.module.add_module("relu_1", nn.ReLU()) self.module.add_module("conv_2", nn.Conv3d(16, 16, kernel_size=3, stride=2)) self.module.add_module("relu_2", nn.ReLU()) self.module.add_module("flatten", Flatten()) self.module.add_module( "output", nn.Linear(in_features=2400, out_features=num_outputs))
def forward(self, input_dict, state, seq_lens): input_dict_copy = input_dict.copy() input_dict_copy['obs'] = self.net(input_dict_copy["obs"]) return DQNTorchModel.forward(self, input_dict_copy, state, seq_lens)
def with_updates(self, *args, **kwargs): DQNTorchModel.with_updates(self, *args, **kwargs)