def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in [256, 256]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size # Add a batch norm layer. layers.append(nn.BatchNorm1d(prev_layer_size)) self._logits = SlimFC(in_size=prev_layer_size, out_size=self.num_outputs, initializer=torch_normc_initializer(0.01), activation_fn=None) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=torch_normc_initializer(1.0), activation_fn=None) self._hidden_layers = nn.Sequential(*layers) self._hidden_out = None
def __init__(self, num_states, num_actions, num_nodes=100): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNModule, self).__init__() layers = [] prev_layer_size = num_states self.num_actions = num_actions self.num_nodes = num_nodes # Create layers 0 to second-last. self.hidden_out_size = 32 for i, size in enumerate([512, 128, self.hidden_out_size]): layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size # if i == 0: # layers.append(nn.Dropout(p=0.3)) # Add a batch norm layer. # layers.append(nn.BatchNorm1d(prev_layer_size)) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=num_actions, initializer=torch_normc_initializer(1.0), activation_fn=None) self._hidden_layers = nn.Sequential(*layers)
def __init__(self, num_states=4, num_actions=18): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNModule, self).__init__() self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch. cuda.is_available() else "cpu") layers = [] prev_layer_size = num_states self.num_actions = num_actions # Create layers 0 to second-last. self.hidden_out_size = 64 for size in [512, 256, 128, self.hidden_out_size]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size # Add a batch norm layer. # layers.append(nn.BatchNorm1d(prev_layer_size)) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=num_actions, initializer=torch_normc_initializer(1.0), activation_fn=None) self._hidden_layers = nn.Sequential(*layers)
def __init__(self, device, num_states=4, num_actions=18): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNActionModule, self).__init__() self.device = device state_layers = [] action_layers = [] self.num_states = num_states self.num_actions = num_actions # Create layers 0 to second-last. state_prev_layer_size = num_states self.state_hidden_out_size = 32 for size in [256, 128, self.state_hidden_out_size]: state_layers.append( SlimFC(in_size=state_prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) state_prev_layer_size = size action_prev_layer_size = num_actions self.action_hidden_out_size = 32 for size in [64, self.action_hidden_out_size]: action_layers.append( SlimFC(in_size=action_prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) action_prev_layer_size = size layers = [] prev_layer_size = self.state_hidden_out_size + self.action_hidden_out_size self.hidden_out_size = 32 for size in [64, self.hidden_out_size]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self._value_branch = SlimFC(in_size=self.hidden_out_size, out_size=1, initializer=torch_normc_initializer(1.0), activation_fn=None) self._state_hidden_layers = nn.Sequential(*state_layers) self._action_hidden_layers = nn.Sequential(*action_layers) self._hidden_layers = nn.Sequential(*layers)
def __init__(self, num_states, num_actions, dqn_config): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNTransitionModule, self).__init__() self.num_states = num_states self.num_actions = num_actions self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.cuda.is_available() else "cpu") self.state_emb_model = None layers = [] prev_layer_size = num_states self.state_emb_size = 32 for i, size in enumerate([128, 64, self.state_emb_size]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self.state_emb_model = nn.Sequential(*layers).to(self.device) self.action_emb_model = None layers = [] prev_layer_size = num_actions self.action_emb_size = 16 for i, size in enumerate([64, self.action_emb_size]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self.action_emb_model = nn.Sequential(*layers).to(self.device) self.transition_emb_model = None layers = [] prev_layer_size = self.state_emb_size + self.action_emb_size self.transition_emb_size = self.state_emb_size for i, size in enumerate([128, self.transition_emb_size]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self.transition_emb_model = nn.Sequential(*layers).to(self.device)
def __init__(self, num_states, num_actions, transition_model, max_num_nodes, min_num_nodes, dqn_config, num_nodes_delta=1): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNDPModule, self).__init__() self.num_actions = num_actions self.max_num_nodes = max_num_nodes self.min_num_nodes = min_num_nodes self.num_nodes_delta = num_nodes_delta self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.cuda.is_available() else "cpu") self.transition_model = transition_model self.dp_models = {} num_nodes = self.min_num_nodes while num_nodes <= self.max_num_nodes: layers = [] prev_layer_size = num_states for _, size in enumerate([64, 32]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size layers.append( SlimFC( in_size=prev_layer_size, out_size=num_actions, initializer=torch_normc_initializer(1.0), activation_fn=None)) self.dp_models[num_nodes] = nn.Sequential(*layers).to(self.device) num_nodes += self.num_nodes_delta
def __init__(self, num_states, num_actions, num_nodes, device): super().__init__() self.num_actions = num_actions self.num_states = num_states self.device = device self.num_nodes = num_nodes layers = [] prev_layer_size = num_states for _, size in enumerate([256, 128, 64]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size layers.append( SlimFC( in_size=prev_layer_size, out_size=num_actions, initializer=torch_normc_initializer(1.0), activation_fn=None)) self.model = nn.Sequential(*layers).to(device).share_memory()
def __init__(self, obs_space, action_space, num_outputs, model_config, name): self.original_space = obs_space.original_space if \ hasattr(obs_space, "original_space") else obs_space assert isinstance(self.original_space, (Dict, Tuple)), \ "`obs_space.original_space` must be [Dict|Tuple]!" self.processed_obs_space = self.original_space if \ model_config.get("_disable_preprocessor_api") else obs_space nn.Module.__init__(self) TorchModelV2.__init__(self, self.original_space, action_space, num_outputs, model_config, name) self.flattened_input_space = flatten_space(self.original_space) # Atari type CNNs or IMPALA type CNNs (with residual layers)? # self.cnn_type = self.model_config["custom_model_config"].get( # "conv_type", "atari") # Build the CNN(s) given obs_space's image components. self.cnns = {} self.one_hot = {} self.flatten = {} concat_size = 0 for i, component in enumerate(self.flattened_input_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config["conv_filters"] if "conv_filters" in model_config else get_filter_config(obs_space.shape), "conv_activation": model_config.get("conv_activation"), "post_fcnet_hiddens": [], } # if self.cnn_type == "atari": cnn = ModelCatalog.get_model_v2(component, action_space, num_outputs=None, model_config=config, framework="torch", name="cnn_{}".format(i)) # TODO (sven): add IMPALA-style option. # else: # cnn = TorchImpalaVisionNet( # component, # action_space, # num_outputs=None, # model_config=config, # name="cnn_{}".format(i)) concat_size += cnn.num_outputs self.cnns[i] = cnn self.add_module("cnn_{}".format(i), cnn) # Discrete|MultiDiscrete inputs -> One-hot encode. elif isinstance(component, Discrete): self.one_hot[i] = True concat_size += component.n elif isinstance(component, MultiDiscrete): self.one_hot[i] = True concat_size += sum(component.nvec) # Everything else (1D Box). else: self.flatten[i] = int(np.product(component.shape)) concat_size += self.flatten[i] # Optional post-concat FC-stack. post_fc_stack_config = { "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []), "fcnet_activation": model_config.get("post_fcnet_activation", "relu") } self.post_fc_stack = ModelCatalog.get_model_v2(Box( float("-inf"), float("inf"), shape=(concat_size, ), dtype=np.float32), self.action_space, None, post_fc_stack_config, framework="torch", name="post_fc_stack") # Actions and value heads. self.logits_layer = None self.value_layer = None self._value_out = None if num_outputs: # Action-distribution head. self.logits_layer = SlimFC( in_size=self.post_fc_stack.num_outputs, out_size=num_outputs, activation_fn=None, ) # Create the value branch model. self.value_layer = SlimFC( in_size=self.post_fc_stack.num_outputs, out_size=1, activation_fn=None, initializer=torch_normc_initializer(0.01)) else: self.num_outputs = concat_size
def __init__(self, obs_space, action_space, num_outputs, model_config, name): self.original_space = obs_space.original_space if \ hasattr(obs_space, "original_space") else obs_space assert isinstance(self.original_space, (Tuple)), \ "`obs_space.original_space` must be Tuple!" nn.Module.__init__(self) TorchModelV2.__init__(self, self.original_space, action_space, num_outputs, model_config, name) self.new_obs_space = obs_space # Atari type CNNs or IMPALA type CNNs (with residual layers)? # self.cnn_type = self.model_config["custom_model_config"].get( # "conv_type", "atari") # Build the CNN(s) given obs_space's image components. self.cnns = {} self.one_hot = {} self.flatten = {} concat_size_p, concat_size_v = 0, 0 for i, component in enumerate(self.original_space[:-1]): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config["conv_filters"] if "conv_filters" in model_config else get_filter_config(obs_space.shape), "conv_activation": model_config.get("conv_activation"), "post_fcnet_hiddens": [], } # if self.cnn_type == "atari": cnn = TorchBatchNormModel(component, action_space, None, config, 'cnn_{}'.format(i)) print(cnn) concat_size_p += cnn.num_outputs_p concat_size_v += cnn.num_outputs_v self.cnns[i] = cnn self.add_module("cnn_{}".format(i), cnn) # Discrete inputs -> One-hot encode. elif isinstance(component, Discrete): self.one_hot[i] = True concat_size_p += component.n concat_size_v += component.n # Everything else (1D Box). else: self.flatten[i] = int(np.product(component.shape)) concat_size_p += self.flatten[i] concat_size_v += self.flatten[i] hidden_size = model_config.get("post_fcnet_hiddens", []) self.post_fc_stack = nn.Sequential( SlimFC(concat_size_p, hidden_size[0], initializer=torch_normc_initializer(1.0), activation_fn=None), nn.BatchNorm1d(hidden_size[0]), nn.ReLU()) self.post_fc_stack_vf = nn.Sequential( SlimFC(concat_size_v, hidden_size[0], initializer=torch_normc_initializer(1.0), activation_fn=None), nn.BatchNorm1d(hidden_size[0]), nn.ReLU()) # Actions and value heads. self.logits_layer = None self.value_layer = None self._value_out = None if num_outputs: # Action-distribution head. self.logits_layer = SlimFC( in_size=hidden_size[0], out_size=num_outputs, initializer=torch_normc_initializer(0.01), activation_fn=None, ) # Create the value branch model. self.value_layer = SlimFC( in_size=hidden_size[0], out_size=1, initializer=torch_normc_initializer(1.0), activation_fn='tanh', ) else: raise NotImplementedError()
def __init__(self, obs_space, action_space, num_outputs, model_config, name): # TODO: (sven) Support Dicts as well. assert isinstance(obs_space.original_space, (Tuple)), \ "`obs_space.original_space` must be Tuple!" nn.Module.__init__(self) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) # Atari type CNNs or IMPALA type CNNs (with residual layers)? self.cnn_type = self.model_config["custom_model_config"].get( "conv_type", "atari") # Build the CNN(s) given obs_space's image components. self.cnns = {} concat_size = 0 for i, component in enumerate(obs_space.original_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config.get("conv_filters", get_filter_config(component.shape)), "conv_activation": model_config.get("conv_activation"), } if self.cnn_type == "atari": cnn = ModelCatalog.get_model_v2(component, action_space, num_outputs=None, model_config=config, framework="torch", name="cnn_{}".format(i)) else: cnn = TorchImpalaVisionNet(component, action_space, num_outputs=None, model_config=config, name="cnn_{}".format(i)) concat_size += cnn.num_outputs self.cnns[i] = cnn self.add_module("cnn_{}".format(i), cnn) # Discrete inputs -> One-hot encode. elif isinstance(component, Discrete): concat_size += component.n # TODO: (sven) Multidiscrete (see e.g. our auto-LSTM wrappers). # Everything else (1D Box). else: assert len(component.shape) == 1, \ "Only input Box 1D or 3D spaces allowed!" concat_size += component.shape[-1] self.logits_layer = None self.value_layer = None self._value_out = None if num_outputs: # Action-distribution head. self.logits_layer = SlimFC( in_size=concat_size, out_size=num_outputs, activation_fn=None, ) # Create the value branch model. self.value_layer = SlimFC( in_size=concat_size, out_size=1, activation_fn=None, initializer=torch_normc_initializer(0.01)) else: self.num_outputs = concat_size