def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) print(model_config) hiddens = model_config.get("fcnet_hiddens") activation = _get_activation_fn(model_config.get("fcnet_activation")) logger.debug("Constructing fcnet {} {}".format(hiddens, activation)) layers = [] last_layer_size = np.product(obs_space.shape) for size in hiddens: layers.append( SlimFC(in_size=last_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) last_layer_size = size self._hidden_layers = nn.Sequential(*layers) self._logits = SlimFC(in_size=last_layer_size, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_branch = SlimFC(in_size=last_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) self._cur_value = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, custom_input_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.torch_sub_model = TorchFC(custom_input_space, action_space, num_outputs, model_config, name) prev_safe_layer_size = int(np.product(custom_input_space.shape)) vf_layers = [] activation = model_config.get("fcnet_activation") hiddens = [32] for size in hiddens: vf_layers.append( SlimFC(in_size=prev_safe_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_safe_layer_size = size vf_layers.append( SlimFC(in_size=prev_safe_layer_size, out_size=1, initializer=normc_initializer(0.01), activation_fn=None)) self.safe_branch_separate = nn.Sequential(*vf_layers) self.last_in = None
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) custom_configs = model_config.get("custom_model_config") self._sensor_seq_len = custom_configs.get("sensor_seq_len", 10) activation = model_config.get("fcnet_activation", "tanh") encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, batch_first=True, dim_feedforward=128) self._transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2) self._all_fc1 = SlimFC(in_size=3, out_size=64, initializer=normc_initializer(1.0), activation_fn=activation) self._all_fc2 = SlimFC(in_size=64, out_size=16, initializer=normc_initializer(1.0), activation_fn=activation) self._action_layer = SlimFC(in_size=16, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_layer = SlimFC(in_size=16, out_size=1, initializer=normc_initializer(0.01), activation_fn=None) self._features = None
def __init__(self, input_size, fe_hidden_sizes=[128], cls_hidden_sizes=[128, 64]): super().__init__() assert len(fe_hidden_sizes) > 0 assert len(cls_hidden_sizes) > 0 layers = [] for size in fe_hidden_sizes: layers.append( SlimFC(in_size=input_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=nn.ReLU)) input_size = size self.feature_extractor = nn.Sequential(*layers) input_size = fe_hidden_sizes[ -1] * 2 # Concatenate the features from the two samples. layers = [] for size in cls_hidden_sizes: layers.append( SlimFC(in_size=input_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=nn.ReLU)) input_size = size layers.append( SlimFC(in_size=input_size, out_size=1, initializer=normc_initializer(1.0))) self.classifier = nn.Sequential(*layers)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) # Nonlinearity for fully connected net (tanh, relu). Default: "tanh" activation = model_config.get("fcnet_activation") # Number of hidden layers for fully connected net. Default: [256, 256] hiddens = [256, 256] # model_config.get("fcnet_hiddens", []) # Whether to skip the final linear layer used to resize the hidden layer # outputs to size `num_outputs`. If True, then the last hidden layer # should already match num_outputs. # no_final_linear = False self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = False self._embedd = nn.Embedding( int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE) # Player Hot Encoded = 3 * Number of Cards Played per trick = 4 # CARD_EMBEDD_SIZE * Number of Cards Played per trick = 4 self._hidden_layers = self._build_hidden_layers( first_layer_size=FIRST_LAYER_SIZE, hiddens=hiddens, activation=activation) self._value_branch_separate = None self._value_embedding = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. self._value_embedding = nn.Embedding( int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE) self._value_branch_separate = self._build_hidden_layers( first_layer_size=FIRST_LAYER_SIZE, hiddens=hiddens, activation=activation) self._logits = SlimFC(in_size=hiddens[-1], out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_branch = SlimFC(in_size=hiddens[-1], out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._cards_in = None self._players_in = None
def __init__(self, size_in, size_out, hiddens, activations, init_weights, append_log_std=False, log_std_type='constant', sample_std=1.0): super().__init__() layers = [] prev_layer_size = size_in for i, size_hidden in enumerate(hiddens + [size_out]): layers.append( SlimFC(in_size=prev_layer_size, out_size=size_hidden, initializer=normc_initializer(init_weights[i]), activation_fn=get_activation_fn(activations[i], framework="torch"))) prev_layer_size = size_hidden if append_log_std: layers.append( AppendLogStd(type=log_std_type, init_val=np.log(sample_std), dim=size_out)) self._model = nn.Sequential(*layers)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) filters = model_config.get("conv_filters") if not filters: filters = _get_filter_config(obs_space.shape) layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = valid_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None)) self._convs = nn.Sequential(*layers) self._logits = SlimFC(out_channels, num_outputs, initializer=nn.init.xavier_uniform_) self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer()) self._cur_value = None
def create_actor(self): model_config = self.model_config layers = [] activation_stage1 = model_config.get("fcnet_activation_stage1") hiddens_stage1 = model_config.get("fcnet_hiddens_stage1") self.gru_cell_size = model_config.get("gru_cell_size") activation_stage2 = model_config.get("fcnet_activation_stage2") hiddens_stage2 = model_config.get("fcnet_hiddens_stage2") prev_layer_size = self.true_obs_space.shape[1] # obs prev_layer_size += self.nbr_agents # one hot encoding of the agent id for size in hiddens_stage1: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation_stage1)) prev_layer_size = size stage1 = nn.Sequential(*layers) gru = nn.GRU(input_size=prev_layer_size, hidden_size=self.gru_cell_size, num_layers=1, batch_first=not self.is_time_major()) prev_layer_size = self.gru_cell_size layers = [] for size in hiddens_stage2: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation_stage2)) prev_layer_size = size layers.append( SlimFC(in_size=prev_layer_size, out_size=self.nbr_actions, initializer=normc_initializer(1.0))) stage2 = nn.Sequential(*layers) return stage1, gru, stage2
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) obs_space_ = obs_space.original_space data, privates = obs_space_.spaces['data'], obs_space_.spaces['privates'] N, T, L = data.shape adjusted_data_shape = (T, N*L) activation = model_config.get("fcnet_activation") hiddens = model_config.get("fcnet_hiddens", [100, 100]) lstm_dim = model_config.get("lstm_cell_size", 128) self.lstm_net = LSTM(input_dim=adjusted_data_shape[-1], hidden_dim=lstm_dim, num_layers=2) prev_layer_size = lstm_dim + int(np.product(privates.shape)) layers = [] for size in hiddens: layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size self._hidden_layers = nn.Sequential(*layers) self._features = None self._policy_net = SlimFC( in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation) self._value_net = SlimFC( in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=activation)
def __init__(self, in_size, out_size, out_lens, at_hiddens, ap_hiddens, initializer=None, activation=None, use_bias=True, bias_init=0.0): super(MultiActionFC, self).__init__() assert sum(out_lens) == out_size prev_vf_layer_size = in_size at_layers = [] # 动作类型,可以有激活函数 for size in at_hiddens: at_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(0.5))) prev_vf_layer_size = size self._at_branch_separate = nn.Sequential(*at_layers) # 动作参数, 最后一层不要激活函数.(因为动作参数比较大.) prev_vf_layer_size = in_size ap_layers = [] for size in ap_hiddens[:-1]: ap_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(0.5))) prev_vf_layer_size = size ap_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=ap_hiddens[-1], activation_fn=None, initializer=normc_initializer(0.5))) self._ap_branch_separate = nn.Sequential(*ap_layers)
def create_critic(self): layers = [] input_size = np.prod(self.true_obs_space.shape) if self.has_real_state: input_size += np.prod(self.state_space.shape) input_size += self.nbr_agents input_size += 2 * self.nbr_agents * self.nbr_actions prev_layer_size = input_size activation = self.model_config['fcnet_activation_critic'] for size in self.model_config['fcnet_hiddens_critic']: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_layer_size = size layers.append( SlimFC(in_size=prev_layer_size, out_size=self.nbr_actions, initializer=normc_initializer(1.0))) return nn.Sequential(*layers)
def _create_model(self): filters = self.filters activation = self.activation branches = {} for obs_name, space in self.obs_space.original_space.spaces.items(): layers = [] w, in_channels = space.shape in_size = w for i, (out_channels, kernel, stride) in enumerate(filters): padding, out_size = same_padding_1d(in_size, kernel, stride) layers.append( SlimConv1d(in_channels, out_channels, kernel, stride, None if i == (len(filters) - 1) else padding, activation_fn=activation)) in_channels = out_channels in_size = out_size branches[obs_name] = nn.Sequential(*layers) self._convs = nn.ModuleDict(branches) out_channels *= len(self._convs) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if self.num_outputs: in_size = np.ceil((in_size - kernel) / stride) padding, _ = same_padding_1d(in_size, 1, 1) self._logits = SlimConv1d(out_channels, self.num_outputs, 1, 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels # Build the value layers self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn( model_config.get("conv_activation"), framework="torch") filters = model_config.get("conv_filters") if not filters: filters = _get_filter_config(obs_space.shape) # no_final_linear = model_config.get("no_final_linear") # vf_share_layers = model_config.get("vf_share_layers") layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = valid_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, activation_fn=activation)) self._convs = nn.Sequential(*layers) self._logits = SlimFC( out_channels, num_outputs, initializer=nn.init.xavier_uniform_) self._value_branch = SlimFC( out_channels, 1, initializer=normc_initializer()) # Holds the current "base" output (before logits layer). self._features = None
def _build_hidden_layers(self, first_layer_size: int, hiddens: list, activation: str): layers = [] prev_layer_size = first_layer_size # Create layers. Assumes no_final_linear = False for size in hiddens: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size return nn.Sequential(*layers)
def __init__(self, activation, action_size, channels=256): nn.Module.__init__(self) self.activation = activation self.channels = channels self.action_size = action_size self.dynamic_layers = [ SlimConv2d( self.channels + self.action_size if i == 0 else self.channels, # encode actions for first layer self.channels, kernel=1, stride=1, padding=None, activation_fn=self.activation ) for i in range(10) ] self.dynamic_head = SlimConv2d( self.channels, self.channels, kernel=1, stride=1, padding=None, activation_fn=None ) self.dynamic = nn.Sequential(*self.dynamic_layers) self.flatten = nn.Flatten() self.reward_layers = [ SlimFC( 256 if i == 0 else 256, # could make different later 256 if i != 4 else 1, initializer=normc_initializer(0.01), activation_fn=self.activation if i != 4 else None ) for i in range(5) ] self.reward_head = nn.Sequential(*self.reward_layers)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): raise ValueError("Config for conv_filters is required") TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] # FIXME add stacking here (w, in_channels) = obs_space.shape in_size = w for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding_1d(in_size, kernel, stride) layers.append( SlimConv1d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear and num_outputs: layers.append( SlimConv1d( in_channels, num_outputs, kernel, stride, None, # padding=valid activation_fn=activation)) out_channels = num_outputs # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv1d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = np.ceil((in_size - kernel) / stride) padding, _ = same_padding_1d(in_size, 1, 1) self._logits = SlimConv1d(out_channels, num_outputs, 1, 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels self._convs = nn.Sequential(*layers) # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None) else: vf_layers = [] (h, w, in_channels) = obs_space.shape assert h == 1 in_size = w for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding_1d(in_size, kernel, stride) vf_layers.append( SlimConv1d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv1d(in_channels, out_channels, kernel, stride, None, activation_fn=activation)) vf_layers.append( SlimConv1d(in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None)) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") # TODO(sven): implement case: vf_shared_layers = False. # vf_share_layers = model_config.get("vf_share_layers") logger.debug("Constructing fcnet {} {}".format(hiddens, activation)) layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and self.num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=self.num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = self.num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if self.num_outputs: self._logits = SlimFC(in_size=hiddens[-1], out_size=self.num_outputs, initializer=normc_initializer(0.01), activation_fn=None) else: self.num_outputs = ([np.product(obs_space.shape)] + hiddens[-1:-1])[-1] self._hidden_layers = nn.Sequential(*layers) # TODO(sven): Implement non-shared value branch. self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current value output. self._cur_value = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name, control_input_size, control_hidden_size, interaction_hidden_size): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) no_final_linear = model_config.get( "no_final_linear") # TODO Handle no_final_linear assert (not no_final_linear, "Not Implemented yet bro") self.vf_share_layers = model_config.get("vf_share_layers") self.vf_hiddens = model_config.get("vf_hiddens", [10, 10]) self.free_log_std = model_config.get("free_log_std") self.control_input_size = control_input_size self.interaction_input_size = 2 assert (np.product(obs_space.shape) == self.control_input_size + self.interaction_input_size, "Wrong size of obs space") control_hidden_size = control_hidden_size interaction_hidden_size = interaction_hidden_size activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") # Are the std required as output for the action self.std = ((num_outputs / 2) == np.product(action_space.shape)) # Are the log_std varies with state or not if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 self._logits = None # Output of the network, called logits for consistency with the rest of RLlib # Build the Negotiate model self.linear_1 = SlimFC(self.control_input_size, control_hidden_size, initializer=normc_initializer(1.0), activation_fn=activation) self.linear_2_mean = SlimFC(control_hidden_size, 2, initializer=normc_initializer(0.01), activation_fn=None) self.linear_accept_1 = SlimFC(self.interaction_input_size, interaction_hidden_size, initializer=normc_initializer(1.0), activation_fn=activation) self.linear_accept_2_mean = SlimFC(interaction_hidden_size, 1, initializer=normc_initializer(0.01), activation_fn=None) self.control = nn.Sequential(self.linear_1, self.linear_2_mean) self.interaction = nn.Sequential(self.linear_accept_1, self.linear_accept_2_mean) self.linear_coop_mean = AppendBiasLayer(1) if self.std: if not self.free_log_std: self.linear_2_std = SlimFC(control_hidden_size, 2, initializer=normc_initializer(0.01), activation_fn=None) self.linear_accept_2_std = SlimFC( interaction_hidden_size, 1, initializer=normc_initializer(0.01), activation_fn=None) self.linear_coop_std = AppendBiasLayer(1) self.control_std = nn.Sequential(self.linear_1, self.linear_2_std) self.interaction_std = nn.Sequential(self.linear_accept_1, self.linear_accept_2_std) self.coop_std = AppendBiasLayer(1) else: self._append_free_log_std = AppendBiasLayer(num_outputs) # value function self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) vf_layers = [] for size in self.vf_hiddens: vf_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size prev_layer_size = prev_vf_layer_size self._value_branch_separate = nn.Sequential(*vf_layers) else: raise NotImplemented() self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) self._value_module = nn.Sequential(self._value_branch_separate, self._value_branch) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) obs_space_ = obs_space.original_space data, images, privates = obs_space_.spaces['data'], obs_space_.spaces['images'], \ obs_space_.spaces['privates'] N, T, L = data.shape adjusted_data_shape = (T, N * L) _, w, h, c = images.shape shape = (c * N, w, h) self.img_shape = shape conv_filters = model_config.get('conv_filters') activation = model_config.get("fcnet_activation") hiddens = model_config.get("fcnet_hiddens", [100, 100]) lstm_dim = model_config.get("lstm_cell_size", 128) if not conv_filters: conv_filters = [16, 32, 32] max_pool = [3] * len(conv_filters) conv_seqs = [] self.lstm_net = LSTM(input_dim=adjusted_data_shape[-1], hidden_dim=lstm_dim, num_layers=2) for (out_channels, mp) in zip(conv_filters, max_pool): conv_seq = ResNet(shape, out_channels, mp) shape = conv_seq.get_output_shape() conv_seqs.append(conv_seq) conv_seqs.append(nn.Flatten()) self.conv_seqs = nn.ModuleList(conv_seqs) prev_layer_size = lstm_dim + int(np.product(privates.shape)) + int( np.product(shape)) layers = [] for size in hiddens: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size self._hidden_layers = nn.Sequential(*layers) self._features = None self._policy_net = SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation) self._value_net = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=activation)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="torch") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None self.traj_view_framestacking = False layers = [] # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: (w, h) = obs_space.shape in_channels = model_config["num_framestacks"] self.traj_view_framestacking = True else: (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: out_channels = out_channels if post_fcnet_hiddens else num_outputs layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) for i, out_size in enumerate(layer_sizes): layers.append( SlimFC(in_size=out_channels, out_size=out_size, activation_fn=post_fcnet_activation, initializer=normc_initializer(1.0))) out_channels = out_size # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride) ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) if post_fcnet_hiddens: layers.append(nn.Flatten()) in_size = out_channels # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): layers.append( SlimFC(in_size=in_size, out_size=out_size, activation_fn=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, initializer=normc_initializer(1.0))) in_size = out_size # Last layer is logits layer. self._logits = layers.pop() else: self._logits = SlimConv2d(out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels self._convs = nn.Sequential(*layers) # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None) else: vf_layers = [] if self.traj_view_framestacking: (w, h) = obs_space.shape in_channels = model_config["num_framestacks"] else: (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None, activation_fn=activation)) vf_layers.append( SlimConv2d(in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None)) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None # Optional: framestacking obs/new_obs for Atari. if self.traj_view_framestacking: from_ = model_config["num_framestacks"] - 1 self.view_requirements[SampleBatch.OBS].shift = \ "-{}:0".format(from_) self.view_requirements[SampleBatch.OBS].shift_from = -from_ self.view_requirements[SampleBatch.OBS].shift_to = 0 self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement( data_col=SampleBatch.OBS, shift="-{}:1".format(from_ - 1), space=self.view_requirements[SampleBatch.OBS].space, )
def __init__( self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str, ): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__( self, obs_space, action_space, num_outputs, model_config, name ) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="torch" ) no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: out_channels = out_channels if post_fcnet_hiddens else num_outputs layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else [] ) for i, out_size in enumerate(layer_sizes): layers.append( SlimFC( in_size=out_channels, out_size=out_size, activation_fn=post_fcnet_activation, initializer=normc_initializer(1.0), ) ) out_channels = out_size # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride), ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) if post_fcnet_hiddens: layers.append(nn.Flatten()) in_size = out_channels # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): layers.append( SlimFC( in_size=in_size, out_size=out_size, activation_fn=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, initializer=normc_initializer(1.0), ) ) in_size = out_size # Last layer is logits layer. self._logits = layers.pop() else: self._logits = SlimConv2d( out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None, ) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self._convs = nn.Sequential(*layers) # If our num_outputs still unknown, we need to do a test pass to # figure out the output dimensions. This could be the case, if we have # the Flatten layer at the end. if self.num_outputs is None: # Create a B=1 dummy sample and push it through out conv-net. dummy_in = ( torch.from_numpy(self.obs_space.sample()) .permute(2, 0, 1) .unsqueeze(0) .float() ) dummy_out = self._convs(dummy_in) self.num_outputs = dummy_out.shape[1] # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC( out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None ) else: vf_layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, activation_fn=activation, ) ) vf_layers.append( SlimConv2d( in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None, ) ) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name, num_decompose=2): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") self.num_decompose = num_decompose # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: # self._logits = torch.nn.ModuleList([ # torch.nn.Sequential( # SlimFC( # in_size=prev_layer_size, # out_size=256, # initializer=normc_initializer(1.0), # activation_fn=activation), # SlimFC( # in_size=256, # out_size=num_outputs, # initializer=normc_initializer(1.0), # activation_fn=None), # ) for i in range(self.num_decompose)]) # self._logits = torch.nn.ModuleList([ # torch.nn.Sequential( # torch.nn.Linear(prev_layer_size, 256), # torch.nn.ReLU(), # torch.nn.Linear(256, num_outputs), # ) for i in range(self.num_decompose)]) self._logits = SlimFC(in_size=prev_layer_size, out_size=num_outputs * self.num_decompose, initializer=normc_initializer(0.01), activation_fn=None) else: raise ValueError("No num_outputs") # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) self._value_branch_separate = [] for size in hiddens: self._value_branch_separate.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential( *self._value_branch_separate) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=self.num_decompose, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = get_activation_fn(model_config.get("fcnet_activation"), framework="torch") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") self.free_log_std = model_config.get("free_log_std") # TODO(sven): implement case: vf_shared_layers = False. # vf_share_layers = model_config.get("vf_share_layers") logger.debug("Constructing fcnet {} {}".format(hiddens, activation)) layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Maybe generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: self._logits = SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) else: self.num_outputs = ([np.product(obs_space.shape)] + hiddens[-1:-1])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) # TODO(sven): Implement non-shared value branch. self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear and num_outputs: layers.append( SlimConv2d( in_channels, num_outputs, kernel, stride, None, # padding=valid activation_fn=activation)) out_channels = num_outputs # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride) ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) self._logits = SlimConv2d(out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels self._convs = nn.Sequential(*layers) # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None) else: vf_layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None, activation_fn=activation)) vf_layers.append( SlimConv2d(in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None)) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = model_config.get("fcnet_activation") hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size if num_outputs: self._logits = MultiActionFC(in_size=prev_layer_size, out_size=num_outputs, out_lens=[3, 10], at_hiddens=[32, 3], ap_hiddens=[32, 10], initializer=normc_initializer(0.01), activation=activation) else: self.num_outputs = ([int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) vf_layers = [] for size in hiddens: vf_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential(*vf_layers) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = model_config.get("fcnet_activation") hiddens = model_config.get("fcnet_hiddens", []) no_final_linear = model_config.get("no_final_linear") self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if self.free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in hiddens[:-1]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = size # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: layers.append( SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = num_outputs # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: layers.append( SlimFC(in_size=prev_layer_size, out_size=hiddens[-1], initializer=normc_initializer(1.0), activation_fn=activation)) prev_layer_size = hiddens[-1] if num_outputs: self._logits = SlimFC(in_size=prev_layer_size, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) else: self.num_outputs = ([int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: self._append_free_log_std = AppendBiasLayer(num_outputs) self._hidden_layers = nn.Sequential(*layers) self._value_branch_separate = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. prev_vf_layer_size = int(np.product(obs_space.shape)) vf_layers = [] for size in hiddens: vf_layers.append( SlimFC(in_size=prev_vf_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_vf_layer_size = size self._value_branch_separate = nn.Sequential(*vf_layers) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._last_flat_in = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): #, #graph_layers, graph_features, graph_tabs, graph_edge_features, cnn_filters, value_cnn_filters, value_cnn_compression, cnn_compression, relative, activation): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.cfg = copy.deepcopy(DEFAULT_OPTIONS) self.cfg.update(model_config['custom_model_config']) #self.cfg = model_config['custom_options'] self.n_agents = len(obs_space.original_space['agents']) self.graph_features = self.cfg['graph_features'] self.cnn_compression = self.cfg['cnn_compression'] self.activation = { 'relu': nn.ReLU, 'leakyrelu': nn.LeakyReLU }[self.cfg['activation']] layers = [] input_shape = obs_space.original_space['agents'][0]['map'].shape (w, h, in_channels) = input_shape in_size = [w, h] for out_channels, kernel, stride in self.cfg['cnn_filters'][:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=self.activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = self.cfg['cnn_filters'][-1] layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None)) layers.append(nn.Flatten(1, -1)) #if isinstance(cnn_compression, int): # layers.append(nn.Linear(cnn_compression, self.cfg['graph_features']-2)) # reserve 2 for pos # layers.append(self.activation{)) self.coop_convs = nn.Sequential(*layers) self.greedy_convs = copy.deepcopy(self.coop_convs) self.coop_value_obs_convs = copy.deepcopy(self.coop_convs) self.greedy_value_obs_convs = copy.deepcopy(self.coop_convs) summary(self.coop_convs, device="cpu", input_size=(input_shape[2], input_shape[0], input_shape[1])) gfl = [] for i in range(self.cfg['graph_layers']): gfl.append( gml_adv.GraphFilterBatchGSOA(self.graph_features, self.graph_features, self.cfg['graph_tabs'], self.cfg['agent_split'], self.cfg['graph_edge_features'], False)) #gfl.append(gml.GraphFilterBatchGSO(self.graph_features, self.graph_features, self.cfg['graph_tabs'], self.cfg['graph_edge_features'], False)) gfl.append(self.activation()) self.GFL = nn.Sequential(*gfl) #gso_sum = torch.zeros(2, 1, 8, 8) #self.GFL[0].addGSO(gso_sum) #summary(self.GFL, device="cuda" if torch.cuda.is_available() else "cpu", input_size=(self.graph_features, 8)) logits_inp_features = self.graph_features if self.cfg['cnn_residual']: logits_inp_features += self.cnn_compression post_logits = [ nn.Linear(logits_inp_features, 64), self.activation(), nn.Linear(64, 32), self.activation() ] logit_linear = nn.Linear(32, 5) nn.init.xavier_uniform_(logit_linear.weight) nn.init.constant_(logit_linear.bias, 0) post_logits.append(logit_linear) self.coop_logits = nn.Sequential(*post_logits) self.greedy_logits = copy.deepcopy(self.coop_logits) summary(self.coop_logits, device="cpu", input_size=(logits_inp_features, )) ############################## layers = [] input_shape = np.array(obs_space.original_space['state'].shape) (w, h, in_channels) = input_shape in_size = [w, h] for out_channels, kernel, stride in self.cfg['value_cnn_filters'][:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=self.activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = self.cfg['value_cnn_filters'][-1] layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None)) layers.append(nn.Flatten(1, -1)) self.coop_value_cnn = nn.Sequential(*layers) self.greedy_value_cnn = copy.deepcopy(self.coop_value_cnn) summary(self.greedy_value_cnn, device="cpu", input_size=(input_shape[2], input_shape[0], input_shape[1])) layers = [ nn.Linear(self.cnn_compression + self.cfg['value_cnn_compression'], 64), self.activation(), nn.Linear(64, 32), self.activation() ] values_linear = nn.Linear(32, 1) normc_initializer()(values_linear.weight) nn.init.constant_(values_linear.bias, 0) layers.append(values_linear) self.coop_value_branch = nn.Sequential(*layers) self.greedy_value_branch = copy.deepcopy(self.coop_value_branch) summary(self.coop_value_branch, device="cpu", input_size=(self.cnn_compression + self.cfg['value_cnn_compression'], )) self._cur_value = None self.freeze_coop_value(self.cfg['freeze_coop_value']) self.freeze_greedy_value(self.cfg['freeze_greedy_value']) self.freeze_coop(self.cfg['freeze_coop']) self.freeze_greedy(self.cfg['freeze_greedy'])