Esempio n. 1
0
    def __init__(self, obs_space, action_space, num_outputs, model_config, name, **custom_model_config):
        #call inheritance
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)

        HP = custom_model_config["hyperparameters"]

        #check if HP is of type LSTMHyperParameters
        if not isinstance(HP, self.hyperparameter_type):
            raise Exception("Make sure to pass the correct HyperParameter-Type for your model")
        
        #save values
        self.feature_size = len(HP.features)
        self.hidden_size = HP.hidden_size
        self.num_layers = HP.num_layers
        self.dropout = HP.dropout

        #create the lstm layer
        self.lstm1 = nn.LSTM(input_size=self.feature_size, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers, dropout=self.dropout)

        #create the activation
        if HP.activation is hp.Activation.TANH:
            self.activation = nn.Tanh()
        elif HP.activation is hp.Activation.RELU:
            self.activation = nn.ReLU()

        #create the linear layers
        self.linear = nn.Linear(self.hidden_size, 3)

        #create the valuefuntion
        self.vf = nn.Linear(self.hidden_size, 1)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        h, w, c = obs_space.shape
        shape = (c, h, w)

        conv_seqs = []
        for out_channels in [16, 32, 32]:
            conv_seq = ConvSequence(shape, out_channels)
            shape = conv_seq.get_output_shape()
            conv_seqs.append(conv_seq)
        self.conv_seqs = nn.ModuleList(conv_seqs)
        self._embed_shape = (shape[0], shape[1], shape[2])
        self._n_embed_shape = (self._embed_shape[0] * num_outputs, ) + self._embed_shape[1:]
        self.hidden_fc = nn.Linear(in_features=shape[0] * shape[1] * shape[2], out_features=256)
        self.logits_fc = nn.Linear(in_features=256, out_features=num_outputs)
        self.value_fc = nn.Linear(in_features=256, out_features=1)

        self.rew_hid = nn.Linear(in_features=shape[0] * shape[1] * shape[2], out_features=256)
        self.rew_out = nn.Linear(in_features=256, out_features=num_outputs)
        self.trans_conv = nn.Conv2d(in_channels=shape[0]*num_outputs, out_channels=shape[0]*num_outputs,
                                    kernel_size=3, padding=1, stride=1, groups=num_outputs)
Esempio n. 3
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, num_objects, object_state_size):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        h, w, c = obs_space.shape
        shape = (c, h, w)

        # Is this channel first or last format?
        # Seems like the original model assumes NCHW, why?
        # i.e., it flips it below; weird
        self.obs_encoder = ObsEncoder(input_dim=c,
                                      hidden_dim=16,
                                      num_objects=num_objects)
        self.mask_encoder = MaskEncoder(input_dim=num_objects,
                                        hidden_dim=16,
                                        fc_hidden_dim=128,
                                        output_dim=object_state_size)

        self.gnn = GNN(input_dim=object_state_size,
                       hidden_dim=16,
                       num_objects=num_objects)

        gnn_flat_output_dim = num_objects * object_state_size

        output_hidden_dim = 128
        self.hidden_fc = nn.Linear(in_features=gnn_flat_output_dim,
                                   out_features=output_hidden_dim)
        self.logits_fc = nn.Linear(in_features=output_hidden_dim,
                                   out_features=num_outputs)
        self.value_fc = nn.Linear(in_features=output_hidden_dim,
                                  out_features=1)

        self._obj_masks = None
Esempio n. 4
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                model_config, name)
        nn.Module.__init__(self)

        self.obs_size = _get_size(obs_space)
        self.rnn_hidden_dim = model_config["lstm_cell_size"]
        self.free_log_std = model_config.get("free_log_std")
        if self.free_log_std:
            assert num_outputs % 2 == 0, (
                "num_outputs must be divisible by two", num_outputs)
            num_outputs = num_outputs // 2
        if self.free_log_std:
            self.log_std = torch.nn.Parameter(torch.as_tensor([0.0] * num_outputs))

        self.fc1 = nn.Linear(self.obs_size, self.rnn_hidden_dim)
        self.rnn = nn.GRU(self.rnn_hidden_dim, self.rnn_hidden_dim, batch_first=True)
        self.fc2 = nn.Linear(self.rnn_hidden_dim, num_outputs)

        self.mu = nn.Linear(self.rnn_hidden_dim, 7)
        self.alpha = nn.Sequential(
            nn.Linear(self.rnn_hidden_dim, 7),
            nn.Softmax(dim=-1)
        )
        

        # self.value_branch = nn.Linear(self.rnn_hidden_dim, 1)
        self._cur_value = None        
Esempio n. 5
0
    def __init__(
        self,
        obs_space,
        action_space,
        num_outputs,
        model_config,
        name,
        **kwargs,
    ):
        orig_space = getattr(obs_space, "original_space", obs_space)
        assert (isinstance(orig_space, Dict)
                and "action_mask" in orig_space.spaces
                and "observations" in orig_space.spaces)

        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name, **kwargs)
        nn.Module.__init__(self)

        self.internal_model = TorchFC(
            orig_space["observations"],
            action_space,
            num_outputs,
            model_config,
            name + "_internal",
        )

        # disable action masking --> will likely lead to invalid actions
        self.no_masking = False
        if "no_masking" in model_config["custom_model_config"]:
            self.no_masking = model_config["custom_model_config"]["no_masking"]
Esempio n. 6
0
    def __init__(self, observation_space, action_space, num_outputs,
                 model_config, name):
        TorchModelV2.__init__(self, observation_space, action_space,
                              num_outputs, model_config, name)
        nn.Module.__init__(self)

        # Non-shared initial layer.
        self.first_layer = SlimFC(
            int(np.product(observation_space.shape)),
            64,
            activation_fn=nn.ReLU,
            initializer=torch.nn.init.xavier_uniform_)

        # Non-shared final layer.
        self.last_layer = SlimFC(
            64,
            self.num_outputs,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_)
        self.vf = SlimFC(
            64,
            1,
            activation_fn=None,
            initializer=torch.nn.init.xavier_uniform_,
        )
        self._global_shared_layer = TORCH_GLOBAL_SHARED_LAYER
        self._output = None
Esempio n. 7
0
    def __init__(self, obs_space, action_space, num_outputs, model_config, name): 
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
   
        self.counter = 0
        self.nn_layers = nn.ModuleList()
        self.conv_filters_dict = model_config["custom_model_config"]["conv_filters"]
        input_c = model_config["custom_model_config"]["inChannel"]
        for conv in range(len(self.conv_filters_dict)): 
            self.nn_layers.append(nn.Conv2d(input_c, self.conv_filters_dict[conv][0], kernel_size= self.conv_filters_dict[conv][1], stride=self.conv_filters_dict[conv][2], padding=self.conv_filters_dict[conv][3] ))
            if self.conv_filters_dict[conv][4][0] == 1:
                self.nn_layers.append(nn.MaxPool2d(kernel_size=self.conv_filters_dict[conv][4][1], stride=self.conv_filters_dict[conv][4][2]))
                self.counter += 1
            input_c = self.conv_filters_dict[conv][0]
            self.counter += 1

        if(model_config["custom_model_config"]["conv_activation"] == "relu"):
            self.conv_activ = F.relu
        
        self.fc_hidden_dict = model_config["custom_model_config"]["fcnet_hiddens"]
        input_c = 1
        for i in range(len(self.fc_hidden_dict)) : 
            self.nn_layers.append(nn.Linear(self.fc_hidden_dict[i][0],self.fc_hidden_dict[i][1]))

        if(model_config["custom_model_config"]["fcnet_activation"] == "relu"):
            self.fully_connect_activation = F.relu
Esempio n. 8
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        """
        The model of the network will be defined here
        """
        self.obs_space = obs_space
        self.action_space = action_space
        self.model_config = model_config
        self.name = name
        self.network_size = model_config["custom_model_config"]["network_size"]

        if isinstance(self.obs_space, Box):
            self.obs_shape = obs_space.shape[0]
        else:
            self.obs_shape = self.obs_space

        self.layers = nn.Sequential()
        last_size = self.obs_space.shape[0]
        i = 0
        for layer_size in self.network_size:
            self.layers.add_module("linear_{}".format(i),
                                   nn.Linear(last_size, layer_size))
            self.layers.add_module("relu_{}".format(i), nn.ReLU())
            last_size = layer_size
            i += 1
        self.layers.add_module("linear_{}".format(i),
                               nn.Linear(last_size, num_outputs))
Esempio n. 9
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        filters = model_config.get("conv_filters")
        if not filters:
            filters = _get_filter_config(obs_space.shape)
        layers = []

        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]
        for out_channels, kernel, stride in filters[:-1]:
            padding, out_size = valid_padding(in_size, kernel,
                                              [stride, stride])
            layers.append(
                SlimConv2d(in_channels, out_channels, kernel, stride, padding))
            in_channels = out_channels
            in_size = out_size

        out_channels, kernel, stride = filters[-1]
        layers.append(
            SlimConv2d(in_channels, out_channels, kernel, stride, None))
        self._convs = nn.Sequential(*layers)

        self._logits = SlimFC(out_channels,
                              num_outputs,
                              initializer=nn.init.xavier_uniform_)
        self._value_branch = SlimFC(out_channels,
                                    1,
                                    initializer=normc_initializer())
        self._cur_value = None
Esempio n. 10
0
 def __init__(self, obs_space, action_space, num_outputs, model_config, name):
     
     TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
     nn.Module.__init__(self)
     self.action_model = action_model
     self.value_model = value_model
     self._model_in = None
Esempio n. 11
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.obs_size = _get_size(obs_space)
        self.hidden_size = model_config["hidden_size"]
        # number of discrete actions
        self.num_category = model_config["num_category"]
        # number of parameters for each action
        self.num_params = model_config['num_params']

        self.rnn = nn.GRUCell(self.obs_size, self.hidden_size)

        # category head (for discrete action)
        self.h2c = nn.Sequential(
            nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(),
            nn.Linear(self.hidden_size, self.num_category))

        # parameter head (for continuous parameters of the action)
        self.h2p = nn.Sequential(nn.Linear(self.hidden_size, self.hidden_size),
                                 nn.Tanh(),
                                 nn.Linear(self.hidden_size, self.num_params))

        # critic
        self.critic = nn.Sequential(
            nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh(),
            nn.Linear(self.hidden_size, 1))
Esempio n. 12
0
    def __init__(
        self,
        obs_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        num_outputs: int,
        model_config: ModelConfigDict,
        name: str,
    ):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self._is_action_discrete = isinstance(action_space,
                                              gym.spaces.Discrete)

        # TODO: I don't know why this is true yet? (in = num_outputs)
        self.obs_ins = num_outputs
        self.action_dim = np.product(self.action_space.shape)
        self.actor_model = self._build_actor_net("actor")
        twin_q = self.model_config["twin_q"]
        self.q_model = self._build_q_net("q")
        if twin_q:
            self.twin_q_model = self._build_q_net("twin_q")
        else:
            self.twin_q_model = None
Esempio n. 13
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)
     nn.Module.__init__(self)
     custom_model_config = model_config.get("custom_model_config")
     # activation = custom_model_config.get("activation")
     # no_final_layer = custom_model_config.get("no_final_layer")
     sse_hiddens = [256, 128, 64]
     hiddens = [256, 256, 256, 32]
     self.num_slice = 3
     self.vf_share_layers = True
     sse_layers = []
     prev_size = int(np.product(obs_space.shape) / self.num_slice)
     for i, size in enumerate(sse_hiddens):
         if i is not len(sse_hiddens) - 1:
             sse_layers.append(nn.Linear(prev_size, size))
             sse_layers.append(nn.Tanh())
         else:
             sse_layers.append(nn.Linear(prev_size, size))
         prev_size = size
     self.sse_encode_block = nn.Sequential(*sse_layers)
     prev_size *= self.num_slice
     hidden_layers = []
     for i, size in enumerate(hiddens):
         hidden_layers.append(nn.Linear(prev_size, size))
         hidden_layers.append(nn.LeakyReLU(0.1))
         prev_size = size
     self.hidden_layers = nn.Sequential(*hidden_layers)
     self.logits = nn.Linear(prev_size, num_outputs)
     if self.vf_share_layers:
         self.value_branch = nn.Linear(prev_size, 1)
     else:
         raise NotImplementedError
     self.features = None
Esempio n. 14
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        if not model_config.get("conv_filters"):
            raise ValueError("Config for conv_filters is required")
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        activation = self.model_config.get("conv_activation")
        filters = self.model_config["conv_filters"]
        assert len(filters) > 0,\
            "Must provide at least 1 entry in `conv_filters`!"

        # Whether the last layer is the output of a Flattened (rather than
        # a n x (1,1) Conv2D).
        self.last_layer_is_flattened = False
        self._logits = None
        # Holds the current "base" output (before logits layer).
        self._features = None
        self.num_outputs = num_outputs if num_outputs else action_space.shape[0]
        self.filters = filters
        self.activation = activation
        self.obs_space = obs_space

        self._create_model()
Esempio n. 15
0
    def __init__(
            self,
            obs_space,
            action_space,
            num_outputs,
            model_config,
            name,
            # customs
            embed_dim=256,
            encoder_type="impala",
            **kwargs):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.action_dim = action_space.n
        self.discrete = True
        self.action_outs = q_outs = self.action_dim
        self.action_ins = None  # No action inputs for the discrete case.
        self.embed_dim = embed_dim

        h, w, c = obs_space.shape
        shape = (c, h, w)
        # obs embedding
        self.encoder = make_encoder(encoder_type,
                                    shape,
                                    out_features=embed_dim)

        self.logits_fc = nn.Linear(in_features=embed_dim,
                                   out_features=num_outputs)
        self.value_fc = nn.Linear(in_features=embed_dim, out_features=1)
Esempio n. 16
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, custom_input_space, action_space,
                              num_outputs, model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(custom_input_space, action_space,
                                       num_outputs, model_config, name)
        prev_safe_layer_size = int(np.product(custom_input_space.shape))
        vf_layers = []
        activation = model_config.get("fcnet_activation")
        hiddens = [32]
        for size in hiddens:
            vf_layers.append(
                SlimFC(in_size=prev_safe_layer_size,
                       out_size=size,
                       activation_fn=activation,
                       initializer=normc_initializer(1.0)))
            prev_safe_layer_size = size
        vf_layers.append(
            SlimFC(in_size=prev_safe_layer_size,
                   out_size=1,
                   initializer=normc_initializer(0.01),
                   activation_fn=None))
        self.safe_branch_separate = nn.Sequential(*vf_layers)
        self.last_in = None
Esempio n. 17
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        custom_config = model_config["custom_options"]
        latent_size = custom_config['latent_size']

        self.main = Encoder(latent_size=latent_size)

        if custom_config['encoder_path'] is not None:
            # saved checkpoints could contain extra weights such as linear_logsigma
            weights = torch.load(custom_config['encoder_path'],
                                 map_location=torch.device('cpu'))
            for k in list(weights.keys()):
                if k not in self.main.state_dict().keys():
                    del weights[k]
            self.main.load_state_dict(weights)
            print("Loaded Weights")
        else:
            print("No Load Weights")

        self.critic = nn.Sequential(nn.Linear(latent_size, 400), nn.ReLU(),
                                    nn.Linear(400, 300), nn.ReLU(),
                                    nn.Linear(300, 1))
        self.actor = nn.Sequential(nn.Linear(latent_size, 400), nn.ReLU(),
                                   nn.Linear(400, 300), nn.ReLU())
        self.alpha_head = nn.Sequential(nn.Linear(300, 3), nn.Softplus())
        self.beta_head = nn.Sequential(nn.Linear(300, 3), nn.Softplus())
        self._cur_value = None
        self.train_encoder = custom_config['train_encoder']
        print("Train Encoder: ", self.train_encoder)
Esempio n. 18
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        # c, h, w = obs_space.shape     # Frame stack
        h, w, c = obs_space.shape
        shape = (c, h, w)
        embed_size = 256

        conv_seqs = []
        for out_channels in [16, 32, 32]:
            conv_seq = ConvSequence(shape, out_channels)
            shape = conv_seq.get_output_shape()
            conv_seqs.append(conv_seq)
        self.conv_seqs = nn.ModuleList(conv_seqs)
        self.hidden_fc_1 = nn.Linear(in_features=shape[0] * shape[1] * shape[2], out_features=embed_size)
        # self.hidden_fc_2 = nn.Linear(in_features=256, out_features=256)
        self.logits_fc = nn.Linear(in_features=256, out_features=num_outputs)
        self.value_fc = nn.Linear(in_features=256, out_features=1)
        # ICM Layers
        self.idm_hidden = nn.Linear(in_features=embed_size * 2, out_features=256)
        self.idm_logits = nn.Linear(in_features=256, out_features=num_outputs)
        self.fdm_hidden = nn.Linear(in_features=embed_size + num_outputs, out_features=256)
        self.fdm_output = nn.Linear(in_features=256, out_features=embed_size)
Esempio n. 19
0
    def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str, base_model: ModelV2):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)

        self.activation = base_model.model_config.get("conv_activation")
        self.output_size = base_model.num_outputs
        self.base_model = base_model

        filters = self.model_config["conv_filters"]
        out_channels, kernel, stride = filters[-1]
        (w, h, in_channels) = obs_space.shape
        in_size = [w, h]

        self.prediction = MuZeroPredictionModel(self.activation, in_size, kernel, stride, self.output_size)
        self.dynamics = MuZeroDynamicsModel(self.activation, self.output_size)

        out_conv = SlimConv2d(
            out_channels,
            out_channels,
            kernel=1,
            stride=1,
            padding=None,
            activation_fn=None
        )

        self.representation = nn.Sequential(base_model._convs, out_conv)  # assumes you're using vision network not fc

        self.hidden = None
Esempio n. 20
0
    def __init__(self, obs_space: gym.spaces.Space,
                 action_space: gym.spaces.Space, num_outputs: int,
                 model_config: ModelConfigDict, name: str):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        custom_configs = model_config.get("custom_model_config")
        self._sensor_seq_len = custom_configs.get("sensor_seq_len", 10)

        activation = model_config.get("fcnet_activation", "tanh")

        encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, batch_first=True, dim_feedforward=128)
        self._transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)

        self._all_fc1 = SlimFC(in_size=3,
                               out_size=64,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._all_fc2 = SlimFC(in_size=64,
                               out_size=16,
                               initializer=normc_initializer(1.0),
                               activation_fn=activation)

        self._action_layer = SlimFC(in_size=16,
                                    out_size=num_outputs,
                                    initializer=normc_initializer(0.01),
                                    activation_fn=None)
        self._value_layer = SlimFC(in_size=16,
                                   out_size=1,
                                   initializer=normc_initializer(0.01),
                                   activation_fn=None)

        self._features = None
Esempio n. 21
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):

        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        h, w, c = obs_space.shape
        shape = (c, h, w)

        conv_seqs = []
        for out_channels in [16, 32, 32]:
            conv_seq = ConvSequence(shape, out_channels)
            shape = conv_seq.get_output_shape()
            conv_seqs.append(conv_seq)
        self.conv_seqs = nn.ModuleList(conv_seqs)
        self.hidden_fc = nn.Linear(in_features=shape[0] * shape[1] * shape[2],
                                   out_features=256)
        self.hidden_fc.weight.data *= 1.4 / self.hidden_fc.weight.norm(
            dim=1, p=2, keepdim=True)
        self.hidden_fc.bias.data *= 0

        self.logits_fc = nn.Linear(in_features=256, out_features=num_outputs)
        self.logits_fc.weight.data *= 0.1 / self.logits_fc.weight.norm(
            dim=1, p=2, keepdim=True)
        self.logits_fc.bias.data * 0

        self.value_fc = nn.Linear(in_features=256, out_features=1)
        self.value_fc.weight.data *= 0.1 / self.value_fc.weight.norm(
            dim=1, p=2, keepdim=True)
        self.value_fc.bias.data *= 0

        self.aux_vf_head = NormedLinear(256, 1, scale=0.1)
Esempio n. 22
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        num_options = model_config.get('oc_num_options')
        TorchModelV2.__init__(self, obs_space, action_space,
                              num_outputs * num_options, model_config, name)
        nn.Module.__init__(self)

        activation = get_activation_fn(model_config.get("fcnet_activation"),
                                       framework="torch")
        hiddens = model_config.get("fcnet_hiddens")
        self.option_epsilon = model_config.get('oc_option_epsilon')

        layers = []
        prev_layer_size = int(np.product(obs_space.shape))

        # Create layers
        for size in hiddens:
            layers.append(nn.Linear(prev_layer_size, size))
            layers.append(activation)
            prev_layer_size = size
        self._body = nn.Sequential(*layers)
        self.q = nn.Linear(prev_layer_size,
                           num_options)  # Value for each option
        self.pi = nn.Sequential(
            nn.Linear(prev_layer_size, num_options * num_outputs),
            View((num_options, num_outputs)),
            nn.Softmax(dim=-1))  # Action probabilities for each option
        self.beta = nn.Sequential(nn.Linear(prev_layer_size, num_options),
                                  nn.Sigmoid)  # Termination probabilities
        # Holds the current "base" output (before logits layer).
        self._features = self._q = self._v = self._pi = self._beta = None
        # Holds the last input, in case value branch is separate.
        self._last_flat_in = None
Esempio n. 23
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.obs_space = obs_space
        self.action_space = action_space
        self.model_config = model_config
        self.name = name

        if isinstance(self.obs_space, Box):
            self.obs_shape = obs_space.shape[0]
        else:
            self.obs_shape = self.obs_space

        # DQN Network:
        self.layers = nn.Sequential()
        self.layers.add_module("linear_1", nn.Linear(self.obs_space.shape[0], 32))
        self.layers.add_module("relu_1", nn.ReLU())
        self.layers.add_module("linear_2", nn.Linear(32, 64))
        self.layers.add_module("relu_2", nn.ReLU())
        self.layers.add_module("linear_3", nn.Linear(64, 32))
        self.layers.add_module("relu_3", nn.ReLU())
        self.layers.add_module("linear_4", nn.Linear(32, num_outputs))
Esempio n. 24
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        custom_config = model_config['custom_options']

        self.main = Encoder()

        if custom_config['encoder_path'] is not None:
            print("Load Trained Encoder")
            # saved checkpoints could contain extra weights such as linear_logsigma
            weights = torch.load(custom_config['encoder_path'],
                                 map_location={'cuda:0': 'cpu'})
            for k in list(weights.keys()):
                if k not in self.main.state_dict().keys():
                    del weights[k]
            self.main.load_state_dict(weights)

        self.critic = nn.Sequential(nn.Linear(32, 1024), nn.ReLU(),
                                    nn.Linear(1024, 256), nn.ReLU(),
                                    nn.Linear(256, 1))
        self.actor = nn.Sequential(nn.Linear(32, 1024), nn.ReLU(),
                                   nn.Linear(1024, 256), nn.ReLU(),
                                   nn.Linear(256, 3), nn.Sigmoid())
        self.actor_logstd = nn.Parameter(torch.zeros(3), requires_grad=True)
        self._cur_value = None
        print("Train Encoder:", custom_config['train_encoder'])
        self.train_encoder = custom_config['train_encoder']
Esempio n. 25
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     num_options = model_config.get('oc_num_options')
     TorchModelV2.__init__(self, obs_space, action_space, num_outputs * num_options,
                           model_config, name)
     nn.Module.__init__(self)
     layers = []
     (w, h, in_channels) = obs_space.shape
     in_size = [w, h]
     # Convolutional layers
     for out_channels, kernel, stride in OCNET_FILTERS:
         padding, out_size = same_padding(in_size, kernel, [stride, stride])
         layers.append(nn.Conv2d(in_channels, out_channels, kernel, stride, padding))
         layers.append(nn.ReLU())
         in_channels = out_channels
         in_size = out_size
     # Dense layer after flattening output, using ReLU
     hSize = OCNET_DENSE
     self.option_epsilon = model_config.get('oc_option_epsilon')
     layers.append(nn.Flatten())
     layers.append(nn.Linear(in_size, hSize))
     layers.append(nn.ReLU())
     self._convs = nn.Sequential(*layers)
     # q, pi, beta, and v
     self.q = nn.Linear(hSize, num_options)  # Value for each option
     #self.v = nn.Linear(hSize, 1)  # Value for state alone? Or do
     self.pi = nn.Sequential(nn.Linear(hSize, num_options * num_outputs), View((num_options, num_outputs)), nn.Softmax(dim=-1))  # Action probabilities for each option
     self.beta = nn.Sequential(nn.Linear(hSize, num_options), nn.Sigmoid)  # Termination probabilities
     # Holds the current "base" output (before heads).
     self._features = self._q = self._v = self._pi = self._beta = None
Esempio n. 26
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kwargs):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        layers = []
        prev_layer_size = int(np.product(obs_space.shape))
        self._logits = None

        # Create layers 0 to second-last.
        for size in [256, 256]:
            layers.append(
                SlimFC(in_size=prev_layer_size,
                       out_size=size,
                       initializer=torch_normc_initializer(1.0),
                       activation_fn=nn.ReLU))
            prev_layer_size = size
            # Add a batch norm layer.
            layers.append(nn.BatchNorm1d(prev_layer_size))

        self._logits = SlimFC(in_size=prev_layer_size,
                              out_size=self.num_outputs,
                              initializer=torch_normc_initializer(0.01),
                              activation_fn=None)

        self._value_branch = SlimFC(in_size=prev_layer_size,
                                    out_size=1,
                                    initializer=torch_normc_initializer(1.0),
                                    activation_fn=None)

        self._hidden_layers = nn.Sequential(*layers)
        self._hidden_out = None
Esempio n. 27
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        print(model_config)
        hiddens = model_config.get("fcnet_hiddens")
        activation = _get_activation_fn(model_config.get("fcnet_activation"))
        logger.debug("Constructing fcnet {} {}".format(hiddens, activation))
        layers = []
        last_layer_size = np.product(obs_space.shape)
        for size in hiddens:
            layers.append(
                SlimFC(in_size=last_layer_size,
                       out_size=size,
                       initializer=normc_initializer(1.0),
                       activation_fn=activation))
            last_layer_size = size

        self._hidden_layers = nn.Sequential(*layers)

        self._logits = SlimFC(in_size=last_layer_size,
                              out_size=num_outputs,
                              initializer=normc_initializer(0.01),
                              activation_fn=None)
        self._value_branch = SlimFC(in_size=last_layer_size,
                                    out_size=1,
                                    initializer=normc_initializer(1.0),
                                    activation_fn=None)
        self._cur_value = None
Esempio n. 28
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        self.obs_space = obs_space
        # Conv-net blovk
        self.conv1 = torch.nn.Conv2d(16,
                                     32,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = torch.nn.Conv2d(32,
                                     32,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1)
        # Calculating output neurons after conv layers
        self.neurons = self.linear_input_neurons()
        # FC-net block
        self.fc1 = torch.nn.Linear(self.neurons, 1024)
        self.fc2 = torch.nn.Linear(1024, 256)

        # Value function branch
        self.value_function_fc = nn.Linear(256, 1)
        # Advantage function branch
        self.advantage_function_fc = nn.Linear(256, num_outputs)
Esempio n. 29
0
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(obs_space, action_space, num_outputs,
                                       model_config, name)
Esempio n. 30
0
 def __init__(self, obs_space, action_space, num_outputs, model_config,
              name):
     TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                           model_config, name)
     nn.Module.__init__(self)
     self.fc = FullyConnectedNetwork(
         obs_space.original_space.child_space["location"], action_space,
         num_outputs, model_config, name)