Ejemplo n.º 1
0
 def __init__(self, vector_dim, action_dim, hidden_units):
     assert len(
         hidden_units
     ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1"
     super().__init__()
     self.feature_net = mlp(hidden_units[0:1])
     self.net = mlp(hidden_units[1:], output_shape=1, out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))
Ejemplo n.º 2
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.logits = mlp(hidden_units['logits'],
                       output_shape=output_shape,
                       out_activation=None)
     self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 3
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.mu = mlp(hidden_units['mu'],
                   output_shape=output_shape,
                   out_activation=None)
     self.log_std = mlp(hidden_units['log_std'],
                        output_shape=output_shape,
                        out_activation='tanh')
     self(I(shape=vector_dim))
Ejemplo n.º 4
0
 def __init__(self, vector_dim, action_dim, quantiles_idx, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.q_net_head = mlp(hidden_units['q_net'],
                           out_layer=False)  # [B, vector_dim]
     self.quantile_net = mlp(hidden_units['quantile'],
                             out_layer=False)  # [N*B, quantiles_idx]
     self.q_net_tile = mlp(
         hidden_units['tile'], output_shape=action_dim,
         out_activation=None)  # [N*B, hidden_units['quantile'][-1]]
     self(I(shape=vector_dim), I(shape=quantiles_idx))
Ejemplo n.º 5
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False)
     self.v = mlp(hidden_units['v'],
                  layer=Noisy,
                  output_shape=atoms,
                  out_activation=None)
     self.adv = mlp(hidden_units['adv'],
                    layer=Noisy,
                    output_shape=action_dim * atoms,
                    out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 6
0
 def __init__(self, vector_dim, output_shape, head_num, hidden_units):
     super().__init__()
     self.nets = [
         mlp(hidden_units, output_shape=output_shape, out_activation=None)
         for _ in range(head_num)
     ]
     self(I(shape=vector_dim))
Ejemplo n.º 7
0
 def __init__(self, vector_dim, action_dim, nums, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.nums = nums
     self.net = mlp(hidden_units,
                    output_shape=nums * action_dim,
                    out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 8
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(hidden_units,
                    output_shape=atoms * action_dim,
                    out_activation='softmax')
     self(I(shape=vector_dim))
Ejemplo n.º 9
0
 def __init__(self, vector_dim, output_shape, hidden_units, is_continuous):
     super().__init__()
     self.is_continuous = is_continuous
     out_activation = 'tanh' if self.is_continuous else None
     self.net = mlp(hidden_units, act_fn='tanh', output_shape=output_shape, out_activation=out_activation, out_layer=True)
     self.weights_2dim = [[i, j] for i, j in zip([vector_dim]+hidden_units, hidden_units+[output_shape])]
     self.weights_nums = np.asarray(self.weights_2dim).prod(axis=-1).tolist()
     self.weights_total_nums = np.asarray(self.weights_2dim).prod(axis=-1).sum() + np.asarray(hidden_units).sum() + output_shape
     self(tf.keras.Input(shape=vector_dim))  # 初始化网络权重
Ejemplo n.º 10
0
 def __init__(self,
              vector_dim,
              output_shape,
              hidden_units,
              out_activation='tanh'):
     super().__init__()
     self.net = mlp(hidden_units,
                    output_shape=output_shape,
                    out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 11
0
 def __init__(self,
              vector_dim,
              action_dim,
              options_num,
              hidden_units,
              is_continuous=True):
     super().__init__()
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.q = mlp(hidden_units['q'],
                  output_shape=options_num,
                  out_activation=None)
     self.pi = mlp(hidden_units['intra_option'],
                   output_shape=options_num * action_dim,
                   out_activation='tanh' if is_continuous else None)
     self.beta = mlp(hidden_units['termination'],
                     output_shape=options_num,
                     out_activation='sigmoid')
     self(I(shape=vector_dim))
Ejemplo n.º 12
0
 def __init__(self,
              vector_dim,
              output_shape,
              options_num,
              hidden_units,
              out_activation=None):
     super().__init__()
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = mlp(hidden_units,
                   output_shape=options_num * output_shape,
                   out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 13
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.logits = mlp(hidden_units,
                       output_shape=output_shape,
                       out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 14
0
 def __init__(self, vector_dim, action_dim, hidden_units):
     super().__init__()
     self.net = mlp(hidden_units, output_shape=1, out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))