Ejemplo n.º 1
0
 def __init__(self, vector_dim, action_dim, quantiles_idx, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.q_net_head = mlp(hidden_units['q_net'], out_layer=False)   # [B, vector_dim]
     self.quantile_net = mlp(hidden_units['quantile'], out_layer=False)  # [N*B, quantiles_idx]
     self.q_net_tile = mlp(hidden_units['tile'], output_shape=action_dim, out_activation=None)   # [N*B, hidden_units['quantile'][-1]]
     self(I(shape=vector_dim), I(shape=quantiles_idx))
Ejemplo n.º 2
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False)
     self.v = mlp(hidden_units['v'], layer=Noisy, output_shape=atoms, out_activation=None)
     self.adv = mlp(hidden_units['adv'], layer=Noisy, output_shape=action_dim * atoms, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 3
0
 def __init__(self, vector_dim, action_dim, options_num, hidden_units, is_continuous=True):
     super().__init__()
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.q = mlp(hidden_units['q'], output_shape=options_num, out_activation=None)
     self.pi = mlp(hidden_units['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None)
     self.beta = mlp(hidden_units['termination'], output_shape=options_num, out_activation='sigmoid')
     self(I(shape=vector_dim))
Ejemplo n.º 4
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.share = mlp(network_settings['share'], out_layer=False)
     self.mu = mlp(network_settings['mu'],
                   output_shape=output_shape,
                   out_activation=None)
     self.log_std = mlp(network_settings['log_std'],
                        output_shape=output_shape,
                        out_activation='tanh')
     self(I(shape=vector_dim))
Ejemplo n.º 5
0
 def __init__(self, vector_dim, action_dim, network_settings):
     assert len(
         network_settings
     ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1"
     super().__init__()
     self.feature_net = mlp(network_settings[0:1])
     self.net = mlp(network_settings[1:],
                    output_shape=1,
                    out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))
Ejemplo n.º 6
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.share = mlp(network_settings['share'], out_layer=False)
     self.logits = mlp(network_settings['logits'],
                       output_shape=output_shape,
                       out_activation=None)
     self.v = mlp(network_settings['v'],
                  output_shape=1,
                  out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 7
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.soft_clip = network_settings['soft_clip']
     self.log_std_min, self.log_std_max = network_settings['log_std_bound']
     self.share = mlp(network_settings['share'], out_layer=False)
     self.mu = mlp(network_settings['mu'],
                   output_shape=output_shape,
                   out_activation=None)
     self.log_std = mlp(network_settings['log_std'],
                        output_shape=output_shape,
                        out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 8
0
 def __init__(self, vector_dim, output_shape, head_num, network_settings):
     super().__init__()
     self.nets = [
         mlp(network_settings,
             output_shape=output_shape,
             out_activation=None) for _ in range(head_num)
     ]
     self(I(shape=vector_dim))
Ejemplo n.º 9
0
 def __init__(self, vector_dim, action_dim, atoms, network_settings):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(network_settings,
                    output_shape=atoms * action_dim,
                    out_activation='softmax')
     self(I(shape=vector_dim))
Ejemplo n.º 10
0
 def __init__(self, vector_dim, action_dim, nums, network_settings):
     super().__init__()
     self.action_dim = action_dim
     self.nums = nums
     self.net = mlp(network_settings,
                    output_shape=nums * action_dim,
                    out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 11
0
    def __init__(self, vector_dim, output_shape, condition_sigma,
                 network_settings):
        super().__init__()
        self.condition_sigma = condition_sigma
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = mlp(network_settings['hidden_units'], out_layer=False)
        self.mu = mlp([], output_shape=output_shape, out_activation='tanh')
        if self.condition_sigma:
            self.log_std = mlp([],
                               output_shape=output_shape,
                               out_activation=None)
        else:
            self.log_std = tf.Variable(
                initial_value=-0.5 *
                tf.ones(output_shape, dtype=tf.dtypes.float32),
                trainable=True)
        self(I(shape=vector_dim))
Ejemplo n.º 12
0
 def __init__(self, vector_dim, output_shape, hidden_units, is_continuous):
     super().__init__()
     self.is_continuous = is_continuous
     out_activation = 'tanh' if self.is_continuous else None
     self.net = mlp(hidden_units, act_fn='tanh', output_shape=output_shape, out_activation=out_activation, out_layer=True)
     self.weights_2dim = [[i, j] for i, j in zip([vector_dim] + hidden_units, hidden_units + [output_shape])]
     self.weights_nums = np.asarray(self.weights_2dim).prod(axis=-1).tolist()
     self.weights_total_nums = np.asarray(self.weights_2dim).prod(axis=-1).sum() + np.asarray(hidden_units).sum() + output_shape
     self(tf.keras.Input(shape=vector_dim))  # 初始化网络权重
Ejemplo n.º 13
0
 def __init__(self, vector_dim, action_dim, atoms, network_settings):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(network_settings, out_layer=False)
     self.outputs = []
     for _ in range(action_dim):
         self.outputs.append(Dense(atoms, activation='softmax'))
     self(I(shape=vector_dim))
Ejemplo n.º 14
0
 def __init__(self,
              vector_dim,
              output_shape,
              network_settings,
              out_activation='tanh'):
     super().__init__()
     self.net = mlp(network_settings,
                    output_shape=output_shape,
                    out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 15
0
 def __init__(self,
              vector_dim,
              output_shape,
              options_num,
              network_settings,
              out_activation=None):
     super().__init__()
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = mlp(network_settings,
                   output_shape=options_num * output_shape,
                   out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 16
0
 def __init__(self,
              vector_dim,
              action_dim,
              options_num,
              network_settings,
              is_continuous=True):
     super().__init__()
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = mlp(network_settings['share'], out_layer=False)
     self.q = mlp(network_settings['q'],
                  output_shape=options_num,
                  out_activation=None)
     self.pi = mlp(network_settings['intra_option'],
                   output_shape=options_num * action_dim,
                   out_activation='tanh' if is_continuous else None)
     self.beta = mlp(network_settings['termination'],
                     output_shape=options_num,
                     out_activation='sigmoid')
     self.o = mlp(network_settings['o'],
                  output_shape=options_num,
                  out_activation=tf.nn.log_softmax)
     self(I(shape=vector_dim))
Ejemplo n.º 17
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.logits = mlp(hidden_units, output_shape=output_shape, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 18
0
 def __init__(self, vector_dim, action_dim, hidden_units):
     super().__init__()
     self.net = mlp(hidden_units, output_shape=1, out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))
Ejemplo n.º 19
0
 def __init__(self, vector_dim, output_shape, head_num, hidden_units):
     super().__init__()
     self.nets = [mlp(hidden_units, output_shape=output_shape, out_activation=None) for _ in range(head_num)]
     self(I(shape=vector_dim))
Ejemplo n.º 20
0
 def __init__(self, vector_dim, output_shape, options_num, hidden_units, out_activation=None):
     super().__init__()
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = mlp(hidden_units, output_shape=options_num * output_shape, out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 21
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.logits = mlp(hidden_units['logits'], output_shape=output_shape, out_activation=None)
     self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 22
0
 def __init__(self, vector_dim, network_settings):
     super().__init__()
     self.net = mlp(network_settings, output_shape=1, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 23
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(hidden_units, output_shape=atoms * action_dim, out_activation='softmax')
     self(I(shape=vector_dim))
Ejemplo n.º 24
0
 def __init__(self, vector_dim, action_dim, nums, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.nums = nums
     self.net = mlp(hidden_units, output_shape=nums * action_dim, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 25
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.net = mlp(hidden_units, output_shape=output_shape, out_activation='tanh')
     self(I(shape=vector_dim))