def __init__(self, vector_dim, action_dim, hidden_units): assert len( hidden_units ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1" super().__init__() self.feature_net = mlp(hidden_units[0:1]) self.net = mlp(hidden_units[1:], output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.share = mlp(hidden_units['share'], out_layer=False) self.logits = mlp(hidden_units['logits'], output_shape=output_shape, out_activation=None) self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.share = mlp(hidden_units['share'], out_layer=False) self.mu = mlp(hidden_units['mu'], output_shape=output_shape, out_activation=None) self.log_std = mlp(hidden_units['log_std'], output_shape=output_shape, out_activation='tanh') self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, quantiles_idx, hidden_units): super().__init__() self.action_dim = action_dim self.q_net_head = mlp(hidden_units['q_net'], out_layer=False) # [B, vector_dim] self.quantile_net = mlp(hidden_units['quantile'], out_layer=False) # [N*B, quantiles_idx] self.q_net_tile = mlp( hidden_units['tile'], output_shape=action_dim, out_activation=None) # [N*B, hidden_units['quantile'][-1]] self(I(shape=vector_dim), I(shape=quantiles_idx))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False) self.v = mlp(hidden_units['v'], layer=Noisy, output_shape=atoms, out_activation=None) self.adv = mlp(hidden_units['adv'], layer=Noisy, output_shape=action_dim * atoms, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, head_num, hidden_units): super().__init__() self.nets = [ mlp(hidden_units, output_shape=output_shape, out_activation=None) for _ in range(head_num) ] self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, nums, hidden_units): super().__init__() self.action_dim = action_dim self.nums = nums self.net = mlp(hidden_units, output_shape=nums * action_dim, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(hidden_units, output_shape=atoms * action_dim, out_activation='softmax') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units, is_continuous): super().__init__() self.is_continuous = is_continuous out_activation = 'tanh' if self.is_continuous else None self.net = mlp(hidden_units, act_fn='tanh', output_shape=output_shape, out_activation=out_activation, out_layer=True) self.weights_2dim = [[i, j] for i, j in zip([vector_dim]+hidden_units, hidden_units+[output_shape])] self.weights_nums = np.asarray(self.weights_2dim).prod(axis=-1).tolist() self.weights_total_nums = np.asarray(self.weights_2dim).prod(axis=-1).sum() + np.asarray(hidden_units).sum() + output_shape self(tf.keras.Input(shape=vector_dim)) # 初始化网络权重
def __init__(self, vector_dim, output_shape, hidden_units, out_activation='tanh'): super().__init__() self.net = mlp(hidden_units, output_shape=output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, options_num, hidden_units, is_continuous=True): super().__init__() self.actions_num = action_dim self.options_num = options_num self.share = mlp(hidden_units['share'], out_layer=False) self.q = mlp(hidden_units['q'], output_shape=options_num, out_activation=None) self.pi = mlp(hidden_units['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None) self.beta = mlp(hidden_units['termination'], output_shape=options_num, out_activation='sigmoid') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, options_num, hidden_units, out_activation=None): super().__init__() self.actions_num = output_shape self.options_num = options_num self.pi = mlp(hidden_units, output_shape=options_num * output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.logits = mlp(hidden_units, output_shape=output_shape, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, hidden_units): super().__init__() self.net = mlp(hidden_units, output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))