def __init__(self, vector_dim, output_shape, options_num, network_settings, out_activation=None): super().__init__() self.actions_num = output_shape self.options_num = options_num self.pi = mlp(network_settings, output_shape=options_num * output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, options_num, hidden_units, out_activation=None): super().__init__() self.actions_num = output_shape self.options_num = options_num self.pi = mlp(hidden_units, output_shape=options_num * output_shape, out_activation=out_activation) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False) self.v = mlp(hidden_units['v'], layer=Noisy, output_shape=atoms, out_activation=None) self.adv = mlp(hidden_units['adv'], layer=Noisy, output_shape=action_dim * atoms, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim=[]): # TODO super().__init__() self.nets = [] for _ in vector_dim: def net(x): return x self.nets.append(net) self.h_dim = sum(vector_dim) self.use_vector = not self.h_dim == 0 if vector_dim: self(*(I(shape=dim) for dim in vector_dim))
def __init__(self, visual_dim=[], visual_feature=128, network_type=VisualNetworkType.NATURE): super().__init__() self.nets = [] self.dense_nets = [] for _ in visual_dim: net = get_visual_network_from_type(network_type)() self.nets.append(net) self.dense_nets.append( Dense(visual_feature, default_activation, **initKernelAndBias)) self.h_dim = visual_feature * len(self.nets) if visual_dim: self(*(I(shape=dim) for dim in visual_dim))
def __init__(self, vector_dim, output_shape, condition_sigma, network_settings): super().__init__() self.condition_sigma = condition_sigma self.log_std_min, self.log_std_max = network_settings['log_std_bound'] self.share = mlp(network_settings['hidden_units'], out_layer=False) self.mu = mlp([], output_shape=output_shape, out_activation='tanh') if self.condition_sigma: self.log_std = mlp([], output_shape=output_shape, out_activation=None) else: self.log_std = tf.Variable( initial_value=-0.5 * tf.ones(output_shape, dtype=tf.dtypes.float32), trainable=True) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, options_num, network_settings, is_continuous=True): super().__init__() self.actions_num = action_dim self.options_num = options_num self.share = mlp(network_settings['share'], out_layer=False) self.q = mlp(network_settings['q'], output_shape=options_num, out_activation=None) self.pi = mlp(network_settings['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None) self.beta = mlp(network_settings['termination'], output_shape=options_num, out_activation='sigmoid') self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, options_num, hidden_units, is_continuous=True): super().__init__() self.actions_num = action_dim self.options_num = options_num self.share = mlp(hidden_units['share'], out_layer=False) self.q = mlp(hidden_units['q'], output_shape=options_num, out_activation=None) self.pi = mlp(hidden_units['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None) self.beta = mlp(hidden_units['termination'], output_shape=options_num, out_activation='sigmoid') self.o = mlp(hidden_units['o'], output_shape=options_num, out_activation=tf.nn.log_softmax) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.net = mlp(hidden_units, output_shape=output_shape, out_activation='tanh') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.net = mlp(network_settings, output_shape=output_shape, out_activation='tanh') self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, network_settings): super().__init__() self.net = mlp(network_settings, output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))
def __init__(self, is_continuous, vector_dim, action_dim, visual_dim=[], visual_feature=128, *, eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10., encoder_type='simple'): ''' params: is_continuous: sepecify whether action space is continuous(True) or discrete(False) vector_dim: dimension of vector state input action_dim: dimension of action visual_dim: dimension of visual state input visual_feature: dimension of visual feature map eta: weight of intrinsic reward lr: the learning rate of curiosity model beta: weight factor of loss between inverse_dynamic_net and forward_net loss_weight: weight factor of loss between policy gradient and curiosity model ''' super().__init__() self.device = get_device() self.eta = eta self.beta = beta self.loss_weight = loss_weight self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr) self.is_continuous = is_continuous self.camera_num = visual_dim[0] if self.camera_num == 0: self.use_visual = False else: self.use_visual = True self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type) self.s_dim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0) if self.use_visual: # S, S' => A self.inverse_dynamic_net = Sequential([ Dense(self.s_dim * 2, default_activation), Dense(action_dim, 'tanh' if is_continuous else None) ]) # S, A => S' self.forward_net = Sequential([ Dense(self.s_dim + action_dim, default_activation), Dense(self.s_dim, None) ]) self.initial_weights(I(shape=vector_dim), I(shape=visual_dim), I(shape=action_dim)) self.tv = [] if self.use_visual: for net in self.nets: self.tv += net.trainable_variables self.tv += self.inverse_dynamic_net.trainable_variables self.tv += self.forward_net.trainable_variables
def __init__(self, vector_dim, output_shape, network_settings): super().__init__() self.logits = mlp(network_settings, output_shape=output_shape, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.logits = mlp(hidden_units, output_shape=output_shape, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, visual_dim=[], visual_feature=128, encoder_type='nature'): super().__init__() self.camera_num = visual_dim[0] self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type) self.hdim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0) self(I(shape=vector_dim), I(shape=visual_dim))
def __init__(self, vector_dim, action_dim, hidden_units): super().__init__() self.net = mlp(hidden_units, output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.share = mlp(hidden_units['share'], out_layer=False) self.mu = mlp(hidden_units['mu'], output_shape=output_shape, out_activation=None) self.log_std = mlp(hidden_units['log_std'], output_shape=output_shape, out_activation='tanh') self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, nums, hidden_units): super().__init__() self.action_dim = action_dim self.nums = nums self.net = mlp(hidden_units, output_shape=nums * action_dim, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, atoms, hidden_units): super().__init__() self.action_dim = action_dim self.atoms = atoms self.net = mlp(hidden_units, output_shape=atoms * action_dim, out_activation='softmax') self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, hidden_units): super().__init__() self.share = mlp(hidden_units['share'], out_layer=False) self.logits = mlp(hidden_units['logits'], output_shape=output_shape, out_activation=None) self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None) self(I(shape=vector_dim))
def __init__(self, vector_dim, output_shape, head_num, hidden_units): super().__init__() self.nets = [mlp(hidden_units, output_shape=output_shape, out_activation=None) for _ in range(head_num)] self(I(shape=vector_dim))
def __init__(self, vector_dim, action_dim, hidden_units): assert len(hidden_units) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1" super().__init__() self.feature_net = mlp(hidden_units[0:1]) self.net = mlp(hidden_units[1:], output_shape=1, out_activation=None) self(I(shape=vector_dim), I(shape=action_dim))