Beispiel #1
0
 def __init__(self,
              vector_dim,
              output_shape,
              options_num,
              network_settings,
              out_activation=None):
     super().__init__()
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = mlp(network_settings,
                   output_shape=options_num * output_shape,
                   out_activation=out_activation)
     self(I(shape=vector_dim))
Beispiel #2
0
 def __init__(self,
              vector_dim,
              output_shape,
              options_num,
              hidden_units,
              out_activation=None):
     super().__init__()
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = mlp(hidden_units,
                   output_shape=options_num * output_shape,
                   out_activation=out_activation)
     self(I(shape=vector_dim))
Beispiel #3
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False)
     self.v = mlp(hidden_units['v'],
                  layer=Noisy,
                  output_shape=atoms,
                  out_activation=None)
     self.adv = mlp(hidden_units['adv'],
                    layer=Noisy,
                    output_shape=action_dim * atoms,
                    out_activation=None)
     self(I(shape=vector_dim))
Beispiel #4
0
    def __init__(self, vector_dim=[]):
        # TODO
        super().__init__()
        self.nets = []
        for _ in vector_dim:

            def net(x):
                return x

            self.nets.append(net)
        self.h_dim = sum(vector_dim)
        self.use_vector = not self.h_dim == 0
        if vector_dim:
            self(*(I(shape=dim) for dim in vector_dim))
Beispiel #5
0
 def __init__(self,
              visual_dim=[],
              visual_feature=128,
              network_type=VisualNetworkType.NATURE):
     super().__init__()
     self.nets = []
     self.dense_nets = []
     for _ in visual_dim:
         net = get_visual_network_from_type(network_type)()
         self.nets.append(net)
         self.dense_nets.append(
             Dense(visual_feature, default_activation, **initKernelAndBias))
     self.h_dim = visual_feature * len(self.nets)
     if visual_dim:
         self(*(I(shape=dim) for dim in visual_dim))
Beispiel #6
0
    def __init__(self, vector_dim, output_shape, condition_sigma,
                 network_settings):
        super().__init__()
        self.condition_sigma = condition_sigma
        self.log_std_min, self.log_std_max = network_settings['log_std_bound']

        self.share = mlp(network_settings['hidden_units'], out_layer=False)
        self.mu = mlp([], output_shape=output_shape, out_activation='tanh')
        if self.condition_sigma:
            self.log_std = mlp([],
                               output_shape=output_shape,
                               out_activation=None)
        else:
            self.log_std = tf.Variable(
                initial_value=-0.5 *
                tf.ones(output_shape, dtype=tf.dtypes.float32),
                trainable=True)
        self(I(shape=vector_dim))
Beispiel #7
0
 def __init__(self,
              vector_dim,
              action_dim,
              options_num,
              network_settings,
              is_continuous=True):
     super().__init__()
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = mlp(network_settings['share'], out_layer=False)
     self.q = mlp(network_settings['q'],
                  output_shape=options_num,
                  out_activation=None)
     self.pi = mlp(network_settings['intra_option'],
                   output_shape=options_num * action_dim,
                   out_activation='tanh' if is_continuous else None)
     self.beta = mlp(network_settings['termination'],
                     output_shape=options_num,
                     out_activation='sigmoid')
     self(I(shape=vector_dim))
Beispiel #8
0
 def __init__(self,
              vector_dim,
              action_dim,
              options_num,
              hidden_units,
              is_continuous=True):
     super().__init__()
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.q = mlp(hidden_units['q'],
                  output_shape=options_num,
                  out_activation=None)
     self.pi = mlp(hidden_units['intra_option'],
                   output_shape=options_num * action_dim,
                   out_activation='tanh' if is_continuous else None)
     self.beta = mlp(hidden_units['termination'],
                     output_shape=options_num,
                     out_activation='sigmoid')
     self.o = mlp(hidden_units['o'],
                  output_shape=options_num,
                  out_activation=tf.nn.log_softmax)
     self(I(shape=vector_dim))
Beispiel #9
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.net = mlp(hidden_units, output_shape=output_shape, out_activation='tanh')
     self(I(shape=vector_dim))
Beispiel #10
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.net = mlp(network_settings,
                    output_shape=output_shape,
                    out_activation='tanh')
     self(I(shape=vector_dim))
Beispiel #11
0
 def __init__(self, vector_dim, action_dim, network_settings):
     super().__init__()
     self.net = mlp(network_settings, output_shape=1, out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))
Beispiel #12
0
    def __init__(self,
                 is_continuous,
                 vector_dim,
                 action_dim,
                 visual_dim=[],
                 visual_feature=128,
                 *,
                 eta=0.2,
                 lr=1.0e-3,
                 beta=0.2,
                 loss_weight=10.,
                 encoder_type='simple'):
        '''
        params:
            is_continuous: sepecify whether action space is continuous(True) or discrete(False)
            vector_dim: dimension of vector state input
            action_dim: dimension of action
            visual_dim: dimension of visual state input
            visual_feature: dimension of visual feature map
            eta: weight of intrinsic reward
            lr: the learning rate of curiosity model
            beta: weight factor of loss between inverse_dynamic_net and forward_net
            loss_weight: weight factor of loss between policy gradient and curiosity model
        '''
        super().__init__()
        self.device = get_device()
        self.eta = eta
        self.beta = beta
        self.loss_weight = loss_weight
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        self.is_continuous = is_continuous

        self.camera_num = visual_dim[0]
        if self.camera_num == 0:
            self.use_visual = False
        else:
            self.use_visual = True

        self.nets = MultiCameraCNN(n=self.camera_num,
                                   feature_dim=visual_feature,
                                   activation_fn=default_activation,
                                   encoder_type=encoder_type)
        self.s_dim = vector_dim + (visual_feature *
                                   self.camera_num) * (self.camera_num > 0)

        if self.use_visual:
            # S, S' => A
            self.inverse_dynamic_net = Sequential([
                Dense(self.s_dim * 2, default_activation),
                Dense(action_dim, 'tanh' if is_continuous else None)
            ])

        # S, A => S'
        self.forward_net = Sequential([
            Dense(self.s_dim + action_dim, default_activation),
            Dense(self.s_dim, None)
        ])
        self.initial_weights(I(shape=vector_dim), I(shape=visual_dim),
                             I(shape=action_dim))

        self.tv = []
        if self.use_visual:
            for net in self.nets:
                self.tv += net.trainable_variables
            self.tv += self.inverse_dynamic_net.trainable_variables
        self.tv += self.forward_net.trainable_variables
Beispiel #13
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.logits = mlp(network_settings,
                       output_shape=output_shape,
                       out_activation=None)
     self(I(shape=vector_dim))
Beispiel #14
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.logits = mlp(hidden_units, output_shape=output_shape, out_activation=None)
     self(I(shape=vector_dim))
Beispiel #15
0
 def __init__(self, vector_dim, visual_dim=[], visual_feature=128, encoder_type='nature'):
     super().__init__()
     self.camera_num = visual_dim[0]
     self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type)
     self.hdim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0)
     self(I(shape=vector_dim), I(shape=visual_dim))
Beispiel #16
0
 def __init__(self, vector_dim, action_dim, hidden_units):
     super().__init__()
     self.net = mlp(hidden_units, output_shape=1, out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))
Beispiel #17
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.mu = mlp(hidden_units['mu'], output_shape=output_shape, out_activation=None)
     self.log_std = mlp(hidden_units['log_std'], output_shape=output_shape, out_activation='tanh')
     self(I(shape=vector_dim))
Beispiel #18
0
 def __init__(self, vector_dim, action_dim, nums, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.nums = nums
     self.net = mlp(hidden_units, output_shape=nums * action_dim, out_activation=None)
     self(I(shape=vector_dim))
Beispiel #19
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(hidden_units, output_shape=atoms * action_dim, out_activation='softmax')
     self(I(shape=vector_dim))
Beispiel #20
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.logits = mlp(hidden_units['logits'], output_shape=output_shape, out_activation=None)
     self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None)
     self(I(shape=vector_dim))
Beispiel #21
0
 def __init__(self, vector_dim, output_shape, head_num, hidden_units):
     super().__init__()
     self.nets = [mlp(hidden_units, output_shape=output_shape, out_activation=None) for _ in range(head_num)]
     self(I(shape=vector_dim))
Beispiel #22
0
 def __init__(self, vector_dim, action_dim, hidden_units):
     assert len(hidden_units) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1"
     super().__init__()
     self.feature_net = mlp(hidden_units[0:1])
     self.net = mlp(hidden_units[1:], output_shape=1, out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))