Ejemplo n.º 1
0
    def __init__(self,
                 feat_dim=64,
                 rnn_units=8,
                 *,
                 use_rnn=False,
                 network_type=MemoryNetworkType.LSTM):
        super().__init__()
        # self.masking = tf.keras.layers.Masking(mask_value=0.)

        # ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported.
        # https://github.com/tensorflow/tensorflow/issues/31998
        self.use_rnn = use_rnn
        self.h_dim = rnn_units if use_rnn else feat_dim
        self.network_type = network_type
        if use_rnn:
            if self.network_type == MemoryNetworkType.GRU:
                self.cell_nums = 1
                cell = tf.keras.layers.GRUCell(rnn_units)
            elif self.network_type == MemoryNetworkType.LSTM:
                self.cell_nums = 2
                cell = tf.keras.layers.LSTMCell(rnn_units)
            self.rnn_net = tf.keras.layers.RNN(cell,
                                               return_state=True,
                                               return_sequences=True)
            self(*([I(shape=(None, feat_dim))] +
                   [I(shape=rnn_units) for _ in range(self.cell_nums)]))
        else:
            self.cell_nums = 1
            self.rnn_net = lambda x, initial_state: (x, initial_state)
Ejemplo n.º 2
0
 def __init__(self, vector_dim, action_dim, quantiles_idx, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.q_net_head = mlp(hidden_units['q_net'], out_layer=False)   # [B, vector_dim]
     self.quantile_net = mlp(hidden_units['quantile'], out_layer=False)  # [N*B, quantiles_idx]
     self.q_net_tile = mlp(hidden_units['tile'], output_shape=action_dim, out_activation=None)   # [N*B, hidden_units['quantile'][-1]]
     self(I(shape=vector_dim), I(shape=quantiles_idx))
Ejemplo n.º 3
0
 def __init__(self, vector_dim, action_dim, hidden_units):
     assert len(
         hidden_units
     ) > 1, "if you want to use this architecture of critic network, the number of layers must greater than 1"
     super().__init__()
     self.feature_net = mlp(hidden_units[0:1])
     self.net = mlp(hidden_units[1:], output_shape=1, out_activation=None)
     self(I(shape=vector_dim), I(shape=action_dim))
Ejemplo n.º 4
0
Archivo: networks.py Proyecto: yyht/RLs
    def __init__(self, dim, hidden_units):
        super().__init__()
        self.rnn_type = 'lstm'
        # self.masking = tf.keras.layers.Masking(mask_value=0.)

        # ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported.
        # https://github.com/tensorflow/tensorflow/issues/31998
        cell = tf.keras.layers.LSTMCell(hidden_units)
        self.lstm_net = tf.keras.layers.RNN(cell, return_state=True, return_sequences=True)
        self(I(shape=(None, dim)), I(shape=(hidden_units,)), I(shape=(hidden_units,)))
Ejemplo n.º 5
0
    def __init__(self,
                 vector_dims,
                 visual_dims,
                 vector_net_kwargs,
                 visual_net_kwargs,
                 encoder_net_kwargs,
                 memory_net_kwargs,
                 is_continuous,
                 action_dim,
                 *,
                 eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10., network_type=VisualNetworkType.SIMPLE):
        '''
        params:
            is_continuous: sepecify whether action space is continuous(True) or discrete(False)
            visual_dims: dimensions of vector state input
            action_dim: dimension of action
            visual_dims: dimensions of visual state input

            eta: weight of intrinsic reward
            lr: the learning rate of curiosity model
            beta: weight factor of loss between inverse_dynamic_net and forward_net
            loss_weight: weight factor of loss between policy gradient and curiosity model
        '''
        super().__init__()
        self.device = get_device()
        self.eta = eta
        self.beta = beta
        self.loss_weight = loss_weight
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        self.is_continuous = is_continuous

        self.net = DefaultRepresentationNetwork(
            name='curiosity_model',
            vec_dims=vector_dims,
            vis_dims=visual_dims,
            vector_net_kwargs=vector_net_kwargs,
            visual_net_kwargs=visual_net_kwargs,
            encoder_net_kwargs=encoder_net_kwargs,
            memory_net_kwargs=memory_net_kwargs
        )

        self.feat_dim = self.net.h_dim

        # S, S' => A
        self.inverse_dynamic_net = Sequential([
            Dense(self.feat_dim * 2, default_activation, **initKernelAndBias),
            Dense(action_dim, 'tanh' if is_continuous else None, **initKernelAndBias)
        ])

        # S, A => S'
        self.forward_net = Sequential([
            Dense(self.feat_dim + action_dim, default_activation, **initKernelAndBias),
            Dense(self.feat_dim, None, **initKernelAndBias)
        ])
        self.initial_weights(I(shape=self.feat_dim), I(shape=action_dim))
Ejemplo n.º 6
0
Archivo: networks.py Proyecto: yyht/RLs
    def __init__(self, is_continuous, vector_dim, action_dim, visual_dim=[], visual_feature=128,
                 *, eta=0.2, lr=1.0e-3, beta=0.2, loss_weight=10., encoder_type='simple'):
        '''
        params:
            is_continuous: sepecify whether action space is continuous(True) or discrete(False)
            vector_dim: dimension of vector state input
            action_dim: dimension of action
            visual_dim: dimension of visual state input
            visual_feature: dimension of visual feature map
            eta: weight of intrinsic reward
            lr: the learning rate of curiosity model
            beta: weight factor of loss between inverse_dynamic_net and forward_net
            loss_weight: weight factor of loss between policy gradient and curiosity model
        '''
        super().__init__()
        self.device = get_device()
        self.eta = eta
        self.beta = beta
        self.loss_weight = loss_weight
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        self.is_continuous = is_continuous

        self.camera_num = visual_dim[0]
        if self.camera_num == 0:
            self.use_visual = False
        else:
            self.use_visual = True

        self.nets = MultiCameraCNN(n=self.camera_num, feature_dim=visual_feature, activation_fn=default_activation, encoder_type=encoder_type)
        self.s_dim = vector_dim + (visual_feature * self.camera_num) * (self.camera_num > 0)

        if self.use_visual:
            # S, S' => A
            self.inverse_dynamic_net = Sequential([
                Dense(self.s_dim * 2, default_activation),
                Dense(action_dim, 'tanh' if is_continuous else None)
            ])

        # S, A => S'
        self.forward_net = Sequential([
            Dense(self.s_dim + action_dim, default_activation),
            Dense(self.s_dim, None)
        ])
        self.initial_weights(I(shape=vector_dim), I(shape=visual_dim), I(shape=action_dim))

        self.tv = []
        if self.use_visual:
            for net in self.nets:
                self.tv += net.trainable_variables
            self.tv += self.inverse_dynamic_net.trainable_variables
        self.tv += self.forward_net.trainable_variables
Ejemplo n.º 7
0
 def __init__(self,
              vector_dim,
              visual_dim=[],
              visual_feature=128,
              encoder_type='nature'):
     super().__init__()
     self.camera_num = visual_dim[0]
     self.nets = MultiCameraCNN(n=self.camera_num,
                                feature_dim=visual_feature,
                                activation_fn=default_activation,
                                encoder_type=encoder_type)
     self.hdim = vector_dim + (visual_feature *
                               self.camera_num) * (self.camera_num > 0)
     self(I(shape=vector_dim), I(shape=visual_dim))
Ejemplo n.º 8
0
 def __init__(self, vector_dim, output_shape, head_num, hidden_units):
     super().__init__()
     self.nets = [
         mlp(hidden_units, output_shape=output_shape, out_activation=None)
         for _ in range(head_num)
     ]
     self(I(shape=vector_dim))
Ejemplo n.º 9
0
 def __init__(self, vector_dim, action_dim, nums, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.nums = nums
     self.net = mlp(hidden_units,
                    output_shape=nums * action_dim,
                    out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 10
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(hidden_units,
                    output_shape=atoms * action_dim,
                    out_activation='softmax')
     self(I(shape=vector_dim))
Ejemplo n.º 11
0
 def __init__(self, vector_dim, output_shape, hidden_units):
     super().__init__()
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.logits = mlp(hidden_units['logits'],
                       output_shape=output_shape,
                       out_activation=None)
     self.v = mlp(hidden_units['v'], output_shape=1, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 12
0
 def __init__(self, feat_dim=64, output_dim=64, *, use_encoder=False):
     # TODO
     super().__init__()
     self.use_encoder = use_encoder
     self.h_dim = output_dim if use_encoder else feat_dim
     self.net = Dense(output_dim, default_activation, **
                      initKernelAndBias) if use_encoder else lambda x: x
     self(I(shape=feat_dim))
Ejemplo n.º 13
0
 def __init__(self, vector_dim, action_dim, atoms, hidden_units):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.share = mlp(hidden_units['share'], layer=Noisy, out_layer=False)
     self.v = mlp(hidden_units['v'], layer=Noisy, output_shape=atoms, out_activation=None)
     self.adv = mlp(hidden_units['adv'], layer=Noisy, output_shape=action_dim * atoms, out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 14
0
 def __init__(self, vector_dim, action_dim, nums, network_settings):
     super().__init__()
     self.action_dim = action_dim
     self.nums = nums
     self.net = mlp(network_settings,
                    output_shape=nums * action_dim,
                    out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 15
0
 def __init__(self, img_dim, fc_dim):
     super().__init__()
     self.net = Sequential([
         get_visual_network_from_type(VisualNetworkType.NATURE)(),
         Dense(fc_dim, **initKernelAndBias),
         LayerNormalization()
     ])
     self(I(shape=img_dim))
Ejemplo n.º 16
0
 def __init__(self, vector_dim, action_dim, atoms, network_settings):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(network_settings,
                    output_shape=atoms * action_dim,
                    out_activation='softmax')
     self(I(shape=vector_dim))
Ejemplo n.º 17
0
 def __init__(self, vector_dim, output_shape, head_num, network_settings):
     super().__init__()
     self.nets = [
         mlp(network_settings,
             output_shape=output_shape,
             out_activation=None) for _ in range(head_num)
     ]
     self(I(shape=vector_dim))
Ejemplo n.º 18
0
 def __init__(self, img_dim, fc_dim):
     super().__init__()
     self.net = Sequential(
         [NatureCNN(),
          Flatten(),
          Dense(fc_dim),
          LayerNormalization()])
     self(I(shape=img_dim))
Ejemplo n.º 19
0
 def __init__(self, vector_dim, action_dim, atoms, network_settings):
     super().__init__()
     self.action_dim = action_dim
     self.atoms = atoms
     self.net = mlp(network_settings, out_layer=False)
     self.outputs = []
     for _ in range(action_dim):
         self.outputs.append(Dense(atoms, activation='softmax'))
     self(I(shape=vector_dim))
Ejemplo n.º 20
0
 def __init__(self, vector_dim=[], network_type=VectorNetworkType.CONCAT):
     super().__init__()
     self.nets = []
     for in_dim in vector_dim:
         self.nets.append(
             get_vector_network_from_type(network_type)(in_dim=in_dim))
     self.h_dim = sum([net.h_dim for net in self.nets])
     if vector_dim:
         self(*(I(shape=dim) for dim in vector_dim))
Ejemplo n.º 21
0
 def __init__(self, vector_dim, action_dim, options_num, hidden_units, is_continuous=True):
     super().__init__()
     self.actions_num = action_dim
     self.options_num = options_num
     self.share = mlp(hidden_units['share'], out_layer=False)
     self.q = mlp(hidden_units['q'], output_shape=options_num, out_activation=None)
     self.pi = mlp(hidden_units['intra_option'], output_shape=options_num * action_dim, out_activation='tanh' if is_continuous else None)
     self.beta = mlp(hidden_units['termination'], output_shape=options_num, out_activation='sigmoid')
     self(I(shape=vector_dim))
Ejemplo n.º 22
0
 def __init__(self,
              vector_dim,
              output_shape,
              network_settings,
              out_activation='tanh'):
     super().__init__()
     self.net = mlp(network_settings,
                    output_shape=output_shape,
                    out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 23
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.share = mlp(network_settings['share'], out_layer=False)
     self.logits = mlp(network_settings['logits'],
                       output_shape=output_shape,
                       out_activation=None)
     self.v = mlp(network_settings['v'],
                  output_shape=1,
                  out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 24
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.share = mlp(network_settings['share'], out_layer=False)
     self.mu = mlp(network_settings['mu'],
                   output_shape=output_shape,
                   out_activation=None)
     self.log_std = mlp(network_settings['log_std'],
                        output_shape=output_shape,
                        out_activation='tanh')
     self(I(shape=vector_dim))
Ejemplo n.º 25
0
 def __init__(self,
              vector_dim,
              output_shape,
              hidden_units,
              out_activation='tanh'):
     super().__init__()
     self.net = mlp(hidden_units,
                    output_shape=output_shape,
                    out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 26
0
 def __init__(self, dim, hidden_units, use_rnn):
     super().__init__()
     self.use_rnn = use_rnn
     if use_rnn:
         self.dim = dim
         # self.masking = tf.keras.layers.Masking(mask_value=0.)
         self.lstm_net = tf.keras.layers.LSTM(hidden_units, return_state=True, return_sequences=True)
         self(I(shape=(None, self.dim)))
         self.hdim = hidden_units
     else:
         self.hdim = dim
Ejemplo n.º 27
0
 def __init__(self, vector_dim, output_shape, network_settings):
     super().__init__()
     self.soft_clip = network_settings['soft_clip']
     self.log_std_min, self.log_std_max = network_settings['log_std_bound']
     self.share = mlp(network_settings['share'], out_layer=False)
     self.mu = mlp(network_settings['mu'],
                   output_shape=output_shape,
                   out_activation=None)
     self.log_std = mlp(network_settings['log_std'],
                        output_shape=output_shape,
                        out_activation=None)
     self(I(shape=vector_dim))
Ejemplo n.º 28
0
 def __init__(self,
              vector_dim,
              output_shape,
              options_num,
              network_settings,
              out_activation=None):
     super().__init__()
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = mlp(network_settings,
                   output_shape=options_num * output_shape,
                   out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 29
0
 def __init__(self,
              vector_dim,
              output_shape,
              options_num,
              hidden_units,
              out_activation=None):
     super().__init__()
     self.actions_num = output_shape
     self.options_num = options_num
     self.pi = mlp(hidden_units,
                   output_shape=options_num * output_shape,
                   out_activation=out_activation)
     self(I(shape=vector_dim))
Ejemplo n.º 30
0
    def __init__(self, vector_dim=[]):
        # TODO
        super().__init__()
        self.nets = []
        for _ in vector_dim:

            def net(x):
                return x

            self.nets.append(net)
        self.h_dim = sum(vector_dim)
        self.use_vector = not self.h_dim == 0
        if vector_dim:
            self(*(I(shape=dim) for dim in vector_dim))