Ejemplo n.º 1
0
 def _build_model(self):
   acts = dict(
       elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
       leaky_relu=tf.nn.leaky_relu)
   cnn_act = acts[self._c.cnn_act]
   act = acts[self._c.dense_act]
   self._encode = models.LaserConvEncoder(self._c.cnn_depth, cnn_act)
   self._dynamics = models.RSSM(
       self._c.stoch_size, self._c.deter_size, self._c.deter_size)
   self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
   self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
   if self._c.pcont:
     self._pcont = models.DenseDecoder(
         (), 3, self._c.num_units, 'binary', act=act)
   self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
   self._actor = models.ActionDecoder(
       self._actdim, 4, self._c.num_units, self._c.action_dist,
       init_std=self._c.action_init_std, act=act)
   model_modules = [self._encode, self._dynamics, self._decode, self._reward]
   if self._c.pcont:
     model_modules.append(self._pcont)
   Optimizer = functools.partial(
       tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip,
       wdpattern=self._c.weight_decay_pattern)
   self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
   self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
   self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
   self.train(next(self._dataset))
Ejemplo n.º 2
0
 def _build_model(self):
   acts = dict(
       elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
       leaky_relu=tf.nn.leaky_relu)
   cnn_act = acts[self._c.cnn_act] # act 激活函数
   act = acts[self._c.dense_act]   
   self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
   self._dynamics = models.RSSM(
       self._c.stoch_size, self._c.deter_size, self._c.deter_size)
   self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
   self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
   if self._c.pcont: 
     self._pcont = models.DenseDecoder(
         (), 3, self._c.num_units, 'binary', act=act)
   self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
   self._actor = models.ActionDecoder(
       self._actdim, 4, self._c.num_units, self._c.action_dist,
       init_std=self._c.action_init_std, act=act)
   model_modules = [self._encode, self._dynamics, self._decode, self._reward]
   if self._c.pcont:
     model_modules.append(self._pcont)
   # 构建optimizer ,函数为first变量tool.Adam,
   # 其余为tool.Adam函数变量输入,
   # 剩余tool.Adam输入调用optimizer时输入
   Optimizer = functools.partial(
       tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip,
       wdpattern=self._c.weight_decay_pattern)
   self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
   self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
   self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
   # Do a train step to initialize all variables, including optimizer
   # statistics. Ideally, we would use batch size zero, but that doesn't work
   # in multi-GPU mode.
   self.train(next(self._dataset))
Ejemplo n.º 3
0
 def _build_model(self):
   acts = dict(
       elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
       leaky_relu=tf.nn.leaky_relu)
   cnn_act = acts[self._c.cnn_act]
   act = acts[self._c.dense_act]
   self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
   self._dynamics = models.RSSM(
       self._c.stoch_size, self._c.deter_size, self._c.deter_size)
   self._actor = models.ActionDecoder(
       self._actdim, 4, self._c.num_units, self._c.action_dist,
       init_std=self._c.action_init_std, act=act)
   obsr = {}
   obsr['image'] = np.ones((1,64,64,3))
   obsr['reward'] = np.ones((1,1))
   print('model built ok')
   embed = self._encode(preprocess(obsr, self._c))
   print('encoder initialized')
   latent = self._dynamics.initial(1)
   action = tf.zeros((1, self._actdim), self._float)
   latent, _ = self._dynamics.obs_step(latent, action, embed)
   feat = self._dynamics.get_feat(latent)
   print('dynamics initialized')
   action = self._actor(feat).mode()
   print('actor initialized')
Ejemplo n.º 4
0
    def _build_model(self):
        acts = dict(elu=tf.nn.elu,
                    relu=tf.nn.relu,
                    swish=tf.nn.swish,
                    leaky_relu=tf.nn.leaky_relu)
        cnn_act = acts[self._c.cnn_act]
        act = acts[self._c.dense_act]
        self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
        # self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size)
        if self._c.model_num == 'Dreamer':
            self._dynamics = models.RSSM(self._c.stoch_size,
                                         self._c.deter_size,
                                         self._c.deter_size)
            print("bulid RSSM")
        elif self._c.model_num == 'ED2_Dreamer':
            train_environ = self._c.task.split('_')[1]
            self._dynamics = models.RSSM_action_separate_with_group(
                self._c.stoch_size, self._c.deter_size, self._c.deter_size,
                group_separate[train_environ][str(self._c.separate_schema)])
            print("bulid action separate RSSM with schema{}, {}".format(
                self._c.separate_schema,
                group_separate[train_environ][str(self._c.separate_schema)]))

        self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
        self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
        if self._c.pcont:
            self._pcont = models.DenseDecoder((),
                                              3,
                                              self._c.num_units,
                                              'binary',
                                              act=act)
        self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
        self._actor = models.ActionDecoder(self._actdim,
                                           4,
                                           self._c.num_units,
                                           self._c.action_dist,
                                           init_std=self._c.action_init_std,
                                           act=act)
        model_modules = [
            self._encode, self._dynamics, self._decode, self._reward
        ]
        if self._c.pcont:
            model_modules.append(self._pcont)
        Optimizer = functools.partial(tools.Adam,
                                      wd=self._c.weight_decay,
                                      clip=self._c.grad_clip,
                                      wdpattern=self._c.weight_decay_pattern)
        self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
        self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
        self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
        # Do a train step to initialize all variables, including optimizer
        # statistics. Ideally, we would use batch size zero, but that doesn't work
        # in multi-GPU mode.
        self.train(next(self._dataset))
Ejemplo n.º 5
0
    def _build_model(self):
        acts = dict(elu=tf.nn.elu,
                    relu=tf.nn.relu,
                    swish=tf.nn.swish,
                    leaky_relu=tf.nn.leaky_relu)
        cnn_act = acts[self._c.cnn_act]
        act = acts[self._c.dense_act]
        self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
        self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size,
                                     self._c.deter_size)
        self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
        self._contrastive = models.ContrastiveObsModel(self._c.deter_size,
                                                       self._c.deter_size * 2)
        self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
        if self._c.pcont:
            self._pcont = models.DenseDecoder((),
                                              3,
                                              self._c.num_units,
                                              'binary',
                                              act=act)
        self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
        self._Qs = [
            models.QNetwork(3, self._c.num_units, act=act)
            for _ in range(self._c.num_Qs)
        ]
        self._actor = models.ActionDecoder(self._actdim,
                                           4,
                                           self._c.num_units,
                                           self._c.action_dist,
                                           init_std=self._c.action_init_std,
                                           act=act)
        model_modules = [
            self._encode, self._dynamics, self._contrastive, self._reward,
            self._decode
        ]
        if self._c.pcont:
            model_modules.append(self._pcont)
        Optimizer = functools.partial(tools.Adam,
                                      wd=self._c.weight_decay,
                                      clip=self._c.grad_clip,
                                      wdpattern=self._c.weight_decay_pattern)
        self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
        self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
        self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
        self._q_opts = [
            Optimizer('qs', [qnet], self._c.value_lr) for qnet in self._Qs
        ]

        if self._c.use_sac:
            self._sac = soft_actor_critic.SAC(self._actor, self._Qs,
                                              self._actor_opt, self._q_opts,
                                              self._actspace)

        self.train(next(self._dataset))
Ejemplo n.º 6
0
 def _build_model(self):
     acts = dict(
         elu=tf.nn.elu,
         relu=tf.nn.relu,
         swish=tf.nn.swish,
         leaky_relu=tf.nn.leaky_relu,
     )
     cnn_act = acts[self._c.cnn_act]
     act = acts[self._c.dense_act]
     self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act)
     ######################################################################
     #  RE3: Random Encoder / RunningMeanStd Modules
     self._rand_encode = models.ConvRandEncoder(self._c.cnn_depth, cnn_act)
     self._rms = models.RMS()
     ######################################################################
     self._dynamics = models.RSSM(
         self._c.stoch_size, self._c.deter_size, self._c.deter_size
     )
     self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act)
     self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
     if self._c.pcont:
         self._pcont = models.DenseDecoder(
             (), 3, self._c.num_units, "binary", act=act
         )
     self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
     self._actor = models.ActionDecoder(
         self._actdim,
         4,
         self._c.num_units,
         self._c.action_dist,
         init_std=self._c.action_init_std,
         act=act,
     )
     model_modules = [self._encode, self._dynamics, self._decode, self._reward]
     if self._c.pcont:
         model_modules.append(self._pcont)
     Optimizer = functools.partial(
         tools.Adam,
         wd=self._c.weight_decay,
         clip=self._c.grad_clip,
         wdpattern=self._c.weight_decay_pattern,
     )
     self._model_opt = Optimizer("model", model_modules, self._c.model_lr)
     self._value_opt = Optimizer("value", [self._value], self._c.value_lr)
     self._actor_opt = Optimizer("actor", [self._actor], self._c.actor_lr)
     # Do a train step to initialize all variables, including optimizer
     # statistics. Ideally, we would use batch size zero, but that doesn't work
     # in multi-GPU mode.
     self.train(next(self._dataset))
Ejemplo n.º 7
0
            torch.save({ 'epoch': i, 'encoder': encoder, 'acts_encoder': acts_encoder, 'acts_decoder': acts_decoder, 'trans': trans, 'inverse_trans': inverse_trans, 'train_loss': train_loss, 'valid_loss': valid_loss}, opt.model_filename + '.model')
            torch.save(optimizer, opt.model_filename + '.optim')

        log_string = ('iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}').format(
                      (i+1)*opt.epoch_size, train_loss[-1], valid_loss[-1], best_valid_loss, opt.lrt)
        print(log_string)
        utils.log(opt.model_filename + '.log', log_string)


if __name__ == '__main__':
    numpy.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed(opt.seed)

    obs_dim = (3, 64, 64)
    action_dim = 5

    device = torch.device('cuda')
    encoder = models.Encoder(opt.z_dim, obs_dim[0]).to(device)
    acts_encoder = models.ActionEncoder(opt.z_dim, action_dim).to(device)
    acts_decoder = models.ActionDecoder(opt.z_dim, action_dim).to(device)
    trans = models.TransitionNoise(opt.z_dim, action_dim, 4).to(device)
    inverse_trans = models.InverseModel(opt.z_dim, action_dim).to(device)

    parameters = list(encoder.parameters()) + list(trans.parameters()) + list(acts_encoder.parameters()) + list(inverse_trans.parameters()) + list(acts_decoder.parameters())
    optimizer = optim.Adam(parameters, lr=opt.lrt, weight_decay=1e-5)

    print('training...')
    utils.log(opt.model_filename + '.log', '[training]')
    train(500, encoder, acts_encoder, acts_decoder, trans, inverse_trans, optimizer)