def _build_model(self): acts = dict( elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.LaserConvEncoder(self._c.cnn_depth, cnn_act) self._dynamics = models.RSSM( self._c.stoch_size, self._c.deter_size, self._c.deter_size) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder( (), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder( self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [self._encode, self._dynamics, self._decode, self._reward] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial( tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) self.train(next(self._dataset))
def _build_model(self): acts = dict( elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] # act 激活函数 act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) self._dynamics = models.RSSM( self._c.stoch_size, self._c.deter_size, self._c.deter_size) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder( (), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder( self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [self._encode, self._dynamics, self._decode, self._reward] if self._c.pcont: model_modules.append(self._pcont) # 构建optimizer ,函数为first变量tool.Adam, # 其余为tool.Adam函数变量输入, # 剩余tool.Adam输入调用optimizer时输入 Optimizer = functools.partial( tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) # Do a train step to initialize all variables, including optimizer # statistics. Ideally, we would use batch size zero, but that doesn't work # in multi-GPU mode. self.train(next(self._dataset))
def _build_model(self): acts = dict( elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) self._dynamics = models.RSSM( self._c.stoch_size, self._c.deter_size, self._c.deter_size) self._actor = models.ActionDecoder( self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) obsr = {} obsr['image'] = np.ones((1,64,64,3)) obsr['reward'] = np.ones((1,1)) print('model built ok') embed = self._encode(preprocess(obsr, self._c)) print('encoder initialized') latent = self._dynamics.initial(1) action = tf.zeros((1, self._actdim), self._float) latent, _ = self._dynamics.obs_step(latent, action, embed) feat = self._dynamics.get_feat(latent) print('dynamics initialized') action = self._actor(feat).mode() print('actor initialized')
def _build_model(self): acts = dict(elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) # self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size) if self._c.model_num == 'Dreamer': self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size) print("bulid RSSM") elif self._c.model_num == 'ED2_Dreamer': train_environ = self._c.task.split('_')[1] self._dynamics = models.RSSM_action_separate_with_group( self._c.stoch_size, self._c.deter_size, self._c.deter_size, group_separate[train_environ][str(self._c.separate_schema)]) print("bulid action separate RSSM with schema{}, {}".format( self._c.separate_schema, group_separate[train_environ][str(self._c.separate_schema)])) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder((), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder(self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [ self._encode, self._dynamics, self._decode, self._reward ] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial(tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) # Do a train step to initialize all variables, including optimizer # statistics. Ideally, we would use batch size zero, but that doesn't work # in multi-GPU mode. self.train(next(self._dataset))
def _build_model(self): acts = dict(elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._contrastive = models.ContrastiveObsModel(self._c.deter_size, self._c.deter_size * 2) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder((), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._Qs = [ models.QNetwork(3, self._c.num_units, act=act) for _ in range(self._c.num_Qs) ] self._actor = models.ActionDecoder(self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [ self._encode, self._dynamics, self._contrastive, self._reward, self._decode ] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial(tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) self._q_opts = [ Optimizer('qs', [qnet], self._c.value_lr) for qnet in self._Qs ] if self._c.use_sac: self._sac = soft_actor_critic.SAC(self._actor, self._Qs, self._actor_opt, self._q_opts, self._actspace) self.train(next(self._dataset))
def _build_model(self): acts = dict( elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu, ) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) ###################################################################### # RE3: Random Encoder / RunningMeanStd Modules self._rand_encode = models.ConvRandEncoder(self._c.cnn_depth, cnn_act) self._rms = models.RMS() ###################################################################### self._dynamics = models.RSSM( self._c.stoch_size, self._c.deter_size, self._c.deter_size ) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder( (), 3, self._c.num_units, "binary", act=act ) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder( self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act, ) model_modules = [self._encode, self._dynamics, self._decode, self._reward] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial( tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern, ) self._model_opt = Optimizer("model", model_modules, self._c.model_lr) self._value_opt = Optimizer("value", [self._value], self._c.value_lr) self._actor_opt = Optimizer("actor", [self._actor], self._c.actor_lr) # Do a train step to initialize all variables, including optimizer # statistics. Ideally, we would use batch size zero, but that doesn't work # in multi-GPU mode. self.train(next(self._dataset))
torch.save({ 'epoch': i, 'encoder': encoder, 'acts_encoder': acts_encoder, 'acts_decoder': acts_decoder, 'trans': trans, 'inverse_trans': inverse_trans, 'train_loss': train_loss, 'valid_loss': valid_loss}, opt.model_filename + '.model') torch.save(optimizer, opt.model_filename + '.optim') log_string = ('iter: {:d}, train_loss: {:0.6f}, valid_loss: {:0.6f}, best_valid_loss: {:0.6f}, lr: {:0.5f}').format( (i+1)*opt.epoch_size, train_loss[-1], valid_loss[-1], best_valid_loss, opt.lrt) print(log_string) utils.log(opt.model_filename + '.log', log_string) if __name__ == '__main__': numpy.random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) obs_dim = (3, 64, 64) action_dim = 5 device = torch.device('cuda') encoder = models.Encoder(opt.z_dim, obs_dim[0]).to(device) acts_encoder = models.ActionEncoder(opt.z_dim, action_dim).to(device) acts_decoder = models.ActionDecoder(opt.z_dim, action_dim).to(device) trans = models.TransitionNoise(opt.z_dim, action_dim, 4).to(device) inverse_trans = models.InverseModel(opt.z_dim, action_dim).to(device) parameters = list(encoder.parameters()) + list(trans.parameters()) + list(acts_encoder.parameters()) + list(inverse_trans.parameters()) + list(acts_decoder.parameters()) optimizer = optim.Adam(parameters, lr=opt.lrt, weight_decay=1e-5) print('training...') utils.log(opt.model_filename + '.log', '[training]') train(500, encoder, acts_encoder, acts_decoder, trans, inverse_trans, optimizer)