def _build_model(self): acts = dict( elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] # act 激活函数 act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) self._dynamics = models.RSSM( self._c.stoch_size, self._c.deter_size, self._c.deter_size) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder( (), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder( self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [self._encode, self._dynamics, self._decode, self._reward] if self._c.pcont: model_modules.append(self._pcont) # 构建optimizer ,函数为first变量tool.Adam, # 其余为tool.Adam函数变量输入, # 剩余tool.Adam输入调用optimizer时输入 Optimizer = functools.partial( tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) # Do a train step to initialize all variables, including optimizer # statistics. Ideally, we would use batch size zero, but that doesn't work # in multi-GPU mode. self.train(next(self._dataset))
def _build_model(self): acts = dict( elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.LaserConvEncoder(self._c.cnn_depth, cnn_act) self._dynamics = models.RSSM( self._c.stoch_size, self._c.deter_size, self._c.deter_size) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder( (), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder( self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [self._encode, self._dynamics, self._decode, self._reward] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial( tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) self.train(next(self._dataset))
def _build_model(self): acts = dict(elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) # self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size) if self._c.model_num == 'Dreamer': self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size) print("bulid RSSM") elif self._c.model_num == 'ED2_Dreamer': train_environ = self._c.task.split('_')[1] self._dynamics = models.RSSM_action_separate_with_group( self._c.stoch_size, self._c.deter_size, self._c.deter_size, group_separate[train_environ][str(self._c.separate_schema)]) print("bulid action separate RSSM with schema{}, {}".format( self._c.separate_schema, group_separate[train_environ][str(self._c.separate_schema)])) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder((), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder(self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [ self._encode, self._dynamics, self._decode, self._reward ] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial(tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) # Do a train step to initialize all variables, including optimizer # statistics. Ideally, we would use batch size zero, but that doesn't work # in multi-GPU mode. self.train(next(self._dataset))
def _build_model(self): acts = dict(elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) self._dynamics = models.RSSM(self._c.stoch_size, self._c.deter_size, self._c.deter_size) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._contrastive = models.ContrastiveObsModel(self._c.deter_size, self._c.deter_size * 2) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder((), 3, self._c.num_units, 'binary', act=act) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._Qs = [ models.QNetwork(3, self._c.num_units, act=act) for _ in range(self._c.num_Qs) ] self._actor = models.ActionDecoder(self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act) model_modules = [ self._encode, self._dynamics, self._contrastive, self._reward, self._decode ] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial(tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._value_opt = Optimizer('value', [self._value], self._c.value_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr) self._q_opts = [ Optimizer('qs', [qnet], self._c.value_lr) for qnet in self._Qs ] if self._c.use_sac: self._sac = soft_actor_critic.SAC(self._actor, self._Qs, self._actor_opt, self._q_opts, self._actspace) self.train(next(self._dataset))
def _build_model(self): acts = dict( elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu, ) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) ###################################################################### # RE3: Random Encoder / RunningMeanStd Modules self._rand_encode = models.ConvRandEncoder(self._c.cnn_depth, cnn_act) self._rms = models.RMS() ###################################################################### self._dynamics = models.RSSM( self._c.stoch_size, self._c.deter_size, self._c.deter_size ) self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder( (), 3, self._c.num_units, "binary", act=act ) self._value = models.DenseDecoder((), 3, self._c.num_units, act=act) self._actor = models.ActionDecoder( self._actdim, 4, self._c.num_units, self._c.action_dist, init_std=self._c.action_init_std, act=act, ) model_modules = [self._encode, self._dynamics, self._decode, self._reward] if self._c.pcont: model_modules.append(self._pcont) Optimizer = functools.partial( tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern, ) self._model_opt = Optimizer("model", model_modules, self._c.model_lr) self._value_opt = Optimizer("value", [self._value], self._c.value_lr) self._actor_opt = Optimizer("actor", [self._actor], self._c.actor_lr) # Do a train step to initialize all variables, including optimizer # statistics. Ideally, we would use batch size zero, but that doesn't work # in multi-GPU mode. self.train(next(self._dataset))
def _build_model(self): acts = dict(elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish, leaky_relu=tf.nn.leaky_relu) cnn_act = acts[self._c.cnn_act] act = acts[self._c.dense_act] #Create encoder based on environment observations if self._c.proprio: if self._c.im_size == 64: self._encode = models.ConvEncoderProprio( self._c.cnn_depth, cnn_act) else: self._encode = models.ConvEncoderProprioLarge( self._c.cnn_depth, cnn_act) else: if self._c.im_size == 64: self._encode = models.ConvEncoder(self._c.cnn_depth, cnn_act) else: self._encode = models.ConvEncoderLarge(self._c.cnn_depth, cnn_act) #RSSM model with ensables self._dynamics = models.RSSME(self._c.stoch_size, self._c.deter_size, self._c.deter_size, num_models=self._c.num_models) #Create decoder based on image size if self._c.im_size == 64: self._decode = models.ConvDecoder(self._c.cnn_depth, cnn_act, shape=(self._c.im_size, self._c.im_size, 3)) else: self._decode = models.ConvDecoderLarge(self._c.cnn_depth, cnn_act, shape=(self._c.im_size, self._c.im_size, 3)) if self._c.proprio: self._proprio = models.DenseDecoder((self._propriodim, ), 3, self._c.num_units, act=act) if self._c.pcont: self._pcont = models.DenseDecoder((), 3, self._c.num_units, 'binary', act=act) self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act) model_modules = [ self._encode, self._dynamics, self._decode, self._reward ] if self._c.proprio: model_modules.append(self._proprio) if self._c.pcont: model_modules.append(self._pcont) #Build actor-critic networks self._qf1 = models.DenseNetwork(1, 3, self._c.num_units, act=act) self._qf2 = models.DenseNetwork(1, 3, self._c.num_units, act=act) self._target_qf1 = deepcopy(self._qf2) self._target_qf2 = deepcopy(self._qf1) self._qf_criterion = tf.keras.losses.Huber() self._actor = models.ActorNetwork(self._actdim, 4, self._c.num_units, act=act) #Initialize optimizers Optimizer = functools.partial(tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip, wdpattern=self._c.weight_decay_pattern) self._model_opt = Optimizer('model', model_modules, self._c.model_lr) self._qf_opt = Optimizer('qf', [self._qf1, self._qf2], self._c.q_lr) self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)