def __init__(self, num_actions=None, dim_obs=None, memory_size=100, memory_word_size=32, name="TrainableAgent"): ''' 智能体对环境的基本认知,动作空间,状态空间 ''' self.num_actions = num_actions # reduce (lambda x,y:x+y, [1,2,3]) 输出为 6 self._obs_size = reduce(lambda x, y: x * y, list(dim_obs)) self.memory_size = memory_size self.memory_word_size = memory_word_size ''' 1.实例化各个类 五个模块的引入 ''' # 编解码器的实例化 self._im2state = EncoderDecoder.ImEncoder(self._obs_size, self.memory_word_size, 64) self._state2im = EncoderDecoder.ImDecoder(self._obs_size, self.memory_word_size, 64) self._vae = EncoderDecoder.VAE(self._obs_size, self.memory_word_size, 64) self._optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) self._vdecoder = EncoderDecoder.VDecoder( 1, 200) # 因为没用batch 所以是一维输出,就是一个v值 self._vaev = EncoderDecoder.VAEV(self._obs_size, self.memory_word_size, 64) # 控制器实例化 self._controllercore = Controller.ControllerCore( num_actions=self.num_actions, num_node=10, memory_size=self.memory_size, memory_word_size=self.memory_word_size) self._aggregator = Controller.MeanAggregator(self.memory_word_size, self.memory_word_size, name="aggregator", concat=False) #输入输出维度相同 self._aggmodel = Controller.AggModel(self.memory_word_size) self.batch_num = 0 # 用来控制训练agg的batch起点,训练聚合器参数的时候用到 #这一版的a2c写的不够清晰 # self.a2cparams = { # 'gamma': 0.99, # 'value': 0.5, # 'entropy':0.0001 # } # self.a2cmodel = Controller.a2cModel(num_actions=self.num_actions) # self.a2cmodel.compile( # optimizer = ko.RMSprop(lr=0.0007), # loss = [self._logits_loss,self._value_loss] # ) self.a2cmodel = AC(self._obs_size, self.num_actions) # 存储器实例化 self._abstract_memory = self._controllercore.AbstractG self._external_memory = Memory.ExternalMemory( memory_size=self.memory_size) # 读写器实例化 memory_num_reads = 2 memory_top_k = 3 self._memory_reader = MemReadWrite.MemReader( memory_word_size=self.memory_word_size, #这个与隐藏层同维度 num_read_heads=memory_num_reads, top_k=memory_top_k, memory_size=self.memory_size) self._memory_eraser = MemReadWrite.MemErase( memory_word_size=self.memory_word_size, memory_size=self.memory_size) self._memory_writer = MemReadWrite.MemWriter( memory_word_size=self.memory_word_size, memory_size=self.memory_size) #重构机制实例化 self._memory_reconstructor = MemReconstruction.MemReconstructor( memory_word_size=self.memory_word_size)
def __init__(self, num_actions=None, dim_obs=None, memory_size=100, memory_word_size=32, name="TrainableAgent"): ''' 智能体对环境的基本认知,动作空间,状态空间 ''' self.num_actions = num_actions self._obs_size = reduce(lambda x, y: x * y, list(dim_obs)) self.memory_size = memory_size self.memory_word_size = memory_word_size ''' 1.实例化各个类 五个模块的引入 ''' # 编解码器的实例化 self._im2state = EncoderDecoder.ImEncoder(self._obs_size, self.memory_word_size, 64) self._state2im = EncoderDecoder.ImDecoder(self._obs_size, self.memory_word_size, 64) self._vae = EncoderDecoder.VAE(self._obs_size, self.memory_word_size, 64) self._optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) self._vdecoder = EncoderDecoder.VDecoder( 1, 200) # 因为没用batch 所以是一维输出,就是一个v值 self._vaev = EncoderDecoder.VAEV(self._obs_size, self.memory_word_size, 64) # 控制器实例化 self._controllercore = Controller.ControllerCore( num_actions=self.num_actions, num_node=10, memory_size=self.memory_size, memory_word_size=self.memory_word_size) self._aggregator = Controller.MeanAggregator(self.memory_word_size, self.memory_word_size, name="aggregator", concat=False) #输入输出维度相同 self._aggmodel = Controller.AggModel(self.memory_word_size) self.batch_num = 0 # 用来控制训练agg的batch起点,训练聚合器参数的时候用到 # 存储器实例化 self._abstract_memory = self._controllercore.AbstractG self._external_memory = Memory.ExternalMemory( memory_size=self.memory_size) # 读写器实例化 memory_num_reads = 2 memory_top_k = 3 self._memory_reader = MemReadWrite.MemReader( memory_word_size=self.memory_word_size, #这个与隐藏层同维度 num_read_heads=memory_num_reads, top_k=memory_top_k, memory_size=self.memory_size) self._memory_eraser = MemReadWrite.MemErase( memory_word_size=self.memory_word_size, memory_size=self.memory_size) self._memory_writer = MemReadWrite.MemWriter( memory_word_size=self.memory_word_size, memory_size=self.memory_size) #重构机制实例化 self._memory_reconstructor = MemReconstruction.MemReconstructor( memory_word_size=self.memory_word_size)