コード例 #1
0
    def __init__(self,
                 num_actions=None,
                 dim_obs=None,
                 memory_size=100,
                 memory_word_size=32,
                 name="TrainableAgent"):
        '''
        智能体对环境的基本认知,动作空间,状态空间
        '''
        self.num_actions = num_actions
        # reduce (lambda x,y:x+y, [1,2,3]) 输出为 6
        self._obs_size = reduce(lambda x, y: x * y, list(dim_obs))
        self.memory_size = memory_size
        self.memory_word_size = memory_word_size
        '''
        1.实例化各个类
            五个模块的引入
        '''
        # 编解码器的实例化
        self._im2state = EncoderDecoder.ImEncoder(self._obs_size,
                                                  self.memory_word_size, 64)
        self._state2im = EncoderDecoder.ImDecoder(self._obs_size,
                                                  self.memory_word_size, 64)
        self._vae = EncoderDecoder.VAE(self._obs_size, self.memory_word_size,
                                       64)
        self._optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
        self._vdecoder = EncoderDecoder.VDecoder(
            1, 200)  # 因为没用batch 所以是一维输出,就是一个v值
        self._vaev = EncoderDecoder.VAEV(self._obs_size, self.memory_word_size,
                                         64)
        # 控制器实例化
        self._controllercore = Controller.ControllerCore(
            num_actions=self.num_actions,
            num_node=10,
            memory_size=self.memory_size,
            memory_word_size=self.memory_word_size)

        self._aggregator = Controller.MeanAggregator(self.memory_word_size,
                                                     self.memory_word_size,
                                                     name="aggregator",
                                                     concat=False)  #输入输出维度相同
        self._aggmodel = Controller.AggModel(self.memory_word_size)
        self.batch_num = 0  # 用来控制训练agg的batch起点,训练聚合器参数的时候用到
        #这一版的a2c写的不够清晰
        # self.a2cparams = {
        #     'gamma': 0.99,
        #     'value': 0.5,
        #     'entropy':0.0001
        # }
        # self.a2cmodel = Controller.a2cModel(num_actions=self.num_actions)
        # self.a2cmodel.compile(
        #     optimizer = ko.RMSprop(lr=0.0007),
        #     loss = [self._logits_loss,self._value_loss]
        # )
        self.a2cmodel = AC(self._obs_size, self.num_actions)
        # 存储器实例化
        self._abstract_memory = self._controllercore.AbstractG
        self._external_memory = Memory.ExternalMemory(
            memory_size=self.memory_size)

        # 读写器实例化
        memory_num_reads = 2
        memory_top_k = 3
        self._memory_reader = MemReadWrite.MemReader(
            memory_word_size=self.memory_word_size,  #这个与隐藏层同维度
            num_read_heads=memory_num_reads,
            top_k=memory_top_k,
            memory_size=self.memory_size)

        self._memory_eraser = MemReadWrite.MemErase(
            memory_word_size=self.memory_word_size,
            memory_size=self.memory_size)

        self._memory_writer = MemReadWrite.MemWriter(
            memory_word_size=self.memory_word_size,
            memory_size=self.memory_size)

        #重构机制实例化
        self._memory_reconstructor = MemReconstruction.MemReconstructor(
            memory_word_size=self.memory_word_size)
コード例 #2
0
ファイル: GBMRagent.py プロジェクト: kangyongxin/GBMRcode
    def __init__(self,
                 num_actions=None,
                 dim_obs=None,
                 memory_size=100,
                 memory_word_size=32,
                 name="TrainableAgent"):
        '''
        智能体对环境的基本认知,动作空间,状态空间
        '''
        self.num_actions = num_actions
        self._obs_size = reduce(lambda x, y: x * y, list(dim_obs))
        self.memory_size = memory_size
        self.memory_word_size = memory_word_size
        '''
        1.实例化各个类
            五个模块的引入
        '''
        # 编解码器的实例化
        self._im2state = EncoderDecoder.ImEncoder(self._obs_size,
                                                  self.memory_word_size, 64)
        self._state2im = EncoderDecoder.ImDecoder(self._obs_size,
                                                  self.memory_word_size, 64)
        self._vae = EncoderDecoder.VAE(self._obs_size, self.memory_word_size,
                                       64)
        self._optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
        self._vdecoder = EncoderDecoder.VDecoder(
            1, 200)  # 因为没用batch 所以是一维输出,就是一个v值
        self._vaev = EncoderDecoder.VAEV(self._obs_size, self.memory_word_size,
                                         64)
        # 控制器实例化
        self._controllercore = Controller.ControllerCore(
            num_actions=self.num_actions,
            num_node=10,
            memory_size=self.memory_size,
            memory_word_size=self.memory_word_size)

        self._aggregator = Controller.MeanAggregator(self.memory_word_size,
                                                     self.memory_word_size,
                                                     name="aggregator",
                                                     concat=False)  #输入输出维度相同
        self._aggmodel = Controller.AggModel(self.memory_word_size)
        self.batch_num = 0  # 用来控制训练agg的batch起点,训练聚合器参数的时候用到
        # 存储器实例化
        self._abstract_memory = self._controllercore.AbstractG
        self._external_memory = Memory.ExternalMemory(
            memory_size=self.memory_size)

        # 读写器实例化
        memory_num_reads = 2
        memory_top_k = 3
        self._memory_reader = MemReadWrite.MemReader(
            memory_word_size=self.memory_word_size,  #这个与隐藏层同维度
            num_read_heads=memory_num_reads,
            top_k=memory_top_k,
            memory_size=self.memory_size)

        self._memory_eraser = MemReadWrite.MemErase(
            memory_word_size=self.memory_word_size,
            memory_size=self.memory_size)

        self._memory_writer = MemReadWrite.MemWriter(
            memory_word_size=self.memory_word_size,
            memory_size=self.memory_size)

        #重构机制实例化
        self._memory_reconstructor = MemReconstruction.MemReconstructor(
            memory_word_size=self.memory_word_size)