Example #1
0
 def work(self):
     try:
         self._loop()
     except ConnectionResetError as e:
         logger.error(f"<{self.worker_id}> 与客户端的连接已断开,连接信息: {self.addr}")
     except TimeoutError as e:
         logger.error(f"<{self.worker_id}> 与客户端的连接超时,连接信息: {self.addr}")
     except Exception as e:
         logger.exception(f"<{self.worker_id}> Worker内部发生异常,循环计数: {self.loop_counter}")
     finally:
         logger.info(f"<{self.worker_id}> Worker已停止运行")
         self.handler.handle_stop()
         self.conn.close()
Example #2
0
    def _loop(self):
        logger.info(f"<{self.worker_id}> Worker正在启动")

        while self._running:
            recv = self.conn.recv_json()

            if self.record_transmission:
                self.transmission_logger.debug(json.dumps(recv))
                
            if recv['type'] == INITIALIZE:
                self.handler.handle_init_data(recv['data'])
            elif recv['type'] == RESET:
                self.handler.handle_reset_data()
            elif recv['type'] == STEP:
                result = self.handler.handle_step_data(recv['data'])
                self.conn.send_json(result)
            elif recv['type'] == STOP:
                break
            self.loop_counter += 1
Example #3
0
 def run(self):
     listener = socket.socket()
     listener.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
     listener.bind((self.host, self.port))
     listener.listen(10)
     logger.info(f"准备接受远程连接,监听地址{self.host}:{self.port}")
     try:
         if self.multi_worker:
             self.thread_controller = ThreadController()
             while True:
                 conn, addr = listener.accept()
                 self.thread_controller.create_work(addr, conn,
                                                    self.handler_factory)
         else:
             conn, addr = listener.accept()
             worker = Worker(addr, conn, self.handler_factory)
             worker.work()
     except KeyboardInterrupt as e:
         logger.info("AIServer已被用户终止")
         if self.thread_controller:
             self.thread_controller.request_stop()
         else:
             worker.stop()
     finally:
         logger.info("AIServer正在尝试退出,请耐心等待...")
         listener.close()
         if self.thread_controller:
             self.thread_controller.join()
Example #4
0
 def load_model(self):
     ckpt = tf.train.get_checkpoint_state(self.model_path_move +
                                          str(self.load_model_num) + '/')
     if ckpt and ckpt.model_checkpoint_path:
         self.saver.restore(self.sess, ckpt.model_checkpoint_path)
     logger.info('加载神经网络')
Example #5
0
    save_net = False
    save_model_num = '2010211129_red_new'
    load_net = False
    load_model_num = '2010211129_red_new'
    save_interval_steps = 10000
    is_Train = True
    max_map_size_x = 77 * 2
    max_map_size_y = 92 * 2
    step_num = 2
    move_space = sum(range(step_num + 1)) * 6
    map_chanel = 1
    input_entity_size = 384
    batch = 128
    IS_MULTI_PROCESSING = True

    logger.info("*** SERVER RUNNING ***")
    cf = configparser.ConfigParser()
    cf.read("./config/server.ini")

    host = cf.get('Server', 'host')
    port = cf.getint('Server', 'port')
    transmission_test = cf.getboolean('Handler', 'transmission_test')

    ppo = None

    if transmission_test:
        func = lambda: TransmissionTest()
    else:
        ppo = ParallelPPO(max_map_size_x=max_map_size_x,
                          max_map_size_y=max_map_size_y,
                          move_space=move_space,
Example #6
0
 def update(self, current_step, spatial_actor, spatial_critic, entity_actor,
            entity_critic, scalar_state, action, my_id, obj_id, move,
            move_type, r):
     if self.is_ppo_move:
         self.ppo_move.update(current_step, entity_critic, move_type, r)
         # return
     # with tf.device("/cpu:0"):
     logger.info(f'更新网络')
     with tf.device(self._device_str2):
         self.sess.run(self.update_oldpi_op)
         adv = self.sess.run(
             self.advantage, {
                 self.tf_entity_critic: entity_critic,
                 self.tf_scalar_state: scalar_state,
                 self.tfdc_r: r
             })
     # adv = (adv - adv.mean()) / (adv.std() + 1e-6)  # sometimes helpful
     # update actor
     with tf.device(self._device_str1):
         if self.METHOD['name'] == 'kl_pen':
             for _ in range(self.A_UPDATE_STEPS):
                 _, kl = self.sess.run(
                     [self.atrain_op, self.kl_mean], {
                         self.tf_spatial_critic: spatial_critic,
                         self.tf_entity_critic: entity_critic,
                         self.tfaction_type: action,
                         self.tfadv: adv,
                         self.tflam: self.METHOD['lam']
                     })
                 if kl > 4 * self.METHOD[
                         'kl_target']:  # this in in google's paper
                     break
             if kl < self.METHOD[
                     'kl_target'] / 1.5:  # adaptive lambda, this is in OpenAI's paper
                 self.METHOD['lam'] /= 2
             elif kl > self.METHOD['kl_target'] * 1.5:
                 self.METHOD['lam'] *= 2
             self.METHOD['lam'] = np.clip(
                 self.METHOD['lam'], 1e-4,
                 10)  # sometimes explode, this clipping is my solution
         else:  # clipping method, find this is better (OpenAI's paper)
             [
                 self.sess.run(
                     self.atrain_op,
                     {
                         self.tf_entity_critic:
                         entity_critic,
                         self.tf_scalar_state:
                         scalar_state,
                         # self.tfaction_type: np.squeeze(a, axis=1),
                         self.tfaction_type:
                         action,
                         self.tfmy_id:
                         my_id,
                         self.tfobj_id:
                         obj_id,
                         self.tfmove:
                         move,
                         self.tfadv:
                         adv
                     }) for _ in range(self.A_UPDATE_STEPS)
             ]
     # update critic
     with tf.device(self._device_str2):
         [
             self.sess.run(
                 self.ctrain_op,
                 {
                     # self.tf_spatial_critic: spatial_critic,
                     self.tf_entity_critic:
                     entity_critic,
                     self.tf_scalar_state:
                     scalar_state,
                     self.tfdc_r:
                     r
                 }) for _ in range(self.C_UPDATE_STEPS)
         ]
     # 显示损失函数
     adv_lis = np.squeeze(np.square(adv), axis=1)
     self.adv_list += adv_lis.tolist()
     critic_list = np.squeeze(self.get_v(entity_critic, scalar_state),
                              axis=1)
     self.critic_list += critic_list.tolist()
     if self.dislpay_loss_counter == self.display_loss_interval:
         self.display_loss(self.adv_list, self.critic_list)
         self.dislpay_loss_counter = 0
     self.dislpay_loss_counter += 1
Example #7
0
    def __init__(self, actions_space, max_map_size_x, max_map_size_y, move,
                 map_chanel, input_entity_size, load_net, is_train, batch,
                 load_model_num, use_gpu):
        self._device_str1 = '/gpu:0' if use_gpu else '/cpu:0'
        self._device_str2 = '/gpu:1' if use_gpu else '/cpu:0'
        logger.info(f"PPO初始化")
        logger.info(f"使用设备:{self._device_str1}")
        logger.info(f"使用设备:{self._device_str2}")
        self.A_LR = 0.0001
        self.C_LR = 0.0002
        self.batch = batch
        self.A_UPDATE_STEPS = 10
        self.C_UPDATE_STEPS = 10
        self.METHOD = [
            dict(name='kl_pen', kl_target=0.01, lam=0.5),  # KL penalty
            dict(name='clip', epsilon=0.2
                 ),  # Clipped surrogate objective, find this is better
        ][1]  # choose the method for optimization

        # 设置Sess运算环境
        config = tf.ConfigProto()
        # config.gpu_options.per_process_gpu_memory_fraction = 0.5  # 使用GPU显存的比率
        config.gpu_options.allow_growth = True  # 按需求使用GPU
        config.log_device_placement = False
        # config.device_count = {'cpu': 0}
        # config.allow_soft_placement = True
        self.sess = tf.Session(config=config)
        self.C_DIM = 1

        # 添加的参数
        self.memory_entity_critic, self.memory_scalar_state, self.memory_r = None, None, None
        self.memory_action, self.memory_my_id, self.memory_obj_id, self.memory_move = None, None, None, None
        self.actions_space = actions_space
        self.max_map_size_x = max_map_size_x
        self.max_map_size_y = max_map_size_y
        self.move = move
        self.map_chanel = map_chanel
        self.action_type_size = 14
        self.my_id_size = 34
        self.obj_id_size = 34
        self.input_entity_size = input_entity_size
        self.move_type = 10
        self.input_scalar_size = 20
        self.update_interval = 1
        self.lstm_batch_size = 1
        self.eps = 0.000001

        # define the input of the state
        # self.tfs = tf.placeholder(tf.float32, [None, self.S_DIM], "state")

        # saving and loading the network
        self.model_path = 'PredictAI_model/'
        self.load_model_num = load_model_num
        self.save_interval = 0

        self.adv_list = []
        self.critic_list = []
        self.fig = plt.figure()
        self.display_loss_interval = 10
        self.dislpay_loss_counter = 0

        # # define the ppo_move
        # self.ppo_move = PPO_MOVE(batch, step_num=2)
        self.ppo_move = PPONet(step_num=3)
        self.is_ppo_move = False
        # self.loss_display = fig.add_subplot(1, 1, 1)
        # self.loss_display.scatter(self.current_step, self.loss)
        plt.ion()  # 本次运行请注释,全局运行不要注释
        plt.show()

        self.get_on_flage = False

        # actor & critic
        logger.info(f'初始化神经网络')
        with tf.variable_scope('ACnet'):
            self.ACNet = ACNet(self.max_map_size_y, self.max_map_size_x,
                               self.map_chanel, self.lstm_batch_size,
                               self.actions_space, self.action_type_size,
                               self.my_id_size, self.obj_id_size, self.move,
                               self.A_LR, use_gpu)
            # self.tf_spatial_critic = tf.placeholder(tf.float32, [None, self.max_map_size_y, self.max_map_size_x,
            #                                                      self.map_chanel], name='inputs')
            self.tf_entity_critic = tf.placeholder(
                tf.float32, [None, self.input_entity_size],
                name='inputs_entity')
            self.tf_scalar_state = tf.placeholder(
                tf.float32, [None, self.input_scalar_size],
                name='inputs_scale')
            self.tf_spatial_critic = tf.placeholder(tf.float32, [
                None, self.max_map_size_y, self.max_map_size_x, self.map_chanel
            ],
                                                    name='actor_inputs')
            # self.tf_entity_actor = tf.placeholder(tf.float32, [None, self.input_entity_size], name='actor_inputs_scale')

            self.tfdc_r = tf.placeholder(tf.float32, [None, self.C_DIM],
                                         'discounted_r')
            self.ANet, self.old_ANet, self.critic = self.ACNet.build_ACNet(
                self.tf_entity_critic, self.tf_scalar_state, True)
            self.advantage = self.tfdc_r - self.critic
            self.closs = tf.reduce_mean(tf.square(self.advantage))
            with tf.device('/cpu:0'):
                tf.summary.scalar('closs', self.closs)  # tensorflow >= 0.12
            self.ctrain_op = tf.train.AdamOptimizer(self.C_LR).minimize(
                self.closs)

            # with tf.device(self._device_str1):

        # 离散状态下使用
        with tf.variable_scope('update_oldpi'):
            self.update_oldpi_op = [
                oldp.assign(p) for p, oldp in zip(
                    self.ANet['actor_params'], self.old_ANet['actor_params'])
            ]

        # 从动作类型概率分布中选择已选动作进行更新
        # with tf.device(self._device_str):
        # fixme
        self.tfaction_type = tf.placeholder(tf.int32, [None, 1], 'action')
        # a_tfa = tf.reshape(self.tfaction_type, [self.A_DIM, -1])
        action_type_indices = tf.stack([
            tf.range(tf.shape(self.tfaction_type)[0], dtype=tf.int32),
            tf.squeeze(self.tfaction_type, axis=1)
        ],
                                       axis=1)
        action_type_prob = tf.gather_nd(
            params=self.ANet['action_type_prob'],
            indices=action_type_indices)  # shape=(None, )
        oldaction_type_prob = tf.gather_nd(
            params=self.old_ANet['action_type_prob'],
            indices=action_type_indices)  # shape=(None, )

        # 从我方算子概率分布中选择我方算子进行更新
        self.tfmy_id = tf.placeholder(tf.int32, [None, 1], 'action')
        my_id_indices = tf.stack([
            tf.range(tf.shape(self.tfmy_id)[0], dtype=tf.int32),
            tf.squeeze(self.tfmy_id, axis=1)
        ],
                                 axis=1)
        my_id_prob = tf.gather_nd(params=self.ANet['my_id_prob'],
                                  indices=my_id_indices)  # shape=(None, )
        oldmy_id_prob = tf.gather_nd(params=self.old_ANet['my_id_prob'],
                                     indices=my_id_indices)  # shape=(None, )

        # 从敌方算子概率分布中选择敌方算子进行更新
        self.tfobj_id = tf.placeholder(tf.int32, [None, 1], 'action')
        obj_id_indices = tf.stack([
            tf.range(tf.shape(self.tfobj_id)[0], dtype=tf.int32),
            tf.squeeze(self.tfobj_id, axis=1)
        ],
                                  axis=1)
        obj_id_prob = tf.gather_nd(params=self.ANet['obj_id_prob'],
                                   indices=obj_id_indices)  # shape=(None, )
        oldobj_id_prob = tf.gather_nd(params=self.old_ANet['obj_id_prob'],
                                      indices=obj_id_indices)  # shape=(None, )

        # 从move分布中选择已选位置
        self.tfmove = tf.placeholder(tf.int32, [None, 1], 'move_x')
        move_indices = tf.stack([
            tf.range(tf.shape(self.tfmove)[0], dtype=tf.int32),
            tf.squeeze(self.tfmove, axis=1)
        ],
                                axis=1)
        move_prob = tf.gather_nd(params=self.ANet['move_prob'],
                                 indices=move_indices)  # shape=(None, )
        oldmove_prob = tf.gather_nd(params=self.old_ANet['move_prob'],
                                    indices=move_indices)  # shape=(None, )

        # loss
        self.tfadv = tf.placeholder(tf.float32, [None, 1],
                                    'advantage')  # 计算TD-error
        with tf.variable_scope('loss'):
            with tf.variable_scope('surrogate'):
                # ratio = tf.exp(pi.log_prob(self.tfaction_type) - oldpi.log_prob(self.tfaction_type))      # 连续情况下使用 OpenAI算法
                # ratio = pi.tf.divide(self.tfaction_type) / oldpi.prob(self.tfaction_type)                 # 连续情况下使用 DeepMind 算法

                # 三个比率分别计算,最后求和算总体损失
                # 离散情况下使用
                ratio_action_type = tf.divide(
                    action_type_prob, tf.maximum(oldaction_type_prob, 1e-5))

                ratio_my_id = tf.divide(my_id_prob,
                                        tf.maximum(oldmy_id_prob, 1e-5))
                ratio_obj_id = tf.divide(obj_id_prob,
                                         tf.maximum(oldobj_id_prob, 1e-5))
                ratio_move = tf.divide(move_prob,
                                       tf.maximum(oldmove_prob, 1e-5))

                surr_action_type = ratio_action_type * self.tfadv
                surr_my_id = ratio_my_id * self.tfadv
                surr_obj_id = ratio_obj_id * self.tfadv
                surr_move = ratio_move * self.tfadv

            if self.METHOD['name'] == 'kl_pen':
                self.tflam = tf.placeholder(tf.float32, None, 'lambda')
                kl = tf.distributions.kl_divergence(
                    self.old_ANet['action_type_prob'],
                    self.ANet['action_type_prob'])
                self.kl_mean = tf.reduce_mean(kl)
                with tf.name_scope('aloss'):
                    self.aloss = -(tf.reduce_mean(surr_action_type -
                                                  self.tflam * kl))
            else:  # clipping method, find this is better
                self.aloss = -(
                    tf.reduce_mean(
                        tf.minimum(
                            surr_action_type,
                            tf.clip_by_value(
                                ratio_action_type, 1. - self.METHOD['epsilon'],
                                1. + self.METHOD['epsilon']) * self.tfadv)) +
                    tf.reduce_mean(
                        tf.minimum(
                            surr_my_id,
                            tf.clip_by_value(ratio_my_id,
                                             1. - self.METHOD['epsilon'],
                                             1. + self.METHOD['epsilon']) *
                            self.tfadv)) +
                    tf.reduce_mean(
                        tf.minimum(
                            surr_obj_id,
                            tf.clip_by_value(
                                ratio_obj_id, 1. - self.METHOD['epsilon'],
                                1. + self.METHOD['epsilon']) * self.tfadv)) +
                    tf.reduce_mean(
                        tf.minimum(
                            surr_move,
                            tf.clip_by_value(
                                ratio_move, 1. - self.METHOD['epsilon'],
                                1. + self.METHOD['epsilon']) * self.tfadv)))
        with tf.variable_scope('atrain'):
            with tf.device(self._device_str1):
                self.atrain_op = tf.train.AdamOptimizer(self.A_LR).minimize(
                    self.aloss)
            self.merged = tf.summary.merge_all()
            self.writer = tf.summary.FileWriter("log/tensorflow/",
                                                self.sess.graph)

        # initializing the global components
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()

        # loading the network model
        if load_net is True:
            self.load_model()