Exemple #1
0
    def get_action(self, obs_dict, step_cnt):
        """
        get actions
        :param detector_obs_list:
        :param fighter_obs_list:
        :param joint_obs_dict:
        :param step_cnt:
        :return:
        """
        detector_action = []
        fighter_action = []
        for y in range(self.fighter_num):
            tmp_course = obs_dict['fighter'][y]['course']  # (1, )
            tmp_pos = obs_dict['fighter'][y]['pos']  # (2, )
            tmp_r_visible_pos = obs_dict['fighter'][y][
                'r_visible_pos']  # (10, 2)
            tmp_l_missile = obs_dict['fighter'][y]['l_missile']  # rule use
            tmp_s_missile = obs_dict['fighter'][y]['s_missile']  # rule use
            tmp_j_visible_fp = obs_dict['fighter'][y][
                'j_visible_fp']  # rule use
            tmp_j_visible_dir = obs_dict['fighter'][y][
                'j_visible_dir']  # (10, 1)
            tmp_g_visible_pos = obs_dict['fighter'][y][
                'g_visible_pos']  # (10, 2)  # rule use
            tmp_striking_id = obs_dict['fighter'][y]['striking_id']  # (10, 1)
            # model obs change, 归一化
            course = tmp_course / 359.
            pos = tmp_pos / self.size_x
            r_visible_pos = tmp_r_visible_pos.reshape(
                1, -1)[0] / self.size_x  # (20,)
            j_visible_dir = tmp_j_visible_dir.reshape(1, -1)[0] / 359  # (10,)
            striking_id = tmp_striking_id.reshape(1, -1)[0] / 1
            obs = np.concatenate(
                (course, pos, r_visible_pos, j_visible_dir, striking_id),
                axis=0)
            logger.debug('obs: {}'.format(obs))

            true_action = np.array([0, 1, 0, 0], dtype=np.int32)
            if obs_dict['fighter'][y]['alive']:
                # rule policy
                true_action = fighter_rule(tmp_course, tmp_pos, tmp_l_missile,
                                           tmp_s_missile, tmp_r_visible_pos,
                                           tmp_j_visible_dir, tmp_j_visible_fp,
                                           tmp_g_visible_pos, step_cnt)
                logger.debug('true action rule out: {}'.format(true_action))
                # model policy
                if not any([any(r_visible_pos >= 0), any(j_visible_dir >= 0)]):
                    tmp_action = self.maddpg.select_action(y, obs)
                    logger.debug('tmp action: {}'.format(tmp_action))
                    # 添加动作, 将动作转换为偏角
                    tmp_action_i = np.argmax(tmp_action)
                    logger.info('tmp action i: {}'.format(tmp_action_i))
                    true_action[0] = action2direction(true_action[0],
                                                      tmp_action_i, ACTION_NUM)

            logger.info('true action: {}'.format(true_action))
            fighter_action.append(true_action)
        fighter_action = np.array(fighter_action)

        return detector_action, fighter_action
Exemple #2
0
    def get_action(self, obs_dict, step_cnt):
        """
        get actions
        :param detector_obs_list:
        :param fighter_obs_list:
        :param joint_obs_dict:
        :param step_cnt:
        :return:
        """

        detector_action = []
        fighter_action = []
        for y in range(self.fighter_num):
            tmp_course = obs_dict['fighter'][y]['course']  # (1, )
            tmp_pos = obs_dict['fighter'][y]['pos']  # (2, )
            tmp_r_visible_pos = obs_dict['fighter'][y][
                'r_visible_pos']  # (10, 2)
            tmp_j_visible_fp = obs_dict['fighter'][y][
                'j_visible_fp']  # (10, 1)
            tmp_l_missile = obs_dict['fighter'][y]['l_missile']  # rule use
            tmp_s_missile = obs_dict['fighter'][y]['s_missile']  # rule use
            tmp_j_visible_fp = obs_dict['fighter'][y][
                'j_visible_fp']  # rule use
            tmp_j_visible_dir = obs_dict['fighter'][y][
                'j_visible_dir']  # (10, 1)  # rule use
            tmp_g_visible_pos = obs_dict['fighter'][y][
                'g_visible_pos']  # (10, 2)
            # model obs change, 归一化
            course = tmp_course / 359.
            pos = tmp_pos / self.size_x
            r_visible_pos = tmp_r_visible_pos.reshape(
                1, -1)[0] / self.size_x  # (20,)
            j_visible_fp = tmp_j_visible_fp.reshape(1, -1)[0] / 359.  # (10,)
            g_visible_pos = tmp_g_visible_pos.reshape(
                1, -1)[0] / self.size_x  # (20,)
            obs = np.concatenate(
                (course, pos, r_visible_pos, j_visible_fp, g_visible_pos),
                axis=0)
            logger.debug('obs: {}'.format(obs))

            true_action = np.array([0, 1, 0, 0], dtype=np.int32)
            if obs_dict['fighter'][y]['alive']:
                # model policy
                tmp_action_i = self.maddpg.select_action(y, obs)
                logger.debug('tmp action i: {}'.format(tmp_action_i))
                # rule policy
                true_action = fighter_rule(tmp_course, tmp_pos, tmp_l_missile,
                                           tmp_s_missile, tmp_r_visible_pos,
                                           tmp_j_visible_dir, tmp_j_visible_fp,
                                           tmp_g_visible_pos, step_cnt)
                logger.debug('true aciton rule out: {}'.format(true_action))
                # 添加动作 todo
                true_action[2] = np.argmax(
                    tmp_action_i) if IS_DISPERSED else range_transfer(
                        tmp_action_i, 11)
                if true_action[2] == 11:
                    logger.info('agent {}: right'.format(y + 1))
                    logger.wait()
                logger.info('true action: {}'.format(true_action))

            fighter_action.append(true_action)
        fighter_action = np.array(fighter_action)

        return detector_action, fighter_action
Exemple #3
0
                # temp
                true_action = np.array([0, 1, 0, 0], dtype=np.int32)
                if not red_obs_dict['fighter'][y]['alive']:
                    # 如果有智能体已经死亡,则默认死亡动作输出
                    action_list.append(
                        np.array([-1 for _ in range(ACTION_NUM)],
                                 dtype=np.float32))
                else:
                    # model policy
                    tmp_action = maddpg.select_action(y, obs)
                    logger.debug('tmp action: {}'.format(tmp_action))
                    # rule policy
                    true_action = fighter_rule(tmp_course, tmp_pos,
                                               tmp_l_missile, tmp_s_missile,
                                               tmp_r_visible_pos,
                                               tmp_j_visible_dir,
                                               tmp_j_visible_fp,
                                               tmp_g_visible_pos, step_cnt)
                    logger.debug(
                        'true aciton rule out: {}'.format(true_action))
                    # 添加动作
                    true_action[2] = np.argmax(
                        tmp_action) if IS_DISPERSED else range_transfer(
                            tmp_action, ACTION_NUM)
                    logger.debug('true action: {}'.format(true_action))

                    action_list.append(tmp_action)
                red_fighter_action.append(true_action)

            # env step
            logger.info('agent0 true action: {}'.format(