Beispiel #1
0
def run(agent1_name,
        agent2_name,
        map_name,
        round_num,
        max_step,
        random_pos=False):
    """

    :param agent1_name: 红方名称
    :param agent2_name: 蓝方名称
    :param map_name: 地图名称
    :param round_num: 对战局数
    :param max_step: 单局最大step
    :param random_pos: 随机起始位置
    :return: agent1_win_times, agent2_win_times, draw_times, agent1_crash_times, agent2_crash_times, agent1_timeout_times, agent2_timeout_times, agent1_launch_failure_times, agent2_launch_failure_times
    """
    side1_win_times = 0
    side2_win_times = 0
    draw_times = 0
    log_flag = agent1_name + '_vs_' + agent2_name
    agent1_launch_failed = False
    agent2_launch_failed = False
    round_cnt = 0
    agent1_crash_list = []
    agent2_crash_list = []
    agent1_timeout_list = []
    agent2_timeout_list = []

    # file path constructing
    map_path = 'maps/' + map_name + '.map'
    agent1_path = 'agent/' + agent1_name + '/agent.py'
    agent2_path = 'agent/' + agent2_name + '/agent.py'

    if not os.path.exists(map_path):
        print('Error: map file not exist!')
        exit(-1)
    if not os.path.exists(agent1_path):
        print('Error: agent1 file not exist!')
        exit(-1)
    if not os.path.exists(agent2_path):
        print('Error: agent2 file not exist!')
        exit(-1)
    # make env
    env = Environment(map_path,
                      'raw',
                      'raw',
                      max_step=max_step,
                      render=True,
                      random_pos=random_pos,
                      log=log_flag)
    # get map info
    size_x, size_y = env.get_map_size()
    side1_detector_num, side1_fighter_num, side2_detector_num, side2_fighter_num = env.get_unit_num(
    )
    # create agent
    agent1 = AgentCtrl(agent1_name, size_x, size_y, side1_detector_num,
                       side1_fighter_num)
    agent2 = AgentCtrl(agent2_name, size_x, size_y, side2_detector_num,
                       side2_fighter_num)
    if not agent1.agent_init():
        print('ERROR: Agent1 ' + agent1_name + ' init failed!')
        agent1.terminate()
        agent2.terminate()
        agent1_launch_failed = True
    if not agent2.agent_init():
        print('ERROR: Agent2 ' + agent2_name + ' init failed!')
        agent1.terminate()
        agent2.terminate()
        agent2_launch_failed = True
    # 若此处一方启动失败,则认为该方全败,启动失败计round_num次,若双方启动失败,则认为双方平局round_num次,其他与前述相同。
    if agent1_launch_failed and agent2_launch_failed:
        return 0, 0, round_num, 0, 0, 0, 0, round_num, round_num
    elif agent1_launch_failed:
        return 0, round_num, 0, 0, 0, 0, 0, round_num, 0
    elif agent2_launch_failed:
        return round_num, 0, 0, 0, 0, 0, 0, 0, round_num
    # execution
    # input("Press the <ENTER> key to continue...")
    for x in range(round_num):
        if x != 0:
            env.reset()
        step_cnt = 0
        round_cnt += 1
        while True:
            step_cnt += 1
            # get obs
            side1_obs_dict, side2_obs_dict = env.get_obs()
            # get action
            agent1_action, agent1_result = agent1.get_action(
                side1_obs_dict, step_cnt)
            if agent1_result == 0:
                side1_detector_action = agent1_action['detector_action']
                side1_fighter_action = agent1_action['fighter_action']
            elif agent1_result == 1:
                agent1_crash_list.append(round_cnt)
            elif agent1_result == 2:
                agent1_timeout_list.append(round_cnt)
            agent2_action, agent2_result = agent2.get_action(
                side2_obs_dict, step_cnt)
            if agent2_result == 0:
                side2_detector_action = agent2_action['detector_action']
                side2_fighter_action = agent2_action['fighter_action']
            elif agent2_result == 1:
                agent2_crash_list.append(round_cnt)
            elif agent2_result == 2:
                agent2_timeout_list.append(round_cnt)
            # execution
            if agent1_result == 0 and agent2_result == 0:
                env.step(side1_detector_action, side1_fighter_action,
                         side2_detector_action, side2_fighter_action)
            elif agent1_result != 0 and agent2_result != 0:
                env.set_surrender(2)
            elif agent1_result != 0:
                env.set_surrender(0)
            else:
                env.set_surrender(1)
            # get done
            if env.get_done():
                # reward
                o_detector_reward, o_fighter_reward, o_game_reward, e_detector_reward, e_fighter_reward, e_game_reward = env.get_reward(
                )
                if o_game_reward > e_game_reward:
                    side1_win_times += 1
                elif o_game_reward < e_game_reward:
                    side2_win_times += 1
                else:
                    draw_times += 1
                break
    agent1.terminate()
    agent2.terminate()
    return side1_win_times, side2_win_times, draw_times, len(
        agent1_crash_list), len(agent2_crash_list), len(
            agent1_timeout_list), len(agent2_timeout_list), 0, 0
Beispiel #2
0
    round_cnt = 0
    agent1_crash_list = []
    agent2_crash_list = []
    # input("Press the <ENTER> key to continue...")
    for x in range(args.round):
        side1_total_reward = 0
        side2_total_reward = 0
        if x != 0:
            env.reset()
        step_cnt = 0
        round_cnt += 1
        while True:
            time.sleep(step_delay)
            step_cnt += 1
            # get obs
            side1_obs_dict, side2_obs_dict = env.get_obs()
            # get action
            try:
                side1_detector_action, side1_fighter_action = agent1.get_action(
                    side1_obs_dict, step_cnt)

                # todo 更改我方动作
                # for i in range(len(side1_fighter_action)):
                #     if args.agent1 == 'fix_rule':
                #         # 规则
                #         side1_fighter_action[i]['r_fre_point'] = i + 1
                #     else:
                #         # model
                #         # side1_fighter_action[i][1] = i+1
                #         side1_fighter_action[i][1] = random.randint(1, 10)
                #         # side1_fighter_action[i][2] = 11
Beispiel #3
0
    red_detector_action = []
    fighter_model = dqn.RLFighter(ACTION_NUM)

    # execution
    for x in range(MAX_EPOCH):
        step_cnt = 0
        env.reset()
        episodic_reward = 0
        while True:
            obs_list = []
            action_list = []
            red_fighter_action = []
            # get obs
            if step_cnt == 0:
                red_obs_dict, blue_obs_dict = env.get_obs()

            # get action
            # get blue action
            blue_detector_action, blue_fighter_action = blue_agent.get_action(
                blue_obs_dict, step_cnt)
            # get red action
            obs_got_ind = [False] * red_fighter_num
            for y in range(red_fighter_num):
                true_action = np.array([0, 1, 0, 0], dtype=np.int32)
                if red_obs_dict['fighter'][y]['alive']:
                    obs_got_ind[y] = True
                    # tmp_img_obs = red_obs_dict['fighter'][y]['screen']
                    # tmp_img_obs = tmp_img_obs.transpose(2, 0, 1)
                    tmp_info_obs = red_obs_dict['fighter'][y]['info']
                    tmp_action = fighter_model.choose_action(tmp_info_obs)
Beispiel #4
0
                      max_step=MAX_STEP,
                      random_pos=True)
    # get map info
    size_x, size_y = env.get_map_size()  # size_x == size_y == 1000
    red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num(
    )
    red_detector_action, blue_detector_action = [], []  # temp

    for i_episode in range(MAX_EPOCH):
        step_cnt = 0
        env.reset()
        total_reward = 0.0  # 每回合所有智能体的总体奖励
        rr = np.zeros((FIGHTER_NUM, ))  # 每回合每个智能体的奖励

        # get obs
        red_obs_dict, blue_obs_dict = env.get_obs()  # output: raw obs结构体
        obs_dict_list = [red_obs_dict, blue_obs_dict]

        while True:
            # obs_list = []
            obs_list = []  # len == n agents
            action_list = []  # # len == n agents
            fighter_action = []  # # len == n agents

            # get red action
            for obs_dict in obs_dict_list:
                for y in range(red_fighter_num):
                    tmp_course = obs_dict['fighter'][y]['course']  # (1, )
                    tmp_pos = obs_dict['fighter'][y]['pos']  # (2, )
                    tmp_l_missile = obs_dict['fighter'][y][
                        'l_missile']  # (1, )
Beispiel #5
0
        step_cnt = 0
        num_agents = 10
        num_units = 20
        env.reset()

        obs_list, en_obs_list = [], []
        behavior_value_list = []
        action_head0_list, action_head1_list, action_head2_list = [], [], []
        probs_head0_list, logits_head1_list, probs_head2_list = [], [], []
        memory_mask1_list, memory_mask2_list, action_mask_list = [], [], []
        next_obs_list, next_en_obs_list = [], []
        reward_list = []

        while True:
            red_detector_action, red_fighter_action = [], []
            red_obs_dict, blue_obs_dict = env.get_obs()

            fighter_tmp_obs = np.asarray(red_obs_dict['fighter'], dtype=np.float32)   # 在同质化的设定中,只有 fighter
            enemy_tmp_obs = np.asarray(red_obs_dict['enemy'], dtype=np.float32)
            fighter_visible_enemys_dict = red_obs_dict['fighter_visible_enemys_dict']
            fighter_data_obs_list = red_obs_dict['fighter_raw']
            # get obs
            if step_cnt == 0:
                red_obs_dict, blue_obs_dict = env.get_obs()     # 此处得到 obs
            # get red and blue action
            blue_detector_action, blue_fighter_action = blue_agent.get_action(blue_obs_dict, step_cnt)
            obs_got_ind = [False] * red_fighter_num

            red_obs, red_en_obs = np.asarray(red_obs_dict['fighter']), np.asarray(red_obs_dict['enemy'])
            agent_obs = np.zeros([num_agents] + list(red_obs.shape))
Beispiel #6
0
                            blue_fighter_num)

    red_detector_action = []
    fighter_model = dqn.RLFighter(ACTION_NUM)

    # execution
    for x in range(MAX_EPOCH):
        step_cnt = 0
        env.reset()
        while True:
            obs_list = []
            action_list = []
            red_fighter_action = []
            # get obs
            if step_cnt == 0:
                red_obs_dict, blue_obs_dict = env.get_obs()  # 此处得到 obs
            # get action
            # get blue action
            blue_detector_action, blue_fighter_action = blue_agent.get_action(
                blue_obs_dict, step_cnt)
            # get red action
            obs_got_ind = [False] * red_fighter_num
            for y in range(red_fighter_num):
                true_action = np.array([0, 1, 0, 0], dtype=np.int32)
                if red_obs_dict['fighter'][y]['alive']:
                    obs_got_ind[y] = True
                    tmp_img_obs = red_obs_dict['fighter'][y]['screen']
                    tmp_img_obs = tmp_img_obs.transpose(2, 0, 1)
                    tmp_info_obs = red_obs_dict['fighter'][y]['info']
                    tmp_action = fighter_model.choose_action(
                        tmp_img_obs, tmp_info_obs)