agent1 = agent1_module.Agent() agent2 = agent2_module.Agent() agent1_obs_ind = agent1.get_obs_ind() agent2_obs_ind = agent2.get_obs_ind() # environment initiation if args.log: if args.log_path == 'default_log': log_flag = args.agent1 + '_vs_' + args.agent2 else: log_flag = args.log_path else: log_flag = False env = Environment(map_path, agent1_obs_ind, agent2_obs_ind, max_step=args.max_step, render=True, random_pos=args.random_pos, log=log_flag) # get map info size_x, size_y = env.get_map_size() side1_detector_num, side1_fighter_num, side2_detector_num, side2_fighter_num = env.get_unit_num( ) agent1.set_map_info(size_x, size_y, side1_detector_num, side1_fighter_num) agent2.set_map_info(size_x, size_y, side2_detector_num, side2_fighter_num) # execution step_cnt = 0 round_cnt = 0 agent1_crash_list = [] agent2_crash_list = []
def run(agent1_name, agent2_name, map_name, round_num, max_step, random_pos=False): """ :param agent1_name: 红方名称 :param agent2_name: 蓝方名称 :param map_name: 地图名称 :param round_num: 对战局数 :param max_step: 单局最大step :param random_pos: 随机起始位置 :return: agent1_win_times, agent2_win_times, draw_times, agent1_crash_times, agent2_crash_times, agent1_timeout_times, agent2_timeout_times, agent1_launch_failure_times, agent2_launch_failure_times """ side1_win_times = 0 side2_win_times = 0 draw_times = 0 log_flag = agent1_name + '_vs_' + agent2_name agent1_launch_failed = False agent2_launch_failed = False round_cnt = 0 agent1_crash_list = [] agent2_crash_list = [] agent1_timeout_list = [] agent2_timeout_list = [] # file path constructing map_path = 'maps/' + map_name + '.map' agent1_path = 'agent/' + agent1_name + '/agent.py' agent2_path = 'agent/' + agent2_name + '/agent.py' if not os.path.exists(map_path): print('Error: map file not exist!') exit(-1) if not os.path.exists(agent1_path): print('Error: agent1 file not exist!') exit(-1) if not os.path.exists(agent2_path): print('Error: agent2 file not exist!') exit(-1) # make env env = Environment(map_path, 'raw', 'raw', max_step=max_step, render=True, random_pos=random_pos, log=log_flag) # get map info size_x, size_y = env.get_map_size() side1_detector_num, side1_fighter_num, side2_detector_num, side2_fighter_num = env.get_unit_num( ) # create agent agent1 = AgentCtrl(agent1_name, size_x, size_y, side1_detector_num, side1_fighter_num) agent2 = AgentCtrl(agent2_name, size_x, size_y, side2_detector_num, side2_fighter_num) if not agent1.agent_init(): print('ERROR: Agent1 ' + agent1_name + ' init failed!') agent1.terminate() agent2.terminate() agent1_launch_failed = True if not agent2.agent_init(): print('ERROR: Agent2 ' + agent2_name + ' init failed!') agent1.terminate() agent2.terminate() agent2_launch_failed = True # 若此处一方启动失败,则认为该方全败,启动失败计round_num次,若双方启动失败,则认为双方平局round_num次,其他与前述相同。 if agent1_launch_failed and agent2_launch_failed: return 0, 0, round_num, 0, 0, 0, 0, round_num, round_num elif agent1_launch_failed: return 0, round_num, 0, 0, 0, 0, 0, round_num, 0 elif agent2_launch_failed: return round_num, 0, 0, 0, 0, 0, 0, 0, round_num # execution # input("Press the <ENTER> key to continue...") for x in range(round_num): if x != 0: env.reset() step_cnt = 0 round_cnt += 1 while True: step_cnt += 1 # get obs side1_obs_dict, side2_obs_dict = env.get_obs() # get action agent1_action, agent1_result = agent1.get_action( side1_obs_dict, step_cnt) if agent1_result == 0: side1_detector_action = agent1_action['detector_action'] side1_fighter_action = agent1_action['fighter_action'] elif agent1_result == 1: agent1_crash_list.append(round_cnt) elif agent1_result == 2: agent1_timeout_list.append(round_cnt) agent2_action, agent2_result = agent2.get_action( side2_obs_dict, step_cnt) if agent2_result == 0: side2_detector_action = agent2_action['detector_action'] side2_fighter_action = agent2_action['fighter_action'] elif agent2_result == 1: agent2_crash_list.append(round_cnt) elif agent2_result == 2: agent2_timeout_list.append(round_cnt) # execution if agent1_result == 0 and agent2_result == 0: env.step(side1_detector_action, side1_fighter_action, side2_detector_action, side2_fighter_action) elif agent1_result != 0 and agent2_result != 0: env.set_surrender(2) elif agent1_result != 0: env.set_surrender(0) else: env.set_surrender(1) # get done if env.get_done(): # reward o_detector_reward, o_fighter_reward, o_game_reward, e_detector_reward, e_fighter_reward, e_game_reward = env.get_reward( ) if o_game_reward > e_game_reward: side1_win_times += 1 elif o_game_reward < e_game_reward: side2_win_times += 1 else: draw_times += 1 break agent1.terminate() agent2.terminate() return side1_win_times, side2_win_times, draw_times, len( agent1_crash_list), len(agent2_crash_list), len( agent1_timeout_list), len(agent2_timeout_list), 0, 0
ATTACK_IND_NUM = ( DETECTOR_NUM + FIGHTER_NUM ) * 2 + 1 # long missile attack + short missile attack + no attack ACTION_NUM = COURSE_NUM * ATTACK_IND_NUM LEARN_INTERVAL = 100 if __name__ == "__main__": # create blue agent blue_agent = Agent() # get agent obs type red_agent_obs_ind = 'feature' blue_agent_obs_ind = blue_agent.get_obs_ind() # make env env = Environment(MAP_PATH, red_agent_obs_ind, blue_agent_obs_ind, max_step=1000, render=RENDER) # get map info size_x, size_y = env.get_map_size() red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num( ) # set map info to blue agent blue_agent.set_map_info(size_x, size_y, blue_detector_num, blue_fighter_num) red_detector_action = [] fighter_model = dqn.RLFighter(ACTION_NUM) # execution for x in range(MAX_EPOCH):
ATTACK_IND_NUM = (DETECTOR_NUM + FIGHTER_NUM) * 2 + 1 # long missile attack + short missile attack + no attack ACTION_NUM = COURSE_NUM * ATTACK_IND_NUM LEARN_INTERVAL = TARGET_REPLACE_ITER BETAS = (0.9, 0.999) EPS_clip = 0.2 K_epochs = 4 max_timesteps = 300 if __name__ == "__main__": # create blue agent blue_agent = Agent() # get agent obs type red_agent_obs_ind = 'ppo' blue_agent_obs_ind = blue_agent.get_obs_ind() # make env env = Environment(MAP_PATH, red_agent_obs_ind, blue_agent_obs_ind, render=RENDER) # get map info size_x, size_y = env.get_map_size() red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num() # set map info to blue agent blue_agent.set_map_info(size_x, size_y, blue_detector_num, blue_fighter_num) red_detector_action = [] fighter_model = ppo.PPOFighter(ACTION_NUM,LR,BETAS,GAMMA,EPS_clip,K_epochs,MAX_EPOCH,LEARN_INTERVAL,max_timesteps) fighter_model.policy.load_state_dict(torch.load('model/ppo/model_000026500.pkl', map_location='cpu')) fighter_model.policy_old.load_state_dict(torch.load('model/ppo/model_000026500.pkl', map_location='cpu')) reward_sum = [] round_sum = [] # execution for x in range(MAX_EPOCH):
else: step_delay = 1 / args.fps # environment initiation if args.log: if args.log_path == 'default_log': log_flag = args.agent1 + '_vs_' + args.agent2 else: log_flag = args.log_path else: log_flag = False env = Environment(map_path, 'raw', 'raw', max_step=args.max_step, render=True, random_pos=args.random_pos, log=log_flag, external_render=args.ext_render, side1_name=args.agent1, side2_name=args.agent2) # get map info size_x, size_y = env.get_map_size() side1_detector_num, side1_fighter_num, side2_detector_num, side2_fighter_num = env.get_unit_num( ) # create agent agent1 = AgentCtrl(args.agent1, size_x, size_y, side1_detector_num, side1_fighter_num, args.agent1_gpu) agent2 = AgentCtrl(args.agent2, size_x, size_y, side2_detector_num, side2_fighter_num, args.agent2_gpu) if not agent1.agent_init():
dim_obs=OBS_NUM, dim_act=ACTION_NUM, batch_size=BATCH_SIZE, capacity=CAPACITY, replace_target_iter=TARGET_REPLACE_ITER, episodes_before_train=EPISODES_BEFORE_TRAIN, learning_rate=LR, gamma=GAMMA, scale_reward=SCALE_REWARD, is_dispersed=IS_DISPERSED) # gpu FloatTensor = th.cuda.FloatTensor if maddpg.use_cuda else th.FloatTensor # make env env = Environment(MAP_PATH, red_agent_obs_ind, blue_agent_obs_ind, render=RENDER, max_step=MAX_STEP, random_pos=True) # get map info size_x, size_y = env.get_map_size() # size_x == size_y == 1000 red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num( ) red_detector_action, blue_detector_action = [], [] # temp for i_episode in range(MAX_EPOCH): step_cnt = 0 env.reset() total_reward = 0.0 # 每回合所有智能体的总体奖励 rr = np.zeros((FIGHTER_NUM, )) # 每回合每个智能体的奖励 # get obs
exit(-1) # delay calc if args.fps == 0: step_delay = 0 else: step_delay = 1 / args.fps # environment initiation if args.log: if args.log_path == 'default_log': log_flag = args.agent1 + '_vs_' + args.agent2 else: log_flag = args.log_path else: log_flag = False env = Environment(map_path, 'raw', 'raw', max_step=args.max_step, render=True, random_pos=args.random_pos, log=log_flag, external_render=args.ext_render) # get map info size_x, size_y = env.get_map_size() side1_detector_num, side1_fighter_num, side2_detector_num, side2_fighter_num = env.get_unit_num() # create agent agent1 = AgentCtrl(args.agent1, size_x, size_y, side1_detector_num, side1_fighter_num) agent2 = AgentCtrl(args.agent2, size_x, size_y, side2_detector_num, side2_fighter_num) if not agent1.agent_init(): print('ERROR: Agent1 init failed!') agent1.terminate() agent2.terminate() exit(-1) else: print('Agent1 init success!') if not agent2.agent_init():