Esempio n. 1
0
    # make env
    env = Environment(MAP_PATH,
                      red_agent_obs_ind,
                      blue_agent_obs_ind,
                      max_step=1000,
                      render=RENDER)
    # get map info
    size_x, size_y = env.get_map_size()
    red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num(
    )
    # set map info to blue agent
    blue_agent.set_map_info(size_x, size_y, blue_detector_num,
                            blue_fighter_num)

    red_detector_action = []
    fighter_model = dqn.RLFighter(ACTION_NUM)

    # execution
    for x in range(MAX_EPOCH):
        step_cnt = 0
        env.reset()
        episodic_reward = 0
        while True:
            obs_list = []
            action_list = []
            red_fighter_action = []
            # get obs
            if step_cnt == 0:
                red_obs_dict, blue_obs_dict = env.get_obs()

            # get action
Esempio n. 2
0
                      blue_agent_obs_ind,
                      render=RENDER,
                      max_step=MAX_STEP)
    # get map info
    size_x, size_y = env.get_map_size()
    red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num(
    )
    # set map info to blue agent
    blue_agent.set_map_info(size_x, size_y, blue_detector_num,
                            blue_fighter_num)

    red_detector_action = []
    fighter_model = dqn.RLFighter(n_actions=ACTION_NUM,
                                  learning_rate=LR,
                                  reward_decay=GAMMA,
                                  e_greedy=EPSILON,
                                  e_greedy_increment=EPSILON_INCREMENT,
                                  capacity=CAPACITY,
                                  batch_size=BATCH_SIZE,
                                  replace_target_iter=TARGET_REPLACE_ITER)

    # execution
    for i_episode in range(MAX_EPOCH):
        step_cnt = 0
        total_reward = 0.0  # 每回合所有智能体的总体奖励
        env.reset()
        while True:
            obs_list = [0 for _ in range(red_fighter_num)]
            action_list = [0 for _ in range(red_fighter_num)]
            red_fighter_action = []
            # get obs
            if step_cnt == 0: