Beispiel #1
0
GAMMA = 0.99                # reward discount
TARGET_REPLACE_ITER = 999   # target update frequency
DETECTOR_NUM = 0
FIGHTER_NUM = 10
COURSE_NUM = 16
ATTACK_IND_NUM = (DETECTOR_NUM + FIGHTER_NUM) * 2 + 1 # long missile attack + short missile attack + no attack
ACTION_NUM = COURSE_NUM * ATTACK_IND_NUM
LEARN_INTERVAL = TARGET_REPLACE_ITER
BETAS = (0.9, 0.999)
EPS_clip = 0.2
K_epochs = 4
max_timesteps = 300

if __name__ == "__main__":
    # create blue agent
    blue_agent = Agent()
    # get agent obs type
    red_agent_obs_ind = 'ppo'
    blue_agent_obs_ind = blue_agent.get_obs_ind()
    # make env
    env = Environment(MAP_PATH, red_agent_obs_ind, blue_agent_obs_ind, render=RENDER)
    # get map info
    size_x, size_y = env.get_map_size()
    red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num()
    # set map info to blue agent
    blue_agent.set_map_info(size_x, size_y, blue_detector_num, blue_fighter_num)

    red_detector_action = []
    fighter_model = ppo.PPOFighter(ACTION_NUM,LR,BETAS,GAMMA,EPS_clip,K_epochs,MAX_EPOCH,LEARN_INTERVAL,max_timesteps)
    fighter_model.policy.load_state_dict(torch.load('model/ppo/model_000026500.pkl', map_location='cpu'))
    fighter_model.policy_old.load_state_dict(torch.load('model/ppo/model_000026500.pkl', map_location='cpu'))
Beispiel #2
0
# OBS_NUM = 2 + 1 + 1 + 1 + 2 * 10 + 10 + 2 * 10
# obs: 自身坐标,自身航向,短导弹剩余,长导弹剩余,主动观测列表坐标,被动观测航向,打击id
OBS_NUM = 2 + 1 + 1 + 1 + 2 * 10 + 10 + 10  # todo before 全局观测坐标
# action
DETECTOR_NUM = 0
FIGHTER_NUM = 10
AGENT_NUM = 2
COURSE_NUM = 16
ACTION_NUM = 360 if IS_DISPERSED else 1  # dispersed num or ddpg

if __name__ == '__main__':
    # reward_record = []  # 记录每轮训练的奖励
    agent0_c_loss = []
    agent0_a_loss = []
    # get agent obs type
    blue_agent = Agent()  # blue agent
    red_agent_obs_ind = AGENT_NAME  # red agent
    blue_agent_obs_ind = blue_agent.get_obs_ind()
    # init model
    maddpg = MADDPG(n_agents=FIGHTER_NUM, dim_obs=OBS_NUM, dim_act=ACTION_NUM,
                    batch_size=BATCH_SIZE, capacity=CAPACITY, replace_target_iter=TARGET_REPLACE_ITER,
                    episodes_before_train=EPISODES_BEFORE_TRAIN, learning_rate=LR, gamma=GAMMA,
                    scale_reward=SCALE_REWARD, is_dispersed=IS_DISPERSED)
    # gpu
    FloatTensor = th.cuda.FloatTensor if maddpg.use_cuda else th.FloatTensor
    # make env
    env = Environment(MAP_PATH, red_agent_obs_ind, blue_agent_obs_ind, render=RENDER, max_step=MAX_STEP,
                      random_pos=True)
    # get map info
    size_x, size_y = env.get_map_size()  # size_x == size_y == 1000
    red_detector_num, red_fighter_num, blue_detector_num, blue_fighter_num = env.get_unit_num()