model.download(f"models/{experiment_name}/") agent.load_state_dict( torch.load(f"models/{experiment_name}/agent.pt", map_location=device)) agent.eval() print(f"resumed at update {starting_update}") for update in range(starting_update, num_updates + 1): # Annealing the rate if instructed to do so. if args.anneal_lr: frac = 1.0 - (update - 1.0) / num_updates lrnow = lr(frac) optimizer.param_groups[0]['lr'] = lrnow # TRY NOT TO MODIFY: prepare the execution of the game. for step in range(0, args.num_steps): envs.render() global_step += 1 * args.num_envs obs[step] = next_obs dones[step] = next_done # ALGO LOGIC: put action logic here with torch.no_grad(): values[step] = agent.get_value(obs[step]).flatten() action, logproba, _, invalid_action_masks[step] = agent.get_action( obs[step], envs=envs) actions[step] = action logprobs[step] = logproba # TRY NOT TO MODIFY: execute the game and log data. # the real action adds the source units real_action = torch.cat([
import torch.optim as optim import torch.nn.functional as F from torch.distributions.categorical import Categorical try: env = MicroRTSGridModeVecEnv( num_envs=1, render_theme=2, ai2s=[microrts_ai.passiveAI], map_path="maps/16x16/basesWorkersTestAttack16x16.xml", reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])) # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env # env = gym.wrappers.RecordEpisodeStatistics(env) # env.action_space.seed(0) obs = env.reset() env.render() except Exception as e: e.printStackTrace() # print("reward is", env.step([[[ 17, 2 , 0 , 3 , 0 , 1 , 2, 0]]])[1]) # env.render() # print("reward is", env.step([[[ 34 , 4 , 1 ,2 , 1 , 2 , 3 ,0]]])[1]) # env.render() # print("reward is", env.step([[[ 14*16+14 , 1 , 0 ,0 , 0 , 0 , 0 ,0]]])[1]) # env.render() # for _ in range(100): # env.step([[[ 0 , 0 , 0 ,0 , 0 , 0 , 0 ,0]]]) # env.render() # print("relative target position:", np.where(np.array(env.vec_client.getMasks(0))[0,13,14][1+6+4+4+4+4+7:]==1)[0]) # print("reward is", env.step([[[ 13*16+14 , 5 , 0 ,0 , 0 , 0 , 0 ,23]]])[1])