obs = torch.zeros((args.num_steps, args.num_envs) + envs.observation_space.shape).to(device) actions = torch.zeros((args.num_steps, args.num_envs) + envs.action_space.shape).to(device) logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device) rewards = torch.zeros((args.num_steps, args.num_envs)).to(device) dones = torch.zeros((args.num_steps, args.num_envs)).to(device) values = torch.zeros((args.num_steps, args.num_envs)).to(device) invalid_action_masks = torch.zeros((args.num_steps, args.num_envs) + (envs.action_space.nvec.sum(), )).to(device) # TRY NOT TO MODIFY: start the game global_step = 0 start_time = time.time() # Note how `next_obs` and `next_done` are used; their usage is equivalent to # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60 next_obs = envs.reset() next_done = torch.zeros(args.num_envs).to(device) num_updates = args.total_timesteps // args.batch_size sw = stopwatch.StopWatch() ## CRASH AND RESUME LOGIC: starting_update = 1 if args.prod_mode and wandb.run.resumed: print("previous run.summary", run.summary) starting_update = run.summary['charts/update'] + 1 global_step = starting_update * args.batch_size api = wandb.Api() run = api.run(run.get_url()[len("https://app.wandb.ai/"):]) model = run.file('agent.pt') model.download(f"models/{experiment_name}/") agent.load_state_dict(torch.load(f"models/{experiment_name}/agent.pt"))
from gym_microrts.envs.vec_env import MicroRTSVecEnv from gym_microrts import microrts_ai from gym.envs.registration import register from gym_microrts import Config try: env = MicroRTSVecEnv(num_envs=1, render_theme=2, ai2s=[microrts_ai.coacAI], map_path="maps/16x16/basesWorkers16x16.xml", reward_weight=np.array( [10.0, 1.0, 1.0, 0.2, 1.0, 4.0])) # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env # env = gym.wrappers.RecordEpisodeStatistics(env) # env.action_space.seed(0) obs = env.reset() env.render() except Exception as e: e.printStackTrace() env.action_space.seed(0) env.reset() for i in range(10000): env.render() action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten() time.sleep(0.001) action = env.action_space.sample() # optional: selecting only valid units. if len(action_mask.nonzero()[0]) != 0: action[0] = action_mask.nonzero()[0][0]