try: env = MicroRTSVecEnv(num_envs=1, render_theme=2, ai2s=[microrts_ai.coacAI], map_path="maps/16x16/basesWorkers16x16.xml", reward_weight=np.array( [10.0, 1.0, 1.0, 0.2, 1.0, 4.0])) # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env # env = gym.wrappers.RecordEpisodeStatistics(env) # env.action_space.seed(0) obs = env.reset() env.render() except Exception as e: e.printStackTrace() env.action_space.seed(0) env.reset() for i in range(10000): env.render() action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten() time.sleep(0.001) action = env.action_space.sample() # optional: selecting only valid units. if len(action_mask.nonzero()[0]) != 0: action[0] = action_mask.nonzero()[0][0] next_obs, reward, done, info = env.step([action]) if done: env.reset() env.close()
# ALGO LOGIC: put action logic here with torch.no_grad(): with sw.timer('inference_value'): values[step] = agent.get_value(obs[step]).flatten() with sw.timer('inference_action'): action, logproba, _, invalid_action_masks[ step] = agent.get_action(obs[step], envs=envs, sw=sw) actions[step] = action.T logprobs[step] = logproba with sw.timer('step'): # TRY NOT TO MODIFY: execute the game and log data. next_obs, rs, ds, infos = envs.step(action.T) rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to( device) for info in infos: if 'episode' in info.keys(): print( f"global_step={global_step}, episode_reward={info['episode']['r']}" ) writer.add_scalar("charts/episode_reward", info['episode']['r'], global_step) # for key in info['microrts_stats']: # writer.add_scalar(f"charts/episode_reward/{key}", info['microrts_stats'][key], global_step) break with sw.timer('train'): # bootstrap reward if not done. reached the batch limit
render_theme=2, ai2=microrts_ai.coacAI, map_path="maps/16x16/basesWorkers16x16.xml", reward_weight=np.array( [10.0, 1.0, 1.0, 0.2, 1.0, 4.0, 0.0])) # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env # env = gym.wrappers.RecordEpisodeStatistics(env) # env.action_space.seed(0) obs = env.reset() env.render() except Exception as e: e.printStackTrace() # print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0]) print("reward is", env.step([[17, 2, 0, 3, 0, 1, 2, 123]])[1]) env.render() # print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0]) print("reward is", env.step([[34, 4, 1, 2, 1, 2, 3, 109]])[1]) env.render() # print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0]) # print("reward is", env.step([12, 0, 0, 0, 0, 0, 0, 0, 0])[1]) # env.render() # print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0]) # for i in range(9): # env.step([12, 0, 0, 0, 0, 0, 0, 0, 0]) # env.render() # print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0]) # # # harvest