Beispiel #1
0
try:
    env = MicroRTSVecEnv(num_envs=1,
                         render_theme=2,
                         ai2s=[microrts_ai.coacAI],
                         map_path="maps/16x16/basesWorkers16x16.xml",
                         reward_weight=np.array(
                             [10.0, 1.0, 1.0, 0.2, 1.0, 4.0]))
    # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env
    # env = gym.wrappers.RecordEpisodeStatistics(env)
    # env.action_space.seed(0)
    obs = env.reset()
    env.render()
except Exception as e:
    e.printStackTrace()
env.action_space.seed(0)
env.reset()
for i in range(10000):
    env.render()
    action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten()
    time.sleep(0.001)
    action = env.action_space.sample()

    # optional: selecting only valid units.
    if len(action_mask.nonzero()[0]) != 0:
        action[0] = action_mask.nonzero()[0][0]

    next_obs, reward, done, info = env.step([action])
    if done:
        env.reset()
env.close()
                # ALGO LOGIC: put action logic here
                with torch.no_grad():
                    with sw.timer('inference_value'):
                        values[step] = agent.get_value(obs[step]).flatten()
                    with sw.timer('inference_action'):
                        action, logproba, _, invalid_action_masks[
                            step] = agent.get_action(obs[step],
                                                     envs=envs,
                                                     sw=sw)

                actions[step] = action.T
                logprobs[step] = logproba

                with sw.timer('step'):
                    # TRY NOT TO MODIFY: execute the game and log data.
                    next_obs, rs, ds, infos = envs.step(action.T)
                rewards[step], next_done = rs.view(-1), torch.Tensor(ds).to(
                    device)

                for info in infos:
                    if 'episode' in info.keys():
                        print(
                            f"global_step={global_step}, episode_reward={info['episode']['r']}"
                        )
                        writer.add_scalar("charts/episode_reward",
                                          info['episode']['r'], global_step)
                        # for key in info['microrts_stats']:
                        #     writer.add_scalar(f"charts/episode_reward/{key}", info['microrts_stats'][key], global_step)
                        break
        with sw.timer('train'):
            # bootstrap reward if not done. reached the batch limit
Beispiel #3
0
                         render_theme=2,
                         ai2=microrts_ai.coacAI,
                         map_path="maps/16x16/basesWorkers16x16.xml",
                         reward_weight=np.array(
                             [10.0, 1.0, 1.0, 0.2, 1.0, 4.0, 0.0]))
    # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env
    # env = gym.wrappers.RecordEpisodeStatistics(env)
    # env.action_space.seed(0)
    obs = env.reset()
    env.render()
except Exception as e:
    e.printStackTrace()

# print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0])

print("reward is", env.step([[17, 2, 0, 3, 0, 1, 2, 123]])[1])
env.render()
# print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0])
print("reward is", env.step([[34, 4, 1, 2, 1, 2, 3, 109]])[1])
env.render()
# print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0])
# print("reward is", env.step([12, 0, 0, 0, 0, 0, 0, 0, 0])[1])
# env.render()
# print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0])

# for i in range(9):
#     env.step([12, 0, 0, 0, 0, 0, 0, 0, 0])
#     env.render()

# print("unit_locatiuons are at", np.where(env.get_unit_location_mask()==1)[0])
# # # harvest