Ejemplo n.º 1
0
    model.download(f"models/{experiment_name}/")
    agent.load_state_dict(torch.load(f"models/{experiment_name}/agent.pt"))
    agent.eval()
    print(f"resumed at update {starting_update}")
for update in range(starting_update, num_updates + 1):
    with sw.timer('main'):
        # Annealing the rate if instructed to do so.
        if args.anneal_lr:
            frac = 1.0 - (update - 1.0) / num_updates
            lrnow = lr(frac)
            optimizer.param_groups[0]['lr'] = lrnow

        with sw.timer('rollouts'):
            # TRY NOT TO MODIFY: prepare the execution of the game.
            for step in range(0, args.num_steps):
                envs.render()
                global_step += 1 * args.num_envs
                obs[step] = next_obs
                dones[step] = next_done

                # ALGO LOGIC: put action logic here
                with torch.no_grad():
                    with sw.timer('inference_value'):
                        values[step] = agent.get_value(obs[step]).flatten()
                    with sw.timer('inference_action'):
                        action, logproba, _, invalid_action_masks[
                            step] = agent.get_action(obs[step],
                                                     envs=envs,
                                                     sw=sw)

                actions[step] = action.T
Ejemplo n.º 2
0
from gym_microrts import microrts_ai
from gym.envs.registration import register
from gym_microrts import Config

try:
    env = MicroRTSVecEnv(num_envs=1,
                         render_theme=2,
                         ai2s=[microrts_ai.coacAI],
                         map_path="maps/16x16/basesWorkers16x16.xml",
                         reward_weight=np.array(
                             [10.0, 1.0, 1.0, 0.2, 1.0, 4.0]))
    # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env
    # env = gym.wrappers.RecordEpisodeStatistics(env)
    # env.action_space.seed(0)
    obs = env.reset()
    env.render()
except Exception as e:
    e.printStackTrace()
env.action_space.seed(0)
env.reset()
for i in range(10000):
    env.render()
    action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten()
    time.sleep(0.001)
    action = env.action_space.sample()

    # optional: selecting only valid units.
    if len(action_mask.nonzero()[0]) != 0:
        action[0] = action_mask.nonzero()[0][0]

    next_obs, reward, done, info = env.step([action])