예제 #1
0
    model.download(f"models/{experiment_name}/")
    agent.load_state_dict(
        torch.load(f"models/{experiment_name}/agent.pt", map_location=device))
    agent.eval()
    print(f"resumed at update {starting_update}")

for update in range(starting_update, num_updates + 1):
    # Annealing the rate if instructed to do so.
    if args.anneal_lr:
        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = lr(frac)
        optimizer.param_groups[0]['lr'] = lrnow

    # TRY NOT TO MODIFY: prepare the execution of the game.
    for step in range(0, args.num_steps):
        envs.render()
        global_step += 1 * args.num_envs
        obs[step] = next_obs
        dones[step] = next_done
        # ALGO LOGIC: put action logic here
        with torch.no_grad():
            values[step] = agent.get_value(obs[step]).flatten()
            action, logproba, _, invalid_action_masks[step] = agent.get_action(
                obs[step], envs=envs)

        actions[step] = action
        logprobs[step] = logproba

        # TRY NOT TO MODIFY: execute the game and log data.
        # the real action adds the source units
        real_action = torch.cat([
예제 #2
0
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions.categorical import Categorical

try:
    env = MicroRTSGridModeVecEnv(
        num_envs=1,
        render_theme=2,
        ai2s=[microrts_ai.passiveAI],
        map_path="maps/16x16/basesWorkersTestAttack16x16.xml",
        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]))
    # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env
    # env = gym.wrappers.RecordEpisodeStatistics(env)
    # env.action_space.seed(0)
    obs = env.reset()
    env.render()
except Exception as e:
    e.printStackTrace()
# print("reward is", env.step([[[ 17,   2 ,  0 ,  3 ,  0 ,  1 ,  2, 0]]])[1])
# env.render()
# print("reward is", env.step([[[ 34  , 4 ,  1   ,2  , 1 ,  2  , 3 ,0]]])[1])
# env.render()

# print("reward is", env.step([[[ 14*16+14  , 1 ,  0   ,0  , 0 ,  0  , 0 ,0]]])[1])
# env.render()
# for _ in range(100):
#     env.step([[[ 0  , 0 ,  0   ,0  , 0 ,  0  , 0 ,0]]])
#     env.render()

# print("relative target position:", np.where(np.array(env.vec_client.getMasks(0))[0,13,14][1+6+4+4+4+4+7:]==1)[0])
# print("reward is", env.step([[[ 13*16+14  , 5 ,  0   ,0  , 0 ,  0  , 0 ,23]]])[1])