コード例 #1
0
def main():

    env = gym.make("LunarLander-v2")
    agent = Agent(env)
    agent.load_state_dict(torch.load("./models/agent.pt"))
    agent.eval()

    obs = env.reset()
    done = False
    for i in range(10000):
        env.render()
        obs = torch.from_numpy(obs).float()
        action, _, _ = agent.get_action(obs)
        obs, rew, done, info = env.step(action.cpu().numpy())
        sleep(0.001)
        if done:
            obs = env.reset()
コード例 #2
0
from gym import spaces
import cv2
cv2.ocl.setUseOpenCL(False)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import gym
from model import Agent

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
env = gym.make("LunarLander-v2")
expert = Agent(env)
expert.load_state_dict(torch.load("./models/agent.pt"))
expert.eval()


def generate_rollout(agent, env):
    agent.to("cpu")
    states = []
    actions = []
    rewards = []
    obs = env.reset()
    steps = 0
    while True:
        obs = torch.from_numpy(obs).float()
        states.append(obs)
        logits = agent.forward(obs)
        probs = torch.softmax(logits, dim=0)
        action = probs.argmax()
コード例 #3
0
# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60
next_obs = envs.reset()
next_done = torch.zeros(args.num_envs).to(device)
num_updates = args.total_timesteps // args.batch_size
## CRASH AND RESUME LOGIC:
starting_update = 1
if args.prod_mode and wandb.run.resumed:
    print("previous run.summary", run.summary)
    starting_update = run.summary['charts/update'] + 1
    global_step = starting_update * args.batch_size
    api = wandb.Api()
    run = api.run(run.get_url()[len("https://app.wandb.ai/"):])
    model = run.file('agent.pt')
    model.download(f"models/{experiment_name}/")
    agent.load_state_dict(torch.load(f"models/{experiment_name}/agent.pt"))
    agent.eval()
    print(f"resumed at update {starting_update}")
for update in range(starting_update, num_updates + 1):
    # Annealing the rate if instructed to do so.
    if args.anneal_lr:
        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = lr(frac)
        optimizer.param_groups[0]['lr'] = lrnow

    # TRY NOT TO MODIFY: prepare the execution of the game.
    for step in range(0, args.num_steps):
        envs.render()
        global_step += 1 * args.num_envs
        obs[step] = next_obs
        dones[step] = next_done