Python Agent.evalの例

プログラミング言語: Python

名前空間/パッケージ名: model

クラス/型: Agent

メソッド/関数: eval

hotexamples.comのコード掲載数: 3

Python Agent.eval - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmodel.Agent.evalの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Agent(30)

act(8)

chooseAction(4)

choose_action(4)

get_action(4)

eval(3)

get_actor_weights(3)

compute_target(2)

EPSILON(1)

acting(1)

acting_train(1)

choose_action_narrow(1)

compute_advantage(1)

count(1)

critic_local(1)

from_vec(1)

コード例 #1

ファイルを表示

ファイル: enjoy.py プロジェクト: cpuheater/imitation-learning-lab

def main():

    env = gym.make("LunarLander-v2")
    agent = Agent(env)
    agent.load_state_dict(torch.load("./models/agent.pt"))
    agent.eval()

    obs = env.reset()
    done = False
    for i in range(10000):
        env.render()
        obs = torch.from_numpy(obs).float()
        action, _, _ = agent.get_action(obs)
        obs, rew, done, info = env.step(action.cpu().numpy())
        sleep(0.001)
        if done:
            obs = env.reset()

コード例 #2

ファイルを表示

ファイル: il.py プロジェクト: cpuheater/imitation-learning-lab

from gym import spaces
import cv2
cv2.ocl.setUseOpenCL(False)

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import gym
from model import Agent

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
env = gym.make("LunarLander-v2")
expert = Agent(env)
expert.load_state_dict(torch.load("./models/agent.pt"))
expert.eval()


def generate_rollout(agent, env):
    agent.to("cpu")
    states = []
    actions = []
    rewards = []
    obs = env.reset()
    steps = 0
    while True:
        obs = torch.from_numpy(obs).float()
        states.append(obs)
        logits = agent.forward(obs)
        probs = torch.softmax(logits, dim=0)
        action = probs.argmax()

コード例 #3

ファイルを表示

# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60
next_obs = envs.reset()
next_done = torch.zeros(args.num_envs).to(device)
num_updates = args.total_timesteps // args.batch_size
## CRASH AND RESUME LOGIC:
starting_update = 1
if args.prod_mode and wandb.run.resumed:
    print("previous run.summary", run.summary)
    starting_update = run.summary['charts/update'] + 1
    global_step = starting_update * args.batch_size
    api = wandb.Api()
    run = api.run(run.get_url()[len("https://app.wandb.ai/"):])
    model = run.file('agent.pt')
    model.download(f"models/{experiment_name}/")
    agent.load_state_dict(torch.load(f"models/{experiment_name}/agent.pt"))
    agent.eval()
    print(f"resumed at update {starting_update}")
for update in range(starting_update, num_updates + 1):
    # Annealing the rate if instructed to do so.
    if args.anneal_lr:
        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = lr(frac)
        optimizer.param_groups[0]['lr'] = lrnow

    # TRY NOT TO MODIFY: prepare the execution of the game.
    for step in range(0, args.num_steps):
        envs.render()
        global_step += 1 * args.num_envs
        obs[step] = next_obs
        dones[step] = next_done