Esempio n. 1
0
import torch.optim as optim
from tensorboardX import SummaryWriter

from atari_wrappers import get_env
from models import *

LR = 2e-5
GAMMA = 0.99
N_FRAMES = 10**8
BATCH_SIZE = 32
REPLAY_SIZE = 10000
SYNC_TARGET_FRAMES = 1000
MODEL_NAME = 'dqn_noisy'

env_id = "SpaceInvadersNoFrameskip-v4"
env = get_env(env_id)
save_file_name = env_id + "-" + MODEL_NAME + ".pth"

replay_buffer = ReplayBuffer(REPLAY_SIZE)
net = NoisyDQN(env.observation_space.shape, env.action_space.n).to(DEVICE)
tgt_net = NoisyDQN(env.observation_space.shape, env.action_space.n).to(DEVICE)
trainer = optim.Adam(net.parameters(), lr=LR, betas=[0.5, 0.99])
writer = SummaryWriter(comment=MODEL_NAME)

episode_reward = 0
last_100_rewards = deque(maxlen=100)
best_mean_reward = None


def calc_td_loss(batch):
    state, action, reward, next_state, done = to_tensor(batch)
Esempio n. 2
0
 def _thunk():
     env = get_env(env_id)
     return env