import torch.optim as optim from tensorboardX import SummaryWriter from atari_wrappers import get_env from models import * LR = 2e-5 GAMMA = 0.99 N_FRAMES = 10**8 BATCH_SIZE = 32 REPLAY_SIZE = 10000 SYNC_TARGET_FRAMES = 1000 MODEL_NAME = 'dqn_noisy' env_id = "SpaceInvadersNoFrameskip-v4" env = get_env(env_id) save_file_name = env_id + "-" + MODEL_NAME + ".pth" replay_buffer = ReplayBuffer(REPLAY_SIZE) net = NoisyDQN(env.observation_space.shape, env.action_space.n).to(DEVICE) tgt_net = NoisyDQN(env.observation_space.shape, env.action_space.n).to(DEVICE) trainer = optim.Adam(net.parameters(), lr=LR, betas=[0.5, 0.99]) writer = SummaryWriter(comment=MODEL_NAME) episode_reward = 0 last_100_rewards = deque(maxlen=100) best_mean_reward = None def calc_td_loss(batch): state, action, reward, next_state, done = to_tensor(batch)
def _thunk(): env = get_env(env_id) return env