def create_env(flags): return wrap_pytorch( wrap_deepmind( make_atari(flags.env), clip_rewards=False, frame_stack=True, scale=False, ))
def create_deepmind_env(flags): return atari_wrappers.wrap_pytorch( atari_wrappers.wrap_deepmind( atari_wrappers.make_atari(flags.env), clip_rewards=False, frame_stack=True, scale=False, ))
current_model.reset_noise() target_model.reset_noise() return loss def plot(frame_idx, rewards, losses): print('frame %s. reward: %s' % (frame_idx, np.mean(rewards[-10:]))) from atari_wrappers import make_atari, wrap_deepmind, wrap_pytorch env_id = "PongNoFrameskip-v4" env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) class RainbowCnnDQN(nn.Module): def __init__(self, input_shape, num_actions, num_atoms, Vmin, Vmax): super(RainbowCnnDQN, self).__init__() self.input_shape = input_shape self.num_actions = num_actions self.num_atoms = num_atoms self.Vmin = Vmin self.Vmax = Vmax self.features = nn.Sequential( nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2),
def create_env(flags): return atari_wrappers.wrap_pytorch( atari_wrappers.wrap_interp(atari_wrappers.make_atari(flags.env), ))