def __init__(self, env_id="PongNoFrameskip-v4"): super(CnnDQN, self).__init__() self.env_id = env_id self.env = make_atari(self.env_id) self.env = wrap_deepmind(self.env) self.env = wrap_pytorch(self.env) self.input_shape = self.env.observation_space.shape self.num_actions = self.env.action_space.n self.features = nn.Sequential( nn.Conv2d(self.env.observation_space.shape[0], 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU() ) self.fc = nn.Sequential( nn.Linear(self.feature_size(), 512), nn.ReLU(), nn.Linear(512, self.num_actions) ) self.optimizer = optim.Adam(self.parameters(), lr=0.00001) self.replay_buffer = ReplayBuffer2(100000) self.losses = []
def __init__(self, env_id="PongNoFrameskip-v4", num_atoms=51, Vmin=-10, Vmax=10): self.num_atoms = num_atoms self.Vmin = Vmin self.Vmax = Vmax self.env_id = env_id self.env = make_atari(self.env_id) self.env = wrap_deepmind(self.env) self.env = wrap_pytorch(self.env) self.current_model = TinyRainbowCnnDQN( self.env.observation_space.shape, self.env.action_space.n, num_atoms, Vmin, Vmax) self.target_model = TinyRainbowCnnDQN(self.env.observation_space.shape, self.env.action_space.n, num_atoms, Vmin, Vmax) if USE_CUDA: self.current_model = self.current_model.cuda() self.target_model = self.target_model.cuda() self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001) self.update_target(self.current_model, self.target_model) self.replay_buffer = ReplayBuffer(100000) self.losses = []
def __init__(self, env_id="PongNoFrameskip-v4"): self.env_id = env_id self.env = make_atari(self.env_id) self.env = wrap_deepmind(self.env) self.env = wrap_pytorch(self.env) self.current_model = CnnDQN(self.env.observation_space.shape, self.env.action_space.n) self.target_model = CnnDQN(self.env.observation_space.shape, self.env.action_space.n) if USE_CUDA: self.current_model = self.current_model.cuda() self.target_model = self.target_model.cuda() self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001) self.replay_buffer = NaivePrioritizedBuffer(100000) self.update_target(self.current_model, self.target_model) self.losses = []
def __init__(self, env_id= "PongNoFrameskip-v4", replay_buffer_size=100000): self.env_id = env_id self.env = make_atari(self.env_id) self.env = wrap_deepmind(self.env) self.env = wrap_pytorch(self.env) self.current_model = CnnDQN() self.target_model = CnnDQN() if torch.cuda.is_available(): self.current_model = self.current_model.cuda() self.target_model = self.target_model.cuda() self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001) self.replay_buffer = ReplayBuffer2(replay_buffer_size) self.losses = [] self.target_model.load_state_dict(self.current_model.state_dict())
def __init__(self, env_id="PongNoFrameskip-v4"): self.env_id = env_id self.env = make_atari(self.env_id) self.env = wrap_deepmind(self.env) self.env = wrap_pytorch(self.env) self.current_model = TinyDuelingCnnDQN( self.env.observation_space.shape, self.env.action_space.n) self.target_model = TinyDuelingCnnDQN(self.env.observation_space.shape, self.env.action_space.n) if torch.cuda.is_available(): self.current_model = self.current_model.cuda() self.target_model = self.target_model.cuda() self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001) self.replay_buffer = ReplayBuffer2(100000) self.update_target(self.current_model, self.target_model) self.losses = []