Example #1
0
    def __init__(self, env_id="PongNoFrameskip-v4"):
        super(CnnDQN, self).__init__()

        self.env_id = env_id
        self.env = make_atari(self.env_id)
        self.env = wrap_deepmind(self.env)
        self.env = wrap_pytorch(self.env)

        self.input_shape = self.env.observation_space.shape
        self.num_actions = self.env.action_space.n

        self.features = nn.Sequential(
            nn.Conv2d(self.env.observation_space.shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        self.fc = nn.Sequential(
            nn.Linear(self.feature_size(), 512),
            nn.ReLU(),
            nn.Linear(512, self.num_actions)
        )

        self.optimizer = optim.Adam(self.parameters(), lr=0.00001)
        self.replay_buffer = ReplayBuffer2(100000)
        self.losses = []
Example #2
0
    def __init__(self,
                 env_id="PongNoFrameskip-v4",
                 num_atoms=51,
                 Vmin=-10,
                 Vmax=10):
        self.num_atoms = num_atoms
        self.Vmin = Vmin
        self.Vmax = Vmax
        self.env_id = env_id
        self.env = make_atari(self.env_id)
        self.env = wrap_deepmind(self.env)
        self.env = wrap_pytorch(self.env)

        self.current_model = TinyRainbowCnnDQN(
            self.env.observation_space.shape, self.env.action_space.n,
            num_atoms, Vmin, Vmax)
        self.target_model = TinyRainbowCnnDQN(self.env.observation_space.shape,
                                              self.env.action_space.n,
                                              num_atoms, Vmin, Vmax)
        if USE_CUDA:
            self.current_model = self.current_model.cuda()
            self.target_model = self.target_model.cuda()

        self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001)
        self.update_target(self.current_model, self.target_model)

        self.replay_buffer = ReplayBuffer(100000)
        self.losses = []
Example #3
0
    def __init__(self, env_id="PongNoFrameskip-v4"):
        self.env_id = env_id
        self.env = make_atari(self.env_id)
        self.env = wrap_deepmind(self.env)
        self.env = wrap_pytorch(self.env)
        self.current_model = CnnDQN(self.env.observation_space.shape, self.env.action_space.n)
        self.target_model = CnnDQN(self.env.observation_space.shape, self.env.action_space.n)

        if USE_CUDA:
            self.current_model = self.current_model.cuda()
            self.target_model = self.target_model.cuda()

        self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001)
        self.replay_buffer = NaivePrioritizedBuffer(100000)
        self.update_target(self.current_model, self.target_model)
        self.losses = []
Example #4
0
    def __init__(self, env_id= "PongNoFrameskip-v4", replay_buffer_size=100000):
        self.env_id = env_id
        self.env = make_atari(self.env_id)
        self.env = wrap_deepmind(self.env)
        self.env = wrap_pytorch(self.env)

        self.current_model = CnnDQN()
        self.target_model = CnnDQN()
        if torch.cuda.is_available():
            self.current_model = self.current_model.cuda()
            self.target_model = self.target_model.cuda()

        self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001)
        self.replay_buffer = ReplayBuffer2(replay_buffer_size)
        self.losses = []

        self.target_model.load_state_dict(self.current_model.state_dict())
Example #5
0
    def __init__(self, env_id="PongNoFrameskip-v4"):
        self.env_id = env_id
        self.env = make_atari(self.env_id)
        self.env = wrap_deepmind(self.env)
        self.env = wrap_pytorch(self.env)

        self.current_model = TinyDuelingCnnDQN(
            self.env.observation_space.shape, self.env.action_space.n)
        self.target_model = TinyDuelingCnnDQN(self.env.observation_space.shape,
                                              self.env.action_space.n)
        if torch.cuda.is_available():
            self.current_model = self.current_model.cuda()
            self.target_model = self.target_model.cuda()

        self.optimizer = optim.Adam(self.current_model.parameters(), lr=0.0001)
        self.replay_buffer = ReplayBuffer2(100000)

        self.update_target(self.current_model, self.target_model)
        self.losses = []