Example #1
0
 def test_get_item_not_full(self):
     buf = RingBuffer(3)
     buf.append("test1")
     buf.append("test2")
     buf.append("test3")
     self.assertEqual(buf[0], "test1")
     self.assertEqual(buf[1], "test2")
     self.assertEqual(buf[2], "test3")
Example #2
0
    def test_threading(self):
        buf = RingBuffer(3)
        buf.append("test1")
        buf.append("test2")
        buf.append("test3")

        thread1 = self.__class__.TestIterThreading(buf)
        thread2 = self.__class__.TestIterThreading(buf)
        thread1.start()
        thread2.start()
        thread1.join(3)
        thread2.join(3)
        self.assertListEqual(thread1.actual, ["test1", "test2", "test3"])
        self.assertListEqual(thread2.actual, ["test1", "test2", "test3"])
Example #3
0
 def test_len(self):
     buf = RingBuffer(3)
     buf.append("test1")
     self.assertEqual(len(buf), 1)
     buf.append("test2")
     self.assertEqual(len(buf), 2)
     buf.append("test3")
     self.assertEqual(len(buf), 3)
     buf.append("test4")
     self.assertEqual(len(buf), 3)
Example #4
0
    def test_iter(self):
        buf = RingBuffer(3)
        buf.append("test1")
        buf.append("test2")
        buf.append("test3")
        buf.append("test4")
        buf.append("test5")
        actual1 = []
        for i in buf:
            actual1.append(i)
        self.assertListEqual(actual1, ["test3", "test4", "test5"])

        actual2 = []
        for i in buf:
            actual2.append(i)
        self.assertListEqual(actual2, ["test3", "test4", "test5"])
Example #5
0
class DQNAgent:
    def __init__(self, env, action_size, config):
        self.memory = RingBuffer(int(
            config.config_section_map()['memorysize']))
        self.gamma = float(
            config.config_section_map()['gamma'])  # discount rate
        self.epsilon = float(
            config.config_section_map()['epsilon'])  # exploration rate
        self.epsilon_min = float(config.config_section_map()['epsilonmin'])
        self.epsilon_decay = float(config.config_section_map()['epsilondecay'])
        self.learning_rate = float(config.config_section_map()['learningrate'])
        self.action_size = action_size
        self.env = env
        self.dqn_model = DQNModel(self.learning_rate, action_size)

    def remember(self, state, action, reward, next_state, done):
        state = state.astype('uint8')
        next_state = next_state.astype('uint8')

        reward = np.sign(reward)

        self.memory.append((state, action, reward, next_state, done))

    def action(self, fi_t, env_sample, csv_handler):

        num_random = random.uniform(0, 1)

        if num_random <= self.epsilon:  # with probability epsilon do a random action
            return env_sample
        else:
            fi_t = np.expand_dims(fi_t, axis=0)
            action = self.dqn_model.model.predict(
                [fi_t, np.ones([1, self.action_size])])
            csv_handler.write_q_values(action)
            return np.argmax(action[0])

    def replay(self, batch_size, csv_logger):

        states = np.zeros((batch_size, 4, 84, 84), dtype='float32')
        actions = np.zeros((batch_size, 4), dtype='uint8')
        rewards = np.zeros(batch_size, dtype='float32')
        next_states = np.zeros((batch_size, 4, 84, 84), dtype='float32')
        dones = np.ones((batch_size, 4), dtype=bool)

        mini_batch = self.get_minibatch(
            batch_size)  # sample random mini_batch from D

        i = 0

        for state, action, reward, next_state, done in mini_batch:

            next_state = next_state.astype('float32')
            state = state.astype('float32')

            states[i] = state
            actions[i][action] = 1
            rewards[i] = reward
            next_states[i] = next_state
            dones[i] = [done, done, done, done]

            i += 1

        next_state_q_values = self.dqn_model.target_model.predict(
            [next_states, np.ones(actions.shape)])

        next_state_q_values[dones] = 0

        q_values = rewards + self.gamma * np.max(next_state_q_values, axis=1)

        #  Trains the model for a fixed number of epochs (iterations on a dataset)
        self.dqn_model.model.fit([states, actions],
                                 actions * q_values[:, None],
                                 batch_size=batch_size,
                                 verbose=0,
                                 callbacks=[csv_logger])

    def get_minibatch(self, batch_size):
        mini_batch = []
        for i in range(batch_size):
            index = randint(0, self.memory.__len__() - 1)
            mini_batch.append(self.memory.__getitem__(index))
        return mini_batch

    def load(self, name):
        self.dqn_model.model.load_weights(name)
        self.dqn_model.update_target_model()

    def save(self, name):
        self.dqn_model.model.save_weights(name)

    def decrease_epsilone(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon -= self.epsilon_decay