Beispiel #1
0
 def __init__(self, *args, **kwargs):
     super(Tester, self).__init__(*args, **kwargs)
     self.limit = 1000
     self.adding_prob = 0.5
     self.nb_nets = 10
     self.memory = BootstrappingMemory(self.nb_nets,
                                       self.limit,
                                       adding_prob=self.adding_prob,
                                       window_length=1)
Beispiel #2
0
 def test_window_length(self):
     window_length = 5
     self.memory = BootstrappingMemory(self.nb_nets,
                                       self.limit,
                                       adding_prob=self.adding_prob,
                                       window_length=window_length)
     nb_samples = int(self.limit * 1.5)
     self.append(nb_samples)
     sample = self.memory.sample(net=5, batch_size=32)
     self.assertEqual(len(sample), 32)
Beispiel #3
0
 def test_append_full_memory(self):
     self.memory = BootstrappingMemory(self.nb_nets,
                                       self.limit,
                                       adding_prob=self.adding_prob,
                                       window_length=1)
     nb_samples = int(self.limit * 9.5)
     self.append(nb_samples)
     self.assertEqual(self.memory.nb_entries, self.limit)
     for i in range(self.nb_nets):
         self.assertAlmostEqual(
             self.limit * self.adding_prob / (self.limit / 10),
             len(self.memory.index_refs[i]) / (self.limit / 10), 0)
Beispiel #4
0
 def test_append(self):
     self.memory = BootstrappingMemory(self.nb_nets,
                                       self.limit,
                                       adding_prob=self.adding_prob,
                                       window_length=1)
     nb_samples = int(self.limit / 2)
     self.append(nb_samples)
     self.assertEqual(self.memory.nb_entries, nb_samples)
     # This test should fail with a low probability
     for i in range(self.nb_nets):
         self.assertLess(
             np.abs(self.adding_prob -
                    len(self.memory.index_refs[i]) / nb_samples), 0.1)
 def __init__(self, *args, **kwargs):
     super(Tester, self).__init__(*args, **kwargs)
     self.nb_nets = 3
     models = []
     for i in range(self.nb_nets):
         models.append(
             NetworkCNNDistributional(nb_ego_states=1,
                                      nb_states_per_vehicle=3,
                                      nb_vehicles=3,
                                      nb_actions=4,
                                      nb_conv_layers=2,
                                      nb_conv_filters=32,
                                      nb_hidden_fc_layers=2,
                                      nb_hidden_neurons=100,
                                      duel=True,
                                      prior=True,
                                      nb_quantiles=32,
                                      nb_cos_embeddings=64,
                                      prior_scale_factor=1).model)
     greedy_policy = DistributionalEpsGreedyPolicy(eps=0)
     test_policy = DistributionalEnsembleTestPolicy()
     memory = BootstrappingMemory(nb_nets=self.nb_nets,
                                  limit=10000,
                                  adding_prob=0.5,
                                  window_length=1)
     self.agent = IqnRpfAgent(models=models,
                              policy=greedy_policy,
                              test_policy=test_policy,
                              enable_double_dqn=True,
                              nb_samples_policy=32,
                              nb_sampled_quantiles=32,
                              cvar_eta=1,
                              nb_actions=4,
                              memory=memory,
                              gamma=0.99,
                              batch_size=64,
                              nb_steps_warmup=1000,
                              train_interval=1,
                              memory_interval=1,
                              target_model_update=1000,
                              delta_clip=10)
 def __init__(self, *args, **kwargs):
     super(Tester, self).__init__(*args, **kwargs)
     self.nb_nets = 3
     greedy_policy = DistributionalEpsGreedyPolicy(eps=0)
     test_policy = DistributionalEnsembleTestPolicy()
     memory = BootstrappingMemory(nb_nets=self.nb_nets,
                                  limit=10000,
                                  adding_prob=0.5,
                                  window_length=1)
     self.agent = IqnRpfAgentParallel(nb_models=self.nb_nets,
                                      nb_actions=4,
                                      memory=memory,
                                      cnn_architecture=True,
                                      learning_rate=0.01,
                                      nb_ego_states=1,
                                      nb_states_per_vehicle=3,
                                      nb_vehicles=3,
                                      nb_conv_layers=2,
                                      nb_conv_filters=32,
                                      nb_hidden_fc_layers=2,
                                      nb_hidden_neurons=100,
                                      nb_cos_embeddings=64,
                                      network_seed=13,
                                      policy=greedy_policy,
                                      test_policy=test_policy,
                                      enable_double_dqn=True,
                                      enable_dueling_dqn=True,
                                      nb_samples_policy=32,
                                      nb_sampled_quantiles=32,
                                      cvar_eta=1,
                                      gamma=0.99,
                                      batch_size=64,
                                      nb_steps_warmup=1000,
                                      train_interval=1,
                                      memory_interval=1,
                                      window_length=1,
                                      target_model_update=1000,
                                      delta_clip=10,
                                      prior_scale_factor=1)
                                   write_graph=True,
                                   write_images=False)
callbacks = [
    tensorboard_callback, save_weights_callback, evaluate_agent_callback
]

# This structure initializes the agent. The different options allows the choice of using a
# convolutional or fully connected neural network architecture,
# and to run the backpropagation of the ensemble members in parallel or sequential.
if p.agent_par['distributional'] and p.agent_par['ensemble']:
    if p.agent_par['parallel']:
        greedy_policy = DistributionalEpsGreedyPolicy(eps=0)
        test_policy = DistributionalEnsembleTestPolicy()
        memory = BootstrappingMemory(
            nb_nets=p.agent_par['number_of_networks'],
            limit=p.agent_par['buffer_size'],
            adding_prob=p.agent_par["adding_prob"],
            window_length=p.agent_par["window_length"])
        agent = IqnRpfAgentParallel(
            nb_models=p.agent_par['number_of_networks'],
            cnn_architecture=p.agent_par['cnn'],
            learning_rate=p.agent_par['learning_rate'],
            nb_ego_states=env.nb_ego_states,
            nb_states_per_vehicle=env.nb_states_per_vehicle,
            nb_vehicles=ps.sim_params['sensor_nb_vehicles'],
            nb_conv_layers=p.agent_par['nb_conv_layers'],
            nb_conv_filters=p.agent_par['nb_conv_filters'],
            nb_hidden_fc_layers=p.agent_par['nb_hidden_fc_layers'],
            nb_hidden_neurons=p.agent_par['nb_hidden_neurons'],
            nb_cos_embeddings=p.agent_par['nb_cos_embeddings'],
            cvar_eta=p.agent_par['cvar_eta'],
    def test_trainable_model(self):
        nb_inputs = 10
        nb_actions = 5
        nb_quantiles = 32
        batch_size = 64
        delta_clip = 1
        nb_nets = 3
        models = []
        for _ in range(nb_nets):
            models.append(
                NetworkMLPDistributional(nb_inputs=nb_inputs,
                                         nb_outputs=nb_actions,
                                         nb_hidden_layers=2,
                                         nb_hidden_neurons=100,
                                         nb_quantiles=nb_quantiles,
                                         nb_cos_embeddings=64,
                                         duel=True,
                                         prior=True,
                                         activation='relu',
                                         duel_type='avg',
                                         window_length=1,
                                         prior_scale_factor=1).model)
        greedy_policy = DistributionalEpsGreedyPolicy(eps=0)
        test_policy = DistributionalEnsembleTestPolicy()
        memory = BootstrappingMemory(nb_nets=self.nb_nets,
                                     limit=10000,
                                     adding_prob=0.5,
                                     window_length=1)
        agent = IqnRpfAgent(models=models,
                            policy=greedy_policy,
                            test_policy=test_policy,
                            enable_double_dqn=True,
                            nb_samples_policy=nb_quantiles,
                            nb_sampled_quantiles=nb_quantiles,
                            cvar_eta=1,
                            nb_actions=nb_actions,
                            memory=memory,
                            gamma=0.99,
                            batch_size=batch_size,
                            nb_steps_warmup=1000,
                            train_interval=1,
                            memory_interval=1,
                            target_model_update=1000,
                            delta_clip=delta_clip)

        agent.compile(Adam(lr=0.01))
        plot_model(agent.trainable_models[0],
                   to_file='iqn_ensemble_trainable_model_2.png',
                   show_shapes=True)

        # Test input
        states = np.random.rand(batch_size, 1, nb_inputs)
        actions = np.random.randint(nb_actions, size=batch_size)
        quantiles = np.random.rand(batch_size, 1, nb_quantiles)
        targets = np.random.rand(batch_size, nb_quantiles)

        predictions = agent.models[0].predict_on_batch([states, quantiles])

        def huber(deltas, quantile):
            if np.abs(deltas) < delta_clip:
                loss = 0.5 * deltas**2
            else:
                loss = delta_clip * (np.abs(deltas) - 0.5 * delta_clip)
            if deltas > 0:
                loss *= quantile / delta_clip
            else:
                loss *= (1 - quantile) / delta_clip
            if loss < 0:
                raise Exception("Loss should always be positive")
            return loss

        true_loss = np.zeros(batch_size)
        for idx in range(batch_size):
            for i in range(nb_quantiles):
                for j in range(nb_quantiles):
                    true_loss[idx] += huber(
                        targets[idx, j] - predictions[idx, i, actions[idx]],
                        quantiles[idx, 0, i])
            true_loss[idx] *= 1 / nb_quantiles

        masks = np.zeros((batch_size, nb_actions))
        masks[range(batch_size), actions] = 1
        targets_expanded = np.zeros((batch_size, nb_quantiles, nb_actions))
        targets_expanded[range(batch_size), :,
                         actions] = targets[range(batch_size), :]
        out = agent.trainable_models[0].predict_on_batch(
            [states, quantiles, targets_expanded, masks])

        self.assertTrue(np.isclose(true_loss, out[0]).all())
        self.assertTrue((predictions == out[1]).all())

        metrics = agent.trainable_models[0].train_on_batch(
            [states, quantiles, targets_expanded, masks],
            [targets, targets_expanded])
        self.assertTrue(np.isclose(np.mean(true_loss), metrics[0]))

        average_q_value = np.mean(predictions)
        average_max_q_value = np.mean(
            np.max(np.mean(predictions, axis=1), axis=-1))
        self.assertTrue(np.isclose(average_q_value, metrics[3]))
        self.assertTrue(np.isclose(average_max_q_value, metrics[4]))
Beispiel #9
0
class Tester(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(Tester, self).__init__(*args, **kwargs)
        self.limit = 1000
        self.adding_prob = 0.5
        self.nb_nets = 10
        self.memory = BootstrappingMemory(self.nb_nets,
                                          self.limit,
                                          adding_prob=self.adding_prob,
                                          window_length=1)

    def test_init(self):
        config = self.memory.get_config()
        self.assertEqual(config['limit'], self.limit)
        self.assertTrue(self.memory.nb_entries == 0)

    def append(self, nb_samples):
        for i in range(nb_samples):
            observation, action, reward, terminal = np.random.rand(), np.random.rand(), np.random.rand(), \
                                                    np.random.rand() < 0.1
            self.memory.append(observation, action, reward, terminal)

    def test_append(self):
        self.memory = BootstrappingMemory(self.nb_nets,
                                          self.limit,
                                          adding_prob=self.adding_prob,
                                          window_length=1)
        nb_samples = int(self.limit / 2)
        self.append(nb_samples)
        self.assertEqual(self.memory.nb_entries, nb_samples)
        # This test should fail with a low probability
        for i in range(self.nb_nets):
            self.assertLess(
                np.abs(self.adding_prob -
                       len(self.memory.index_refs[i]) / nb_samples), 0.1)

    def test_append_full_memory(self):
        self.memory = BootstrappingMemory(self.nb_nets,
                                          self.limit,
                                          adding_prob=self.adding_prob,
                                          window_length=1)
        nb_samples = int(self.limit * 9.5)
        self.append(nb_samples)
        self.assertEqual(self.memory.nb_entries, self.limit)
        for i in range(self.nb_nets):
            self.assertAlmostEqual(
                self.limit * self.adding_prob / (self.limit / 10),
                len(self.memory.index_refs[i]) / (self.limit / 10), 0)

    def test_get_recent_state(self):
        state_in = 5
        state_out = self.memory.get_recent_state(state_in)
        self.assertEqual([state_in], state_out)

    def test_sample(self):
        nb_samples = int(self.limit * 1.5)
        self.append(nb_samples)
        sample = self.memory.sample(net=5, batch_size=32)
        self.assertEqual(len(sample), 32)

    def test_window_length(self):
        window_length = 5
        self.memory = BootstrappingMemory(self.nb_nets,
                                          self.limit,
                                          adding_prob=self.adding_prob,
                                          window_length=window_length)
        nb_samples = int(self.limit * 1.5)
        self.append(nb_samples)
        sample = self.memory.sample(net=5, batch_size=32)
        self.assertEqual(len(sample), 32)