コード例 #1
0
 def test_save_load_batch_norm(self):
     state_dim = 8
     action_dim = 4
     model = FullyConnectedParametricDQN(
         state_dim,
         action_dim,
         sizes=[8, 4],
         activations=["relu", "relu"],
         use_batch_norm=True,
     )
     # Freezing batch_norm
     model.eval()
     expected_num_params, expected_num_inputs, expected_num_outputs = 21, 2, 1
     check_save_load(self, model, expected_num_params, expected_num_inputs,
                     expected_num_outputs)
コード例 #2
0
 def test_save_load_batch_norm(self):
     state_dim = 8
     action_dim = 4
     model = FullyConnectedParametricDQN(
         state_dim,
         action_dim,
         sizes=[8, 4],
         activations=["relu", "relu"],
         use_batch_norm=True,
     )
     # Freezing batch_norm
     model.eval()
     expected_num_params, expected_num_inputs, expected_num_outputs = 21, 2, 1
     check_save_load(
         self, model, expected_num_params, expected_num_inputs, expected_num_outputs
     )
コード例 #3
0
 def test_basic(self):
     state_dim = 8
     action_dim = 4
     model = FullyConnectedParametricDQN(
         state_dim,
         action_dim,
         sizes=[8, 4],
         activations=["relu", "relu"],
         use_batch_norm=True,
     )
     input = model.input_prototype()
     self.assertEqual((1, state_dim), input.state.float_features.shape)
     self.assertEqual((1, action_dim), input.action.float_features.shape)
     # Using batch norm requires more than 1 example in training, avoid that
     model.eval()
     single_q_value = model(input)
     self.assertEqual((1, 1), single_q_value.q_value.shape)
コード例 #4
0
 def test_basic(self):
     state_dim = 8
     action_dim = 4
     model = FullyConnectedParametricDQN(
         state_dim,
         action_dim,
         sizes=[8, 4],
         activations=["relu", "relu"],
         use_batch_norm=True,
     )
     input = model.input_prototype()
     self.assertEqual((1, state_dim), input.state.float_features.shape)
     self.assertEqual((1, action_dim), input.action.float_features.shape)
     # Using batch norm requires more than 1 example in training, avoid that
     model.eval()
     single_q_value = model(input)
     self.assertEqual((1, 1), single_q_value.q_value.shape)
コード例 #5
0
    def test_slate_q_trainer(self):
        recsim = RecSim(num_users=10)

        # Build memory pool with random policy
        memory_pool = OpenAIGymMemoryPool(10000000)
        random_reward = recsim.rollout_policy(random_policy, memory_pool)

        # Train a model
        q_network = FullyConnectedParametricDQN(
            state_dim=memory_pool.state_dim,
            action_dim=memory_pool.action_dim,
            sizes=[64, 32],
            activations=["relu", "relu"],
        )

        q_network = q_network.eval()
        recsim.reset()
        untrained_policy_reward = recsim.rollout_policy(
            partial(top_k_policy, q_network))
        q_network = q_network.train()

        q_network_target = q_network.get_target_network()
        parameters = SlateQTrainerParameters()
        trainer = SlateQTrainer(q_network, q_network_target, parameters)

        for _i in range(1000):
            tdp = memory_pool.sample_memories(
                128, model_type=ModelType.PYTORCH_PARAMETRIC_DQN.value)
            training_batch = tdp.as_slate_q_training_batch()
            trainer.train(training_batch)

        q_network = q_network.eval()
        recsim.reset()
        trained_policy_reward = recsim.rollout_policy(
            partial(top_k_policy, q_network))

        print(
            f"Reward; random: {random_reward}; untrained: {untrained_policy_reward}; "
            f"trained: {trained_policy_reward}")

        self.assertGreater(trained_policy_reward, untrained_policy_reward)
        self.assertGreater(trained_policy_reward, random_reward)
        self.assertEqual(random_reward, 1384.0)
        self.assertEqual(untrained_policy_reward, 1200.0)
        self.assertEqual(trained_policy_reward, 1432.0)