Ejemplo n.º 1
0
    def test_dqn_agent(self):
        config = {
            'seed': 10,
            'batch_size': 16,
            'state_shape': (2, ),
            'actions': 2,
            'action_shape': (),
            'update_rate': 1,
            'update_repeat': 4,
            'min_replay_size': 50,
            'memory_capacity': 50,
            "exploration": "epsilon_decay",
            "exploration_param": {
                "epsilon": 1,
                "epsilon_final": 0,
                "epsilon_states": 50
            },
            'target_network_update_rate': 1.0,
            'use_target_network': True,
            "alpha": 0.0005,
            "gamma": 0.99,
            "tau": 1.0
        }

        tf.reset_default_graph()
        tf.set_random_seed(10)

        config = create_config(config)
        network_builder = NeuralNetwork.layered_network(
            layers=[{
                'type': 'dense',
                'num_outputs': 16
            }, {
                'type': 'linear',
                'num_outputs': 2
            }])
        agent = DQNAgent(config=config, network_builder=network_builder)

        state = (1, 0)
        rewards = [0.0] * 100
        for n in xrange(10000):
            action = agent.get_action(state=state)
            if action == 0:
                state = (1, 0)
                reward = 0.0
                terminal = False
            else:
                state = (0, 1)
                reward = 1.0
                terminal = False
            agent.add_observation(state=state,
                                  action=action,
                                  reward=reward,
                                  terminal=terminal)
            rewards[n % 100] = reward

            if sum(rewards) == 100.0:
                return

        assert (sum(rewards) == 100.0)
Ejemplo n.º 2
0
    def test_trpo_agent(self):
        config = {
            'batch_size': 8,
            'max_episode_length': 4,
            'continuous': False,
            'state_shape': (2, ),
            'actions': 2,
            'action_shape': ()
        }
        tf.reset_default_graph()

        config = create_config(config)
        network_builder = NeuralNetwork.layered_network(
            layers=[{
                'type': 'dense',
                'num_outputs': 32
            }, {
                'type': 'linear',
                'num_outputs': 2
            }])
        agent = DQNAgent(config=config, network_builder=network_builder)

        state = (1, 0)
        rewards = [0.0] * 100
        for n in range(1000):
            action = agent.get_action(state=state)
            if action == 0:
                state = (1, 0)
                reward = 0.0
                terminal = False
            else:
                state = (0, 1)
                reward = 1.0
                terminal = True
            agent.add_observation(state=state,
                                  action=action,
                                  reward=reward,
                                  terminal=terminal)
            rewards[n % 100] = reward
            if sum(rewards) == 100.0:
                return