def test_blogpost_introduction(self):
        """
        Test of introduction blog post examples.
        """
        import tensorflow as tf
        import numpy as np

        ### DQN agent example

        from tensorforce import Configuration
        from tensorforce.agents import DQNAgent

        # The agent is configured with a single configuration object
        config = Configuration(memory=dict(type='replay', capacity=1000),
                               batch_size=8,
                               first_update=100,
                               target_sync_frequency=10)

        # Network is an ordered list of layers
        network_spec = [
            dict(type='dense', size=32),
            dict(type='dense', size=32)
        ]

        # Define a state
        states = dict(shape=(10, ), type='float')

        # Define an action
        actions = dict(type='int', num_actions=5)

        agent = DQNAgent(states_spec=states,
                         actions_spec=actions,
                         network_spec=network_spec,
                         config=config)

        agent.close()

        ### Code block: multiple states
        states = dict(image=dict(shape=(64, 64, 3), type='float'),
                      caption=dict(shape=(20, ), type='int'))

        # DQN does not support multiple states. Omit test for now.
        # agent = DQNAgent(config=config)

        ### Code block: DQN observer function

        def observe(self, reward, terminal):
            super(DQNAgent, self).observe(reward, terminal)
            if self.timestep >= self.first_update \
                    and self.timestep % self.target_update_frequency == 0:
                self.model.update_target()

        ### Code block: Network config JSON

        network_json = """
        [
            {
                "type": "conv2d",
                "size": 32,
                "window": 8,
                "stride": 4
            },
            {
                "type": "conv2d",
                "size": 64,
                "window": 4,
                "stride": 2
            },
            {
                "type": "flatten"
            },
            {
                "type": "dense",
                "size": 512
            }
        ]
        """

        ### Test json

        import json
        network_spec = json.loads(network_json)

        ### Code block: Modified dense layer

        modified_dense = """
        [
            {
                "type": "dense",
                "size": 64,
                "bias": false,
                "activation": "selu",
                "l2_regularization": 0.001
            }
        ]
        """

        ### Test json
        network_spec = json.loads(modified_dense)

        ### Code block: Own layer type
        from tensorforce.core.networks import Layer

        class BatchNormalization(Layer):
            def __init__(self,
                         variance_epsilon=1e-6,
                         scope='batchnorm',
                         summary_labels=None):
                super(BatchNormalization,
                      self).__init__(scope=scope,
                                     summary_labels=summary_labels)
                self.variance_epsilon = variance_epsilon

            def tf_apply(self, x, update):
                mean, variance = tf.nn.moments(x,
                                               axes=tuple(
                                                   range(x.shape.ndims - 1)))
                return tf.nn.batch_normalization(
                    x=x,
                    mean=mean,
                    variance=variance,
                    offset=None,
                    scale=None,
                    variance_epsilon=self.variance_epsilon)

        ### Test own layer

        states = dict(shape=(10, ), type='float')
        network_spec = [{
            'type': 'dense',
            'size': 32
        }, {
            'type': BatchNormalization,
            'variance_epsilon': 1e-9
        }]
        config = Configuration(memory=dict(type='replay', capacity=1000),
                               batch_size=8)
        agent = DQNAgent(states_spec=states,
                         actions_spec=actions,
                         network_spec=network_spec,
                         config=config)

        agent.close()

        ### Code block: Own network builder
        from tensorforce.core.networks import Network

        class CustomNetwork(Network):
            def tf_apply(self, x, internals, update, return_internals=False):
                image = x['image']  # 64x64x3-dim, float
                caption = x['caption']  # 20-dim, int
                initializer = tf.random_normal_initializer(mean=0.0,
                                                           stddev=0.01,
                                                           dtype=tf.float32)

                # CNN
                weights = tf.get_variable(name='W1',
                                          shape=(3, 3, 3, 16),
                                          initializer=initializer)
                image = tf.nn.conv2d(image,
                                     filter=weights,
                                     strides=(1, 1, 1, 1),
                                     padding='SAME')
                image = tf.nn.relu(image)
                image = tf.nn.max_pool(image,
                                       ksize=(1, 2, 2, 1),
                                       strides=(1, 2, 2, 1),
                                       padding='SAME')

                weights = tf.get_variable(name='W2',
                                          shape=(3, 3, 16, 32),
                                          initializer=initializer)
                image = tf.nn.conv2d(image,
                                     filter=weights,
                                     strides=(1, 1, 1, 1),
                                     padding='SAME')
                image = tf.nn.relu(image)
                image = tf.nn.max_pool(image,
                                       ksize=(1, 2, 2, 1),
                                       strides=(1, 2, 2, 1),
                                       padding='SAME')

                image = tf.reshape(image, shape=(-1, 16 * 16, 32))
                image = tf.reduce_mean(image, axis=1)

                # LSTM
                weights = tf.get_variable(name='W3',
                                          shape=(30, 32),
                                          initializer=initializer)
                caption = tf.nn.embedding_lookup(params=weights, ids=caption)
                lstm = tf.contrib.rnn.LSTMCell(num_units=32)
                caption, _ = tf.nn.dynamic_rnn(cell=lstm,
                                               inputs=caption,
                                               dtype=tf.float32)
                caption = tf.reduce_mean(caption, axis=1)

                # Combination
                if return_internals:
                    return tf.multiply(image, caption), list()
                else:
                    return tf.multiply(image, caption)

        ### Test own network builder

        states = dict(image=dict(shape=(64, 64, 3), type='float'),
                      caption=dict(shape=(20, ), type='int'))
        config = Configuration(memory=dict(type='replay', capacity=1000),
                               batch_size=8)
        agent = DQNAgent(states_spec=states,
                         actions_spec=actions,
                         network_spec=CustomNetwork,
                         config=config)

        agent.close()

        ### Code block: LSTM function
        from tensorforce.core.networks import Layer

        class Lstm(Layer):
            def __init__(self, size, scope='lstm', summary_labels=()):
                self.size = size
                super(Lstm, self).__init__(num_internals=1,
                                           scope=scope,
                                           summary_labels=summary_labels)

            def tf_apply(self, x, update, state):
                state = tf.contrib.rnn.LSTMStateTuple(c=state[:, 0, :],
                                                      h=state[:, 1, :])
                self.lstm_cell = tf.contrib.rnn.LSTMCell(num_units=self.size)

                x, state = self.lstm_cell(inputs=x, state=state)

                internal_output = tf.stack(values=(state.c, state.h), axis=1)
                return x, (internal_output, )

            def internal_inputs(self):
                return super(Lstm, self).internal_inputs() + [
                    tf.placeholder(dtype=tf.float32,
                                   shape=(None, 2, self.size))
                ]

            def internal_inits(self):
                return super(Lstm, self).internal_inits() + [
                    np.zeros(shape=(2, self.size))
                ]

        ### Test LSTM
        states = dict(shape=(10, ), type='float')
        network_spec = [{'type': 'flatten'}, {'type': Lstm, 'size': 10}]
        config = Configuration(memory=dict(type='replay', capacity=1000),
                               batch_size=8)

        agent = DQNAgent(states_spec=states,
                         actions_spec=actions,
                         network_spec=network_spec,
                         config=config)

        agent.close()

        ### Preprocessing configuration
        states = dict(shape=(84, 84, 3), type='float')
        preprocessing = [
            dict(type='image_resize', width=84, height=84),
            dict(type='grayscale'),
            dict(type='normalize'),
            dict(type='sequence', length=4)
        ]
        config = Configuration(memory=dict(type='replay', capacity=1000),
                               batch_size=8,
                               first_update=100,
                               target_sync_frequency=50,
                               preprocessing=preprocessing)

        ### Test preprocessing configuration

        agent = DQNAgent(states_spec=states,
                         actions_spec=actions,
                         network_spec=network_spec,
                         config=config)

        agent.close()

        ### Code block: Continuous action exploration

        exploration = dict(type='ornstein_uhlenbeck',
                           sigma=0.1,
                           mu=0,
                           theta=0.1)
        config = Configuration(memory=dict(type='replay', capacity=1000),
                               batch_size=8,
                               exploration=exploration)

        ### Test continuous action exploration

        agent = DQNAgent(states_spec=states,
                         actions_spec=actions,
                         network_spec=network_spec,
                         config=config)

        agent.close()

        ### Code block: Discrete action exploration

        exploration = dict(type='epsilon_decay',
                           initial_epsilon=1.0,
                           final_epsilon=0.01,
                           timesteps=1e6)
        config = Configuration(memory=dict(type='replay', capacity=1000),
                               batch_size=8,
                               exploration=exploration)

        ### Test discrete action exploration
        agent = DQNAgent(states_spec=states,
                         actions_spec=actions,
                         network_spec=network_spec,
                         config=config)

        agent.close()
    def test_blogpost_introduction_runner(self):
        from tensorforce.tests.minimal_test import MinimalTest
        from tensorforce.agents import DQNAgent
        from tensorforce.execution import Runner

        environment = MinimalTest(specification={'int': ()})

        network_spec = [dict(type='dense', size=32)]

        agent = DQNAgent(states=environment.states,
                         actions=environment.actions,
                         network=network_spec,
                         memory=dict(type='replay',
                                     include_next_states=True,
                                     capacity=100),
                         target_sync_frequency=50)
        runner = Runner(agent=agent, environment=environment)

        def episode_finished(runner):
            if runner.episode % 100 == 0:
                print(sum(runner.episode_rewards[-100:]) / 100)
            return runner.episode < 100 \
                or not all(reward >= 1.0 for reward in runner.episode_rewards[-100:])

        # runner.run(episodes=1000, episode_finished=episode_finished)
        runner.run(episodes=10, episode_finished=episode_finished
                   )  # Only 10 episodes for this test
        runner.close()

        ### Code block: next
        agent = DQNAgent(states=environment.states,
                         actions=environment.actions,
                         network=network_spec,
                         memory=dict(type='replay',
                                     include_next_states=True,
                                     capacity=100),
                         target_sync_frequency=50)

        # max_episodes = 1000
        max_episodes = 10  # Only 10 episodes for this test
        max_timesteps = 2000

        episode = 0
        episode_rewards = list()

        while True:
            state = environment.reset()
            agent.reset()

            timestep = 0
            episode_reward = 0
            while True:
                action = agent.act(states=state)
                state, terminal, reward = environment.execute(actions=action)
                agent.observe(terminal=terminal, reward=reward)

                timestep += 1
                episode_reward += reward

                if terminal or timestep == max_timesteps:
                    break

            episode += 1
            episode_rewards.append(episode_reward)

            if all(reward >= 1.0 for reward in
                   episode_rewards[-100:]) or episode == max_episodes:
                break

        agent.close()
        environment.close()
Exemple #3
0
class Toy(Environment):
    def __init__(self):
        self.reset()

    def __str__(self):
        return "Toy%s" % str(self.state)

    def close(self):
        pass

    def reset(self):
        self.state = ([0] * max_seq_length * 2)
        self.tape_head = max_seq_length
        self.seq_length = np.random.randint(1, 10)
        self.seq = np.random.randint(1, 10, (self.seq_length))
        self.state[:seq_length] = seq
        return dict(self.state)

    def execute(self, actions):
        reward = 0.0
        print(actions)

        if actions != num_vals + 1:
            self.state[self.tape_head] = actions


        if actions == num_vals + 1:
            if action_type == 'bool' or action_type == 'int':
                correct = np.sum(actions[action_type])
                overall = util.prod(shape)
                self.state[action_type] = ((overall - correct) / overall, correct / overall)
            reward += max(min(self.state[action_type][1], 1.0), 0.0)
        else:


        return dict(self.state), terminal, reward

    @property
    def states(self):
        return dict(shape=(max_seq_length * 2,), type='int')

    @property
    def actions(self):
        return dict(type='int', num_actions=num_vals + 1)



# The agent is configured with a single configuration object
config = Configuration(
    memory=dict(
        type='replay',
        capacity=1000
    ),
    batch_size=8,
    first_update=100,
    target_sync_frequency=10
)

# Network is an ordered list of layers
network_spec = [dict(type='dense', size=32), dict(type='dense', size=32)]

environment = Toy()

agent = DQNAgent(
    states_spec=environment.states,
    actions_spec=environment.actions,
    network_spec=network_spec,
    config=config
)

runner = Runner(agent=agent, environment=environment)

def episode_finished(runner):
    if runner.episode % 100 == 0:
        print(sum(runner.episode_rewards[-100:]) / 100)
    return runner.episode < 100 \
        or not all(reward >= 1.0 for reward in runner.episode_rewards[-100:])

runner.run(episodes=1000, episode_finished=episode_finished)