Python DQNAgent.close Examples

Programming Language: Python

Namespace/Package Name: tensorforce.agents

Class/Type: DQNAgent

Method/Function: close

Examples at hotexamples.com: 2

Python DQNAgent.close - 2 examples found. These are the top rated real world Python examples of tensorforce.agents.DQNAgent.close extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

act(8)

observe(7)

reset(5)

save_model(4)

DQNAgent(3)

restore_model(3)

add_observation(2)

close(2)

get_action(2)

__init__(1)

initialize(1)

save(1)

Example #1

Show file

File: test_tutorial_code.py Project: jesuscast/tensorforce-clone

    def test_blogpost_introduction(self):
        """
        Test of introduction blog post examples.
        """
        import tensorflow as tf
        import numpy as np

        ### DQN agent example
        from tensorforce.agents import DQNAgent

        # Network is an ordered list of layers
        network_spec = [dict(type='dense', size=32), dict(type='dense', size=32)]

        # Define a state
        states = dict(shape=(10,), type='float')

        # Define an action
        actions = dict(type='int', num_actions=5)

        agent = DQNAgent(
            states_spec=states,
            actions_spec=actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8,
            first_update=100,
            target_sync_frequency=10
        )

        agent.close()

        ### Code block: multiple states
        states = dict(
            image=dict(shape=(64, 64, 3), type='float'),
            caption=dict(shape=(20,), type='int')
        )

        # DQN does not support multiple states. Omit test for now.
        # agent = DQNAgent(config=config)

        ### Code block: DQN observer function

        def observe(self, reward, terminal):
            super(DQNAgent, self).observe(reward, terminal)
            if self.timestep >= self.first_update \
                    and self.timestep % self.target_update_frequency == 0:
                self.model.update_target()

        ### Code block: Network config JSON

        network_json = """
        [
            {
                "type": "conv2d",
                "size": 32,
                "window": 8,
                "stride": 4
            },
            {
                "type": "conv2d",
                "size": 64,
                "window": 4,
                "stride": 2
            },
            {
                "type": "flatten"
            },
            {
                "type": "dense",
                "size": 512
            }
        ]
        """

        ### Test json

        import json
        network_spec = json.loads(network_json)

        ### Code block: Modified dense layer

        modified_dense = """
        [
            {
                "type": "dense",
                "size": 64,
                "bias": false,
                "activation": "selu",
                "l2_regularization": 0.001
            }
        ]
        """

        ### Test json
        network_spec = json.loads(modified_dense)

        ### Code block: Own layer type
        from tensorforce.core.networks import Layer

        class BatchNormalization(Layer):

            def __init__(self, variance_epsilon=1e-6, scope='batchnorm', summary_labels=None):
                super(BatchNormalization, self).__init__(scope=scope, summary_labels=summary_labels)
                self.variance_epsilon = variance_epsilon

            def tf_apply(self, x, update):
                mean, variance = tf.nn.moments(x, axes=tuple(range(x.shape.ndims - 1)))
                return tf.nn.batch_normalization(
                    x=x,
                    mean=mean,
                    variance=variance,
                    offset=None,
                    scale=None,
                    variance_epsilon=self.variance_epsilon
                )

        ### Test own layer

        states = dict(shape=(10,), type='float')
        network_spec = [
            {'type': 'dense', 'size': 32},
            {'type': BatchNormalization, 'variance_epsilon': 1e-9}
        ]

        agent = DQNAgent(
            states_spec=states,
            actions_spec=actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8
        )

        agent.close()

        ### Code block: Own network builder
        from tensorforce.core.networks import Network

        class CustomNetwork(Network):

            def tf_apply(self, x, internals, update, return_internals=False):
                image = x['image']  # 64x64x3-dim, float
                caption = x['caption']  # 20-dim, int
                initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01, dtype=tf.float32)

                # CNN
                weights = tf.get_variable(name='W1', shape=(3, 3, 3, 16), initializer=initializer)
                image = tf.nn.conv2d(image, filter=weights, strides=(1, 1, 1, 1), padding='SAME')
                image = tf.nn.relu(image)
                image = tf.nn.max_pool(image, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='SAME')

                weights = tf.get_variable(name='W2', shape=(3, 3, 16, 32), initializer=initializer)
                image = tf.nn.conv2d(image, filter=weights, strides=(1, 1, 1, 1), padding='SAME')
                image = tf.nn.relu(image)
                image = tf.nn.max_pool(image, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='SAME')

                image = tf.reshape(image, shape=(-1, 16 * 16, 32))
                image = tf.reduce_mean(image, axis=1)

                # LSTM
                weights = tf.get_variable(name='W3', shape=(30, 32), initializer=initializer)
                caption = tf.nn.embedding_lookup(params=weights, ids=caption)
                lstm = tf.contrib.rnn.LSTMCell(num_units=32)
                caption, _ = tf.nn.dynamic_rnn(cell=lstm, inputs=caption, dtype=tf.float32)
                caption = tf.reduce_mean(caption, axis=1)

                # Combination
                if return_internals:
                    return tf.multiply(image, caption), list()
                else:
                    return tf.multiply(image, caption)

        ### Test own network builder

        states = dict(
            image=dict(shape=(64, 64, 3), type='float'),
            caption=dict(shape=(20,), type='int')
        )

        agent = DQNAgent(
            states_spec=states,
            actions_spec=actions,
            network_spec=CustomNetwork,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8
        )

        agent.close()

        ### Code block: LSTM function
        from tensorforce.core.networks import Layer

        class Lstm(Layer):

            def __init__(self, size, scope='lstm', summary_labels=()):
                self.size = size
                super(Lstm, self).__init__(num_internals=1, scope=scope, summary_labels=summary_labels)

            def tf_apply(self, x, update, state):
                state = tf.contrib.rnn.LSTMStateTuple(c=state[:, 0, :], h=state[:, 1, :])
                self.lstm_cell = tf.contrib.rnn.LSTMCell(num_units=self.size)

                x, state = self.lstm_cell(inputs=x, state=state)

                internal_output = tf.stack(values=(state.c, state.h), axis=1)
                return x, (internal_output,)

            def internal_inputs(self):
                return super(Lstm, self).internal_inputs() + [tf.placeholder(dtype=tf.float32, shape=(None, 2, self.size))]

            def internal_inits(self):
                return super(Lstm, self).internal_inits() + [np.zeros(shape=(2, self.size))]

        ### Test LSTM
        states = dict(shape=(10,), type='float')
        network_spec = [
            {'type': 'flatten'},
            {'type': Lstm, 'size': 10}
        ]

        agent = DQNAgent(
            states_spec=states,
            actions_spec=actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8
        )

        agent.close()

        ### Preprocessing configuration
        states = dict(shape=(84, 84, 3), type='float')
        preprocessing = [
            dict(
                type='image_resize',
                width=84,
                height=84
            ),
            dict(
                type='grayscale'
            ),
            dict(
                type='normalize'
            ),
            dict(
                type='sequence',
                length=4
            )
        ]

        ### Test preprocessing configuration

        agent = DQNAgent(
            states_spec=states,
            actions_spec=actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8,
            first_update=100,
            target_sync_frequency=50,
            preprocessing=preprocessing
        )

        agent.close()

        ### Code block: Continuous action exploration

        exploration = dict(
            type='ornstein_uhlenbeck',
            sigma=0.1,
            mu=0,
            theta=0.1
        )

         ### Test continuous action exploration
        agent = DQNAgent(
            states_spec=states,
            actions_spec=actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8,
            exploration=exploration
        )

        agent.close()

        ### Code block: Discrete action exploration

        exploration = dict(
            type='epsilon_decay',
            initial_epsilon=1.0,
            final_epsilon=0.01,
            timesteps=1e6
        )

        ### Test discrete action exploration
        agent = DQNAgent(
            states_spec=states,
            actions_spec=actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8,
            exploration=exploration
        )

        agent.close()

Example #2

Show file

File: test_tutorial_code.py Project: jesuscast/tensorforce-clone

    def test_blogpost_introduction_runner(self):
        from tensorforce.environments.minimal_test import MinimalTest
        from tensorforce.agents import DQNAgent
        from tensorforce.execution import Runner

        environment = MinimalTest(specification=[('int', ())])

        network_spec = [
            dict(type='dense', size=32)
        ]

        agent = DQNAgent(
            states_spec=environment.states,
            actions_spec=environment.actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8,
            first_update=100,
            target_sync_frequency=50
        )
        runner = Runner(agent=agent, environment=environment)

        def episode_finished(runner):
            if runner.episode % 100 == 0:
                print(sum(runner.episode_rewards[-100:]) / 100)
            return runner.episode < 100 \
                or not all(reward >= 1.0 for reward in runner.episode_rewards[-100:])

        # runner.run(episodes=1000, episode_finished=episode_finished)
        runner.run(episodes=10, episode_finished=episode_finished)  # Only 10 episodes for this test

        ### Code block: next
        agent = DQNAgent(
            states_spec=environment.states,
            actions_spec=environment.actions,
            network_spec=network_spec,
            memory=dict(
                type='replay',
                capacity=1000
            ),
            batch_size=8,
            first_update=100,
            target_sync_frequency=50
        )

        # max_episodes = 1000
        max_episodes = 10  # Only 10 episodes for this test
        max_timesteps = 2000

        episode = 0
        episode_rewards = list()

        while True:
            state = environment.reset()
            agent.reset()

            timestep = 0
            episode_reward = 0
            while True:
                action = agent.act(states=state)
                state, terminal, reward = environment.execute(actions=action)
                agent.observe(terminal=terminal, reward=reward)

                timestep += 1
                episode_reward += reward

                if terminal or timestep == max_timesteps:
                    break

            episode += 1
            episode_rewards.append(episode_reward)

            if all(reward >= 1.0 for reward in episode_rewards[-100:]) or episode == max_episodes:
                break

        agent.close()
        environment.close()