Ejemplo n.º 1
0
    def test_get_optimizer(self):
        """Tests get_optimizer.
        """
        default_optimizer_fn, optimizer_class = opt.get_optimizer_fn(
            opt.default_optimization_hparams()["optimizer"])
        default_optimizer = default_optimizer_fn(1.0)
        self.assertTrue(optimizer_class, tf.train.Optimizer)
        self.assertIsInstance(default_optimizer, tf.train.AdamOptimizer)

        hparams = {
            "type": "MomentumOptimizer",
            "kwargs": {
                "learning_rate": 0.001,
                "momentum": 0.9,
                "use_nesterov": True
            }
        }
        momentum_optimizer_fn, _ = opt.get_optimizer_fn(hparams)
        momentum_optimizer = momentum_optimizer_fn()
        self.assertIsInstance(momentum_optimizer, tf.train.MomentumOptimizer)

        hparams = {
            "type": tf.train.MomentumOptimizer,
            "kwargs": {
                "momentum": 0.9,
                "use_nesterov": True
            }
        }
        momentum_optimizer_fn, _ = opt.get_optimizer_fn(hparams)
        momentum_optimizer = momentum_optimizer_fn(0.001)
        self.assertIsInstance(momentum_optimizer, tf.train.MomentumOptimizer)

        hparams = {"type": tf.train.MomentumOptimizer(0.001, 0.9)}
        momentum_optimizer, _ = opt.get_optimizer_fn(hparams)
        self.assertIsInstance(momentum_optimizer, tf.train.MomentumOptimizer)
Ejemplo n.º 2
0
 def default_hparams():
     return {
         'policy_type': 'CategoricalPolicyNet',
         'policy_hparams': None,
         'discount_factor': 0.95,
         'normalize_reward': False,
         'optimization': opt.default_optimization_hparams(),
         'name': 'pg_agent',
     }
Ejemplo n.º 3
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values:

        .. role:: python(code)
           :language: python

        .. code-block:: python

            {
                'policy_type': 'CategoricalPolicyNet',
                'policy_hparams': None,
                'discount_factor': 0.95,
                'normalize_reward': False,
                'optimization': default_optimization_hparams(),
                'name': 'pg_agent',
            }

        Here:

        "policy_type" : str or class or instance
            Policy net. Can be class, its name or module path, or a class
            instance. If class name is given, the class must be from module
            :mod:`texar.modules` or :mod:`texar.custom`. Ignored if a
            `policy` is given to the agent constructor.

        "policy_hparams" : dict, optional
            Hyperparameters for the policy net. With the :attr:`policy_kwargs`
            argument to the constructor, a network is created with
            :python:`policy_class(**policy_kwargs, hparams=policy_hparams)`.

        "discount_factor" : float
            The discount factor of reward.

        "normalize_reward" : bool
            Whether to normalize the discounted reward, by
            `(discounted_reward - mean) / std`.

        "optimization" : dict
            Hyperparameters of optimization for updating the policy net.
            See :func:`~texar.core.default_optimization_hparams` for details.

        "name" : str
            Name of the agent.
        """
        return {
            'policy_type': 'CategoricalPolicyNet',
            'policy_hparams': None,
            'discount_factor': 0.95,
            'normalize_reward': False,
            'optimization': opt.default_optimization_hparams(),
            'name': 'pg_agent',
        }
Ejemplo n.º 4
0
 def default_hparams():
     return {
         'name': 'pg_agent',
         'discount_factor': 0.95,
         'network': {
             'type': 'PGNet',
             'hparams': None
         },
         'trainer': {
             'loss_fn': pg_loss,
             'optimization_hparams': opt.default_optimization_hparams(),
         }
     }
Ejemplo n.º 5
0
 def default_hparams():
     return {
         'discount_factor': 0.95,
         'normalize_reward': False,
         'entropy_weight': 0.,
         'loss': {
             'average_across_batch': True,
             'average_across_timesteps': False,
             'sum_over_batch': False,
             'sum_over_timesteps': True,
             'time_major': False
         },
         'optimization': opt.default_optimization_hparams(),
         'name': 'pg_agent',
     }
Ejemplo n.º 6
0
    def test_get_learning_rate_decay_fn(self): # pylint: disable=too-many-locals
        """Tests get_learning_rate_decay_fn.
        """
        default_lr_decay_fn = opt.get_learning_rate_decay_fn(
            opt.default_optimization_hparams()["learning_rate_decay"])
        self.assertIsNone(default_lr_decay_fn)

        boundaries = [2, 4]
        values = [0.1, 0.01, 0.001]
        hparams = {
            "type": "piecewise_constant",
            "kwargs": {
                "boundaries": boundaries,
                "values": values
            },
            "min_learning_rate": 0.05,
            "start_decay_step": 1,
            "end_decay_step": utils.MAX_SEQ_LENGTH,
        }
        pc_lr_decay_fn = opt.get_learning_rate_decay_fn(hparams)

        global_step = 1
        pc_lr = pc_lr_decay_fn(learning_rate=1., global_step=global_step)
        pc_lr_true = tf.train.piecewise_constant(
            global_step-hparams["start_decay_step"], boundaries, values)

        hparams["type"] = "natural_exp_decay"
        hparams["kwargs"] = {
            "decay_steps": 1,
            "decay_rate": 0.5
        }
        ned_lr_decay_fn = opt.get_learning_rate_decay_fn(hparams)
        ned_lr = ned_lr_decay_fn(learning_rate=1., global_step=global_step)
        ned_lr_true = tf.train.natural_exp_decay(
            1., global_step-hparams["start_decay_step"],
            hparams["kwargs"]["decay_steps"], hparams["kwargs"]["decay_rate"])

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            pc_lr_, pc_lr_true_, ned_lr_, ned_lr_true_ = sess.run(
                [pc_lr, pc_lr_true, ned_lr, ned_lr_true])
            self.assertEqual(pc_lr_, pc_lr_true_)
            self.assertEqual(ned_lr_, ned_lr_true_)
Ejemplo n.º 7
0
    def test_get_gradient_clip_fn(self):    # pylint: disable=too-many-locals
        """Tests get_gradient_clip_fn.
        """
        default_grad_clip_fn = opt.get_gradient_clip_fn(
            opt.default_optimization_hparams()["gradient_clip"])
        self.assertIsNone(default_grad_clip_fn)

        grads = [tf.random_uniform([10, 10], -1., 1.) for _ in range(5)]
        grads_and_vars = list(zip(grads, range(5)))

        hparams = {
            "type": "clip_by_global_norm",
            "kwargs": {
                "clip_norm": 0.1
            }
        }
        gn_grad_clip_fn = opt.get_gradient_clip_fn(hparams)
        gn_grads_and_vars = gn_grad_clip_fn(grads_and_vars)
        gn_grads, _ = zip(*gn_grads_and_vars)
        gn_grads_true, _ = tf.clip_by_global_norm(
            grads, hparams["kwargs"]["clip_norm"])

        hparams = {
            "type": "clip_by_value",
            "kwargs": {
                "clip_value_min": -0.01,
                "clip_value_max": 0.01
            }
        }
        v_grad_clip_fn = opt.get_gradient_clip_fn(hparams)
        v_grads_and_vars = v_grad_clip_fn(grads_and_vars)
        v_grads, _ = zip(*v_grads_and_vars)
        v_grads_true = tf.clip_by_value(grads,
                                        hparams["kwargs"]["clip_value_min"],
                                        hparams["kwargs"]["clip_value_max"])

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            gn_grads_, gn_grads_true_, v_grads_, v_grads_true_ = sess.run(
                [gn_grads, gn_grads_true, v_grads, v_grads_true])
            np.testing.assert_array_equal(gn_grads_, gn_grads_true_)
            np.testing.assert_array_equal(v_grads_, v_grads_true_)
Ejemplo n.º 8
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values:

        .. role:: python(code)
           :language: python

        .. code-block:: python

            {
                'discount_factor': 0.95,
                'normalize_reward': False,
                'entropy_weight': 0.,
                'loss': {
                    'average_across_batch': True,
                    'average_across_timesteps': False,
                    'sum_over_batch': False,
                    'sum_over_timesteps': True,
                    'time_major': False
                },
                'optimization': default_optimization_hparams(),
                'name': 'pg_agent',
            }

        Here:

        "discount_factor" : float
            The discount factor of reward.

        "normalize_reward" : bool
            Whether to normalize the discounted reward, by
            `(discounted_reward - mean) / std`. Here `mean` and `std` are
            over all time steps and all samples in the batch.

        "entropy_weight" : float
            The weight of entropy loss of the sample distribution, to encourage
            maximizing the Shannon entropy. Set to 0 to disable the loss.

        "loss" : dict
            Extra keyword arguments for
            :func:`~texar.losses.pg_loss_with_logits`, including the
            reduce arguments (e.g., `average_across_batch`) and `time_major`

        "optimization" : dict
            Hyperparameters of optimization for updating the policy net.
            See :func:`~texar.core.default_optimization_hparams` for details.

        "name" : str
            Name of the agent.
        """
        return {
            'discount_factor': 0.95,
            'normalize_reward': False,
            'entropy_weight': 0.,
            'loss': {
                'average_across_batch': True,
                'average_across_timesteps': False,
                'sum_over_batch': False,
                'sum_over_timesteps': True,
                'time_major': False
            },
            'optimization': opt.default_optimization_hparams(),
            'name': 'pg_agent',
        }
Ejemplo n.º 9
0
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values:

        .. role:: python(code)
           :language: python

        .. code-block:: python

            {
                'qnet_type': 'CategoricalQNet',
                'qnet_hparams': None,
                'replay_memory_type': 'DequeReplayMemory',
                'replay_memory_hparams': None,
                'exploration_type': 'EpsilonLinearDecayExploration',
                'exploration_hparams': None,
                'optimization': opt.default_optimization_hparams(),
                'target_update_strategy': 'copy',
                'cold_start_steps': 100,
                'sample_batch_size': 32,
                'update_period': 100,
                'discount_factor': 0.95,
                'name': 'dqn_agent'
            }

        Here:

        "qnet_type" : str or class or instance
            Q-value net. Can be class, its
            name or module path, or a class instance. If class name is given,
            the class must be from module :mod:`texar.modules` or
            :mod:`texar.custom`. Ignored if a `qnet` is given to
            the agent constructor.

        "qnet_hparams" : dict, optional
            Hyperparameters for the Q net. With the :attr:`qnet_kwargs`
            argument to the constructor, a network is created with
            :python:`qnet_class(**qnet_kwargs, hparams=qnet_hparams)`.

        "replay_memory_type" : str or class or instance
            Replay memory class. Can be class, its name or module path,
            or a class instance.
            If class name is given, the class must be from module
            :mod:`texar.core` or :mod:`texar.custom`.
            Ignored if a `replay_memory` is given to the agent constructor.

        "replay_memory_hparams" : dict, optional
            Hyperparameters for the replay memory. With the
            :attr:`replay_memory_kwargs` argument to the constructor,
            a network is created with
            :python:`replay_memory_class(
            **replay_memory_kwargs, hparams=replay_memory_hparams)`.

        "exploration_type" : str or class or instance
            Exploration class. Can be class,
            its name or module path, or a class instance. If class name is
            given, the class must be from module :mod:`texar.core` or
            :mod:`texar.custom`. Ignored if a `exploration` is given to
            the agent constructor.

        "exploration_hparams" : dict, optional
            Hyperparameters for the exploration class.
            With the :attr:`exploration_kwargs` argument to the constructor,
            a network is created with :python:`exploration_class(
            **exploration_kwargs, hparams=exploration_hparams)`.

        "optimization" : dict
            Hyperparameters of optimization for updating the Q-net.
            See :func:`~texar.core.default_optimization_hparams` for details.

        "cold_start_steps": int
            In the beginning, Q-net is not trained in the first few steps.

        "sample_batch_size": int
            The number of samples taken in replay memory when training.

        "target_update_strategy": string

            - If **"copy"**, the target network is assigned with the parameter \
            of Q-net every :attr:`"update_period"` steps.

            - If **"tau"**, target will be updated by assigning as
            ``` (1 - 1/update_period) * target + 1/update_period * qnet ```

        "update_period": int
            Frequecy of updating the target network, i.e., updating
            the target once for every "update_period" steps.

        "discount_factor" : float
            The discount factor of reward.

        "name" : str
            Name of the agent.
        """
        return {
            'qnet_type': 'CategoricalQNet',
            'qnet_hparams': None,
            'replay_memory_type': 'DequeReplayMemory',
            'replay_memory_hparams': None,
            'exploration_type': 'EpsilonLinearDecayExploration',
            'exploration_hparams': None,
            'optimization': opt.default_optimization_hparams(),
            'target_update_strategy': 'copy',
            'cold_start_steps': 100,
            'sample_batch_size': 32,
            'update_period': 100,
            'discount_factor': 0.95,
            'name': 'dqn_agent'
        }