コード例 #1
0
ファイル: common_test.py プロジェクト: ruizhaogit/alf
    def test_transform_image(self):
        shape = [10]
        observation = tf.zeros(shape, dtype=tf.uint8)
        common.image_scale_transformer(observation)

        T1 = common.namedtuple('T1', ['x', 'y'])
        T2 = common.namedtuple('T2', ['a', 'b', 'c'])
        T3 = common.namedtuple('T3', ['l', 'm'])
        observation = T1(x=T2(a=tf.ones(shape, dtype=tf.uint8) * 255,
                              b=T3(l=tf.zeros(shape, dtype=tf.uint8))))
        transformed_observation = common.image_scale_transformer(
            observation, fields=["x.a", "x.b.l"])

        tf.debugging.assert_equal(transformed_observation.x.a,
                                  tf.ones(shape, dtype=tf.float32))
        tf.debugging.assert_equal(transformed_observation.x.b.l,
                                  tf.ones(shape, dtype=tf.float32) * -1)

        with self.assertRaises(Exception) as _:
            common.image_scale_transformer(observation,
                                           fields=["x.b.m"])  # empty ()

        observation = dict(x=dict(a=observation.x.a))
        common.image_scale_transformer(observation, fields=["x.a"])
コード例 #2
0
# limitations under the License.

from absl import logging
import gin
import numpy as np
import tensorflow as tf

from tf_agents.trajectories.time_step import StepType

from alf.algorithms.algorithm import Algorithm, AlgorithmStep, LossInfo
from alf.utils import dist_utils
from alf.utils.averager import ScalarWindowAverager
from alf.utils.common import namedtuple, run_if, should_record_summaries
from alf.utils.dist_utils import calc_default_target_entropy

EntropyTargetLossInfo = namedtuple("EntropyTargetLossInfo", ["entropy_loss"])
EntropyTargetInfo = namedtuple("EntropyTargetInfo", ["step_type", "loss"])


@gin.configurable
class EntropyTargetAlgorithm(Algorithm):
    """Algorithm for adjust entropy regularization.

    It tries to adjust the entropy regularization (i.e. alpha) so that the
    the entropy is not smaller than `target_entropy`.

    The algorithm has two stages:
    1. init stage. During this stage, the alpha is not changed. It transitions
       to adjust_stage once entropy drops below `target_entropy`.
    2. adjust stage. During this stage, log_alpha is adjusted using this formula:
       ((below + 0.5 * above) * decreasing - (above + 0.5 * below) * increasing) * update_rate
コード例 #3
0
from tf_agents.trajectories.time_step import StepType

from alf.algorithms.algorithm import Algorithm
import alf.utils.common as common
from alf.utils.common import ActionTimeStep, namedtuple, LossInfo, make_action_time_step
from alf.utils.common import cast_transformer
from tf_agents.utils import eager_utils
from tf_agents.metrics import tf_metrics

import alf.utils
import gin.tf

TrainingInfo = namedtuple("TrainingInfo", [
    "action_distribution", "action", "step_type", "reward", "discount", "info",
    "collect_info", "collect_action_distribution"
],
                          default_value=())


@gin.configurable
class RLAlgorithm(Algorithm):
    """Abstract base class for  RL Algorithms.

    RLAlgorithm provide basic functions and generic interface for rl algorithms.

    The key interface functions are:
    1. predict(): one step of computation of action for evaluation.
    2. rollout(): one step of comutation for rollout. Besides action, it also
       needs to compute other information necessary for training.
    3. train_step(): only used for off-policy training.
コード例 #4
0
ファイル: algorithm.py プロジェクト: ruizhaogit/alf
# See the License for the specific language governing permissions and
# limitations under the License.
"""Algorithm base class."""

from abc import abstractmethod
from absl import logging
import copy

import tensorflow as tf

from tf_agents.utils import eager_utils

import alf.utils
from alf.utils.common import namedtuple, LossInfo

AlgorithmStep = namedtuple("AlgorithmStep", ["outputs", "state", "info"])


class Algorithm(tf.Module):
    """Algorithm base class.

    Algorithm is a generic interface for supervised training algorithms.

    User needs to implement train_step() and calc_loss()/train_complete().

    train_step() is called to generate actions for every environment step.
    It also needs to generate necessary information for training.

    train_complete() is called every train_interval steps (specified in
    PolicyDriver). All the training information collected at each previous
    train_step() are batched and provided as arguments for train_complete().
コード例 #5
0
from tf_agents.networks.network import DistributionNetwork
from tf_agents.specs.distribution_spec import nested_distributions_from_specs
from tf_agents.specs import tensor_spec
from tf_agents.distributions.utils import SquashToSpecNormal

from alf.algorithms.actor_critic_algorithm import ActorCriticAlgorithm
from alf.algorithms.off_policy_algorithm import Experience
from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm
from alf.algorithms.rl_algorithm import ActionTimeStep, StepType
from alf.optimizers.trusted_updater import TrustedUpdater
from alf.utils import common
from alf.utils.common import namedtuple

nest_map = tf.nest.map_structure

TracExperience = namedtuple(
    "TracExperience", ["observation", "step_type", "state", "action_param"])

TracInfo = namedtuple("TracInfo", ["observation", "state", "ac"])


@gin.configurable
class TracAlgorithm(OnPolicyAlgorithm):
    """Trust-region actor-critic.

    It compares the action distributions after the SGD with the action
    distributions from the previous model. If the average distance is too big,
    the new parameters are shrinked as:
        w_new' = old_w + max_kl / kl * (w_new - w_old)

    If the distribution is Categorical, the distance is ||logits_1 - logits_2||^2,
    and if the distribution is Deterministic, the distance is ||loc_1 - loc_2||^2,