def test_transform_image(self): shape = [10] observation = tf.zeros(shape, dtype=tf.uint8) common.image_scale_transformer(observation) T1 = common.namedtuple('T1', ['x', 'y']) T2 = common.namedtuple('T2', ['a', 'b', 'c']) T3 = common.namedtuple('T3', ['l', 'm']) observation = T1(x=T2(a=tf.ones(shape, dtype=tf.uint8) * 255, b=T3(l=tf.zeros(shape, dtype=tf.uint8)))) transformed_observation = common.image_scale_transformer( observation, fields=["x.a", "x.b.l"]) tf.debugging.assert_equal(transformed_observation.x.a, tf.ones(shape, dtype=tf.float32)) tf.debugging.assert_equal(transformed_observation.x.b.l, tf.ones(shape, dtype=tf.float32) * -1) with self.assertRaises(Exception) as _: common.image_scale_transformer(observation, fields=["x.b.m"]) # empty () observation = dict(x=dict(a=observation.x.a)) common.image_scale_transformer(observation, fields=["x.a"])
# limitations under the License. from absl import logging import gin import numpy as np import tensorflow as tf from tf_agents.trajectories.time_step import StepType from alf.algorithms.algorithm import Algorithm, AlgorithmStep, LossInfo from alf.utils import dist_utils from alf.utils.averager import ScalarWindowAverager from alf.utils.common import namedtuple, run_if, should_record_summaries from alf.utils.dist_utils import calc_default_target_entropy EntropyTargetLossInfo = namedtuple("EntropyTargetLossInfo", ["entropy_loss"]) EntropyTargetInfo = namedtuple("EntropyTargetInfo", ["step_type", "loss"]) @gin.configurable class EntropyTargetAlgorithm(Algorithm): """Algorithm for adjust entropy regularization. It tries to adjust the entropy regularization (i.e. alpha) so that the the entropy is not smaller than `target_entropy`. The algorithm has two stages: 1. init stage. During this stage, the alpha is not changed. It transitions to adjust_stage once entropy drops below `target_entropy`. 2. adjust stage. During this stage, log_alpha is adjusted using this formula: ((below + 0.5 * above) * decreasing - (above + 0.5 * below) * increasing) * update_rate
from tf_agents.trajectories.time_step import StepType from alf.algorithms.algorithm import Algorithm import alf.utils.common as common from alf.utils.common import ActionTimeStep, namedtuple, LossInfo, make_action_time_step from alf.utils.common import cast_transformer from tf_agents.utils import eager_utils from tf_agents.metrics import tf_metrics import alf.utils import gin.tf TrainingInfo = namedtuple("TrainingInfo", [ "action_distribution", "action", "step_type", "reward", "discount", "info", "collect_info", "collect_action_distribution" ], default_value=()) @gin.configurable class RLAlgorithm(Algorithm): """Abstract base class for RL Algorithms. RLAlgorithm provide basic functions and generic interface for rl algorithms. The key interface functions are: 1. predict(): one step of computation of action for evaluation. 2. rollout(): one step of comutation for rollout. Besides action, it also needs to compute other information necessary for training. 3. train_step(): only used for off-policy training.
# See the License for the specific language governing permissions and # limitations under the License. """Algorithm base class.""" from abc import abstractmethod from absl import logging import copy import tensorflow as tf from tf_agents.utils import eager_utils import alf.utils from alf.utils.common import namedtuple, LossInfo AlgorithmStep = namedtuple("AlgorithmStep", ["outputs", "state", "info"]) class Algorithm(tf.Module): """Algorithm base class. Algorithm is a generic interface for supervised training algorithms. User needs to implement train_step() and calc_loss()/train_complete(). train_step() is called to generate actions for every environment step. It also needs to generate necessary information for training. train_complete() is called every train_interval steps (specified in PolicyDriver). All the training information collected at each previous train_step() are batched and provided as arguments for train_complete().
from tf_agents.networks.network import DistributionNetwork from tf_agents.specs.distribution_spec import nested_distributions_from_specs from tf_agents.specs import tensor_spec from tf_agents.distributions.utils import SquashToSpecNormal from alf.algorithms.actor_critic_algorithm import ActorCriticAlgorithm from alf.algorithms.off_policy_algorithm import Experience from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm from alf.algorithms.rl_algorithm import ActionTimeStep, StepType from alf.optimizers.trusted_updater import TrustedUpdater from alf.utils import common from alf.utils.common import namedtuple nest_map = tf.nest.map_structure TracExperience = namedtuple( "TracExperience", ["observation", "step_type", "state", "action_param"]) TracInfo = namedtuple("TracInfo", ["observation", "state", "ac"]) @gin.configurable class TracAlgorithm(OnPolicyAlgorithm): """Trust-region actor-critic. It compares the action distributions after the SGD with the action distributions from the previous model. If the average distance is too big, the new parameters are shrinked as: w_new' = old_w + max_kl / kl * (w_new - w_old) If the distribution is Categorical, the distance is ||logits_1 - logits_2||^2, and if the distribution is Deterministic, the distance is ||loc_1 - loc_2||^2,