def test_map_structure_to(self, map_structure_up_to): shallow_nest = [[None], None] inp_val = [[1], 2] out = map_structure_up_to(shallow_nest, lambda x: 2 * x, inp_val) self.assertEqual(out, [[2], 4]) shallow_nest = [None, None] inp_val = [[1], 2] out = map_structure_up_to(shallow_nest, lambda x: 2 * x, inp_val) self.assertEqual(out, [[1, 1], 4]) data_list = [[2, 4, 6, 8], [[1, 3, 5, 7, 9], [3, 5, 7]]] name_list = ['evens', ['odds', 'primes']] out = map_structure_up_to( name_list, lambda name, sec: "first_{}_{}".format(len(sec), name), name_list, data_list) self.assertEqual(out, ['first_4_evens', ['first_5_odds', 'first_3_primes']]) ab_tuple = namedtuple("ab_tuple", "a, b") op_tuple = namedtuple("op_tuple", "add, mul") inp_val = ab_tuple(a=2, b=3) inp_ops = ab_tuple(a=op_tuple(add=1, mul=2), b=op_tuple(add=2, mul=3)) out = map_structure_up_to( inp_val, lambda val, ops: (val + ops.add) * ops.mul, inp_val, inp_ops) self.assertEqual(out, ab_tuple(a=6, b=15))
def test_transform_image(self): shape = [10] observation = tf.zeros(shape, dtype=tf.uint8) common.image_scale_transformer(observation) T1 = namedtuple('T1', ['x', 'y']) T2 = namedtuple('T2', ['a', 'b', 'c']) T3 = namedtuple('T3', ['l', 'm']) observation = T1(x=T2(a=tf.ones(shape, dtype=tf.uint8) * 255, b=T3(l=tf.zeros(shape, dtype=tf.uint8)))) transformed_observation = common.image_scale_transformer( observation, fields=["x.a", "x.b.l"]) tf.debugging.assert_equal(transformed_observation.x.a, tf.ones(shape, dtype=tf.float32)) tf.debugging.assert_equal(transformed_observation.x.b.l, tf.ones(shape, dtype=tf.float32) * -1) with self.assertRaises(Exception) as _: common.image_scale_transformer(observation, fields=["x.b.m"]) # empty () observation = dict(x=dict(a=observation.x.a)) common.image_scale_transformer(observation, fields=["x.a"])
# limitations under the License. import gin import torch import alf from alf.algorithms.algorithm import Algorithm from alf.data_structures import TimeStep, namedtuple, AlgStep, LossInfo from alf.networks import EncodingNetwork from alf.nest.utils import NestConcat from alf.tensor_specs import TensorSpec from alf.utils import math_ops from alf.utils.normalizers import ScalarAdaptiveNormalizer, AdaptiveNormalizer ICMInfo = namedtuple("ICMInfo", ["reward", "loss"]) @gin.configurable class ICMAlgorithm(Algorithm): """Intrinsic Curiosity Module This module generate the intrinsic reward based on predition error of observation. See Pathak et al "Curiosity-driven Exploration by Self-supervised Prediction" """ def __init__(self, action_spec, observation_spec=None,
current_frame (int): not used. Returns: np.ndarray: The shape is [num_channels, image_size_y, image_size_x], where num_channels is 3 for rgb sensor, and 1 for other sensors. """ return self._image NumpyWaypoint = namedtuple( "NumpyWaypoint", [ 'id', # int 'location', # [3] (x, y, z) 'rotation', # [3] (pitch, yaw, rolll) 'road_id', # int 'section_id', # int 'lane_id', # int 'is_junction', # bool 'lane_width', # float 'lane_change', # int (carla.LaneChange) whether lane change is allowed. 0: None, 1: Right, 2: Left, 3: Both 'lane_type', # int (carla.LaneType) 'right_lane_marking', # int (carla.LaneMarking) 'left_lane_marking', # int (carla.LaneMarking) ]) def _to_numpy_loc(loc: carla.Location): return np.array([loc.x, loc.y, loc.z], dtype=np.float) def _to_carla_loc(loc): return carla.Location(float(loc[0]), float(loc[1]), float(loc[2]))
# limitations under the License. import gin import torch import alf from alf.algorithms.algorithm import Algorithm from alf.data_structures import AlgStep, LossInfo, namedtuple, TimeStep, StepType from alf.networks import EncodingNetwork from alf.tensor_specs import BoundedTensorSpec, TensorSpec from alf.utils.tensor_utils import to_tensor from alf.utils import math_ops from alf.utils.normalizers import AdaptiveNormalizer, ScalarAdaptiveNormalizer DIAYNInfo = namedtuple("DIAYNInfo", ["reward", "loss"]) @gin.configurable def create_discrete_skill_spec(num_of_skills): return BoundedTensorSpec((), dtype="int64", maximum=num_of_skills - 1) @gin.configurable class DIAYNAlgorithm(Algorithm): """Diversity is All You Need Module This module learns a set of skill-conditional policies in an unsupervised way. See Eysenbach et al "Diversity is All You Need: Learning Diverse Skills without a Reward Function" for more details. """
from alf.data_structures import LossInfo, namedtuple from alf.networks import EncodingNetwork, StableNormalProjectionNetwork, CategoricalProjectionNetwork from alf.utils import dist_utils, tensor_utils, summary_utils from alf.utils.losses import element_wise_squared_loss ModelOutput = namedtuple( 'ModelOutput', [ 'value', # [B], value for the player 0 'reward', # [B], reward for the player 0 'game_over', # [B], whether the game is over # [B, K, ...], candidate actions, () all available discrete actions 'actions', # [B, K], probabilities of the candidate actions. prob of 0 indicates invalid action 'action_probs', # [B, ...], latent state 'state', # used by calc_loss 'action_distribution', # used by calc_loss 'game_over_logit' ]) ModelTarget = namedtuple( 'ModelTarget', [ # reward the for taken previous action and the next unoll_steps actions
from alf.algorithms.config import TrainerConfig from alf.algorithms.off_policy_algorithm import OffPolicyAlgorithm from alf.algorithms.one_step_loss import OneStepTDLoss from alf.algorithms.rl_algorithm import RLAlgorithm from alf.data_structures import TimeStep, Experience, LossInfo, namedtuple from alf.data_structures import AlgStep, StepType from alf.nest import nest import alf.nest.utils as nest_utils from alf.networks import ActorDistributionNetwork, CriticNetwork from alf.networks import QNetwork, QRNNNetwork from alf.tensor_specs import TensorSpec, BoundedTensorSpec from alf.utils import losses, common, dist_utils, math_ops ActionType = Enum('ActionType', ('Discrete', 'Continuous', 'Mixed')) SacActionState = namedtuple("SacActionState", ["actor_network", "critic"], default_value=()) SacCriticState = namedtuple("SacCriticState", ["critics", "target_critics"]) SacState = namedtuple("SacState", ["action", "actor", "critic"], default_value=()) SacCriticInfo = namedtuple("SacCriticInfo", ["critics", "target_critic"]) SacActorInfo = namedtuple("SacActorInfo", ["actor_loss", "neg_entropy"], default_value=()) SacInfo = namedtuple("SacInfo", ["action_distribution", "actor", "critic", "alpha"], default_value=())
from tf_agents.agents.ddpg.critic_rnn_network import CriticRnnNetwork from tf_agents.networks.actor_distribution_network import ActorDistributionNetwork from tf_agents.networks.actor_distribution_rnn_network import ActorDistributionRnnNetwork from tf_agents.trajectories.time_step import StepType from tf_agents.utils import common as tfa_common from alf.algorithms.ddpg_algorithm import create_ou_process from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm from alf.data_structures import ActionTimeStep, LossInfo, PolicyStep, TrainingInfo from alf.data_structures import namedtuple from alf.utils import common, dist_utils, losses from alf.utils.summary_utils import safe_mean_hist_summary SarsaState = namedtuple('SarsaState', [ 'prev_observation', 'prev_step_type', 'actor', 'target_actor', 'critic', 'target_critic' ], default_value=()) SarsaInfo = namedtuple( 'SarsaInfo', ['action_distribution', 'actor_loss', 'critic', 'returns']) SarsaLossInfo = namedtuple('SarsaLossInfo', ['actor', 'critic'], default_value=()) @gin.configurable class SarsaAlgorithm(OnPolicyAlgorithm): """SARSA Algorithm. SARSA update Q function in an online manner using the following loss: ||Q(s_t,a_t) - stop_gradient(r_t, \gamma * Q(s_{t+1}, a_{t+1})||^2 See https://en.wikipedia.org/wiki/State-action-reward-state-action
import torch from alf.algorithms.actor_critic_algorithm import ActorCriticAlgorithm from alf.algorithms.algorithm import Algorithm from alf.algorithms.agent_helpers import AgentHelper from alf.algorithms.config import TrainerConfig from alf.algorithms.entropy_target_algorithm import EntropyTargetAlgorithm from alf.algorithms.icm_algorithm import ICMAlgorithm from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm from alf.algorithms.rl_algorithm import RLAlgorithm from alf.data_structures import AlgStep, Experience from alf.data_structures import TimeStep, namedtuple from alf.utils import math_ops AgentState = namedtuple("AgentState", ["obs_trans", "rl", "irm", "goal_generator", "repr"], default_value=()) AgentInfo = namedtuple( "AgentInfo", ["rl", "irm", "goal_generator", "entropy_target", "repr"], default_value=()) @gin.configurable class Agent(OnPolicyAlgorithm): """Agent is a master algorithm that integrates different algorithms together. """ def __init__(self, observation_spec, action_spec, env=None,
from tf_agents.agents.ddpg.critic_rnn_network import CriticRnnNetwork from tf_agents.networks.q_network import QNetwork from tf_agents.networks.q_rnn_network import QRnnNetwork from tf_agents.networks.actor_distribution_network import ActorDistributionNetwork from tf_agents.networks.actor_distribution_rnn_network import ActorDistributionRnnNetwork from tf_agents.networks.network import Network, DistributionNetwork from tf_agents.utils import common as tfa_common from alf.algorithms.off_policy_algorithm import OffPolicyAlgorithm from alf.algorithms.one_step_loss import OneStepTDLoss from alf.algorithms.rl_algorithm import RLAlgorithm from alf.data_structures import ActionTimeStep, Experience, LossInfo, namedtuple from alf.data_structures import PolicyStep, TrainingInfo from alf.utils import losses, common, dist_utils SacShareState = namedtuple("SacShareState", ["actor"]) SacActorState = namedtuple("SacActorState", ["critic1", "critic2"]) SacCriticState = namedtuple( "SacCriticState", ["critic1", "critic2", "target_critic1", "target_critic2"]) SacState = namedtuple("SacState", ["share", "actor", "critic"]) SacActorInfo = namedtuple("SacActorInfo", ["loss"]) SacCriticInfo = namedtuple("SacCriticInfo", ["critic1", "critic2", "target_critic"]) SacAlphaInfo = namedtuple("SacAlphaInfo", ["loss"])
import numpy as np import torch import torch.nn as nn import alf from alf import data_structures as ds from alf.data_structures import namedtuple from alf.nest.utils import convert_device from alf.utils.common import warning_once from alf.utils.data_buffer import atomic, RingBuffer from alf.utils import checkpoint_utils from .segment_tree import SumSegmentTree, MaxSegmentTree, MinSegmentTree BatchInfo = namedtuple("BatchInfo", ["env_ids", "positions", "importance_weights"], default_value=()) @gin.configurable class ReplayBuffer(RingBuffer): """Replay buffer with RingBuffer as implementation. Terminology: consistent with RingBuffer, we use ``pos`` to refer to the always increasing position of an element in the infinitly long buffer, and ``idx`` as the actual index of the element in the underlying store (``_buffer``). That means ``idx == pos % _max_length`` is always true, and one should use ``_buffer[idx]`` to retrieve the stored data. """ ONE_MINUS = np.float32(1) - np.finfo(np.float32).eps
# limitations under the License. import gin import numpy as np import functools import torch import alf from alf.algorithms.rl_algorithm import RLAlgorithm from alf.data_structures import (TimeStep, Experience, LossInfo, namedtuple, AlgStep, StepType) from alf.tensor_specs import TensorSpec, BoundedTensorSpec import alf.utils.common as common GoalState = namedtuple("GoalState", ["goal"], default_value=()) GoalInfo = namedtuple("GoalInfo", ["goal", "loss"], default_value=()) @gin.configurable class RandomCategoricalGoalGenerator(RLAlgorithm): """Random Goal Generation Module. This module generates a random categorical goal for the agent in the beginning of every episode. """ def __init__(self, observation_spec, num_of_goals, name="RandomCategoricalGoalGenerator"):
import numpy as np import alf from alf.algorithms.algorithm import Algorithm from alf.algorithms.off_policy_algorithm import OffPolicyAlgorithm from alf.algorithms.sac_algorithm import SacAlgorithm from alf.algorithms.config import TrainerConfig from alf.data_structures import TimeStep, Experience, namedtuple, AlgStep from alf.data_structures import make_experience, LossInfo from alf.tensor_specs import BoundedTensorSpec, TensorSpec from alf.utils.conditional_ops import conditional_update from alf.utils import common, summary_utils, tensor_utils ActionRepeatState = namedtuple( "ActionRepeatState", [ "rl", "action", "steps", "k", "rl_discount", "rl_reward", "sample_rewards", "repr" ], default_value=()) @gin.configurable class DynamicActionRepeatAgent(OffPolicyAlgorithm): """Create an agent which learns a variable action repetition duration. At each decision step, the agent outputs both the action to repeat and the number of steps to repeat. These two quantities together constitute the action of the agent. We use SAC with mixed action type for training. The core idea is similar to `Learning to Repeat: Fine Grained Action Repetition for Deep Reinforcement Learning <http://arxiv.org/abs/1702.06054>`_. """ def __init__(self,
"""A generic generator.""" import gin import numpy as np import torch from alf.algorithms.algorithm import Algorithm from alf.algorithms.mi_estimator import MIEstimator from alf.data_structures import AlgStep, LossInfo, namedtuple import alf.nest as nest from alf.networks import Network, EncodingNetwork from alf.tensor_specs import TensorSpec from alf.utils import common, math_ops from alf.utils.averager import AdaptiveAverager GeneratorLossInfo = namedtuple("GeneratorLossInfo", ["generator", "mi_estimator"]) @gin.configurable class Generator(Algorithm): """Generator Generator generates outputs given `inputs` (can be None) by transforming a random noise and input using `net`: outputs = net([noise, input]) if input is not None else net(noise) The generator is trained to minimize the following objective: :math:`E(loss\_func(net([noise, input]))) - entropy\_regulariztion \cdot H(P)`
"""Unittests for nest.py""" import torch from absl.testing import parameterized import collections import alf import alf.nest as nest import cnest from alf.data_structures import namedtuple from alf.tensor_specs import TensorSpec from alf.nest.utils import NestConcat, NestSum, NestMultiply from alf.nest import transform_nest NTuple = namedtuple('NTuple', ['a', 'b']) # default value will be None class TestIsNested(parameterized.TestCase, alf.test.TestCase): @parameterized.parameters(nest.is_nested, cnest._is_nested) def test_is_nested(self, is_nested): self.assertFalse(is_nested(1)) self.assertFalse(is_nested(None)) self.assertTrue(is_nested(dict(x=1))) self.assertTrue(is_nested([1])) ntuple = NTuple(a=1, b=NTuple(a=NTuple(a=(2, ), b=[3]), b=dict(x=2))) self.assertTrue(is_nested(ntuple)) class TestFlatten(parameterized.TestCase, alf.test.TestCase): @parameterized.parameters(nest.py_flatten, cnest.flatten)
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Actor critic algorithm.""" import gin from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm from alf.networks import ActorDistributionNetwork, ValueNetwork from alf.algorithms.actor_critic_loss import ActorCriticLoss from alf.data_structures import TimeStep, AlgStep, namedtuple from alf.utils import common, dist_utils from .config import TrainerConfig ActorCriticState = namedtuple("ActorCriticState", ["actor", "value"], default_value=()) ActorCriticInfo = namedtuple("ActorCriticInfo", ["action_distribution", "value"]) @gin.configurable class ActorCriticAlgorithm(OnPolicyAlgorithm): """Actor critic algorithm.""" def __init__(self, observation_spec, action_spec, actor_network_ctor=ActorDistributionNetwork, value_network_ctor=ValueNetwork, env=None, config: TrainerConfig = None,
import alf from alf.algorithms.sac_algorithm import SacAlgorithm from alf.algorithms.agent_helpers import AgentHelper from alf.algorithms.config import TrainerConfig from .skill_generator import SkillGenerator, SubTrajectory from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm from alf.data_structures import AlgStep, Experience from alf.data_structures import TimeStep, namedtuple from alf.nest.utils import transform_nest from alf.utils import math_ops from alf.data_structures import StepType from alf.tensor_specs import BoundedTensorSpec, TensorSpec from alf.networks.preprocessors import EmbeddingPreprocessor from alf.utils.conditional_ops import conditional_update AgentState = namedtuple("AgentState", ["rl", "skill_generator"], default_value=()) AgentInfo = namedtuple("AgentInfo", ["rl", "skill_generator", "skill_discount"], default_value=()) @gin.configurable def get_low_rl_input_spec(observation_spec, action_spec, num_steps_per_skill, skill_spec): assert observation_spec.ndim == 1 and action_spec.ndim == 1 concat_observation_spec = TensorSpec( (num_steps_per_skill * observation_spec.shape[0], )) concat_action_spec = TensorSpec( (num_steps_per_skill * action_spec.shape[0], )) traj_spec = SubTrajectory(observation=concat_observation_spec,
import alf from alf.algorithms.config import TrainerConfig from alf.algorithms.off_policy_algorithm import OffPolicyAlgorithm from alf.algorithms.one_step_loss import OneStepTDLoss from alf.algorithms.rl_algorithm import RLAlgorithm from alf.algorithms.sac_algorithm import _set_target_entropy from alf.data_structures import TimeStep, Experience, LossInfo, namedtuple from alf.data_structures import AlgStep from alf.nest import nest from alf.networks import MdqCriticNetwork from alf.tensor_specs import TensorSpec, BoundedTensorSpec from alf.utils import (losses, common, dist_utils, math_ops, spec_utils, tensor_utils) MdqCriticState = namedtuple("MdqCriticState", ['critic', 'target_critic']) MdqCriticInfo = namedtuple("MdqCriticInfo", [ "critic_free_form", "target_critic_free_form", "critic_adv_form", "distill_target", "kl_wrt_prior" ]) MdqState = namedtuple("MdqState", ['critic']) MdqAlphaInfo = namedtuple("MdqAlphaInfo", ["alpha_loss", "neg_entropy"]) MdqInfo = namedtuple("MdqInfo", ["critic", "alpha"], default_value=()) MdqLossInfo = namedtuple('MdqLossInfo', ['critic', 'distill', 'alpha']) @gin.configurable class MdqAlgorithm(OffPolicyAlgorithm): """Multi-Dimentional Q-Learning Algorithm.
import alf from alf.algorithms.algorithm import Algorithm from alf.algorithms.sac_algorithm import SacAlgorithm, SacLossInfo from alf.algorithms.config import TrainerConfig from alf.data_structures import TimeStep, Experience, namedtuple, AlgStep from alf.data_structures import make_experience, LossInfo, StepType from alf.networks import EncodingNetwork import alf.nest.utils as nest_utils from alf.tensor_specs import BoundedTensorSpec, TensorSpec from alf.utils.conditional_ops import conditional_update from alf.utils import dist_utils, math_ops, common, losses, tensor_utils from alf.nest.utils import NestConcat from alf.networks.preprocessors import EmbeddingPreprocessor SubTrajectory = namedtuple('SubTrajectory', ["observation", "prev_action"], default_value=()) DiscriminatorTimeStep = namedtuple('DiscTimeStep', [ "step_type", "observation", "state", "env_id", "batch_info", "prev_action", "reward", ], default_value=()) DiscriminatorState = namedtuple( "DiscriminatorState", ["untrans_observation", "subtrajectory", "first_observation"],
from alf.algorithms.off_policy_algorithm import OffPolicyAlgorithm from alf.algorithms.one_step_loss import OneStepTDLoss from alf.algorithms.rl_algorithm import RLAlgorithm from alf.data_structures import (AlgStep, Experience, LossInfo, namedtuple, TimeStep) from alf.nest import nest from alf.networks import ActorDistributionNetwork, CriticNetwork from alf.tensor_specs import TensorSpec, BoundedTensorSpec from alf.utils import losses, common, dist_utils, tensor_utils from alf.utils.math_ops import add_ignore_empty from alf.algorithms.dynamics_learning_algorithm import DynamicsLearningAlgorithm from alf.algorithms.reward_learning_algorithm import RewardEstimationAlgorithm from alf.algorithms.planning_algorithm import PlanAlgorithm MbrlState = namedtuple("MbrlState", ["dynamics", "reward", "planner"]) MbrlInfo = namedtuple("MbrlInfo", ["dynamics", "reward", "planner"], default_value=()) @gin.configurable class MbrlAlgorithm(OffPolicyAlgorithm): """Model-based RL algorithm """ def __init__(self, observation_spec, feature_spec, action_spec, dynamics_module: DynamicsLearningAlgorithm, reward_module: RewardEstimationAlgorithm, planner_module: PlanAlgorithm,
from typing import Callable import alf from alf.algorithms.config import TrainerConfig from alf.algorithms.off_policy_algorithm import OffPolicyAlgorithm from alf.algorithms.one_step_loss import OneStepTDLoss from alf.algorithms.rl_algorithm import RLAlgorithm from alf.data_structures import TimeStep, Experience, LossInfo, namedtuple from alf.data_structures import AlgStep, StepType from alf.nest import nest import alf.nest.utils as nest_utils from alf.networks import ActorNetwork, CriticNetwork from alf.tensor_specs import TensorSpec, BoundedTensorSpec from alf.utils import losses, common, dist_utils, math_ops, spec_utils DdpgCriticState = namedtuple("DdpgCriticState", ['critics', 'target_actor', 'target_critics']) DdpgCriticInfo = namedtuple("DdpgCriticInfo", ["q_values", "target_q_values"]) DdpgActorState = namedtuple("DdpgActorState", ['actor', 'critics']) DdpgState = namedtuple("DdpgState", ['actor', 'critics']) DdpgInfo = namedtuple("DdpgInfo", ["action_distribution", "actor_loss", "critic"], default_value=()) DdpgLossInfo = namedtuple('DdpgLossInfo', ('actor', 'critic')) @gin.configurable class DdpgAlgorithm(OffPolicyAlgorithm): """Deep Deterministic Policy Gradient (DDPG). Reference: Lillicrap et al "Continuous control with deep reinforcement learning"
import alf from alf.algorithms.algorithm import Algorithm from alf.algorithms.sac_algorithm import _set_target_entropy from alf.algorithms.one_step_loss import OneStepTDLoss from alf.algorithms.rl_algorithm import RLAlgorithm from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm from alf.data_structures import (AlgStep, Experience, experience_to_time_step, LossInfo, namedtuple, StepType, TimeStep) from alf.networks import Network from alf.utils import common, dist_utils, losses, math_ops, spec_utils, tensor_utils from alf.utils.summary_utils import safe_mean_hist_summary import alf.nest.utils as nest_utils SarsaState = namedtuple('SarsaState', [ 'prev_observation', 'prev_step_type', 'actor', 'critics', 'target_critics', 'noise' ], default_value=()) SarsaInfo = namedtuple('SarsaInfo', [ 'action_distribution', 'actor_loss', 'critics', 'target_critics', 'neg_entropy' ], default_value=()) SarsaLossInfo = namedtuple('SarsaLossInfo', ['actor', 'critic', 'alpha', 'neg_entropy']) nest_map = alf.nest.map_structure @gin.configurable class SarsaAlgorithm(OnPolicyAlgorithm):
import os import glob from scipy.interpolate import interp1d from scipy.signal import savgol_filter import matplotlib import matplotlib.pyplot as plt # Style gallery: https://tonysyu.github.io/raw_content/matplotlib-style-gallery/gallery.html plt.style.use('seaborn-colorblind') import alf.nest as nest from alf.data_structures import namedtuple HOME = os.getenv("HOME") MeanCurve = namedtuple("MeanCurve", ['x', 'y', 'min_y', 'max_y', 'name'], default_value=()) class MeanCurveReader(object): """Read and compute a MeanCurve from one or multiple TB event files. """ _SIZE_GUIDANCE = { 'compressedHistograms': 10, 'images': 0, 'scalars': 100, # sampled points will evenly distribute over the training time 'histograms': 1 } def _get_metric_name(self): raise NotImplementedError()
from absl import logging import gin import numpy as np import tensorflow as tf from tf_agents.trajectories.time_step import StepType from alf.algorithms.algorithm import Algorithm, AlgorithmStep from alf.data_structures import namedtuple, LossInfo from alf.utils import dist_utils from alf.utils.averager import ScalarWindowAverager from alf.utils.common import run_if, should_record_summaries from alf.utils.dist_utils import calc_default_target_entropy from alf.utils.dist_utils import calc_default_max_entropy EntropyTargetLossInfo = namedtuple("EntropyTargetLossInfo", ["neg_entropy"]) EntropyTargetInfo = namedtuple("EntropyTargetInfo", ["step_type", "loss"]) @gin.configurable class EntropyTargetAlgorithm(Algorithm): """Algorithm for adjust entropy regularization. It tries to adjust the entropy regularization (i.e. alpha) so that the the entropy is not smaller than `target_entropy`. The algorithm has three stages: 0. init stage. This is an optional stage. If the initial entropy is already below `max_entropy`, then this stage is skipped. Otherwise, the alpha will be slowly decreased so that the entropy will land at `max_entropy` to trigger the next `free stage`. Basically, this stage let the user to choose
from alf.experience_replayers.replay_buffer import BatchInfo, ReplayBuffer from alf.nest import nest from alf.nest.utils import convert_device from alf.networks import Network, LSTMEncodingNetwork from alf.utils import common, dist_utils, spec_utils, tensor_utils from alf.utils.normalizers import AdaptiveNormalizer from alf.utils.summary_utils import safe_mean_hist_summary, safe_mean_summary PredictiveRepresentationLearnerInfo = namedtuple( 'PredictiveRepresentationLearnerInfo', [ # actual actions taken in the next unroll_steps + 1 steps # [B, unroll_steps + 1, ...] 'action', # The flag to indicate whether to include this target into loss # [B, unroll_steps + 1] 'mask', # nest for targets # [B, unroll_steps + 1, ...] 'target' ]) @gin.configurable class SimpleDecoder(Algorithm): """A simple decoder with elementwise loss between the target and the predicted value. It is used to predict the target value from the given representation. Its loss can be used to train the representation.
import gin import numpy as np import torch import torch.distributions as td import alf from alf.algorithms.actor_critic_algorithm import ActorCriticAlgorithm from alf.algorithms.on_policy_algorithm import OnPolicyAlgorithm from alf.data_structures import Experience, namedtuple, StepType, TimeStep from alf.optimizers.trusted_updater import TrustedUpdater from alf.utils import common, dist_utils, math_ops nest_map = alf.nest.map_structure TracExperience = namedtuple( "TracExperience", ["observation", "step_type", "state", "action_param", "prev_action"]) TracInfo = namedtuple( "TracInfo", ["action_distribution", "observation", "state", "ac", "prev_action"]) @gin.configurable class TracAlgorithm(OnPolicyAlgorithm): """Trust-region actor-critic. It compares the action distributions after the SGD with the action distributions from the previous model. If the average distance is too big, the new parameters are shrinked as:
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from absl.testing import parameterized import torch import alf from alf.data_structures import namedtuple, StepType from alf.experience_replayers.replay_buffer import ReplayBuffer, BatchInfo from alf.algorithms.data_transformer import FrameStacker, ImageScaleTransformer from alf.utils import common DataItem = namedtuple( 'DataItem', ['step_type', 'observation', 'batch_info', 'replay_buffer'], default_value=()) class FrameStackerTest(parameterized.TestCase, alf.test.TestCase): @parameterized.parameters(-1, 0) def test_frame_stacker(self, stack_axis=0): data_spec = DataItem(step_type=alf.TensorSpec((), dtype=torch.int32), observation=dict(scalar=alf.TensorSpec(()), vector=alf.TensorSpec((7, )), matrix=alf.TensorSpec((5, 6)), tensor=alf.TensorSpec( (2, 3, 4)))) replay_buffer = ReplayBuffer(data_spec=data_spec, num_environments=2, max_length=1024,
import numpy as np import torch import torch.nn.functional as F from typing import Callable import alf from alf.algorithms.algorithm import Algorithm from alf.algorithms.config import TrainerConfig from alf.data_structures import AlgStep, LossInfo, namedtuple from alf.algorithms.generator import Generator from alf.networks import EncodingNetwork, ParamNetwork from alf.tensor_specs import TensorSpec from alf.utils import common, math_ops, summary_utils from alf.utils.summary_utils import record_time HyperNetworkLossInfo = namedtuple("HyperNetworkLossInfo", ["loss", "extra"]) def classification_loss(output, target): pred = output.max(-1)[1] acc = pred.eq(target).float().mean(0) avg_acc = acc.mean() loss = F.cross_entropy(output.transpose(1, 2), target) return HyperNetworkLossInfo(loss=loss, extra=avg_acc) def regression_loss(output, target): out_shape = output.shape[-1] assert (target.shape[-1] == out_shape), ( "feature dimension of output and target does not match.") loss = 0.5 * F.mse_loss(output.reshape(-1, out_shape),
from abc import abstractmethod from absl import logging import copy import json import os import tensorflow as tf from tensorflow.python.util.serialization import get_json_type from tf_agents.utils import eager_utils from alf.data_structures import namedtuple, LossInfo import alf.utils AlgorithmStep = namedtuple("AlgorithmStep", ["outputs", "state", "info"]) def _is_alg(obj): """Only return True if the obj in an instance of Algorithm.""" return isinstance(obj, Algorithm) def _is_trainable_module(obj): """Only return True if the module or var is trainable, to avoid possible confusions shown in the optimizer info""" return (isinstance(obj, tf.Module) and not isinstance(obj, Algorithm) and obj.trainable_variables) def _is_trainable_var(obj):
self.normalize_base = torch.where(normalize, self.minimum, self.normalize_base) def normalize_value(self, value, batch_index): return self.normalize_scale[batch_index] * ( value - self.normalize_base[batch_index]) def calc_value(self, nodes): return self.value_sum[nodes] / self.visit_count[nodes] def _nest_slice(nested, i): return nest.map_structure(lambda x: x[i], nested) MCTSState = namedtuple("MCTSState", ["steps"]) MCTSInfo = namedtuple( "MCTSInfo", ["candidate_actions", "value", "candidate_action_policy"]) @gin.configurable class MCTSAlgorithm(OffPolicyAlgorithm): r"""Monte-Carlo Tree Search algorithm. The code largely follows the pseudocode of `Schrittwieser et. al. Mastering Atari, Go, Chess and Shogi by Planning with a Learned Model <https://arxiv.org/abs/1911.08265>`_. The pseudocode can be downloaded from `<https://arxiv.org/src/1911.08265v2/anc/pseudocode.py>`_ There are several differences: 1. In this implementation, all values and rewards are for player 0. It seems