import numpy as np import logging import gym import ray from ray.rllib.agents.impala import vtrace from ray.rllib.models.tf.tf_action_dist import Categorical from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.policy.tf_policy import LearningRateSchedule, \ EntropyCoeffSchedule, ACTION_LOGP from ray.rllib.utils.explained_variance import explained_variance from ray.rllib.utils import try_import_tf tf = try_import_tf() logger = logging.getLogger(__name__) BEHAVIOUR_LOGITS = "behaviour_logits" class VTraceLoss: def __init__(self, actions, actions_logp, actions_entropy, dones, behaviour_action_logp, behaviour_logits, target_logits,
Type, TypeVar, TYPE_CHECKING, Union, ) if TYPE_CHECKING: from ray.rllib.env.env_context import EnvContext from ray.rllib.policy.dynamic_tf_policy_v2 import DynamicTFPolicyV2 from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2 from ray.rllib.policy.policy import PolicySpec from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch from ray.rllib.policy.view_requirement import ViewRequirement from ray.rllib.utils import try_import_tf, try_import_torch _, tf, _ = try_import_tf() torch, _ = try_import_torch() # Represents a generic tensor type. # This could be an np.ndarray, tf.Tensor, or a torch.Tensor. TensorType = Any # Either a plain tensor, or a dict or tuple of tensors (or StructTensors). TensorStructType = Union[TensorType, dict, tuple] # A shape of a tensor. TensorShape = Union[Tuple[int], List[int]] # Represents a fully filled out config of a Trainer class. # Note: Policy config dicts are usually the same as TrainerConfigDict, but # parts of it may sometimes be altered in e.g. a multi-agent setup,
from abc import ABC import os import copy from simulator import SimpleSim, generate_random_routing import numpy as np import gym import ray.rllib.agents.ppo as ppo import ray.rllib.agents.impala as impala import ray from ray.tune.logger import pretty_print import argparse as ap from ray.rllib.utils import try_import_tf import warnings try: _, tf, version = try_import_tf(True) assert version == 2, "TF not version 2.xx" except ImportError as e: raise e if not tf.test.is_built_with_cuda(): my_devices = tf.config.experimental.list_physical_devices( device_type='CPU') tf.config.experimental.set_visible_devices(devices=my_devices, device_type='CPU') __all__ = [ 'PPOEnv', 'PPOENV_DEFAULT_CONFIG', 'PPOExpRunner', 'binary_state_converter' ] DEFAULT_CONFIG_DIR = os.path.abspath(
from ray.rllib.agents.a3c.a3c_tf_policy import A3CLoss from ray.rllib.agents.trainer import with_common_config from ray.rllib.agents.trainer_template import build_trainer from ray.rllib.evaluation.postprocessing import Postprocessing, compute_advantages from ray.rllib.models import ModelCatalog from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.preprocessors import Preprocessor from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils import try_import_tf from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable from baselines.marl_benchmark.networks import CentralizedActorCriticModel tf1, tf, tfv = try_import_tf() class CentralizedValueMixin: def __init__(self: TFPolicy): self.compute_central_vf = make_tf_callable( self.get_session(), dynamic_shape=True )(self.model.central_value_function) def build_cac_model( policy: TFPolicy, obs_space: spaces.Space, action_space: spaces.Space, config ) -> ModelV2: policy.model = ModelCatalog.get_model_v2( obs_space=obs_space, action_space=action_space,
def __init__(self): self.tf = try_import_tf() self.torch, _ = try_import_torch() load_envs(os.path.dirname(__file__)) load_models(os.path.dirname(__file__))