Esempio n. 1
0
import numpy as np
import logging
import gym

import ray
from ray.rllib.agents.impala import vtrace
from ray.rllib.models.tf.tf_action_dist import Categorical
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.policy.tf_policy import LearningRateSchedule, \
    EntropyCoeffSchedule, ACTION_LOGP
from ray.rllib.utils.explained_variance import explained_variance
from ray.rllib.utils import try_import_tf

tf = try_import_tf()

logger = logging.getLogger(__name__)

BEHAVIOUR_LOGITS = "behaviour_logits"


class VTraceLoss:
    def __init__(self,
                 actions,
                 actions_logp,
                 actions_entropy,
                 dones,
                 behaviour_action_logp,
                 behaviour_logits,
                 target_logits,
Esempio n. 2
0
    Type,
    TypeVar,
    TYPE_CHECKING,
    Union,
)

if TYPE_CHECKING:
    from ray.rllib.env.env_context import EnvContext
    from ray.rllib.policy.dynamic_tf_policy_v2 import DynamicTFPolicyV2
    from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2
    from ray.rllib.policy.policy import PolicySpec
    from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch
    from ray.rllib.policy.view_requirement import ViewRequirement
    from ray.rllib.utils import try_import_tf, try_import_torch

    _, tf, _ = try_import_tf()
    torch, _ = try_import_torch()

# Represents a generic tensor type.
# This could be an np.ndarray, tf.Tensor, or a torch.Tensor.
TensorType = Any

# Either a plain tensor, or a dict or tuple of tensors (or StructTensors).
TensorStructType = Union[TensorType, dict, tuple]

# A shape of a tensor.
TensorShape = Union[Tuple[int], List[int]]

# Represents a fully filled out config of a Trainer class.
# Note: Policy config dicts are usually the same as TrainerConfigDict, but
# parts of it may sometimes be altered in e.g. a multi-agent setup,
Esempio n. 3
0
from abc import ABC
import os
import copy
from simulator import SimpleSim, generate_random_routing
import numpy as np
import gym
import ray.rllib.agents.ppo as ppo
import ray.rllib.agents.impala as impala
import ray
from ray.tune.logger import pretty_print
import argparse as ap
from ray.rllib.utils import try_import_tf
import warnings

try:
    _, tf, version = try_import_tf(True)
    assert version == 2, "TF not version 2.xx"
except ImportError as e:
    raise e

if not tf.test.is_built_with_cuda():
    my_devices = tf.config.experimental.list_physical_devices(
        device_type='CPU')
    tf.config.experimental.set_visible_devices(devices=my_devices,
                                               device_type='CPU')

__all__ = [
    'PPOEnv', 'PPOENV_DEFAULT_CONFIG', 'PPOExpRunner', 'binary_state_converter'
]

DEFAULT_CONFIG_DIR = os.path.abspath(
Esempio n. 4
0
from ray.rllib.agents.a3c.a3c_tf_policy import A3CLoss
from ray.rllib.agents.trainer import with_common_config
from ray.rllib.agents.trainer_template import build_trainer
from ray.rllib.evaluation.postprocessing import Postprocessing, compute_advantages
from ray.rllib.models import ModelCatalog
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.preprocessors import Preprocessor
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.policy.tf_policy import TFPolicy
from ray.rllib.policy.tf_policy_template import build_tf_policy
from ray.rllib.utils import try_import_tf
from ray.rllib.utils.tf_ops import explained_variance, make_tf_callable

from baselines.marl_benchmark.networks import CentralizedActorCriticModel

tf1, tf, tfv = try_import_tf()


class CentralizedValueMixin:
    def __init__(self: TFPolicy):
        self.compute_central_vf = make_tf_callable(
            self.get_session(), dynamic_shape=True
        )(self.model.central_value_function)


def build_cac_model(
    policy: TFPolicy, obs_space: spaces.Space, action_space: spaces.Space, config
) -> ModelV2:
    policy.model = ModelCatalog.get_model_v2(
        obs_space=obs_space,
        action_space=action_space,
Esempio n. 5
0
 def __init__(self):
     self.tf = try_import_tf()
     self.torch, _ = try_import_torch()
     load_envs(os.path.dirname(__file__))
     load_models(os.path.dirname(__file__))