# Code in this file is copied and adapted from # https://github.com/openai/evolution-strategies-starter. import gym import numpy as np import ray import ray.experimental.tf_utils from ray.rllib.agents.es.es_tf_policy import make_session from ray.rllib.models import ModelCatalog from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils.filter import get_filter from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.spaces.space_utils import unbatch tf = try_import_tf() class ARSTFPolicy: def __init__(self, obs_space, action_space, config): self.observation_space = obs_space self.action_space = action_space self.action_noise_std = config["action_noise_std"] self.preprocessor = ModelCatalog.get_preprocessor_for_space( self.observation_space) self.observation_filter = get_filter(config["observation_filter"], self.preprocessor.shape) self.single_threaded = config.get("single_threaded", False) self.sess = make_session(single_threaded=self.single_threaded)
from typing import Any, Dict, List, Tuple, TYPE_CHECKING, Union from ray.rllib.env.base_env import _DUMMY_AGENT_ID from ray.rllib.evaluation.collectors.sample_collector import SampleCollector from ray.rllib.evaluation.episode import MultiAgentEpisode from ray.rllib.policy.policy import Policy from ray.rllib.policy.policy_map import PolicyMap from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch from ray.rllib.utils.annotations import override from ray.rllib.utils.debug import summarize from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.typing import AgentID, EpisodeID, EnvID, PolicyID, \ TensorType, ViewRequirementsDict from ray.util.debug import log_once _, tf, _ = try_import_tf() torch, _ = try_import_torch() if TYPE_CHECKING: from ray.rllib.agents.callbacks import DefaultCallbacks logger = logging.getLogger(__name__) def to_float_np_array(v: List[Any]) -> np.ndarray: if torch and torch.is_tensor(v[0]): raise ValueError arr = np.array(v) if arr.dtype == np.float64: return arr.astype(np.float32) # save some memory return arr
from ray.rllib.policy.eager_tf_policy_v2 import EagerTFPolicyV2 from ray.rllib.utils.annotations import override from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.utils.framework import get_variable, try_import_tf from ray.rllib.utils.spaces.simplex import Simplex from ray.rllib.utils.tf_utils import huber_loss, make_tf_callable from ray.rllib.utils.typing import ( AlgorithmConfigDict, TensorType, LocalOptimizer, ModelGradients, ) from ray.util.debug import log_once tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) class ComputeTDErrorMixin: def __init__(self: Union[DynamicTFPolicyV2, EagerTFPolicyV2]): @make_tf_callable(self.get_session(), dynamic_shape=True) def compute_td_error(obs_t, act_t, rew_t, obs_tp1, done_mask, importance_weights): input_dict = SampleBatch({ SampleBatch.CUR_OBS: tf.convert_to_tensor(obs_t), SampleBatch.ACTIONS: tf.convert_to_tensor(act_t), SampleBatch.REWARDS:
from collections import OrderedDict from typing import List, Dict from gym import spaces from ray.rllib.utils.framework import try_import_tf, get_activation_fn from ray.rllib.utils.annotations import override from ray.rllib.utils.types import ModelConfigDict, TensorType from ray.rllib.models import ModelCatalog from ray.rllib.models.tf.misc import normc_initializer from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.preprocessors import get_preprocessor import tensorflow as tf tf1, tf, tf_version = try_import_tf() class CentralizedActorCriticModel(TFModelV2): CRITIC_OBS = "critic_obs" def __init__( self, obs_space: spaces.Space, action_space: spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str, ): super(CentralizedActorCriticModel, self).__init__( obs_space, action_space, num_outputs, model_config, name