def init_execution(self): """ Creates and stores a tf server (and optionally joins it if we are a parameter-server). Only relevant, if we are running in distributed mode. """ if self.execution_mode == "distributed": if get_distributed_backend() == "distributed_tf": self.setup_distributed_tf() elif get_distributed_backend() == "horovod": self.setup_horovod_execution()
def init_execution(self): """ Creates and sets up the distributed backend. Also creates the global time step variable. """ if self.execution_mode == "distributed": if get_distributed_backend() == "distributed_tf": self.setup_distributed_tf() elif get_distributed_backend() == "horovod": self.setup_horovod_execution()
def test_ray_updating(self): """ Tests Ray's memory performance. """ assert get_distributed_backend() == "ray" memory = PrioritizedReplayBuffer( size=self.capacity, alpha=1.0, clip_rewards=True ) records = [self.record_space.sample(size=1) for _ in range_(self.inserts)] for record in records: memory.add( obs_t=record['states'], action=record['actions'], reward=record['reward'], obs_tp1=record['states'], done=record['terminals'], weight=None ) loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)] indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _ in range_(self.samples)] start = time.monotonic() for index, loss in zip(indices, loss_values): memory.update_priorities(index, loss) end = time.monotonic() - start tp = len(indices) / end print('#### Testing Ray Prioritized Replay memory ####') print('Testing updating performance:') print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format( len(indices), tp, end ))
def test_ray_sampling(self): """ Tests Ray's memory performance. """ assert get_distributed_backend() == "ray" memory = PrioritizedReplayBuffer( size=self.capacity, alpha=1.0, clip_rewards=True ) records = [self.record_space.sample(size=1) for _ in range_(self.inserts)] for record in records: memory.add( obs_t=ray_compress(record['states']), action=record['actions'], reward=record['reward'], obs_tp1=ray_compress(record['states']), done=record['terminals'], weight=None ) start = time.monotonic() for _ in range_(self.samples): batch_tuple = memory.sample(self.sample_batch_size, beta=1.0) end = time.monotonic() - start tp = self.samples / end print('#### Testing Ray Prioritized Replay memory ####') print('Testing sampling performance:') print('Sampled {} batches, throughput: {} samples/s, total time: {} s'.format( self.samples, tp, end ))
def test_ray_prioritized_replay_insert(self): """ Tests Ray's memory performance. """ assert get_distributed_backend() == "ray" memory = PrioritizedReplayBuffer( size=self.capacity, alpha=1.0, clip_rewards=True ) # Test individual inserts. records = [self.record_space.sample(size=1) for _ in range_(self.inserts)] start = time.monotonic() for record in records: memory.add( obs_t=record['states'], action=record['actions'], reward=record['reward'], obs_tp1=record['states'], done=record['terminals'], weight=None ) end = time.monotonic() - start tp = len(records) / end print('#### Testing Ray Prioritized Replay memory ####') print('Testing insert performance:') print('Inserted {} separate records, throughput: {} records/s, total time: {} s'.format( len(records), tp, end )) memory = PrioritizedReplayBuffer( size=self.capacity, alpha=1.0, clip_rewards=True ) # Test chunked inserts -> done via external for loop in Ray. chunks = int(self.inserts / self.chunksize) records = [self.record_space.sample(size=self.chunksize) for _ in range_(chunks)] start = time.monotonic() for chunk in records: for i in range_(self.chunksize): memory.add( obs_t=chunk['states'][i], action=chunk['actions'][i], reward=chunk['reward'][i], obs_tp1=chunk['states'][i], done=chunk['terminals'][i], weight=None ) end = time.monotonic() - start tp = len(records) * self.chunksize / end print('Testing chunked insert performance:') print('Inserted {} chunks, throughput: {} records/s, total time: {} s'.format( len(records), tp, end ))
def setup_horovod_execution(self): """ Sets up Horovod. """ # Check again to avoid import if unset which will crash if horovod is not installed. if get_distributed_backend() == "horovod": import horovod.tensorflow as hvd self.logger.info("Setting up Horovod execution.") hvd.init() config = tf.ConfigProto() config.gpu_options.visible_device_list = str(hvd.local_rank())
def test_ray_combined_ops(self): """ Tests a combined workflow of insert, sample, update on the prioritized replay memory. """ assert get_distributed_backend() == "ray" memory = PrioritizedReplayBuffer( size=self.capacity, alpha=1.0, clip_rewards=True ) chunksize = 32 # Test chunked inserts -> done via external for loop in Ray. chunks = int(self.inserts / chunksize) records = [self.record_space.sample(size=chunksize) for _ in range_(chunks)] loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(chunks)] start = time.monotonic() for chunk, loss_values in zip(records, loss_values): # Insert. for i in range_(chunksize): memory.add( obs_t=ray_compress(chunk['states'][i]), action=chunk['actions'][i], reward=chunk['reward'][i], obs_tp1=ray_compress(chunk['states'][i]), done=chunk['terminals'][i], weight=None ) # Sample. batch_tuple = memory.sample(self.sample_batch_size, beta=1.0) indices = batch_tuple[-1] # Update memory.update_priorities(indices, loss_values) end = time.monotonic() - start tp = len(records) / end print('Ray: testing combined insert/sample/update performance:') print('Ran {} combined ops, throughput: {} combined ops/s, total time: {} s'.format( len(records), tp, end ))
from __future__ import division from __future__ import print_function from copy import deepcopy from rlgraph.execution.ray.ray_util import worker_exploration from six.moves import xrange as range_ import logging import numpy as np import time from rlgraph import get_distributed_backend from rlgraph.agents import Agent from rlgraph.environments import Environment if get_distributed_backend() == "ray": import ray class RayExecutor(object): """ Abstract distributed Ray executor. A Ray executor implements a specific distributed learning semantic by delegating distributed state management and execution to the Ray execution engine. """ def __init__(self, executor_spec, environment_spec, worker_spec): """ Args: executor_spec (dict): Contains all information necessary to set up and execute agents on a Ray cluster.
def setup_session(self, hooks): """ Creates and then enters the session for this model. Also finalizes the graph. Args: hooks (list): A list of session hooks to use. """ if self.execution_mode == "distributed": self.logger.info("Setting up distributed TensorFlow session.") if self.server is None: raise RLGraphError( "TensorflowGraphExecutor's Server is None! It could be that your DISTRIBUTED_BACKEND (currently " "set to '{}') is not set to 'distributed_tf'. You can do so via the RLGraph config file in your " "home directory or the ENV variable 'RLGRAPH_DISTRIBUTED_BACKEND=distributed_tf'.". format(get_distributed_backend()) ) if self.tf_session_type == "monitored-session": session_creator = tf.train.ChiefSessionCreator( scaffold=self.scaffold, master=self.server.target, config=self.tf_session_config, checkpoint_dir=None, checkpoint_filename_with_path=None ) self.monitored_session = tf.train.MonitoredSession( #is_chief=self.execution_spec["distributed_spec"]["task_index"] == 0, session_creator=session_creator, hooks=hooks, stop_grace_period_secs=120 # Default value. ) else: assert self.tf_session_type == "monitored-training-session",\ "ERROR: Invalid session type: {}!".format(self.tf_session_type) is_chief = self.execution_spec["distributed_spec"].get( "is_chief", self.execution_spec["distributed_spec"]["task_index"] == 0 ) self.monitored_session = tf.train.MonitoredTrainingSession( master=self.server.target, is_chief=is_chief, checkpoint_dir=None, # TODO: specify? save_checkpoint_secs=600, save_summaries_secs=30, log_step_count_steps=50000, # scaffold=self.scaffold, # Ignore other hooks hooks=[hooks[-1]] if hooks else None, config=self.tf_session_config, stop_grace_period_secs=120 # Default value. ) else: # If monitoring is disabled, if self.disable_monitoring: self.logger.info("Setting up default session for non-distributed mode.") self.monitored_session = tf.Session(config=self.tf_session_config) else: self.logger.info("Setting up singular monitored session for non-distributed mode.") self.monitored_session = tf.train.SingularMonitoredSession( hooks=hooks, scaffold=self.scaffold, master='', # Default value. config=self.tf_session_config, checkpoint_dir=None ) # Exit the graph-context and finalize the graph. if self.graph_default_context is not None: self.graph_default_context.__exit__(None, None, None) # TODO back in # self.graph.finalize() if self.disable_monitoring: # If no monitoring, both just end up being simple sessions. self.session = self.monitored_session self.session.run(self.init_op) else: # Enter the session to be ready for acting/learning. self.monitored_session.__enter__() self.session = self.monitored_session._tf_sess() # Setup the tf Profiler. if self.profiling_enabled and not self.disable_monitoring: self.profiler = tf.profiler.Profiler(graph=self.session.graph)
def __init__(self, agent_config, worker_spec, env_spec, frameskip=1): """ Creates agent and environment for Ray worker. Args: agent_config (dict): Agent configuration dict. worker_spec (dict): Worker parameters. env_spec (dict): Environment config for environment to run. frameskip (int): How often actions are repeated after retrieving them from the agent. """ assert get_distributed_backend() == "ray" # Internal frameskip of env. self.env_frame_skip = worker_spec.get("env_internal_frame_skip", 1) # Worker computes weights for prioritized sampling. worker_spec = deepcopy(worker_spec) self.num_environments = worker_spec.pop("num_worker_environments", 1) # Make sample size proportional to num envs. self.worker_sample_size = worker_spec.pop( "worker_sample_size") * self.num_environments self.worker_executes_postprocessing = worker_spec.pop( "worker_executes_postprocessing", True) self.n_step_adjustment = worker_spec.pop("n_step_adjustment", 1) self.env_ids = [ "env_{}".format(i) for i in range_(self.num_environments) ] num_background_envs = worker_spec.pop("num_background_envs", 1) # TODO from spec once we decided on generic vectorization. self.vector_env = SequentialVectorEnv(self.num_environments, env_spec, num_background_envs) # Then update agent config. agent_config['state_space'] = self.vector_env.state_space agent_config['action_space'] = self.vector_env.action_space ray_exploration = worker_spec.pop("ray_exploration", None) self.worker_executes_exploration = worker_spec.pop( "worker_executes_exploration", False) self.ray_exploration_set = False if ray_exploration is not None: # Update worker with worker specific constant exploration value. # TODO too many levels? assert agent_config["exploration_spec"]["epsilon_spec"]["decay_spec"]["type"] == "constant_decay", \ "ERROR: If using Ray's constant exploration, exploration type must be 'constant_decay'." if self.worker_executes_exploration: agent_config["exploration_spec"] = None self.exploration_epsilon = ray_exploration else: agent_config["exploration_spec"]["epsilon_spec"]["decay_spec"][ "constant_value"] = ray_exploration self.ray_exploration_set = True self.discount = agent_config.get("discount", 0.99) # Python based preprocessor as image resizing is broken in TF. self.preprocessors = {} preprocessing_spec = agent_config.get("preprocessing_spec", None) self.is_preprocessed = {} for env_id in self.env_ids: self.preprocessors[env_id] = self.setup_preprocessor( preprocessing_spec, self.vector_env.state_space.with_batch_rank()) self.is_preprocessed[env_id] = False self.agent = self.setup_agent(agent_config, worker_spec) self.worker_frameskip = frameskip # Flag for container actions. self.container_actions = self.agent.flat_action_space is not None self.action_space = self.agent.flat_action_space # Save these so they can be fetched after training if desired. self.finished_episode_rewards = [[] for _ in range_(self.num_environments) ] self.finished_episode_timesteps = [ [] for _ in range_(self.num_environments) ] # Total times sample the "real" wallclock time from start to end for each episode. self.finished_episode_total_times = [ [] for _ in range_(self.num_environments) ] # Sample times stop the wallclock time counter between runs, so only the sampling time is accounted for. self.finished_episode_sample_times = [ [] for _ in range_(self.num_environments) ] self.total_worker_steps = 0 self.episodes_executed = 0 # Step time and steps done per call to execute_and_get to measure throughput of this worker. self.sample_times = [] self.sample_steps = [] self.sample_env_frames = [] # To continue running through multiple exec calls. self.last_states = self.vector_env.reset_all() self.zero_batched_state = np.zeros( (1, ) + self.agent.preprocessed_state_space.shape) self.zero_unbatched_state = np.zeros( self.agent.preprocessed_state_space.shape) self.preprocessed_states_buffer = np.zeros( shape=(self.num_environments, ) + self.agent.preprocessed_state_space.shape, dtype=self.agent.preprocessed_state_space.dtype) self.last_ep_timesteps = [0 for _ in range_(self.num_environments)] self.last_ep_rewards = [0 for _ in range_(self.num_environments)] self.last_ep_start_timestamps = [ 0.0 for _ in range_(self.num_environments) ] self.last_ep_start_initialized = False # initialize on first `execute_and_get_timesteps()` call self.last_ep_sample_times = [ 0.0 for _ in range_(self.num_environments) ] # Was the last state a terminal state so env should be reset in next call? self.last_terminals = [False for _ in range_(self.num_environments)]
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function from rlgraph import get_backend, get_distributed_backend from rlgraph.components.optimizers.optimizer import Optimizer from rlgraph.utils.decorators import rlgraph_api if get_backend() == "tf" and get_distributed_backend() == "horovod": import horovod.tensorflow as hvd elif get_backend() == "pytorch" and get_distributed_backend() == "horovod": import horovod.pytorch as hvd class HorovodOptimizer(Optimizer): """ This Optimizer provides a wrapper for the horovod optimizer package: https://github.com/uber/horovod Horovod is meant to be used as an alternative to distributed TensorFlow as it implements communication in a different way, as explained in the Horovod paper: arXiv:1802.05799
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import division from __future__ import print_function from rlgraph import get_backend, get_distributed_backend from rlgraph.components.optimizers.optimizer import Optimizer if get_backend() == "tf" and get_distributed_backend() == "horovod": import horovod.tensorflow as hvd elif get_backend() == "pytorch" and get_backend() == "horovod": import horovod.pytorch as hvd class HorovodOptimizer(Optimizer): """ This Optimizer provides a wrapper for the horovod optimizer package: https://github.com/uber/horovod Horovod is meant to be used as an alternative to distributed TensorFlow as it implements communication in a different way, as explained in the Horovod paper: arXiv:1802.05799
def __init__(self, agent_config, worker_spec, env_spec, frameskip=1): """ Creates agent and environment for Ray worker. Args: agent_config (dict): Agent configuration dict. worker_spec (dict): Worker parameters. env_spec (dict): Environment config for environment to run. frameskip (int): How often actions are repeated after retrieving them from the agent. """ assert get_distributed_backend() == "ray" # Internal frameskip of env. self.env_frame_skip = env_spec.get("frameskip", 1) # Worker computes weights for prioritized sampling. worker_spec = deepcopy(worker_spec) self.num_environments = worker_spec.pop("num_worker_environments", 1) self.worker_sample_size = worker_spec.pop("worker_sample_size") * self.num_environments self.worker_computes_weights = worker_spec.pop("worker_computes_weights", True) # Use GAE. self.generalized_advantage_estimation = worker_spec.pop("generalized_advantage_estimation", True) self.gae_lambda = worker_spec.pop("gae_lambda", 1.0) self.compress = worker_spec.pop("compress_states", False) self.env_ids = ["env_{}".format(i) for i in range_(self.num_environments)] num_background_envs = worker_spec.pop("num_background_envs", 1) self.vector_env = SequentialVectorEnv(self.num_environments, env_spec, num_background_envs) # Then update agent config. agent_config['state_space'] = self.vector_env.state_space agent_config['action_space'] = self.vector_env.action_space # Python based preprocessor as image resizing is broken in TF. self.preprocessors = {} preprocessing_spec = agent_config.get("preprocessing_spec", None) self.is_preprocessed = {} for env_id in self.env_ids: self.preprocessors[env_id] = self.setup_preprocessor( preprocessing_spec, self.vector_env.state_space.with_batch_rank() ) self.is_preprocessed[env_id] = False self.agent = self.setup_agent(agent_config, worker_spec) self.worker_frameskip = frameskip # Save these so they can be fetched after training if desired. self.finished_episode_rewards = [[] for _ in range_(self.num_environments)] self.finished_episode_timesteps = [[] for _ in range_(self.num_environments)] # Total times sample the "real" wallclock time from start to end for each episode. self.finished_episode_total_times = [[] for _ in range_(self.num_environments)] # Sample times stop the wallclock time counter between runs, so only the sampling time is accounted for. self.finished_episode_sample_times = [[] for _ in range_(self.num_environments)] self.total_worker_steps = 0 self.episodes_executed = 0 # Step time and steps done per call to execute_and_get to measure throughput of this worker. self.sample_times = [] self.sample_steps = [] self.sample_env_frames = [] # To continue running through multiple exec calls. self.last_states = self.vector_env.reset_all() self.zero_batched_state = np.zeros((1,) + self.agent.preprocessed_state_space.shape) self.zero_unbatched_state = np.zeros(self.agent.preprocessed_state_space.shape) self.preprocessed_states_buffer = np.zeros( shape=(self.num_environments,) + self.agent.preprocessed_state_space.shape, dtype=self.agent.preprocessed_state_space.dtype ) self.last_ep_timesteps = [0 for _ in range_(self.num_environments)] self.last_ep_rewards = [0 for _ in range_(self.num_environments)] self.last_ep_start_timestamps = [0.0 for _ in range_(self.num_environments)] self.last_ep_start_initialized = False # initialize on first `execute_and_get_timesteps()` call self.last_ep_sample_times = [0.0 for _ in range_(self.num_environments)] # Was the last state a terminal state so env should be reset in next call? self.last_terminals = [False for _ in range_(self.num_environments)]