Exemple #1
0
 def init_execution(self):
     """
     Creates and stores a tf server (and optionally joins it if we are a parameter-server).
     Only relevant, if we are running in distributed mode.
     """
     if self.execution_mode == "distributed":
         if get_distributed_backend() == "distributed_tf":
             self.setup_distributed_tf()
         elif get_distributed_backend() == "horovod":
             self.setup_horovod_execution()
 def init_execution(self):
     """
     Creates and sets up the distributed backend.
     Also creates the global time step variable.
     """
     if self.execution_mode == "distributed":
         if get_distributed_backend() == "distributed_tf":
             self.setup_distributed_tf()
         elif get_distributed_backend() == "horovod":
             self.setup_horovod_execution()
Exemple #3
0
    def test_ray_updating(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.add(
                obs_t=record['states'],
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=record['states'],
                done=record['terminals'],
                weight=None
            )
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)]
        indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _
                   in range_(self.samples)]

        start = time.monotonic()
        for index, loss in zip(indices, loss_values):
            memory.update_priorities(index, loss)
        end = time.monotonic() - start
        tp = len(indices) / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing updating performance:')
        print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format(
            len(indices), tp, end
        ))
Exemple #4
0
 def test_ray_sampling(self):
     """
     Tests Ray's memory performance.
     """
     assert get_distributed_backend() == "ray"
     memory = PrioritizedReplayBuffer(
         size=self.capacity,
         alpha=1.0,
         clip_rewards=True
     )
     records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
     for record in records:
         memory.add(
             obs_t=ray_compress(record['states']),
             action=record['actions'],
             reward=record['reward'],
             obs_tp1=ray_compress(record['states']),
             done=record['terminals'],
             weight=None
         )
     start = time.monotonic()
     for _ in range_(self.samples):
         batch_tuple = memory.sample(self.sample_batch_size, beta=1.0)
     end = time.monotonic() - start
     tp = self.samples / end
     print('#### Testing Ray Prioritized Replay memory ####')
     print('Testing sampling performance:')
     print('Sampled {} batches, throughput: {} samples/s, total time: {} s'.format(
         self.samples, tp, end
     ))
Exemple #5
0
    def test_ray_prioritized_replay_insert(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        # Test individual inserts.
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]

        start = time.monotonic()
        for record in records:
            memory.add(
                obs_t=record['states'],
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=record['states'],
                done=record['terminals'],
                weight=None
            )
        end = time.monotonic() - start
        tp = len(records) / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing insert performance:')
        print('Inserted {} separate records, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )

        # Test chunked inserts -> done via external for loop in Ray.
        chunks = int(self.inserts / self.chunksize)
        records = [self.record_space.sample(size=self.chunksize) for _ in range_(chunks)]
        start = time.monotonic()
        for chunk in records:
            for i in range_(self.chunksize):
                memory.add(
                    obs_t=chunk['states'][i],
                    action=chunk['actions'][i],
                    reward=chunk['reward'][i],
                    obs_tp1=chunk['states'][i],
                    done=chunk['terminals'][i],
                    weight=None
                )
        end = time.monotonic() - start
        tp = len(records) * self.chunksize / end
        print('Testing chunked insert performance:')
        print('Inserted {} chunks, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))
Exemple #6
0
 def setup_horovod_execution(self):
     """
     Sets up Horovod.
     """
     # Check again to avoid import if unset which will crash if horovod is not installed.
     if get_distributed_backend() == "horovod":
         import horovod.tensorflow as hvd
         self.logger.info("Setting up Horovod execution.")
         hvd.init()
         config = tf.ConfigProto()
         config.gpu_options.visible_device_list = str(hvd.local_rank())
Exemple #7
0
    def test_ray_combined_ops(self):
        """
        Tests a combined workflow of insert, sample, update on the prioritized replay memory.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        chunksize = 32

        # Test chunked inserts -> done via external for loop in Ray.
        chunks = int(self.inserts / chunksize)
        records = [self.record_space.sample(size=chunksize) for _ in range_(chunks)]
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(chunks)]
        start = time.monotonic()

        for chunk, loss_values in zip(records, loss_values):
            # Insert.
            for i in range_(chunksize):
                memory.add(
                    obs_t=ray_compress(chunk['states'][i]),
                    action=chunk['actions'][i],
                    reward=chunk['reward'][i],
                    obs_tp1=ray_compress(chunk['states'][i]),
                    done=chunk['terminals'][i],
                    weight=None
                )
            # Sample.
            batch_tuple = memory.sample(self.sample_batch_size, beta=1.0)
            indices = batch_tuple[-1]
            # Update
            memory.update_priorities(indices, loss_values)

        end = time.monotonic() - start
        tp = len(records) / end
        print('Ray: testing combined insert/sample/update performance:')
        print('Ran {} combined ops, throughput: {} combined ops/s, total time: {} s'.format(
            len(records), tp, end
        ))
Exemple #8
0
from __future__ import division
from __future__ import print_function

from copy import deepcopy

from rlgraph.execution.ray.ray_util import worker_exploration
from six.moves import xrange as range_
import logging
import numpy as np
import time

from rlgraph import get_distributed_backend
from rlgraph.agents import Agent
from rlgraph.environments import Environment

if get_distributed_backend() == "ray":
    import ray


class RayExecutor(object):
    """
    Abstract distributed Ray executor.

    A Ray executor implements a specific distributed learning semantic by delegating
    distributed state management and execution to the Ray execution engine.
    """
    def __init__(self, executor_spec, environment_spec, worker_spec):
        """
        Args:
            executor_spec (dict): Contains all information necessary to set up and execute
                agents on a Ray cluster.
Exemple #9
0
    def setup_session(self, hooks):
        """
        Creates and then enters the session for this model. Also finalizes the graph.

        Args:
            hooks (list): A list of session hooks to use.
        """
        if self.execution_mode == "distributed":
            self.logger.info("Setting up distributed TensorFlow session.")
            if self.server is None:
                raise RLGraphError(
                    "TensorflowGraphExecutor's Server is None! It could be that your DISTRIBUTED_BACKEND (currently "
                    "set to '{}') is not set to 'distributed_tf'. You can do so via the RLGraph config file in your "
                    "home directory or the ENV variable 'RLGRAPH_DISTRIBUTED_BACKEND=distributed_tf'.".
                    format(get_distributed_backend())
                )
            if self.tf_session_type == "monitored-session":
                session_creator = tf.train.ChiefSessionCreator(
                    scaffold=self.scaffold,
                    master=self.server.target,
                    config=self.tf_session_config,
                    checkpoint_dir=None,
                    checkpoint_filename_with_path=None
                )
                self.monitored_session = tf.train.MonitoredSession(
                    #is_chief=self.execution_spec["distributed_spec"]["task_index"] == 0,
                    session_creator=session_creator,
                    hooks=hooks,
                    stop_grace_period_secs=120  # Default value.
                )
            else:
                assert self.tf_session_type == "monitored-training-session",\
                    "ERROR: Invalid session type: {}!".format(self.tf_session_type)
                is_chief = self.execution_spec["distributed_spec"].get(
                    "is_chief", self.execution_spec["distributed_spec"]["task_index"] == 0
                )
                self.monitored_session = tf.train.MonitoredTrainingSession(
                    master=self.server.target,
                    is_chief=is_chief,
                    checkpoint_dir=None,  # TODO: specify?
                    save_checkpoint_secs=600,
                    save_summaries_secs=30,
                    log_step_count_steps=50000,
                    # scaffold=self.scaffold,
                    # Ignore other hooks
                    hooks=[hooks[-1]] if hooks else None,
                    config=self.tf_session_config,
                    stop_grace_period_secs=120  # Default value.
                )
        else:
            # If monitoring is disabled,
            if self.disable_monitoring:
                self.logger.info("Setting up default session for non-distributed mode.")
                self.monitored_session = tf.Session(config=self.tf_session_config)
            else:
                self.logger.info("Setting up singular monitored session for non-distributed mode.")
                self.monitored_session = tf.train.SingularMonitoredSession(
                    hooks=hooks,
                    scaffold=self.scaffold,
                    master='',  # Default value.
                    config=self.tf_session_config,
                    checkpoint_dir=None
                )

        # Exit the graph-context and finalize the graph.
        if self.graph_default_context is not None:
            self.graph_default_context.__exit__(None, None, None)

        # TODO back in
        # self.graph.finalize()

        if self.disable_monitoring:
            # If no monitoring, both just end up being simple sessions.
            self.session = self.monitored_session
            self.session.run(self.init_op)
        else:
            # Enter the session to be ready for acting/learning.
            self.monitored_session.__enter__()
            self.session = self.monitored_session._tf_sess()

        # Setup the tf Profiler.
        if self.profiling_enabled and not self.disable_monitoring:
            self.profiler = tf.profiler.Profiler(graph=self.session.graph)
Exemple #10
0
    def __init__(self, agent_config, worker_spec, env_spec, frameskip=1):
        """
        Creates agent and environment for Ray worker.

        Args:
            agent_config (dict): Agent configuration dict.
            worker_spec (dict): Worker parameters.
            env_spec (dict): Environment config for environment to run.
            frameskip (int): How often actions are repeated after retrieving them from the agent.
        """
        assert get_distributed_backend() == "ray"
        # Internal frameskip of env.
        self.env_frame_skip = worker_spec.get("env_internal_frame_skip", 1)
        # Worker computes weights for prioritized sampling.
        worker_spec = deepcopy(worker_spec)
        self.num_environments = worker_spec.pop("num_worker_environments", 1)

        # Make sample size proportional to num envs.
        self.worker_sample_size = worker_spec.pop(
            "worker_sample_size") * self.num_environments
        self.worker_executes_postprocessing = worker_spec.pop(
            "worker_executes_postprocessing", True)
        self.n_step_adjustment = worker_spec.pop("n_step_adjustment", 1)
        self.env_ids = [
            "env_{}".format(i) for i in range_(self.num_environments)
        ]
        num_background_envs = worker_spec.pop("num_background_envs", 1)

        # TODO from spec once we decided on generic vectorization.
        self.vector_env = SequentialVectorEnv(self.num_environments, env_spec,
                                              num_background_envs)

        # Then update agent config.
        agent_config['state_space'] = self.vector_env.state_space
        agent_config['action_space'] = self.vector_env.action_space

        ray_exploration = worker_spec.pop("ray_exploration", None)
        self.worker_executes_exploration = worker_spec.pop(
            "worker_executes_exploration", False)
        self.ray_exploration_set = False
        if ray_exploration is not None:
            # Update worker with worker specific constant exploration value.
            # TODO too many levels?
            assert agent_config["exploration_spec"]["epsilon_spec"]["decay_spec"]["type"] == "constant_decay", \
                "ERROR: If using Ray's constant exploration, exploration type must be 'constant_decay'."
            if self.worker_executes_exploration:
                agent_config["exploration_spec"] = None
                self.exploration_epsilon = ray_exploration
            else:
                agent_config["exploration_spec"]["epsilon_spec"]["decay_spec"][
                    "constant_value"] = ray_exploration
                self.ray_exploration_set = True

        self.discount = agent_config.get("discount", 0.99)
        # Python based preprocessor as image resizing is broken in TF.

        self.preprocessors = {}
        preprocessing_spec = agent_config.get("preprocessing_spec", None)
        self.is_preprocessed = {}
        for env_id in self.env_ids:
            self.preprocessors[env_id] = self.setup_preprocessor(
                preprocessing_spec,
                self.vector_env.state_space.with_batch_rank())
            self.is_preprocessed[env_id] = False
        self.agent = self.setup_agent(agent_config, worker_spec)
        self.worker_frameskip = frameskip

        #  Flag for container actions.
        self.container_actions = self.agent.flat_action_space is not None
        self.action_space = self.agent.flat_action_space

        # Save these so they can be fetched after training if desired.
        self.finished_episode_rewards = [[]
                                         for _ in range_(self.num_environments)
                                         ]
        self.finished_episode_timesteps = [
            [] for _ in range_(self.num_environments)
        ]
        # Total times sample the "real" wallclock time from start to end for each episode.
        self.finished_episode_total_times = [
            [] for _ in range_(self.num_environments)
        ]
        # Sample times stop the wallclock time counter between runs, so only the sampling time is accounted for.
        self.finished_episode_sample_times = [
            [] for _ in range_(self.num_environments)
        ]

        self.total_worker_steps = 0
        self.episodes_executed = 0

        # Step time and steps done per call to execute_and_get to measure throughput of this worker.
        self.sample_times = []
        self.sample_steps = []
        self.sample_env_frames = []

        # To continue running through multiple exec calls.
        self.last_states = self.vector_env.reset_all()

        self.zero_batched_state = np.zeros(
            (1, ) + self.agent.preprocessed_state_space.shape)
        self.zero_unbatched_state = np.zeros(
            self.agent.preprocessed_state_space.shape)
        self.preprocessed_states_buffer = np.zeros(
            shape=(self.num_environments, ) +
            self.agent.preprocessed_state_space.shape,
            dtype=self.agent.preprocessed_state_space.dtype)
        self.last_ep_timesteps = [0 for _ in range_(self.num_environments)]
        self.last_ep_rewards = [0 for _ in range_(self.num_environments)]
        self.last_ep_start_timestamps = [
            0.0 for _ in range_(self.num_environments)
        ]
        self.last_ep_start_initialized = False  # initialize on first `execute_and_get_timesteps()` call
        self.last_ep_sample_times = [
            0.0 for _ in range_(self.num_environments)
        ]

        # Was the last state a terminal state so env should be reset in next call?
        self.last_terminals = [False for _ in range_(self.num_environments)]
Exemple #11
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from rlgraph import get_backend, get_distributed_backend
from rlgraph.components.optimizers.optimizer import Optimizer
from rlgraph.utils.decorators import rlgraph_api

if get_backend() == "tf" and get_distributed_backend() == "horovod":
    import horovod.tensorflow as hvd
elif get_backend() == "pytorch" and get_distributed_backend() == "horovod":
    import horovod.pytorch as hvd


class HorovodOptimizer(Optimizer):
    """
    This Optimizer provides a wrapper for the horovod optimizer package:

    https://github.com/uber/horovod

    Horovod is meant to be used as an alternative to distributed TensorFlow as it implements
    communication in a different way, as explained in the Horovod paper:

    arXiv:1802.05799
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from rlgraph import get_backend, get_distributed_backend
from rlgraph.components.optimizers.optimizer import Optimizer


if get_backend() == "tf" and get_distributed_backend() == "horovod":
    import horovod.tensorflow as hvd
elif get_backend() == "pytorch" and get_backend() == "horovod":
    import horovod.pytorch as hvd


class HorovodOptimizer(Optimizer):
    """
    This Optimizer provides a wrapper for the horovod optimizer package:

    https://github.com/uber/horovod

    Horovod is meant to be used as an alternative to distributed TensorFlow as it implements
    communication in a different way, as explained in the Horovod paper:

    arXiv:1802.05799
Exemple #13
0
    def __init__(self, agent_config, worker_spec, env_spec, frameskip=1):
        """
        Creates agent and environment for Ray worker.

        Args:
            agent_config (dict): Agent configuration dict.
            worker_spec (dict): Worker parameters.
            env_spec (dict): Environment config for environment to run.
            frameskip (int): How often actions are repeated after retrieving them from the agent.
        """
        assert get_distributed_backend() == "ray"
        # Internal frameskip of env.
        self.env_frame_skip = env_spec.get("frameskip", 1)
        # Worker computes weights for prioritized sampling.
        worker_spec = deepcopy(worker_spec)
        self.num_environments = worker_spec.pop("num_worker_environments", 1)
        self.worker_sample_size = worker_spec.pop("worker_sample_size") * self.num_environments
        self.worker_computes_weights = worker_spec.pop("worker_computes_weights", True)

        # Use GAE.
        self.generalized_advantage_estimation = worker_spec.pop("generalized_advantage_estimation", True)
        self.gae_lambda = worker_spec.pop("gae_lambda", 1.0)
        self.compress = worker_spec.pop("compress_states", False)

        self.env_ids = ["env_{}".format(i) for i in range_(self.num_environments)]
        num_background_envs = worker_spec.pop("num_background_envs", 1)

        self.vector_env = SequentialVectorEnv(self.num_environments, env_spec, num_background_envs)

        # Then update agent config.
        agent_config['state_space'] = self.vector_env.state_space
        agent_config['action_space'] = self.vector_env.action_space

        # Python based preprocessor as image resizing is broken in TF.
        self.preprocessors = {}
        preprocessing_spec = agent_config.get("preprocessing_spec", None)
        self.is_preprocessed = {}
        for env_id in self.env_ids:
            self.preprocessors[env_id] = self.setup_preprocessor(
                preprocessing_spec, self.vector_env.state_space.with_batch_rank()
            )
            self.is_preprocessed[env_id] = False
        self.agent = self.setup_agent(agent_config, worker_spec)
        self.worker_frameskip = frameskip

        # Save these so they can be fetched after training if desired.
        self.finished_episode_rewards = [[] for _ in range_(self.num_environments)]
        self.finished_episode_timesteps = [[] for _ in range_(self.num_environments)]
        # Total times sample the "real" wallclock time from start to end for each episode.
        self.finished_episode_total_times = [[] for _ in range_(self.num_environments)]
        # Sample times stop the wallclock time counter between runs, so only the sampling time is accounted for.
        self.finished_episode_sample_times = [[] for _ in range_(self.num_environments)]

        self.total_worker_steps = 0
        self.episodes_executed = 0

        # Step time and steps done per call to execute_and_get to measure throughput of this worker.
        self.sample_times = []
        self.sample_steps = []
        self.sample_env_frames = []

        # To continue running through multiple exec calls.
        self.last_states = self.vector_env.reset_all()

        self.zero_batched_state = np.zeros((1,) + self.agent.preprocessed_state_space.shape)
        self.zero_unbatched_state = np.zeros(self.agent.preprocessed_state_space.shape)
        self.preprocessed_states_buffer = np.zeros(
            shape=(self.num_environments,) + self.agent.preprocessed_state_space.shape,
            dtype=self.agent.preprocessed_state_space.dtype
        )
        self.last_ep_timesteps = [0 for _ in range_(self.num_environments)]
        self.last_ep_rewards = [0 for _ in range_(self.num_environments)]
        self.last_ep_start_timestamps = [0.0 for _ in range_(self.num_environments)]
        self.last_ep_start_initialized = False  # initialize on first `execute_and_get_timesteps()` call
        self.last_ep_sample_times = [0.0 for _ in range_(self.num_environments)]

        # Was the last state a terminal state so env should be reset in next call?
        self.last_terminals = [False for _ in range_(self.num_environments)]