def __init__(self, local_worker, minibatch_buffer_size, num_sgd_iter,
                 learner_queue_size, learner_queue_timeout):
        """Initialize the learner thread.

        Arguments:
            local_worker (RolloutWorker): process local rollout worker holding
                policies this thread will call learn_on_batch() on
            minibatch_buffer_size (int): max number of train batches to store
                in the minibatching buffer
            num_sgd_iter (int): number of passes to learn on per train batch
            learner_queue_size (int): max size of queue of inbound
                train batches to this thread
            learner_queue_timeout (int): raise an exception if the queue has
                been empty for this long in seconds
        """
        threading.Thread.__init__(self)
        self.learner_queue_size = WindowStat("size", 50)
        self.local_worker = local_worker
        self.inqueue = queue.Queue(maxsize=learner_queue_size)
        self.outqueue = queue.Queue()
        self.minibatch_buffer = MinibatchBuffer(inqueue=self.inqueue,
                                                size=minibatch_buffer_size,
                                                timeout=learner_queue_timeout,
                                                num_passes=num_sgd_iter,
                                                init_num_passes=num_sgd_iter)
        self.queue_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.load_timer = TimerStat()
        self.load_wait_timer = TimerStat()
        self.daemon = True
        self.weights_updated = False
        self.stats = {}
        self.stopped = False
        self.num_steps = 0
Esempio n. 2
0
class LearnerThread(threading.Thread):
    """Background thread that updates the local model from sample trajectories.

    The learner thread communicates with the main thread through Queues. This
    is needed since Ray operations can only be run on the main thread. In
    addition, moving heavyweight gradient ops session runs off the main thread
    improves overall throughput.
    """
    def __init__(
        self,
        local_worker: RolloutWorker,
        minibatch_buffer_size: int,
        num_sgd_iter: int,
        learner_queue_size: int,
        learner_queue_timeout: int,
    ):
        """Initialize the learner thread.

        Args:
            local_worker: process local rollout worker holding
                policies this thread will call learn_on_batch() on
            minibatch_buffer_size: max number of train batches to store
                in the minibatching buffer
            num_sgd_iter: number of passes to learn on per train batch
            learner_queue_size: max size of queue of inbound
                train batches to this thread
            learner_queue_timeout: raise an exception if the queue has
                been empty for this long in seconds
        """
        threading.Thread.__init__(self)
        self.learner_queue_size = WindowStat("size", 50)
        self.local_worker = local_worker
        self.inqueue = queue.Queue(maxsize=learner_queue_size)
        self.outqueue = queue.Queue()
        self.minibatch_buffer = MinibatchBuffer(
            inqueue=self.inqueue,
            size=minibatch_buffer_size,
            timeout=learner_queue_timeout,
            num_passes=num_sgd_iter,
            init_num_passes=num_sgd_iter,
        )
        self.queue_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.load_timer = TimerStat()
        self.load_wait_timer = TimerStat()
        self.daemon = True
        self.weights_updated = False
        self.learner_info = {}
        self.stopped = False
        self.num_steps = 0

    def run(self) -> None:
        # Switch on eager mode if configured.
        if self.local_worker.policy_config.get("framework") in ["tf2", "tfe"]:
            tf1.enable_eager_execution()
        while not self.stopped:
            self.step()

    def step(self) -> Optional[_NextValueNotReady]:
        with self.queue_timer:
            try:
                batch, _ = self.minibatch_buffer.get()
            except queue.Empty:
                return _NextValueNotReady()
        with self.grad_timer:
            # Use LearnerInfoBuilder as a unified way to build the final
            # results dict from `learn_on_loaded_batch` call(s).
            # This makes sure results dicts always have the same structure
            # no matter the setup (multi-GPU, multi-agent, minibatch SGD,
            # tf vs torch).
            learner_info_builder = LearnerInfoBuilder(num_devices=1)
            multi_agent_results = self.local_worker.learn_on_batch(batch)
            for pid, results in multi_agent_results.items():
                learner_info_builder.add_learn_on_batch_results(results, pid)
            self.learner_info = learner_info_builder.finalize()
            self.weights_updated = True

        self.num_steps += 1
        # Put tuple: env-steps, agent-steps, and learner info into the queue.
        self.outqueue.put(
            (batch.count, batch.agent_steps(), self.learner_info))
        self.learner_queue_size.push(self.inqueue.qsize())

    def add_learner_metrics(self,
                            result: Dict,
                            overwrite_learner_info=True) -> Dict:
        """Add internal metrics to a trainer result dict."""
        def timer_to_ms(timer):
            return round(1000 * timer.mean, 3)

        if overwrite_learner_info:
            result["info"].update({
                "learner_queue":
                self.learner_queue_size.stats(),
                LEARNER_INFO:
                copy.deepcopy(self.learner_info),
                "timing_breakdown": {
                    "learner_grad_time_ms": timer_to_ms(self.grad_timer),
                    "learner_load_time_ms": timer_to_ms(self.load_timer),
                    "learner_load_wait_time_ms":
                    timer_to_ms(self.load_wait_timer),
                    "learner_dequeue_time_ms": timer_to_ms(self.queue_timer),
                },
            })
        else:
            result["info"].update({
                "learner_queue":
                self.learner_queue_size.stats(),
                "timing_breakdown": {
                    "learner_grad_time_ms": timer_to_ms(self.grad_timer),
                    "learner_load_time_ms": timer_to_ms(self.load_timer),
                    "learner_load_wait_time_ms":
                    timer_to_ms(self.load_wait_timer),
                    "learner_dequeue_time_ms": timer_to_ms(self.queue_timer),
                },
            })
        return result
Esempio n. 3
0
    def __init__(
        self,
        local_worker: RolloutWorker,
        num_gpus: int = 1,
        lr=None,  # deprecated.
        train_batch_size: int = 500,
        num_multi_gpu_tower_stacks: int = 1,
        num_sgd_iter: int = 1,
        learner_queue_size: int = 16,
        learner_queue_timeout: int = 300,
        num_data_load_threads: int = 16,
        _fake_gpus: bool = False,
        # Deprecated arg, use
        minibatch_buffer_size=None,
    ):
        """Initializes a MultiGPULearnerThread instance.

        Args:
            local_worker (RolloutWorker): Local RolloutWorker holding
                policies this thread will call `load_batch_into_buffer` and
                `learn_on_loaded_batch` on.
            num_gpus (int): Number of GPUs to use for data-parallel SGD.
            train_batch_size (int): Size of batches (minibatches if
                `num_sgd_iter` > 1) to learn on.
            num_multi_gpu_tower_stacks (int): Number of buffers to parallelly
                load data into on one device. Each buffer is of size of
                `train_batch_size` and hence increases GPU memory usage
                accordingly.
            num_sgd_iter (int): Number of passes to learn on per train batch
                (minibatch if `num_sgd_iter` > 1).
            learner_queue_size (int): Max size of queue of inbound
                train batches to this thread.
            num_data_load_threads (int): Number of threads to use to load
                data into GPU memory in parallel.
        """
        # Deprecated: No need to specify as we don't need the actual
        # minibatch-buffer anyways.
        if minibatch_buffer_size:
            deprecation_warning(
                old="MultiGPULearnerThread.minibatch_buffer_size",
                error=False,
            )
        super().__init__(
            local_worker=local_worker,
            minibatch_buffer_size=0,
            num_sgd_iter=num_sgd_iter,
            learner_queue_size=learner_queue_size,
            learner_queue_timeout=learner_queue_timeout,
        )
        # Delete reference to parent's minibatch_buffer, which is not needed.
        # Instead, in multi-GPU mode, we pull tower stack indices from the
        # `self.ready_tower_stacks_buffer` buffer, whose size is exactly
        # `num_multi_gpu_tower_stacks`.
        self.minibatch_buffer = None

        self.train_batch_size = train_batch_size

        # TODO: (sven) Allow multi-GPU to work for multi-agent as well.
        self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID]

        logger.info("MultiGPULearnerThread devices {}".format(
            self.policy.devices))
        assert self.train_batch_size % len(self.policy.devices) == 0
        assert self.train_batch_size >= len(self.policy.devices),\
            "batch too small"

        if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}:
            raise NotImplementedError("Multi-gpu mode for multi-agent")

        self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks))

        # Two queues for tower stacks:
        # a) Those that are loaded with data ("ready")
        # b) Those that are ready to be loaded with new data ("idle").
        self.idle_tower_stacks = queue.Queue()
        self.ready_tower_stacks = queue.Queue()
        # In the beginning, all stacks are idle (no loading has taken place
        # yet).
        for idx in self.tower_stack_indices:
            self.idle_tower_stacks.put(idx)
        # Start n threads that are responsible for loading data into the
        # different (idle) stacks.
        for i in range(num_data_load_threads):
            self.loader_thread = _MultiGPULoaderThread(self,
                                                       share_stats=(i == 0))
            self.loader_thread.start()

        # Create a buffer that holds stack indices that are "ready"
        # (loaded with data). Those are stacks that we can call
        # "learn_on_loaded_batch" on.
        self.ready_tower_stacks_buffer = MinibatchBuffer(
            self.ready_tower_stacks, num_multi_gpu_tower_stacks,
            learner_queue_timeout, num_sgd_iter)
Esempio n. 4
0
class MultiGPULearnerThread(LearnerThread):
    """Learner that can use multiple GPUs and parallel loading.

    This class is used for async sampling algorithms.

    Example workflow: 2 GPUs and 3 multi-GPU tower stacks.
    -> On each GPU, there are 3 slots for batches, indexed 0, 1, and 2.

    Workers collect data from env and push it into inqueue:
    Workers -> (data) -> self.inqueue

    We also have two queues, indicating, which stacks are loaded and which
    are not.
    - idle_tower_stacks = [0, 1, 2]  <- all 3 stacks are free at first.
    - ready_tower_stacks = []  <- None of the 3 stacks is loaded with data.

    `ready_tower_stacks` is managed by `ready_tower_stacks_buffer` for
    possible minibatch-SGD iterations per loaded batch (this avoids a reload
    from CPU to GPU for each SGD iter).

    n _MultiGPULoaderThreads: self.inqueue -get()->
    policy.load_batch_into_buffer() -> ready_stacks = [0 ...]

    This thread: self.ready_tower_stacks_buffer -get()->
    policy.learn_on_loaded_batch() -> if SGD-iters done,
    put stack index back in idle_tower_stacks queue.
    """
    def __init__(
        self,
        local_worker: RolloutWorker,
        num_gpus: int = 1,
        lr=None,  # deprecated.
        train_batch_size: int = 500,
        num_multi_gpu_tower_stacks: int = 1,
        num_sgd_iter: int = 1,
        learner_queue_size: int = 16,
        learner_queue_timeout: int = 300,
        num_data_load_threads: int = 16,
        _fake_gpus: bool = False,
        # Deprecated arg, use
        minibatch_buffer_size=None,
    ):
        """Initializes a MultiGPULearnerThread instance.

        Args:
            local_worker (RolloutWorker): Local RolloutWorker holding
                policies this thread will call `load_batch_into_buffer` and
                `learn_on_loaded_batch` on.
            num_gpus (int): Number of GPUs to use for data-parallel SGD.
            train_batch_size (int): Size of batches (minibatches if
                `num_sgd_iter` > 1) to learn on.
            num_multi_gpu_tower_stacks (int): Number of buffers to parallelly
                load data into on one device. Each buffer is of size of
                `train_batch_size` and hence increases GPU memory usage
                accordingly.
            num_sgd_iter (int): Number of passes to learn on per train batch
                (minibatch if `num_sgd_iter` > 1).
            learner_queue_size (int): Max size of queue of inbound
                train batches to this thread.
            num_data_load_threads (int): Number of threads to use to load
                data into GPU memory in parallel.
        """
        # Deprecated: No need to specify as we don't need the actual
        # minibatch-buffer anyways.
        if minibatch_buffer_size:
            deprecation_warning(
                old="MultiGPULearnerThread.minibatch_buffer_size",
                error=False,
            )
        super().__init__(
            local_worker=local_worker,
            minibatch_buffer_size=0,
            num_sgd_iter=num_sgd_iter,
            learner_queue_size=learner_queue_size,
            learner_queue_timeout=learner_queue_timeout,
        )
        # Delete reference to parent's minibatch_buffer, which is not needed.
        # Instead, in multi-GPU mode, we pull tower stack indices from the
        # `self.ready_tower_stacks_buffer` buffer, whose size is exactly
        # `num_multi_gpu_tower_stacks`.
        self.minibatch_buffer = None

        self.train_batch_size = train_batch_size

        # TODO: (sven) Allow multi-GPU to work for multi-agent as well.
        self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID]

        logger.info("MultiGPULearnerThread devices {}".format(
            self.policy.devices))
        assert self.train_batch_size % len(self.policy.devices) == 0
        assert self.train_batch_size >= len(self.policy.devices),\
            "batch too small"

        if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}:
            raise NotImplementedError("Multi-gpu mode for multi-agent")

        self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks))

        # Two queues for tower stacks:
        # a) Those that are loaded with data ("ready")
        # b) Those that are ready to be loaded with new data ("idle").
        self.idle_tower_stacks = queue.Queue()
        self.ready_tower_stacks = queue.Queue()
        # In the beginning, all stacks are idle (no loading has taken place
        # yet).
        for idx in self.tower_stack_indices:
            self.idle_tower_stacks.put(idx)
        # Start n threads that are responsible for loading data into the
        # different (idle) stacks.
        for i in range(num_data_load_threads):
            self.loader_thread = _MultiGPULoaderThread(self,
                                                       share_stats=(i == 0))
            self.loader_thread.start()

        # Create a buffer that holds stack indices that are "ready"
        # (loaded with data). Those are stacks that we can call
        # "learn_on_loaded_batch" on.
        self.ready_tower_stacks_buffer = MinibatchBuffer(
            self.ready_tower_stacks, num_multi_gpu_tower_stacks,
            learner_queue_timeout, num_sgd_iter)

    @override(LearnerThread)
    def step(self) -> None:
        assert self.loader_thread.is_alive()
        with self.load_wait_timer:
            buffer_idx, released = self.ready_tower_stacks_buffer.get()

        with self.grad_timer:
            fetches = self.policy.learn_on_loaded_batch(
                offset=0, buffer_index=buffer_idx)
            self.weights_updated = True
            self.stats = {DEFAULT_POLICY_ID: get_learner_stats(fetches)}

        if released:
            self.idle_tower_stacks.put(buffer_idx)

        self.outqueue.put(
            (self.policy.get_num_samples_loaded_into_buffer(buffer_idx),
             self.stats))
        self.learner_queue_size.push(self.inqueue.qsize())
class LearnerThread(threading.Thread):
    """Background thread that updates the local model from sample trajectories.

    The learner thread communicates with the main thread through Queues. This
    is needed since Ray operations can only be run on the main thread. In
    addition, moving heavyweight gradient ops session runs off the main thread
    improves overall throughput.
    """
    def __init__(self, local_worker, minibatch_buffer_size, num_sgd_iter,
                 learner_queue_size, learner_queue_timeout):
        """Initialize the learner thread.

        Arguments:
            local_worker (RolloutWorker): process local rollout worker holding
                policies this thread will call learn_on_batch() on
            minibatch_buffer_size (int): max number of train batches to store
                in the minibatching buffer
            num_sgd_iter (int): number of passes to learn on per train batch
            learner_queue_size (int): max size of queue of inbound
                train batches to this thread
            learner_queue_timeout (int): raise an exception if the queue has
                been empty for this long in seconds
        """
        threading.Thread.__init__(self)
        self.learner_queue_size = WindowStat("size", 50)
        self.local_worker = local_worker
        self.inqueue = queue.Queue(maxsize=learner_queue_size)
        self.outqueue = queue.Queue()
        self.minibatch_buffer = MinibatchBuffer(inqueue=self.inqueue,
                                                size=minibatch_buffer_size,
                                                timeout=learner_queue_timeout,
                                                num_passes=num_sgd_iter,
                                                init_num_passes=num_sgd_iter)
        self.queue_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.load_timer = TimerStat()
        self.load_wait_timer = TimerStat()
        self.daemon = True
        self.weights_updated = False
        self.stats = {}
        self.stopped = False
        self.num_steps = 0

    def run(self):
        while not self.stopped:
            self.step()

    def step(self):
        with self.queue_timer:
            batch, _ = self.minibatch_buffer.get()

        with self.grad_timer:
            fetches = self.local_worker.learn_on_batch(batch)
            self.weights_updated = True
            self.stats = get_learner_stats(fetches)

        self.num_steps += 1
        self.outqueue.put((batch.count, self.stats))
        self.learner_queue_size.push(self.inqueue.qsize())

    def add_learner_metrics(self, result):
        """Add internal metrics to a trainer result dict."""
        def timer_to_ms(timer):
            return round(1000 * timer.mean, 3)

        result["info"].update({
            "learner_queue": self.learner_queue_size.stats(),
            "learner": copy.deepcopy(self.stats),
            "timing_breakdown": {
                "learner_grad_time_ms": timer_to_ms(self.grad_timer),
                "learner_load_time_ms": timer_to_ms(self.load_timer),
                "learner_load_wait_time_ms": timer_to_ms(self.load_wait_timer),
                "learner_dequeue_time_ms": timer_to_ms(self.queue_timer),
            }
        })
        return result
    def __init__(
            self,
            local_worker: RolloutWorker,
            num_gpus: int = 1,
            lr=None,  # deprecated.
            train_batch_size: int = 500,
            num_multi_gpu_tower_stacks: int = 1,
            minibatch_buffer_size: int = 1,
            num_sgd_iter: int = 1,
            learner_queue_size: int = 16,
            learner_queue_timeout: int = 300,
            num_data_load_threads: int = 16,
            _fake_gpus: bool = False):
        """Initializes a MultiGPULearnerThread instance.

        Args:
            local_worker (RolloutWorker): Local RolloutWorker holding
                policies this thread will call load_data() and optimizer() on.
            num_gpus (int): Number of GPUs to use for data-parallel SGD.
            train_batch_size (int): Size of batches (minibatches if
                `num_sgd_iter` > 1) to learn on.
            num_multi_gpu_tower_stacks (int): Number of buffers to parallelly
                load data into on one device. Each buffer is of size of
                `train_batch_size` and hence increases GPU memory usage
                accordingly.
            minibatch_buffer_size (int): Max number of train batches to store
                in the minibatch buffer.
            num_sgd_iter (int): Number of passes to learn on per train batch
                (minibatch if `num_sgd_iter` > 1).
            learner_queue_size (int): Max size of queue of inbound
                train batches to this thread.
            num_data_load_threads (int): Number of threads to use to load
                data into GPU memory in parallel.
        """
        LearnerThread.__init__(self, local_worker, minibatch_buffer_size,
                               num_sgd_iter, learner_queue_size,
                               learner_queue_timeout)
        self.train_batch_size = train_batch_size

        # TODO: (sven) Allow multi-GPU to work for multi-agent as well.
        self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID]

        logger.info("MultiGPULearnerThread devices {}".format(
            self.policy.devices))
        assert self.train_batch_size % len(self.policy.devices) == 0
        assert self.train_batch_size >= len(self.policy.devices),\
            "batch too small"

        if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}:
            raise NotImplementedError("Multi-gpu mode for multi-agent")

        self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks))

        self.idle_tower_stacks = queue.Queue()
        self.ready_tower_stacks = queue.Queue()
        for idx in self.tower_stack_indices:
            self.idle_tower_stacks.put(idx)
        for i in range(num_data_load_threads):
            self.loader_thread = _MultiGPULoaderThread(self,
                                                       share_stats=(i == 0))
            self.loader_thread.start()

        self.minibatch_buffer = MinibatchBuffer(self.ready_tower_stacks,
                                                minibatch_buffer_size,
                                                learner_queue_timeout,
                                                num_sgd_iter)
class MultiGPULearnerThread(LearnerThread):
    """Learner that can use multiple GPUs and parallel loading.

    This class is used for async sampling algorithms.
    """
    def __init__(
            self,
            local_worker: RolloutWorker,
            num_gpus: int = 1,
            lr=None,  # deprecated.
            train_batch_size: int = 500,
            num_multi_gpu_tower_stacks: int = 1,
            minibatch_buffer_size: int = 1,
            num_sgd_iter: int = 1,
            learner_queue_size: int = 16,
            learner_queue_timeout: int = 300,
            num_data_load_threads: int = 16,
            _fake_gpus: bool = False):
        """Initializes a MultiGPULearnerThread instance.

        Args:
            local_worker (RolloutWorker): Local RolloutWorker holding
                policies this thread will call load_data() and optimizer() on.
            num_gpus (int): Number of GPUs to use for data-parallel SGD.
            train_batch_size (int): Size of batches (minibatches if
                `num_sgd_iter` > 1) to learn on.
            num_multi_gpu_tower_stacks (int): Number of buffers to parallelly
                load data into on one device. Each buffer is of size of
                `train_batch_size` and hence increases GPU memory usage
                accordingly.
            minibatch_buffer_size (int): Max number of train batches to store
                in the minibatch buffer.
            num_sgd_iter (int): Number of passes to learn on per train batch
                (minibatch if `num_sgd_iter` > 1).
            learner_queue_size (int): Max size of queue of inbound
                train batches to this thread.
            num_data_load_threads (int): Number of threads to use to load
                data into GPU memory in parallel.
        """
        LearnerThread.__init__(self, local_worker, minibatch_buffer_size,
                               num_sgd_iter, learner_queue_size,
                               learner_queue_timeout)
        self.train_batch_size = train_batch_size

        # TODO: (sven) Allow multi-GPU to work for multi-agent as well.
        self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID]

        logger.info("MultiGPULearnerThread devices {}".format(
            self.policy.devices))
        assert self.train_batch_size % len(self.policy.devices) == 0
        assert self.train_batch_size >= len(self.policy.devices),\
            "batch too small"

        if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}:
            raise NotImplementedError("Multi-gpu mode for multi-agent")

        self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks))

        self.idle_tower_stacks = queue.Queue()
        self.ready_tower_stacks = queue.Queue()
        for idx in self.tower_stack_indices:
            self.idle_tower_stacks.put(idx)
        for i in range(num_data_load_threads):
            self.loader_thread = _MultiGPULoaderThread(self,
                                                       share_stats=(i == 0))
            self.loader_thread.start()

        self.minibatch_buffer = MinibatchBuffer(self.ready_tower_stacks,
                                                minibatch_buffer_size,
                                                learner_queue_timeout,
                                                num_sgd_iter)

    @override(LearnerThread)
    def step(self) -> None:
        assert self.loader_thread.is_alive()
        with self.load_wait_timer:
            buffer_idx, released = self.minibatch_buffer.get()

        with self.grad_timer:
            fetches = self.policy.learn_on_loaded_batch(
                offset=0, buffer_index=buffer_idx)
            self.weights_updated = True
            self.stats = {DEFAULT_POLICY_ID: get_learner_stats(fetches)}

        if released:
            self.idle_tower_stacks.put(buffer_idx)

        self.outqueue.put(
            (self.policy.get_num_samples_loaded_into_buffer(buffer_idx),
             self.stats))
        self.learner_queue_size.push(self.inqueue.qsize())
class MultiGPULearnerThread(LearnerThread):
    """Learner that can use multiple GPUs and parallel loading.

    This class is used for async sampling algorithms.

    Example workflow: 2 GPUs and 3 multi-GPU tower stacks.
    -> On each GPU, there are 3 slots for batches, indexed 0, 1, and 2.

    Workers collect data from env and push it into inqueue:
    Workers -> (data) -> self.inqueue

    We also have two queues, indicating, which stacks are loaded and which
    are not.
    - idle_tower_stacks = [0, 1, 2]  <- all 3 stacks are free at first.
    - ready_tower_stacks = []  <- None of the 3 stacks is loaded with data.

    `ready_tower_stacks` is managed by `ready_tower_stacks_buffer` for
    possible minibatch-SGD iterations per loaded batch (this avoids a reload
    from CPU to GPU for each SGD iter).

    n _MultiGPULoaderThreads: self.inqueue -get()->
    policy.load_batch_into_buffer() -> ready_stacks = [0 ...]

    This thread: self.ready_tower_stacks_buffer -get()->
    policy.learn_on_loaded_batch() -> if SGD-iters done,
    put stack index back in idle_tower_stacks queue.
    """

    def __init__(
        self,
        local_worker: RolloutWorker,
        num_gpus: int = 1,
        lr=None,  # deprecated.
        train_batch_size: int = 500,
        num_multi_gpu_tower_stacks: int = 1,
        num_sgd_iter: int = 1,
        learner_queue_size: int = 16,
        learner_queue_timeout: int = 300,
        num_data_load_threads: int = 16,
        _fake_gpus: bool = False,
        # Deprecated arg, use
        minibatch_buffer_size=None,
    ):
        """Initializes a MultiGPULearnerThread instance.

        Args:
            local_worker: Local RolloutWorker holding
                policies this thread will call `load_batch_into_buffer` and
                `learn_on_loaded_batch` on.
            num_gpus: Number of GPUs to use for data-parallel SGD.
            train_batch_size: Size of batches (minibatches if
                `num_sgd_iter` > 1) to learn on.
            num_multi_gpu_tower_stacks: Number of buffers to parallelly
                load data into on one device. Each buffer is of size of
                `train_batch_size` and hence increases GPU memory usage
                accordingly.
            num_sgd_iter: Number of passes to learn on per train batch
                (minibatch if `num_sgd_iter` > 1).
            learner_queue_size: Max size of queue of inbound
                train batches to this thread.
            num_data_load_threads: Number of threads to use to load
                data into GPU memory in parallel.
        """
        # Deprecated: No need to specify as we don't need the actual
        # minibatch-buffer anyways.
        if minibatch_buffer_size:
            deprecation_warning(
                old="MultiGPULearnerThread.minibatch_buffer_size",
                error=False,
            )
        super().__init__(
            local_worker=local_worker,
            minibatch_buffer_size=0,
            num_sgd_iter=num_sgd_iter,
            learner_queue_size=learner_queue_size,
            learner_queue_timeout=learner_queue_timeout,
        )
        # Delete reference to parent's minibatch_buffer, which is not needed.
        # Instead, in multi-GPU mode, we pull tower stack indices from the
        # `self.ready_tower_stacks_buffer` buffer, whose size is exactly
        # `num_multi_gpu_tower_stacks`.
        self.minibatch_buffer = None

        self.train_batch_size = train_batch_size

        self.policy_map = self.local_worker.policy_map
        self.devices = next(iter(self.policy_map.values())).devices

        logger.info("MultiGPULearnerThread devices {}".format(self.devices))
        assert self.train_batch_size % len(self.devices) == 0
        assert self.train_batch_size >= len(self.devices), "batch too small"

        self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks))

        # Two queues for tower stacks:
        # a) Those that are loaded with data ("ready")
        # b) Those that are ready to be loaded with new data ("idle").
        self.idle_tower_stacks = queue.Queue()
        self.ready_tower_stacks = queue.Queue()
        # In the beginning, all stacks are idle (no loading has taken place
        # yet).
        for idx in self.tower_stack_indices:
            self.idle_tower_stacks.put(idx)
        # Start n threads that are responsible for loading data into the
        # different (idle) stacks.
        for i in range(num_data_load_threads):
            self.loader_thread = _MultiGPULoaderThread(self, share_stats=(i == 0))
            self.loader_thread.start()

        # Create a buffer that holds stack indices that are "ready"
        # (loaded with data). Those are stacks that we can call
        # "learn_on_loaded_batch" on.
        self.ready_tower_stacks_buffer = MinibatchBuffer(
            self.ready_tower_stacks,
            num_multi_gpu_tower_stacks,
            learner_queue_timeout,
            num_sgd_iter,
        )

    @override(LearnerThread)
    def step(self) -> None:
        assert self.loader_thread.is_alive()
        with self.load_wait_timer:
            buffer_idx, released = self.ready_tower_stacks_buffer.get()

        get_num_samples_loaded_into_buffer = 0
        with self.grad_timer:
            # Use LearnerInfoBuilder as a unified way to build the final
            # results dict from `learn_on_loaded_batch` call(s).
            # This makes sure results dicts always have the same structure
            # no matter the setup (multi-GPU, multi-agent, minibatch SGD,
            # tf vs torch).
            learner_info_builder = LearnerInfoBuilder(num_devices=len(self.devices))

            for pid in self.policy_map.keys():
                # Not a policy-to-train.
                if not self.local_worker.is_policy_to_train(pid):
                    continue
                policy = self.policy_map[pid]
                default_policy_results = policy.learn_on_loaded_batch(
                    offset=0, buffer_index=buffer_idx
                )
                learner_info_builder.add_learn_on_batch_results(default_policy_results)
                self.weights_updated = True
                get_num_samples_loaded_into_buffer += (
                    policy.get_num_samples_loaded_into_buffer(buffer_idx)
                )

            self.learner_info = learner_info_builder.finalize()

        if released:
            self.idle_tower_stacks.put(buffer_idx)

        # Put tuple: env-steps, agent-steps, and learner info into the queue.
        self.outqueue.put(
            (
                get_num_samples_loaded_into_buffer,
                get_num_samples_loaded_into_buffer,
                self.learner_info,
            )
        )
        self.learner_queue_size.push(self.inqueue.qsize())
Esempio n. 9
0
    def __init__(self,
                 local_worker,
                 num_gpus=1,
                 lr=0.0005,
                 train_batch_size=500,
                 num_data_loader_buffers=1,
                 minibatch_buffer_size=1,
                 num_sgd_iter=1,
                 learner_queue_size=16,
                 learner_queue_timeout=300,
                 num_data_load_threads=16,
                 _fake_gpus=False):
        """Initialize a multi-gpu learner thread.

        Arguments:
            local_worker (RolloutWorker): process local rollout worker holding
                policies this thread will call learn_on_batch() on
            num_gpus (int): number of GPUs to use for data-parallel SGD
            lr (float): learning rate
            train_batch_size (int): size of batches to learn on
            num_data_loader_buffers (int): number of buffers to load data into
                in parallel. Each buffer is of size of train_batch_size and
                increases GPU memory usage proportionally.
            minibatch_buffer_size (int): max number of train batches to store
                in the minibatching buffer
            num_sgd_iter (int): number of passes to learn on per train batch
            learner_queue_size (int): max size of queue of inbound
                train batches to this thread
            num_data_loader_threads (int): number of threads to use to load
                data into GPU memory in parallel
        """
        LearnerThread.__init__(self, local_worker, minibatch_buffer_size,
                               num_sgd_iter, learner_queue_size,
                               learner_queue_timeout)
        self.lr = lr
        self.train_batch_size = train_batch_size
        if not num_gpus:
            self.devices = ["/cpu:0"]
        elif _fake_gpus:
            self.devices = [
                "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
            ]
        else:
            self.devices = [
                "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
            ]
        logger.info("TFMultiGPULearner devices {}".format(self.devices))
        assert self.train_batch_size % len(self.devices) == 0
        assert self.train_batch_size >= len(self.devices), "batch too small"

        if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}:
            raise NotImplementedError("Multi-gpu mode for multi-agent")
        self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID]

        # per-GPU graph copies created below must share vars with the policy
        # reuse is set to AUTO_REUSE because Adam nodes are created after
        # all of the device copies are created.
        self.par_opt = []
        with self.local_worker.tf_sess.graph.as_default():
            with self.local_worker.tf_sess.as_default():
                with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE):
                    if self.policy._state_inputs:
                        rnn_inputs = self.policy._state_inputs + [
                            self.policy._seq_lens
                        ]
                    else:
                        rnn_inputs = []
                    adam = tf.train.AdamOptimizer(self.lr)
                    for _ in range(num_data_loader_buffers):
                        self.par_opt.append(
                            LocalSyncParallelOptimizer(
                                adam,
                                self.devices,
                                [v for _, v in self.policy._loss_inputs],
                                rnn_inputs,
                                999999,  # it will get rounded down
                                self.policy.copy))

                self.sess = self.local_worker.tf_sess
                self.sess.run(tf.global_variables_initializer())

        self.idle_optimizers = queue.Queue()
        self.ready_optimizers = queue.Queue()
        for opt in self.par_opt:
            self.idle_optimizers.put(opt)
        for i in range(num_data_load_threads):
            self.loader_thread = _LoaderThread(self, share_stats=(i == 0))
            self.loader_thread.start()

        self.minibatch_buffer = MinibatchBuffer(
            self.ready_optimizers, minibatch_buffer_size,
            learner_queue_timeout, num_sgd_iter)