Python MinibatchBuffer.get 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ray.rllib.optimizers.aso_minibatch_buffer

클래스/타입: MinibatchBuffer

메소드/함수: get

hotexamples.com에서의 예제들: 3

Python MinibatchBuffer.get - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ray.rllib.optimizers.aso_minibatch_buffer.MinibatchBuffer.get에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MinibatchBuffer(4)

get(3)

자주 사용되는 메소드들

MinibatchBuffer (4)

get (3)

예제 #1

파일 보기

파일: aso_multi_gpu_learner.py 프로젝트: rustbeltanalytica/Web-Assets

class TFMultiGPULearner(LearnerThread):
    """Learner that can use multiple GPUs and parallel loading.

    This is for use with AsyncSamplesOptimizer.
    """

    def __init__(self,
                 local_worker,
                 num_gpus=1,
                 lr=0.0005,
                 train_batch_size=500,
                 num_data_loader_buffers=1,
                 minibatch_buffer_size=1,
                 num_sgd_iter=1,
                 learner_queue_size=16,
                 learner_queue_timeout=300,
                 num_data_load_threads=16,
                 _fake_gpus=False):
        """Initialize a multi-gpu learner thread.

        Arguments:
            local_worker (RolloutWorker): process local rollout worker holding
                policies this thread will call learn_on_batch() on
            num_gpus (int): number of GPUs to use for data-parallel SGD
            lr (float): learning rate
            train_batch_size (int): size of batches to learn on
            num_data_loader_buffers (int): number of buffers to load data into
                in parallel. Each buffer is of size of train_batch_size and
                increases GPU memory usage proportionally.
            minibatch_buffer_size (int): max number of train batches to store
                in the minibatching buffer
            num_sgd_iter (int): number of passes to learn on per train batch
            learner_queue_size (int): max size of queue of inbound
                train batches to this thread
            num_data_loader_threads (int): number of threads to use to load
                data into GPU memory in parallel
        """
        LearnerThread.__init__(self, local_worker, minibatch_buffer_size,
                               num_sgd_iter, learner_queue_size,
                               learner_queue_timeout)
        self.lr = lr
        self.train_batch_size = train_batch_size
        if not num_gpus:
            self.devices = ["/cpu:0"]
        elif _fake_gpus:
            self.devices = [
                "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
            ]
        else:
            self.devices = [
                "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
            ]
        logger.info("TFMultiGPULearner devices {}".format(self.devices))
        assert self.train_batch_size % len(self.devices) == 0
        assert self.train_batch_size >= len(self.devices), "batch too small"

        if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}:
            raise NotImplementedError("Multi-gpu mode for multi-agent")
        self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID]

        # per-GPU graph copies created below must share vars with the policy
        # reuse is set to AUTO_REUSE because Adam nodes are created after
        # all of the device copies are created.
        self.par_opt = []
        with self.local_worker.tf_sess.graph.as_default():
            with self.local_worker.tf_sess.as_default():
                with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE):
                    if self.policy._state_inputs:
                        rnn_inputs = self.policy._state_inputs + [
                            self.policy._seq_lens
                        ]
                    else:
                        rnn_inputs = []
                    adam = tf.train.AdamOptimizer(self.lr)
                    for _ in range(num_data_loader_buffers):
                        self.par_opt.append(
                            LocalSyncParallelOptimizer(
                                adam,
                                self.devices,
                                [v for _, v in self.policy._loss_inputs],
                                rnn_inputs,
                                999999,  # it will get rounded down
                                self.policy.copy))

                self.sess = self.local_worker.tf_sess
                self.sess.run(tf.global_variables_initializer())

        self.idle_optimizers = queue.Queue()
        self.ready_optimizers = queue.Queue()
        for opt in self.par_opt:
            self.idle_optimizers.put(opt)
        for i in range(num_data_load_threads):
            self.loader_thread = _LoaderThread(self, share_stats=(i == 0))
            self.loader_thread.start()

        self.minibatch_buffer = MinibatchBuffer(
            self.ready_optimizers, minibatch_buffer_size,
            learner_queue_timeout, num_sgd_iter)

    @override(LearnerThread)
    def step(self):
        assert self.loader_thread.is_alive()
        with self.load_wait_timer:
            opt, released = self.minibatch_buffer.get()

        with self.grad_timer:
            fetches = opt.optimize(self.sess, 0)
            self.weights_updated = True
            self.stats = get_learner_stats(fetches)

        if released:
            self.idle_optimizers.put(opt)

        self.outqueue.put(opt.num_tuples_loaded)
        self.learner_queue_size.push(self.inqueue.qsize())

예제 #2

파일 보기

파일: aso_multi_gpu_learner.py 프로젝트: zzz622848/ray

class TFMultiGPULearner(LearnerThread):
    """Learner that can use multiple GPUs and parallel loading.

    This is for use with AsyncSamplesOptimizer.
    """
    def __init__(self,
                 local_evaluator,
                 num_gpus=1,
                 lr=0.0005,
                 train_batch_size=500,
                 num_data_loader_buffers=1,
                 minibatch_buffer_size=1,
                 num_sgd_iter=1,
                 learner_queue_size=16,
                 num_data_load_threads=16,
                 _fake_gpus=False):
        LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size,
                               num_sgd_iter, learner_queue_size)
        self.lr = lr
        self.train_batch_size = train_batch_size
        if not num_gpus:
            self.devices = ["/cpu:0"]
        elif _fake_gpus:
            self.devices = [
                "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
            ]
        else:
            self.devices = [
                "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus)))
            ]
        logger.info("TFMultiGPULearner devices {}".format(self.devices))
        assert self.train_batch_size % len(self.devices) == 0
        assert self.train_batch_size >= len(self.devices), "batch too small"

        if set(self.local_evaluator.policy_map.keys()) != {DEFAULT_POLICY_ID}:
            raise NotImplementedError("Multi-gpu mode for multi-agent")
        self.policy = self.local_evaluator.policy_map[DEFAULT_POLICY_ID]

        # per-GPU graph copies created below must share vars with the policy
        # reuse is set to AUTO_REUSE because Adam nodes are created after
        # all of the device copies are created.
        self.par_opt = []
        with self.local_evaluator.tf_sess.graph.as_default():
            with self.local_evaluator.tf_sess.as_default():
                with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE):
                    if self.policy._state_inputs:
                        rnn_inputs = self.policy._state_inputs + [
                            self.policy._seq_lens
                        ]
                    else:
                        rnn_inputs = []
                    adam = tf.train.AdamOptimizer(self.lr)
                    for _ in range(num_data_loader_buffers):
                        self.par_opt.append(
                            LocalSyncParallelOptimizer(
                                adam,
                                self.devices,
                                [v for _, v in self.policy._loss_inputs],
                                rnn_inputs,
                                999999,  # it will get rounded down
                                self.policy.copy))

                self.sess = self.local_evaluator.tf_sess
                self.sess.run(tf.global_variables_initializer())

        self.idle_optimizers = queue.Queue()
        self.ready_optimizers = queue.Queue()
        for opt in self.par_opt:
            self.idle_optimizers.put(opt)
        for i in range(num_data_load_threads):
            self.loader_thread = _LoaderThread(self, share_stats=(i == 0))
            self.loader_thread.start()

        self.minibatch_buffer = MinibatchBuffer(self.ready_optimizers,
                                                minibatch_buffer_size,
                                                num_sgd_iter)

    @override(LearnerThread)
    def step(self):
        assert self.loader_thread.is_alive()
        with self.load_wait_timer:
            opt, released = self.minibatch_buffer.get()

        with self.grad_timer:
            fetches = opt.optimize(self.sess, 0)
            self.weights_updated = True
            self.stats = get_learner_stats(fetches)

        if released:
            self.idle_optimizers.put(opt)

        self.outqueue.put(opt.num_tuples_loaded)
        self.learner_queue_size.push(self.inqueue.qsize())

예제 #3

파일 보기

class LearnerThread(threading.Thread):
    """Background thread that updates the local model from sample trajectories.

    This is for use with AsyncSamplesOptimizer.

    The learner thread communicates with the main thread through Queues. This
    is needed since Ray operations can only be run on the main thread. In
    addition, moving heavyweight gradient ops session runs off the main thread
    improves overall throughput.
    """
    def __init__(self, local_worker, minibatch_buffer_size, num_sgd_iter,
                 learner_queue_size, learner_queue_timeout):
        """Initialize the learner thread.

        Arguments:
            local_worker (RolloutWorker): process local rollout worker holding
                policies this thread will call learn_on_batch() on
            minibatch_buffer_size (int): max number of train batches to store
                in the minibatching buffer
            num_sgd_iter (int): number of passes to learn on per train batch
            learner_queue_size (int): max size of queue of inbound
                train batches to this thread
            learner_queue_timeout (int): raise an exception if the queue has
                been empty for this long in seconds
        """
        threading.Thread.__init__(self)
        self.learner_queue_size = WindowStat("size", 50)
        self.local_worker = local_worker
        self.inqueue = queue.Queue(maxsize=learner_queue_size)
        self.outqueue = queue.Queue()
        self.minibatch_buffer = MinibatchBuffer(inqueue=self.inqueue,
                                                size=minibatch_buffer_size,
                                                timeout=learner_queue_timeout,
                                                num_passes=num_sgd_iter,
                                                init_num_passes=num_sgd_iter)
        self.queue_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.load_timer = TimerStat()
        self.load_wait_timer = TimerStat()
        self.daemon = True
        self.weights_updated = False
        self.stats = {}
        self.stopped = False
        self.num_steps = 0

    def run(self):
        while not self.stopped:
            self.step()

    def step(self):
        with self.queue_timer:
            batch, _ = self.minibatch_buffer.get()

        with self.grad_timer:
            fetches = self.local_worker.learn_on_batch(batch)
            self.weights_updated = True
            self.stats = get_learner_stats(fetches)

        self.num_steps += 1
        self.outqueue.put(batch.count)
        self.learner_queue_size.push(self.inqueue.qsize())