Beispiel #1
0
    def _init(self,
              learning_starts=1000,
              buffer_size=10000,
              prioritized_replay=True,
              prioritized_replay_alpha=0.6,
              prioritized_replay_beta=0.4,
              prioritized_replay_eps=1e-6,
              train_batch_size=32,
              sample_batch_size=4):

        self.replay_starts = learning_starts
        self.prioritized_replay_beta = prioritized_replay_beta
        self.prioritized_replay_eps = prioritized_replay_eps
        self.train_batch_size = train_batch_size

        # Stats
        self.update_weights_timer = TimerStat()
        self.sample_timer = TimerStat()
        self.replay_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.throughput = RunningStat()

        # Set up replay buffer
        if prioritized_replay:
            self.replay_buffer = PrioritizedReplayBuffer(
                buffer_size, alpha=prioritized_replay_alpha)
        else:
            self.replay_buffer = ReplayBuffer(buffer_size)

        assert buffer_size >= self.replay_starts
 def _init(self, num_sgd_iter=1, timesteps_per_batch=1):
     self.update_weights_timer = TimerStat()
     self.sample_timer = TimerStat()
     self.grad_timer = TimerStat()
     self.throughput = RunningStat()
     self.num_sgd_iter = num_sgd_iter
     self.timesteps_per_batch = timesteps_per_batch
    def __init__(self,
                 workers,
                 num_sgd_iter=1,
                 train_batch_size=1,
                 sgd_minibatch_size=0,
                 standardize_fields=frozenset([]),
                 aux_loss_every_k=16,
                 aux_loss_num_sgd_iter=9,
                 aux_loss_start_after_num_steps=0):
        PolicyOptimizer.__init__(self, workers)

        self.update_weights_timer = TimerStat()
        self.standardize_fields = standardize_fields
        self.sample_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.throughput = RunningStat()
        self.num_sgd_iter = num_sgd_iter
        self.sgd_minibatch_size = sgd_minibatch_size
        self.train_batch_size = train_batch_size
        self.learner_stats = {}
        self.policies = dict(
            self.workers.local_worker().foreach_trainable_policy(lambda p, i:
                                                                 (i, p)))
        logger.debug("Policies to train: {}".format(self.policies))

        self.aux_loss_every_k = aux_loss_every_k
        self.aux_loss_num_sgd_iter = aux_loss_num_sgd_iter
        self.aux_loss_start_after_num_steps = aux_loss_start_after_num_steps
        self.memory = []
        # Assert that train batch size is divisible by sgd minibatch size to make populating
        # policy logits simpler.
        assert train_batch_size % sgd_minibatch_size == 0, (
            f"train_batch_size: {train_batch_size}"
            f"sgd_minibatch_size: {sgd_minibatch_size}")
 def _init(self, num_sgd_iter=1, train_batch_size=1):
     self.update_weights_timer = TimerStat()
     self.sample_timer = TimerStat()
     self.grad_timer = TimerStat()
     self.throughput = RunningStat()
     self.num_sgd_iter = num_sgd_iter
     self.train_batch_size = train_batch_size
     self.learner_stats = {}
Beispiel #5
0
 def testCombiningStat(self):
     for shape in [(), (3, ), (3, 4)]:
         li = []
         rs1 = RunningStat(shape)
         rs2 = RunningStat(shape)
         rs = RunningStat(shape)
         for _ in range(5):
             val = np.random.randn(*shape)
             rs1.push(val)
             rs.push(val)
             li.append(val)
         for _ in range(9):
             rs2.push(val)
             rs.push(val)
             li.append(val)
         rs1.update(rs2)
         assert np.allclose(rs.mean, rs1.mean)
         assert np.allclose(rs.std, rs1.std)
    def __init__(self, workers, num_sgd_iter=1, train_batch_size=1):
        PolicyOptimizer.__init__(self, workers)

        self.update_weights_timer = TimerStat()
        self.sample_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.throughput = RunningStat()
        self.num_sgd_iter = num_sgd_iter
        self.train_batch_size = train_batch_size
        self.learner_stats = {}
Beispiel #7
0
 def testRunningStat(self):
     for shp in ((), (3,), (3, 4)):
         li = []
         rs = RunningStat(shp)
         for _ in range(5):
             val = np.random.randn(*shp)
             rs.push(val)
             li.append(val)
             m = np.mean(li, axis=0)
             self.assertTrue(np.allclose(rs.mean, m))
             v = np.square(m) if (len(li) == 1) else np.var(li, ddof=1, axis=0)
             self.assertTrue(np.allclose(rs.var, v))
Beispiel #8
0
    def _init(self,
              learning_starts=1000,
              buffer_size=10000,
              prioritized_replay=True,
              prioritized_replay_alpha=0.6,
              prioritized_replay_beta=0.4,
              schedule_max_timesteps=100000,
              beta_annealing_fraction=0.2,
              final_prioritized_replay_beta=0.4,
              prioritized_replay_eps=1e-6,
              train_batch_size=32,
              sample_batch_size=4):

        self.replay_starts = learning_starts
        # linearly annealing beta used in Rainbow paper
        self.prioritized_replay_beta = LinearSchedule(
            schedule_timesteps=int(schedule_max_timesteps *
                                   beta_annealing_fraction),
            initial_p=prioritized_replay_beta,
            final_p=final_prioritized_replay_beta)
        self.prioritized_replay_eps = prioritized_replay_eps
        self.train_batch_size = train_batch_size

        # Stats
        self.update_weights_timer = TimerStat()
        self.sample_timer = TimerStat()
        self.replay_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.throughput = RunningStat()
        self.learner_stats = {}

        # Set up replay buffer
        if prioritized_replay:

            def new_buffer():
                return PrioritizedReplayBuffer(buffer_size,
                                               alpha=prioritized_replay_alpha)
        else:

            def new_buffer():
                return ReplayBuffer(buffer_size)

        self.replay_buffers = collections.defaultdict(new_buffer)

        assert buffer_size >= self.replay_starts
Beispiel #9
0
    def __init__(self, workers, train_batch_size=10000, microbatch_size=1000):
        PolicyOptimizer.__init__(self, workers)

        if train_batch_size <= microbatch_size:
            raise ValueError(
                "The microbatch size must be smaller than the train batch "
                "size, got {} vs {}".format(microbatch_size, train_batch_size))

        self.update_weights_timer = TimerStat()
        self.sample_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.throughput = RunningStat()
        self.train_batch_size = train_batch_size
        self.microbatch_size = microbatch_size
        self.learner_stats = {}
        self.policies = dict(
            self.workers.local_worker().foreach_trainable_policy(lambda p, i:
                                                                 (i, p)))
        logger.debug("Policies to train: {}".format(self.policies))
    def __init__(self,
                 workers,
                 num_sgd_iter=1,
                 train_batch_size=1,
                 sgd_minibatch_size=0,
                 standardize_fields=frozenset([])):
        PolicyOptimizer.__init__(self, workers)

        self.update_weights_timer = TimerStat()
        self.standardize_fields = standardize_fields
        self.sample_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.throughput = RunningStat()
        self.num_sgd_iter = num_sgd_iter
        self.sgd_minibatch_size = sgd_minibatch_size
        self.train_batch_size = train_batch_size
        self.learner_stats = {}
        self.policies = dict(self.workers.local_worker()
                             .foreach_trainable_policy(lambda p, i: (i, p)))
        logger.debug("Policies to train: {}".format(self.policies))
    def _init(self,
              learning_starts=1000,
              buffer_size=10000,
              prioritized_replay=True,
              prioritized_replay_alpha=0.6,
              prioritized_replay_beta=0.4,
              prioritized_replay_eps=1e-6,
              train_batch_size=32,
              sample_batch_size=4,
              clip_rewards=True):

        self.replay_starts = learning_starts
        self.prioritized_replay_beta = prioritized_replay_beta
        self.prioritized_replay_eps = prioritized_replay_eps
        self.train_batch_size = train_batch_size

        # Stats
        self.update_weights_timer = TimerStat()
        self.sample_timer = TimerStat()
        self.replay_timer = TimerStat()
        self.grad_timer = TimerStat()
        self.throughput = RunningStat()

        # Set up replay buffer
        if prioritized_replay:

            def new_buffer():
                return PrioritizedReplayBuffer(
                    buffer_size,
                    alpha=prioritized_replay_alpha,
                    clip_rewards=clip_rewards)
        else:

            def new_buffer():
                return ReplayBuffer(buffer_size, clip_rewards)

        self.replay_buffers = collections.defaultdict(new_buffer)

        assert buffer_size >= self.replay_starts
Beispiel #12
0
 def _init(self, batch_size=32):
     self.update_weights_timer = TimerStat()
     self.sample_timer = TimerStat()
     self.grad_timer = TimerStat()
     self.throughput = RunningStat()
     self.batch_size = batch_size
Beispiel #13
0
 def _init(self):
     self.update_weights_timer = TimerStat()
     self.sample_timer = TimerStat()
     self.grad_timer = TimerStat()
     self.throughput = RunningStat()