Ejemplo n.º 1
0
    def _load_params(self, json_data):
        super()._load_params(json_data)

        self.epochs = 1 if (self.EPOCHS_KEY
                            not in json_data) else json_data[self.EPOCHS_KEY]
        self.batch_size = 1024 if (self.BATCH_SIZE_KEY not in json_data
                                   ) else json_data[self.BATCH_SIZE_KEY]
        self.ratio_clip = 0.2 if (self.RATIO_CLIP_KEY not in json_data
                                  ) else json_data[self.RATIO_CLIP_KEY]
        self.norm_adv_clip = 5 if (self.NORM_ADV_CLIP_KEY not in json_data
                                   ) else json_data[self.NORM_ADV_CLIP_KEY]
        self.td_lambda = 0.95 if (self.TD_LAMBDA_KEY not in json_data
                                  ) else json_data[self.TD_LAMBDA_KEY]
        self.tar_clip_frac = -1 if (self.TAR_CLIP_FRAC not in json_data
                                    ) else json_data[self.TAR_CLIP_FRAC]
        self.actor_stepsize_decay = 0.5 if (
            self.ACTOR_STEPSIZE_DECAY
            not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]

        num_procs = MPIUtil.get_num_procs()
        local_batch_size = int(self.batch_size / num_procs)
        min_replay_size = 2 * local_batch_size  # needed to prevent buffer overflow
        assert (self.replay_buffer_size > min_replay_size)

        self.replay_buffer_size = np.maximum(min_replay_size,
                                             self.replay_buffer_size)

        return
Ejemplo n.º 2
0
  def _load_params(self, json_data):
    super()._load_params(json_data)

    self.epochs = 1 if (self.EPOCHS_KEY not in json_data) else json_data[self.EPOCHS_KEY]
    self.batch_size = 1024 if (
        self.BATCH_SIZE_KEY not in json_data) else json_data[self.BATCH_SIZE_KEY]
    self.ratio_clip = 0.2 if (
        self.RATIO_CLIP_KEY not in json_data) else json_data[self.RATIO_CLIP_KEY]
    self.norm_adv_clip = 5 if (
        self.NORM_ADV_CLIP_KEY not in json_data) else json_data[self.NORM_ADV_CLIP_KEY]
    self.td_lambda = 0.95 if (
        self.TD_LAMBDA_KEY not in json_data) else json_data[self.TD_LAMBDA_KEY]
    self.tar_clip_frac = -1 if (
        self.TAR_CLIP_FRAC not in json_data) else json_data[self.TAR_CLIP_FRAC]
    self.actor_stepsize_decay = 0.5 if (
        self.ACTOR_STEPSIZE_DECAY not in json_data) else json_data[self.ACTOR_STEPSIZE_DECAY]

    num_procs = MPIUtil.get_num_procs()
    local_batch_size = int(self.batch_size / num_procs)
    min_replay_size = 2 * local_batch_size  # needed to prevent buffer overflow
    assert (self.replay_buffer_size > min_replay_size)

    self.replay_buffer_size = np.maximum(min_replay_size, self.replay_buffer_size)

    return
Ejemplo n.º 3
0
    def _load_params(self, json_data):
        if (self.UPDATE_PERIOD_KEY in json_data):
            self.update_period = int(json_data[self.UPDATE_PERIOD_KEY])

        if (self.ITERS_PER_UPDATE in json_data):
            self.iters_per_update = int(json_data[self.ITERS_PER_UPDATE])

        if (self.DISCOUNT_KEY in json_data):
            self.discount = json_data[self.DISCOUNT_KEY]

        if (self.MINI_BATCH_SIZE_KEY in json_data):
            self.mini_batch_size = int(json_data[self.MINI_BATCH_SIZE_KEY])

        if (self.REPLAY_BUFFER_SIZE_KEY in json_data):
            self.replay_buffer_size = int(
                json_data[self.REPLAY_BUFFER_SIZE_KEY])

        if (self.INIT_SAMPLES_KEY in json_data):
            self.init_samples = int(json_data[self.INIT_SAMPLES_KEY])

        if (self.NORMALIZER_SAMPLES_KEY in json_data):
            self.normalizer_samples = int(
                json_data[self.NORMALIZER_SAMPLES_KEY])

        if (self.OUTPUT_ITERS_KEY in json_data):
            self.output_iters = json_data[self.OUTPUT_ITERS_KEY]

        if (self.INT_OUTPUT_ITERS_KEY in json_data):
            self.int_output_iters = json_data[self.INT_OUTPUT_ITERS_KEY]

        if (self.TEST_EPISODES_KEY in json_data):
            self.test_episodes = int(json_data[self.TEST_EPISODES_KEY])

        if (self.EXP_ANNEAL_SAMPLES_KEY in json_data):
            self.exp_anneal_samples = json_data[self.EXP_ANNEAL_SAMPLES_KEY]

        if (self.EXP_PARAM_BEG_KEY in json_data):
            self.exp_params_beg.load(json_data[self.EXP_PARAM_BEG_KEY])

        if (self.EXP_PARAM_END_KEY in json_data):
            self.exp_params_end.load(json_data[self.EXP_PARAM_END_KEY])

        num_procs = MPIUtil.get_num_procs()
        self._local_mini_batch_size = int(
            np.ceil(self.mini_batch_size / num_procs))
        self._local_mini_batch_size = np.maximum(self._local_mini_batch_size,
                                                 1)
        self.mini_batch_size = self._local_mini_batch_size * num_procs

        assert (self.exp_params_beg.noise == self.exp_params_end.noise
                )  # noise std should not change
        self.exp_params_curr = copy.deepcopy(self.exp_params_beg)
        self.exp_params_end.noise = self.exp_params_beg.noise

        self._need_normalizer_update = self.normalizer_samples > 0

        return
Ejemplo n.º 4
0
    def _update_mode_test(self):
        if (self.test_episode_count * MPIUtil.get_num_procs() >= self.test_episodes):
            global_return = MPIUtil.reduce_sum(self.test_return)
            global_count = MPIUtil.reduce_sum(self.test_episode_count)
            avg_return = global_return / global_count
            self.avg_test_return = avg_return

            if self.enable_training:
                self._init_mode_train()
        return
Ejemplo n.º 5
0
    def _update_mode_test(self):
        if (self.test_episode_count * MPIUtil.get_num_procs() >=
                self.test_episodes):
            global_return = MPIUtil.reduce_sum(self.test_return)
            global_count = MPIUtil.reduce_sum(self.test_episode_count)
            avg_return = global_return / global_count
            self.avg_test_return = avg_return

            if self.enable_training:
                self._init_mode_train()
        return
Ejemplo n.º 6
0
    def _load_params(self, json_data):
        if (self.UPDATE_PERIOD_KEY in json_data):
            self.update_period = int(json_data[self.UPDATE_PERIOD_KEY])
        
        if (self.ITERS_PER_UPDATE in json_data):
            self.iters_per_update = int(json_data[self.ITERS_PER_UPDATE])
                    
        if (self.DISCOUNT_KEY in json_data):
            self.discount = json_data[self.DISCOUNT_KEY]
        
        if (self.MINI_BATCH_SIZE_KEY in json_data):
            self.mini_batch_size = int(json_data[self.MINI_BATCH_SIZE_KEY])
            
        if (self.REPLAY_BUFFER_SIZE_KEY in json_data):
            self.replay_buffer_size = int(json_data[self.REPLAY_BUFFER_SIZE_KEY])
            
        if (self.INIT_SAMPLES_KEY in json_data):
            self.init_samples = int(json_data[self.INIT_SAMPLES_KEY])

        if (self.NORMALIZER_SAMPLES_KEY in json_data):
            self.normalizer_samples = int(json_data[self.NORMALIZER_SAMPLES_KEY])

        if (self.OUTPUT_ITERS_KEY in json_data):
            self.output_iters = json_data[self.OUTPUT_ITERS_KEY]

        if (self.INT_OUTPUT_ITERS_KEY in json_data):
            self.int_output_iters = json_data[self.INT_OUTPUT_ITERS_KEY]
            
        if (self.TEST_EPISODES_KEY in json_data):
            self.test_episodes = int(json_data[self.TEST_EPISODES_KEY])

        if (self.EXP_ANNEAL_SAMPLES_KEY in json_data):
            self.exp_anneal_samples = json_data[self.EXP_ANNEAL_SAMPLES_KEY]

        if (self.EXP_PARAM_BEG_KEY in json_data):
            self.exp_params_beg.load(json_data[self.EXP_PARAM_BEG_KEY])

        if (self.EXP_PARAM_END_KEY in json_data):
            self.exp_params_end.load(json_data[self.EXP_PARAM_END_KEY])
        
        num_procs = MPIUtil.get_num_procs()
        self._local_mini_batch_size = int(np.ceil(self.mini_batch_size / num_procs))
        self._local_mini_batch_size = np.maximum(self._local_mini_batch_size, 1)
        self.mini_batch_size = self._local_mini_batch_size * num_procs
        
        assert(self.exp_params_beg.noise == self.exp_params_end.noise) # noise std should not change
        self.exp_params_curr = copy.deepcopy(self.exp_params_beg)
        self.exp_params_end.noise = self.exp_params_beg.noise

        self._need_normalizer_update = self.normalizer_samples > 0

        return
Ejemplo n.º 7
0
  def update_flatgrad(self, flat_grad, grad_scale=1.0):
    if self.iter % self.CHECK_SYNC_ITERS == 0:
      assert self.check_synced(), Logger.print2('Network parameters desynchronized')

    if grad_scale != 1.0:
      flat_grad *= grad_scale

    MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
    self._global_flat_grad /= MPIUtil.get_num_procs()

    self._load_flat_grad(self._global_flat_grad)
    self.sess.run([self._update], self._grad_feed)
    self.iter += 1

    return
Ejemplo n.º 8
0
    def update_flatgrad(self, flat_grad, grad_scale=1.0):
        if self.iter % self.CHECK_SYNC_ITERS == 0:
            assert self.check_synced(), Logger.print2(
                'Network parameters desynchronized')

        if grad_scale != 1.0:
            flat_grad *= grad_scale

        MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
        self._global_flat_grad /= MPIUtil.get_num_procs()

        self._load_flat_grad(self._global_flat_grad)
        self.sess.run([self._update], self._grad_feed)
        self.iter += 1

        return
Ejemplo n.º 9
0
 def _get_iters_per_update(self):
     return MPIUtil.get_num_procs() * self.iters_per_update
Ejemplo n.º 10
0
 def _build_replay_buffer(self, buffer_size):
     num_procs = MPIUtil.get_num_procs()
     buffer_size = int(buffer_size / num_procs)
     self.replay_buffer = ReplayBuffer(buffer_size=buffer_size)
     self.replay_buffer_initialized = False
     return
Ejemplo n.º 11
0
 def _get_iters_per_update(self):
     return MPIUtil.get_num_procs() * self.iters_per_update
Ejemplo n.º 12
0
 def _build_replay_buffer(self, buffer_size):
     num_procs = MPIUtil.get_num_procs()
     buffer_size = int(buffer_size / num_procs)
     self.replay_buffer = ReplayBuffer(buffer_size=buffer_size)
     self.replay_buffer_initialized = False
     return