Example #1
0
 def _valid_train_step(self):
     samples = self.replay_buffer.get_current_size()
     exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
     global_sample_count = int(MPIUtil.reduce_sum(samples))
     global_exp_min = int(MPIUtil.reduce_min(exp_samples))
     return (global_sample_count > self.batch_size) and (global_exp_min > 0)
Example #2
0
 def _valid_train_step(self):
     samples = self.replay_buffer.get_current_size()
     exp_samples = self.replay_buffer.count_filtered(self.EXP_ACTION_FLAG)
     global_sample_count = int(MPIUtil.reduce_sum(samples))
     global_exp_min = int(MPIUtil.reduce_min(exp_samples))
     return (global_sample_count > self.batch_size) and (global_exp_min > 0)