def report_training(self, step, num_steps, learning_rate, report_stats, multigpu=False): """ This is the user-defined batch-level traing progress report function. Args: step(int): current step count. num_steps(int): total number of batches. learning_rate(float): current learning rate. report_stats(Statistics): old Statistics instance. Returns: report_stats(Statistics): updated Statistics instance. """ if self.start_time < 0: raise ValueError("""ReportMgr needs to be started (set 'start_time' or use 'start()'""") if step % self.report_every == 0: if multigpu: report_stats = \ Statistics.all_gather_stats(report_stats) self._report_training(step, num_steps, learning_rate, report_stats) self.progress_step += 1 return Statistics() else: return report_stats
def _maybe_gather_stats(self, stat): """ Gather statistics in multi-processes cases Args: stat(:obj:onmt.utils.Statistics): a Statistics object to gather or None (it returns None in this case) Returns: stat: the updated (or unchanged) stat object """ if stat is not None and self.n_gpu > 1: return Statistics.all_gather_stats(stat) return stat