예제 #1
0
 def _run_training_loop(self, m, curr_epoch):
     """Trains the cifar model `m` for one epoch."""
     start_time = time.time()
     while True:
         try:
             with self._new_session(m):
                 train_accuracy = helper_utils.run_epoch_training(
                     self.session, m, self.data_loader, curr_epoch)
                 tf.logging.info('Saving model after epoch')
                 self.save_model(step=curr_epoch)
                 break
         except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
             tf.logging.info('Retryable error caught: %s.  Retrying.', e)
     tf.logging.info('Finished epoch: {}'.format(curr_epoch))
     tf.logging.info('Epoch time(min): {}'.format(
         (time.time() - start_time) / 60.0))
     return train_accuracy
예제 #2
0
 def _run_training_loop(self, m, curr_epoch):
   """Trains the cifar model `m` for one epoch."""
   start_time = time.time()
   while True:
     try:
       with self._new_session(m):
         train_accuracy = helper_utils.run_epoch_training(
             self.session, m, self.data_loader, curr_epoch)
         tf.logging.info('Saving model after epoch')
         self.save_model(step=curr_epoch)
         break
     except (tf.errors.AbortedError, tf.errors.UnavailableError) as e:
       tf.logging.info('Retryable error caught: %s.  Retrying.', e)
   tf.logging.info('Finished epoch: {}'.format(curr_epoch))
   tf.logging.info('Epoch time(min): {}'.format(
       (time.time() - start_time) / 60.0))
   return train_accuracy