Example #1
0
 def setUpClass(cls):
     out = run_train(
         **{
             '--data-dir': cifar10_data_dir,
             '--epochs': 50,
             '--model': "resnext",
             '--model-size': 29,
             '--batch-size': 4,
             '--warmup-epochs': 2,
             '--lr-schedule': 'cosine',
             '--label-smoothing': '0.05',
             '--base-learning-rate': -5,
             '--precision': '16.32'
         })
     cls.validation = get_csv(out, 'validation.csv')
     cls.training = get_csv(out, 'training.csv')
Example #2
0
 def setUpClass(cls):
     out = run_train(
         **{
             '--data-dir': cifar10_data_dir,
             '--name-suffix': 'penguin',
             '--log-dir': 'logs/walrus',
             '--iterations': 10,
             '--batches-per-step': 10
         })
     cls.logdir = None
     for line in out.split('\n'):
         if line.find('Saving to ') != -1:
             cls.logdir = line[11:]
             break
     cls.validation = get_csv(out, 'validation.csv')
     cls.training = get_csv(out, 'training.csv')
 def setUp(self):
     out = run_train(
         self, **{
             '--iterations': 100,
             '--batches-per-step': 10,
             '--dataset': 'cifar-10',
             '--generated-data': '',
             '--model': 'EfficientNet',
             '--model-size': 'cifar',
             '--micro-batch-size': 10,
             '--no-validation': '',
             '--enable-recomputation': '',
             '--group-dim': 16,
             '--expand-ratio': 4
         })
     self.training = get_csv(out, 'training.csv')
Example #4
0
 def setUpClass(cls):
     out = run_train(
         **{
             '--data-dir': cifar10_data_dir,
             '--epochs': 400,
             '--model': "squeezenet",
             '--use-bypass': '',
             '--lr-schedule': 'polynomial_decay_lr',
             '--label-smoothing': '0.05',
             '--abs-learning-rate': 0.1,
             '--abs-end-learning-rate': 0.0001,
             '--warmup-epochs': 0,
             '--precision': '16.32'
         })
     cls.validation = get_csv(out, 'validation.csv')
     cls.training = get_csv(out, 'training.csv')
Example #5
0
 def setUpClass(cls):
     out = run_train(
         **{
             '--iterations': 1000,
             '--dataset': 'imagenet',
             '--model': 'resnext',
             '--model-size': 14,
             '--synthetic-data': '',
             '--shards': 2,
             '--pipeline-depth': 128,
             '--batch-size': 1,
             '--no-validation': '',
             '--pipeline-splits': 'b2/0/relu'
         })
     cls.out = out
     cls.training = get_csv(out, 'training.csv')
Example #6
0
 def setUpClass(cls):
     out = run_train(**{'--dataset': 'cifar-10',
                        '--epochs': 10,
                        '--model-size': 14,
                        '--batch-norm': '',
                        '--pipeline-num-parallel': 8,
                        '--generated-data': '',
                        '--batch-size': 16,
                        '--base-learning-rate': -4,
                        '--precision': '32.32',
                        '--seed': 1234,
                        '--warmup-epochs': 0,
                        '--no-stochastic-rounding': '',
                        '--batches-per-step': 100
                        })
     cls.validation = get_csv(out, 'validation.csv')
     cls.training = get_csv(out, 'training.csv')
 def setUpClass(cls):
     out = run_train(**{'--generated-data': '',
                        '--dataset': 'ImageNet',
                        '--model': 'efficientnet',
                        '--model-size': 'B1',
                        '--shards': 4,
                        '--pipeline': '',
                        '--gradient-accumulation-count': 128,
                        '--batch-size': 4,
                        '--no-validation': '',
                        '--xla-recompute': '',
                        '--available-memory-proportion': 0.2,
                        '--pipeline-schedule': 'Grouped',
                        '--iterations': 10,
                        '--pipeline-splits': 'block2a/c', 'block4a': 'block5c'})
     cls.out = out
     cls.training = get_csv(out, 'training.csv')
Example #8
0
 def setUpClass(cls):
     out = run_train(**{'--iterations': 10,
                        '--batches-per-step': 10,
                        '--dataset': 'imagenet',
                        '--generated-data': '',
                        '--model-size': 50,
                        '--shards': 2,
                        '--replicas': 2,
                        '--pipeline': '',
                        '--gradient-accumulation-count': 128,
                        '--pipeline-schedule': 'Grouped',
                        '--batch-size': 2,
                        '--no-validation': '',
                        '--xla-recompute': '',
                        '--available-memory-proportion': 0.1,
                        '--pipeline-splits': 'b3/0/relu'})
     cls.training = get_csv(out, 'training.csv')
Example #9
0
 def setUpClass(cls):
     out = run_train(
         **{
             '--iterations': 10,
             '--batches-per-step': 10,
             '--dataset': 'imagenet',
             '--synthetic-data': '',
             '--model-size': 50,
             '--shards': 2,
             '--pipeline-depth': 256,
             '--batch-size': 2,
             '--no-validation': '',
             '--xla-recompute': '',
             '--available-memory-proportion': 0.1,
             '--pipeline-splits': 'b3/1/relu'
         })
     cls.training = get_csv(out, 'training.csv')
Example #10
0
 def setUpClass(cls):
     out = run_train(
         **{
             '--iterations': 500,
             '--dataset': 'imagenet',
             '--model': 'resnext',
             '--model-size': 14,
             '--generated-data': '',
             '--shards': 2,
             '--pipeline': '',
             '--gradient-accumulation-count': 128,
             '--batch-size': 1,
             '--no-validation': '',
             '--pipeline-splits': 'b2/0/relu'
         })
     cls.out = out
     cls.training = get_csv(out, 'training.csv')
Example #11
0
 def setUpClass(cls):
     out = run_train(**{'--iterations': 10,
                        '--batches-per-step': 10,
                        '--dataset': 'imagenet',
                        '--synthetic-data': '',
                        '--model': 'EfficientNet',
                        '--model-size': 'B0',
                        '--shards': 2,
                        '--pipeline-depth': 128,
                        '--batch-size': 4,
                        '--no-validation': '',
                        '--xla-recompute': '',
                        '--pipeline-schedule': 'Grouped',
                        '--group-dim': 16,
                        '--expand-ratio': 4,
                        '--pipeline-splits': 'block3b'})
     cls.training = get_csv(out, 'training.csv')
Example #12
0
 def setUp(self):
     out = run_train(
         self, **{
             '--iterations': 10,
             '--batches-per-step': 10,
             '--dataset': 'imagenet',
             '--generated-data': '',
             '--model-size': 50,
             '--shards': 2,
             '--pipeline': '',
             '--gradient-accumulation-count': 256,
             '--micro-batch-size': 2,
             '--no-validation': '',
             '--enable-recomputation': '',
             '--available-memory-proportion': 0.1,
             '--pipeline-splits': 'b3/1/relu'
         })
     self.training = get_csv(out, 'training.csv')
Example #13
0
 def setUp(self):
     out = run_train(
         self, **{
             '--iterations': 500,
             '--dataset': 'imagenet',
             '--model': 'resnext',
             '--model-size': 14,
             '--generated-data': '',
             '--shards': 2,
             '--pipeline': '',
             '--gradient-accumulation-count': 128,
             '--micro-batch-size': 1,
             '--no-validation': '',
             '--pipeline-splits': 'b2/0/relu',
             '--fused-preprocessing': ''
         })
     self.out = out
     self.training = get_csv(out, 'training.csv')
 def setUp(self):
     out = run_train(
         self, **{
             '--generated-data': '',
             '--dataset': 'ImageNet',
             '--model': 'efficientnet',
             '--shards': 2,
             '--pipeline': '',
             '--gradient-accumulation-count': 256,
             '--micro-batch-size': 2,
             '--no-validation': '',
             '--enable-recomputation': '',
             '--available-memory-proportion': 0.2,
             '--iterations': 10,
             '--pipeline-splits': 'block3b',
             '--fused-preprocessing': ''
         })
     self.out = out
     self.training = get_csv(out, 'training.csv')
Example #15
0
 def setUp(self):
     out = run_train(
         self, **{
             '--dataset': 'cifar-10',
             '--model': 'resnext',
             '--epochs': 2,
             '--model-size': 29,
             '--micro-batch-size': 4,
             '--batch-norm': '',
             '--pipeline-num-parallel': 8,
             '--generated-data': '',
             '--base-learning-rate-exponent': -4,
             '--precision': '32.32',
             '--seed': 1234,
             '--warmup-epochs': 0,
             '--no-stochastic-rounding': '',
             '--batches-per-step': 100
         })
     self.validation = get_csv(out, 'validation.csv')
     self.training = get_csv(out, 'training.csv')
Example #16
0
 def setUpClass(cls):
     out = run_train(**{'--iterations': 10,
                        '--batches-per-step': 10,
                        '--dataset': 'imagenet',
                        '--synthetic-data': '',
                        '--model': 'EfficientNet',
                        '--model-size': 'B0',
                        '--shards': 2,
                        '--replicas': 2,
                        '--pipeline': '',
                        '--gradient-accumulation-count': 128,
                        '--pipeline-schedule': 'Grouped',
                        '--batch-size': 2,
                        '--no-validation': '',
                        '--xla-recompute': '',
                        '--group-dim': 16,
                        '--expand-ratio': 4,
                        '--use-relu': '',
                        '--available-memory-proportion': 0.2,
                        '--pipeline-splits': 'block3b'})
     cls.training = get_csv(out, 'training.csv')
 def setUp(self):
     out = run_train(
         self, **{
             '--iterations': 10,
             '--batches-per-step': 10,
             '--dataset': 'imagenet',
             '--generated-data': '',
             '--model': 'EfficientNet',
             '--model-size': 'B0',
             '--shards': 2,
             '--pipeline': '',
             '--gradient-accumulation-count': 128,
             '--micro-batch-size': 4,
             '--no-validation': '',
             '--enable-recomputation': '',
             '--pipeline-schedule': 'Grouped',
             '--group-dim': 16,
             '--expand-ratio': 4,
             '--pipeline-splits': 'block3b'
         })
     self.training = get_csv(out, 'training.csv')
Example #18
0
    def test_cifar10_validation(self):
        with TemporaryDirectory() as log_dir:
            # create checkpoints for iterations 0, 2 and 4
            out = run_train(
                self, **{
                    '--data-dir': cifar10_data_dir,
                    '--name-suffix': 'test_validation',
                    '--log-dir': log_dir,
                    '--iterations': 4,
                    '--batches-per-step': 2,
                    '--no-validation': '',
                    '--ckpts-per-epoch': 1000000
                })
            dir_to_restore = None
            for line in out.split('\n'):
                if line.find('Saving to ') != -1:
                    dir_to_restore = line[11:]
                    break
            self.assertIsNotNone(dir_to_restore)

            # run validation on these checkpoints
            out = run_validation(
                working_path, **{
                    '--data-dir': cifar10_data_dir,
                    '--restore-path': dir_to_restore
                })
            validation_line_count = 0
            iterations_in_out = [0, 2, 4]
            for line in out.split('\n'):
                prefix = 'iteration:'
                pos = line.find(prefix)
                if pos != -1:
                    iteration = int(line[pos +
                                         len(prefix):line.find(',', pos)])
                    self.assertEqual(iterations_in_out[validation_line_count],
                                     iteration)
                    validation_line_count += 1
            self.assertEqual(3, validation_line_count)
Example #19
0
 def test_help(self):
     help_out = run_train(**{'--model': 'efficientnet', '--help': ''})
     self.assertNotEqual(help_out.find("EfficientNet:"), -1)
Example #20
0
 def test_non_accelerated_8_bit_io(self):
     with self.assertRaises(subprocess.CalledProcessError) as e:
         run_train('--accelerator-side-preprocess', 'False',
                   '--micro-batch-size', '8', '--validation',
                   '--eight-bit-transfer', 'True')
     self.assertEqual(e.exception.returncode, 1)
Example #21
0
 def test_help(self):
     help_out = run_train(**{'--help': ''})
     self.assertNotEqual(help_out.find("usage: train.py"), -1)
Example #22
0
 def test_logs_per_epoch_neg(self):
     with self.assertRaises(subprocess.CalledProcessError) as e:
         run_train('--logs-per-epoch', '-1')
     self.assertEqual(e.exception.returncode, 1)
Example #23
0
 def test_unsupported_argument(self):
     with self.assertRaises(subprocess.CalledProcessError) as e:
         run_train('--wrong-arg', '0')
     self.assertEqual(e.exception.returncode, 2)
Example #24
0
 def test_logs_per_epoch_not_multiple_of_epoch(self):
     with self.assertRaises(subprocess.CalledProcessError) as e:
         run_train('--logs-per-epoch', '0.25', '--num-epochs', '6')
     self.assertEqual(e.exception.returncode, 1)
Example #25
0
 def test_recompute_with_no_split(self):
     with self.assertRaises(subprocess.CalledProcessError) as e:
         run_train('--recomputation', 'True')
     self.assertEqual(e.exception.returncode, 1)
Example #26
0
 def test_help(self):
     output = run_train('--help')
     self.assertIn('usage', output)