Exemple #1
0
def _instantiate_optimizers(strategy, learning_rate, beta_1, train_settings):
    LOGGER.info(" -------- Creating Optimizers --------")

    with strategy.scope():
        srfr_optimizer = NovoGrad(
            learning_rate=learning_rate,
            beta_1=beta_1,
            beta_2=train_settings["beta_2"],
            weight_decay=train_settings["weight_decay"],
            name="novograd_srfr",
        )
        srfr_optimizer = mixed_precision.LossScaleOptimizer(
            srfr_optimizer,
            loss_scale="dynamic",
        )
        discriminator_optimizer = NovoGrad(
            learning_rate=learning_rate,
            beta_1=beta_1,
            beta_2=train_settings["beta_2"],
            weight_decay=train_settings["weight_decay"],
            name="novograd_discriminator",
        )
        discriminator_optimizer = mixed_precision.LossScaleOptimizer(
            discriminator_optimizer, loss_scale="dynamic")

    return (
        srfr_optimizer,
        discriminator_optimizer,
    )
Exemple #2
0
 def test_sparse_sample(self):
     self.run_sparse_sample(
         iterations=1,
         expected=[[0.9552786425, 1.9105572849],
                   [2.9400000012, 3.9200000016]],
         optimizer=NovoGrad(lr=0.1, epsilon=1e-8),
     )
Exemple #3
0
 def test_sparse_sample_with_weight_decay(self):
     self.run_sparse_sample(
         iterations=1,
         expected=[[0.945278642, 1.8905572849],
                   [2.9100000012, 3.8800000016]],
         optimizer=NovoGrad(lr=0.1, weight_decay=0.1, epsilon=1e-8),
     )
Exemple #4
0
def test_sparse_sample(dtype):
    run_sparse_sample(
        iterations=2,
        expected=[[0.71, 2.0], [3.0, 3.71]],
        optimizer=NovoGrad(lr=0.1, epsilon=1e-8),
        dtype=dtype,
    )
Exemple #5
0
 def test_sparse_sample_with_grad_averaging(self):
     self.run_sparse_sample(
         iterations=2,
         expected=[[0.9105572849, 1.8211145698],
                   [2.8800000024, 3.8400000032]],
         optimizer=NovoGrad(lr=0.1, grad_averaging=True, epsilon=1e-8),
     )
Exemple #6
0
def test_dense_sample_with_grad_averaging(dtype):
    run_dense_sample(
        iterations=2,
        expected=[[0.9105572849, 1.8211145698], [2.8800000024, 3.8400000032]],
        optimizer=NovoGrad(lr=0.1, grad_averaging=True, epsilon=1e-8),
        dtype=dtype,
    )
Exemple #7
0
def test_dense_sample_with_weight_decay(dtype):
    run_dense_sample(
        iterations=1,
        expected=[[0.945278642, 1.8905572849], [2.9100000012, 3.8800000016]],
        optimizer=NovoGrad(lr=0.1, weight_decay=0.1, epsilon=1e-8),
        dtype=dtype,
    )
Exemple #8
0
def test_dense_sample(dtype):
    run_dense_sample(
        iterations=1,
        expected=[[0.9552786425, 1.9105572849], [2.9400000012, 3.9200000016]],
        optimizer=NovoGrad(lr=0.1, epsilon=1e-8),
        dtype=dtype,
    )
Exemple #9
0
def test_sparse_sample_with_grad_averaging(dtype):
    run_sparse_sample(
        iterations=2,
        expected=[[0.8, 2.0], [3.0, 3.8]],
        optimizer=NovoGrad(lr=0.1, grad_averaging=True, epsilon=1e-8),
        dtype=dtype,
    )
Exemple #10
0
def test_sparse_sample_with_weight_decay(dtype):
    run_sparse_sample(
        iterations=2,
        expected=[[0.6821, 2.0], [3.0, 3.5954]],
        optimizer=NovoGrad(lr=0.1, weight_decay=0.1, epsilon=1e-8),
        dtype=dtype,
    )
Exemple #11
0
def test_fit_simple_linear_model():
    np.random.seed(0x2020)
    tf.random.set_seed(0x2020)

    x = np.random.standard_normal((100000, 3))
    w = np.random.standard_normal((3, 1))
    y = np.dot(x, w) + np.random.standard_normal((100000, 1)) * 1e-5

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(input_shape=(3, ), units=1))
    model.compile(NovoGrad(), loss="mse")

    model.fit(x, y, epochs=2)

    x = np.random.standard_normal((100, 3))
    y = np.dot(x, w)
    predicted = model.predict(x)

    max_abs_diff = np.max(np.abs(predicted - y))
    assert max_abs_diff < 1e-2
Exemple #12
0
def test_serialization():
    optimizer = NovoGrad(lr=1e-4, weight_decay=0.0, grad_averaging=False)
    config = tf.keras.optimizers.serialize(optimizer)
    new_optimizer = tf.keras.optimizers.deserialize(config)
    assert new_optimizer.get_config() == optimizer.get_config()
Exemple #13
0
def test_get_config():
    opt = NovoGrad(lr=1e-4, weight_decay=0.0, grad_averaging=False)
    config = opt.get_config()
    assert config["learning_rate"] == 1e-4
    assert config["weight_decay"] == 0.0
    assert config["grad_averaging"] is False
def main():
    """Main training function."""
    timing = TimingLogger()
    timing.start()
    network_settings, train_settings, preprocess_settings = parseConfigsFile(
        ['network', 'train', 'preprocess'])

    strategy = tf.distribute.MirroredStrategy()
    BATCH_SIZE = train_settings['batch_size'] * strategy.num_replicas_in_sync
    temp_folder = Path.cwd().joinpath('temp', 'synthetic_ds')

    LOGGER.info(' -------- Importing Datasets --------')

    vgg_dataset = VggFace2(mode='concatenated')
    synthetic_dataset = vgg_dataset.get_dataset()
    synthetic_dataset = vgg_dataset.augment_dataset()
    synthetic_dataset = vgg_dataset.normalize_dataset()
    synthetic_dataset = synthetic_dataset.cache(str(temp_folder))
    #synthetic_dataset_len = vgg_dataset.get_dataset_size()
    synthetic_dataset_len = 100_000
    synthetic_num_classes = vgg_dataset.get_number_of_classes()
    synthetic_dataset = synthetic_dataset.shuffle(
        buffer_size=2_048).repeat().batch(BATCH_SIZE).prefetch(1)

    lfw_path = Path.cwd().joinpath('temp', 'lfw')
    lfw_dataset = LFW()
    (left_pairs, left_aug_pairs, right_pairs, right_aug_pairs,
     is_same_list) = lfw_dataset.get_dataset()
    left_pairs = left_pairs.batch(BATCH_SIZE).cache(
        str(lfw_path.joinpath('left'))).prefetch(AUTOTUNE)
    left_aug_pairs = left_aug_pairs.batch(BATCH_SIZE).cache(
        str(lfw_path.joinpath('left_aug'))).prefetch(AUTOTUNE)
    right_pairs = right_pairs.batch(BATCH_SIZE).cache(
        str(lfw_path.joinpath('right'))).prefetch(AUTOTUNE)
    right_aug_pairs = right_aug_pairs.batch(BATCH_SIZE).cache(
        str(lfw_path.joinpath('right_aug'))).prefetch(AUTOTUNE)

    # Using `distribute_dataset` to distribute the batches across the GPUs
    synthetic_dataset = strategy.experimental_distribute_dataset(
        synthetic_dataset)
    left_pairs = strategy.experimental_distribute_dataset(left_pairs)
    left_aug_pairs = strategy.experimental_distribute_dataset(left_aug_pairs)
    right_pairs = strategy.experimental_distribute_dataset(right_pairs)
    right_aug_pairs = strategy.experimental_distribute_dataset(right_aug_pairs)

    LOGGER.info(' -------- Creating Models and Optimizers --------')

    EPOCHS = generate_num_epochs(
        train_settings['iterations'],
        synthetic_dataset_len,
        BATCH_SIZE,
    )

    with strategy.scope():
        srfr_model = SRFR(
            num_filters=network_settings['num_filters'],
            depth=50,
            categories=network_settings['embedding_size'],
            num_gc=network_settings['gc'],
            num_blocks=network_settings['num_blocks'],
            residual_scailing=network_settings['residual_scailing'],
            training=True,
            input_shape=preprocess_settings['image_shape_low_resolution'],
            num_classes_syn=synthetic_num_classes,
        )
        sr_discriminator_model = DiscriminatorNetwork()

        srfr_optimizer = NovoGrad(
            learning_rate=train_settings['learning_rate'],
            beta_1=train_settings['momentum'],
            beta_2=train_settings['beta_2'],
            weight_decay=train_settings['weight_decay'],
            name='novograd_srfr',
        )
        srfr_optimizer = mixed_precision.LossScaleOptimizer(
            srfr_optimizer,
            loss_scale='dynamic',
        )
        discriminator_optimizer = NovoGrad(
            learning_rate=train_settings['learning_rate'],
            beta_1=train_settings['momentum'],
            beta_2=train_settings['beta_2'],
            weight_decay=train_settings['weight_decay'],
            name='novograd_discriminator',
        )
        discriminator_optimizer = mixed_precision.LossScaleOptimizer(
            discriminator_optimizer, loss_scale='dynamic')

        train_loss = partial(
            strategy.reduce,
            reduce_op=tf.distribute.ReduceOp.MEAN,
            axis=0,
        )

    checkpoint = tf.train.Checkpoint(
        epoch=tf.Variable(1),
        step=tf.Variable(1),
        srfr_model=srfr_model,
        sr_discriminator_model=sr_discriminator_model,
        srfr_optimizer=srfr_optimizer,
        discriminator_optimizer=discriminator_optimizer,
    )
    manager = tf.train.CheckpointManager(checkpoint,
                                         directory='./training_checkpoints',
                                         max_to_keep=5)

    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_summary_writer = tf.summary.create_file_writer(
        str(Path.cwd().joinpath('logs', 'gradient_tape', current_time,
                                'train')), )
    test_summary_writer = tf.summary.create_file_writer(
        str(Path.cwd().joinpath('logs', 'gradient_tape', current_time,
                                'test')), )

    LOGGER.info(' -------- Starting Training --------')
    with strategy.scope():
        checkpoint.restore(manager.latest_checkpoint)
        if manager.latest_checkpoint:
            LOGGER.info(f' Restored from {manager.latest_checkpoint}')
        else:
            LOGGER.info(' Initializing from scratch.')

        for epoch in range(int(checkpoint.epoch), EPOCHS + 1):
            timing.start(Train.__name__)
            LOGGER.info(f' Start of epoch {epoch}')

            train = Train(strategy, srfr_model, srfr_optimizer,
                          sr_discriminator_model, discriminator_optimizer,
                          train_summary_writer, test_summary_writer,
                          checkpoint, manager)
            srfr_loss, discriminator_loss = train.train_srfr_model(
                BATCH_SIZE,
                train_loss,
                synthetic_dataset,
                synthetic_num_classes,
                left_pairs,
                left_aug_pairs,
                right_pairs,
                right_aug_pairs,
                is_same_list,
                sr_weight=train_settings['super_resolution_weight'],
                scale=train_settings['scale'],
                margin=train_settings['angular_margin'],
                # natural_ds,
                # num_classes_natural,
            )
            elapsed_time = timing.end(Train.__name__, True)
            with train_summary_writer.as_default():
                tf.summary.scalar('srfr_loss_per_epoch', srfr_loss, step=epoch)
                tf.summary.scalar(
                    'discriminator_loss_per_epoch',
                    discriminator_loss,
                    step=epoch,
                )
                tf.summary.scalar('training_time_per_epoch',
                                  elapsed_time,
                                  step=epoch)
            LOGGER.info((f' Epoch {epoch}, SRFR Loss: {srfr_loss:.3f},'
                         f' Discriminator Loss: {discriminator_loss:.3f}'))

            train.save_model()

            checkpoint.epoch.assign_add(1)
Exemple #15
0
 def test_get_config(self):
     opt = NovoGrad(lr=1e-4, weight_decay=0.0, grad_averaging=False)
     config = opt.get_config()
     self.assertEqual(config["learning_rate"], 1e-4)
     self.assertEqual(config["weight_decay"], 0.0)
     self.assertEqual(config["grad_averaging"], False)