def test_serialization(): optimizer = RectifiedAdam( lr=1e-3, total_steps=10000, warmup_proportion=0.1, min_lr=1e-5, ) config = tf.keras.optimizers.serialize(optimizer) new_optimizer = tf.keras.optimizers.deserialize(config) assert new_optimizer.get_config() == optimizer.get_config()
def compile_model(self, optimizer='SGD', learning_rate=0.01, momentum=0.00, loss='binary_crossentropy', warmup_proportion=0.1, total_steps=10000, min_lr=1e-5, measurment_metric_name='accuracy'): # MMMM self.learning_rate = learning_rate self.momentum = momentum self.warmup_proportion = warmup_proportion self.total_steps = total_steps self.min_lr = min_lr self.measurment_metric_name = measurment_metric_name if type(optimizer) == str: # Assign Optimizers based on selection optimizerType = optimizer.upper() if optimizerType == 'SGD': optimizerClass = SGD(learning_rate=learning_rate, momentum=momentum) elif optimizerType == 'ADAM': optimizerClass = Adam(learning_rate=learning_rate) elif optimizerType == 'RMSPROP': optimizerClass = RMSprop(learning_rate=learning_rate) elif optimizerType == 'RADAM': optimizerClass = RectifiedAdam(learning_rate=learning_rate, warmup_proportion=warmup_proportion, decay=min_lr) # total_steps=total_steps,min_lr=min_lr) elif optimizerType == 'LOOKAHEAD': optimizerClass = RectifiedAdam(learning_rate=learning_rate, warmup_proportion=warmup_proportion, decay=min_lr) # total_steps=total_steps,min_lr=min_lr) optimizerClass = Lookahead(optimizerClass) else: print("Optimizer is NOT in string list.") else: optimizerClass = optimizer optimizerType = "ManualFunc" # Assign measure matrics based on selection: measurment_metric = '' if measurment_metric_name.lower() == 'accuracy': measurment_metric = Accuracy self.measurment_metric_name = 'accuracy' elif measurment_metric_name.lower() == 'categorical_accuracy': measurment_metric = [CategoricalAccuracy] self.measurment_metric_name = 'categorical_accuracy' # elif measurment_metric_name.lower() == 'iou': # measurment_metric = [iou] # self.measurment_metric_name = 'iou' # elif measurment_metric_name.lower() == 'iou_thresholded': # measurment_metric = [iou_thresholded] # self.measurment_metric_name = 'iou_thresholded' super().compile(optimizer=optimizerClass, loss=loss, metrics=self.measurment_metric_name) # super().compile(loss=loss,optimizer=optimizer,metrics=['accuracy']) self.optimizerType = optimizerType print("Model was complied. optimizer: %s, learning_rate: %s, momentum: %s" % ( self.optimizerType, self.learning_rate, self.momentum))
def test_dense_sample_with_weight_decay(self): # Expected values are obtained from the previous implementation self.run_dense_sample( iterations=100, expected=[[0.984775, 1.983276], [2.983125, 3.982076]], optimizer=RectifiedAdam(lr=1e-3, weight_decay=0.01), )
def test_sparse_sample_with_weight_decay(self): # Expected values are obtained from the previous implementation self.run_sparse_sample( iterations=200, expected=[[0.957368, 2.0], [3.0, 3.951673]], optimizer=RectifiedAdam(lr=1e-3, weight_decay=0.01), )
def test_dense_sample_with_weight_decay(self): # Expected values are obtained from the official implementation self.run_dense_sample( iterations=1000, expected=[[0.5472, 1.5368], [2.5276, 3.5176]], optimizer=RectifiedAdam(lr=1e-3, weight_decay=0.01), )
def test_dense_sample(self): # Expected values are obtained from the previous implementation self.run_dense_sample( iterations=100, expected=[[0.985769, 1.985269], [2.986119, 3.986068]], optimizer=RectifiedAdam(lr=1e-3), )
def test_sparse_sample(self): # Expected values are obtained from the previous implementation self.run_sparse_sample( iterations=200, expected=[[0.959333, 2.0], [3.0, 3.959632]], optimizer=RectifiedAdam(lr=1e-3), )
def test_dense_sample(self): # Expected values are obtained from the official implementation self.run_dense_sample( iterations=1000, expected=[[0.5554, 1.5549], [2.5557, 3.5557]], optimizer=RectifiedAdam(lr=1e-3), )
def test_sparse_sample_with_weight_decay(self): # Expected values are obtained from the official implementation # Dense results should be: [-0.2029, 0.7768], [1.7578, 2.7380] self.run_sparse_sample( iterations=2000, expected=[[-0.2029, 2.0], [3.0, 2.7380]], optimizer=RectifiedAdam(lr=1e-3, weight_decay=0.01), )
def test_dense_sample_with_warmup(): run_dense_sample( iterations=100, expected=[[0.994062, 1.993912], [2.994167, 3.994152]], optimizer=RectifiedAdam( lr=1e-3, total_steps=100, warmup_proportion=0.1, min_lr=1e-5, ), )
def build_ranger_optimizer(radam_options={}, lookahead_options={ "sync_period": 6, "slow_step_size": 0.5 }): radam = RectifiedAdam(**radam_options) ranger = Lookahead(radam, **lookahead_options) return ranger
def test_sparse_sample(self): # Expected values are obtained from the official implementation # Dense results should be: [-0.1929, 0.8066], [1.8075, 2.8074] self.run_sparse_sample( iterations=2000, expected=[[-0.1929, 2.0], [3.0, 2.8074]], optimizer=RectifiedAdam(lr=1e-3), )
def test_sparse_sample_with_warmup(): run_sparse_sample( iterations=200, expected=[[0.982629, 2.0], [3.0, 3.982674]], optimizer=RectifiedAdam( lr=1e-3, total_steps=200, warmup_proportion=0.1, min_lr=1e-5, ), )
def test_dense_sample_with_amsgrad(self): # Expected values are obtained from the official implementation # `amsgrad` has no effect because the gradient is fixed self.run_dense_sample( iterations=100, expected=[[0.985769, 1.985269], [2.986119, 3.986068]], optimizer=RectifiedAdam(lr=1e-3, amsgrad=True), )
def test_sparse_sample_with_amsgrad(self): # Expected values are obtained from the official implementation # `amsgrad` has no effect because the gradient is fixed self.run_sparse_sample( iterations=200, expected=[[0.959333, 2.0], [3.0, 3.959632]], optimizer=RectifiedAdam(lr=1e-3, amsgrad=True), )
def test_sparse_sample_with_lookahead(): # Expected values are obtained from the previous implementation # of Ranger. run_sparse_sample( iterations=150, expected=[[0.988156, 2.0], [3.0, 3.988291]], optimizer=Lookahead( RectifiedAdam(lr=1e-3, beta_1=0.95,), sync_period=6, slow_step_size=0.45, ), )
def test_dense_sample_with_lookahead(): # Expected values are obtained from the original implementation # of Ranger run_dense_sample( iterations=100, expected=[[0.993126, 1.992901], [2.993283, 3.993261]], optimizer=Lookahead( RectifiedAdam(lr=1e-3, beta_1=0.95,), sync_period=6, slow_step_size=0.45, ), )
def test_sparse_sample_with_warmup(self): self.run_sparse_sample( iterations=2000, expected=[[0.4653, 2.0], [3.0, 3.4653]], optimizer=RectifiedAdam( lr=1e-3, total_steps=2000, warmup_proportion=0.1, min_lr=1e-5, ), )
def test_dense_sample_with_warmup(self): self.run_dense_sample( iterations=1000, expected=[[0.8041, 1.8041], [2.8041, 3.8041]], optimizer=RectifiedAdam( lr=1e-3, total_steps=1000, warmup_proportion=0.1, min_lr=1e-5, ), )
def test_scheduler_serialization(): lr_scheduler = tf.keras.optimizers.schedules.ExponentialDecay( 1e-3, 50, 0.5) wd_scheduler = tf.keras.optimizers.schedules.InverseTimeDecay( 2e-3, 25, 0.25) optimizer = RectifiedAdam(learning_rate=lr_scheduler, weight_decay=wd_scheduler) config = tf.keras.optimizers.serialize(optimizer) new_optimizer = tf.keras.optimizers.deserialize(config) assert new_optimizer.get_config() == optimizer.get_config() assert new_optimizer.get_config()["learning_rate"] == { "class_name": "ExponentialDecay", "config": lr_scheduler.get_config(), } assert new_optimizer.get_config()["weight_decay"] == { "class_name": "InverseTimeDecay", "config": wd_scheduler.get_config(), }
def test_schedulers(): lr_scheduler = tf.keras.optimizers.schedules.ExponentialDecay( 1e-3, 50, 0.5) wd_scheduler = tf.keras.optimizers.schedules.InverseTimeDecay( 2e-3, 25, 0.25) run_dense_sample( iterations=100, expected=[[0.993192, 1.992625], [2.993369, 3.993239]], optimizer=RectifiedAdam(learning_rate=lr_scheduler, weight_decay=wd_scheduler), )
def test_dense_sample_with_lookahead(self): # Expected values are obtained from the original implementation # of Ranger self.run_dense_sample( iterations=1000, expected=[[0.7985, 1.7983], [2.7987, 3.7986]], optimizer=Lookahead( RectifiedAdam( lr=1e-3, beta_1=0.95, ), sync_period=6, slow_step_size=0.45, ), )
def test_sparse_sample_with_lookahead(self): # Expected values are obtained from the original implementation # of Ranger. # Dense results should be: [0.6417, 1.6415], [2.6419, 3.6418] self.run_sparse_sample( iterations=1500, expected=[[0.6417, 2.0], [3.0, 3.6418]], optimizer=Lookahead( RectifiedAdam( lr=1e-3, beta_1=0.95, ), sync_period=6, slow_step_size=0.45, ), )
def build(self) -> None: """ Build the Siamese model and compile it to optimize the contrastive loss using Adam. Both arms of the Siamese network will use the same embedder model, i.e. the weights are tied between both arms. The model will be parallelized over all available GPUs if applicable. """ # Both arms of the Siamese network use the same model, # i.e. the weights are tied. strategy = tf.distribute.MirroredStrategy() with strategy.scope(): self.siamese_model = self._build_siamese_model() # Train using Adam to optimize the contrastive loss. self.siamese_model.compile(RectifiedAdam(self.lr), contrastive_loss)
def compile(self, optimizer=RectifiedAdam(), loss=None, **kwargs): ae_losses = create_losses() loss = loss or {} for k in loss: ae_losses.pop(k) self.ae_losses = {**ae_losses, **loss} if 'metrics' in kwargs.keys(): self.ae_metrics = kwargs.pop('metrics', None) else: self.ae_metrics = create_metrics(self.get_flat_shape()) tf.keras.Model.compile(self, optimizer=optimizer, loss=self.ae_losses, metrics=self.ae_metrics, **kwargs) print(self.summary())
def test_checkpoint_serialization(tmpdir): optimizer = RectifiedAdam() optimizer2 = RectifiedAdam() var_0 = tf.Variable([1.0, 2.0], dtype=tf.dtypes.float32) var_1 = tf.Variable([3.0, 4.0], dtype=tf.dtypes.float32) grad_0 = tf.constant([0.1, 0.2], dtype=tf.dtypes.float32) grad_1 = tf.constant([0.03, 0.04], dtype=tf.dtypes.float32) grads_and_vars = list(zip([grad_0, grad_1], [var_0, var_1])) optimizer.apply_gradients(grads_and_vars) checkpoint = tf.train.Checkpoint(optimizer=optimizer) checkpoint2 = tf.train.Checkpoint(optimizer=optimizer2) model_path = str(tmpdir / "rectified_adam_chkpt") checkpoint.write(model_path) checkpoint2.read(model_path) optimizer2.apply_gradients(grads_and_vars)
def get_optimizer(optimizer_param: dict): optimizer_name = optimizer_param['name'].lower() lr = optimizer_param['lr'] kwargs = {} if optimizer_param['clipnorm'] != 0: kwargs['clipnorm'] = optimizer_param['clipnorm'] if optimizer_param['clipvalue'] != 0: kwargs['clipvalue'] = optimizer_param['clipvalue'] optimizer_dict = { 'adadelta': Adadelta(lr, **kwargs), 'adagrad': Adagrad(lr, **kwargs), 'adam': Adam(lr, **kwargs), 'adam_amsgrad': Adam(lr, amsgrad=True, **kwargs), 'sgd': SGD(lr, **kwargs), 'sgd_momentum': SGD(lr, momentum=optimizer_param['momentum'], **kwargs), 'sgd_nesterov': SGD(lr, momentum=optimizer_param['momentum'], nesterov=True, **kwargs), 'nadam': Nadam(lr, **kwargs), 'rmsprop': RMSprop(lr, **kwargs), 'radam': RectifiedAdam(lr, **kwargs), } optimizer = optimizer_dict[optimizer_name] if optimizer_param['lookahead']: optimizer = Lookahead(optimizer=optimizer, sync_period=optimizer_param['sync_period']) return optimizer
def main(): """Run training process.""" parser = argparse.ArgumentParser( description="Train MultiBand MelGAN (See detail in examples/multiband_melgan/train_multiband_melgan.py)" ) parser.add_argument( "--train-dir", default=None, type=str, help="directory including training data. ", ) parser.add_argument( "--dev-dir", default=None, type=str, help="directory including development data. ", ) parser.add_argument( "--use-norm", default=1, type=int, help="use norm mels for training or raw." ) parser.add_argument( "--outdir", type=str, required=True, help="directory to save checkpoints." ) parser.add_argument( "--config", type=str, required=True, help="yaml format configuration file." ) parser.add_argument( "--resume", default="", type=str, nargs="?", help='checkpoint file path to resume training. (default="")', ) parser.add_argument( "--verbose", type=int, default=1, help="logging level. higher is more logging. (default=1)", ) parser.add_argument( "--generator_mixed_precision", default=0, type=int, help="using mixed precision for generator or not.", ) parser.add_argument( "--discriminator_mixed_precision", default=0, type=int, help="using mixed precision for discriminator or not.", ) parser.add_argument( "--pretrained", default="", type=str, nargs="?", help='path of .h5 mb-melgan generator to load weights from', ) args = parser.parse_args() # return strategy STRATEGY = return_strategy() # set mixed precision config if args.generator_mixed_precision == 1 or args.discriminator_mixed_precision == 1: tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True}) args.generator_mixed_precision = bool(args.generator_mixed_precision) args.discriminator_mixed_precision = bool(args.discriminator_mixed_precision) args.use_norm = bool(args.use_norm) # set logger if args.verbose > 1: logging.basicConfig( level=logging.DEBUG, stream=sys.stdout, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) elif args.verbose > 0: logging.basicConfig( level=logging.INFO, stream=sys.stdout, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) else: logging.basicConfig( level=logging.WARN, stream=sys.stdout, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", ) logging.warning("Skip DEBUG/INFO messages") # check directory existence if not os.path.exists(args.outdir): os.makedirs(args.outdir) # check arguments if args.train_dir is None: raise ValueError("Please specify --train-dir") if args.dev_dir is None: raise ValueError("Please specify either --valid-dir") # load and save config with open(args.config) as f: config = yaml.load(f, Loader=yaml.Loader) config.update(vars(args)) config["version"] = tensorflow_tts.__version__ with open(os.path.join(args.outdir, "config.yml"), "w") as f: yaml.dump(config, f, Dumper=yaml.Dumper) for key, value in config.items(): logging.info(f"{key} = {value}") # get dataset if config["remove_short_samples"]: mel_length_threshold = config["batch_max_steps"] // config[ "hop_size" ] + 2 * config["multiband_melgan_generator_params"].get("aux_context_window", 0) else: mel_length_threshold = None if config["format"] == "npy": audio_query = "*-wave.npy" mel_query = "*-raw-feats.npy" if args.use_norm is False else "*-norm-feats.npy" audio_load_fn = np.load mel_load_fn = np.load else: raise ValueError("Only npy are supported.") # define train/valid dataset train_dataset = AudioMelDataset( root_dir=args.train_dir, audio_query=audio_query, mel_query=mel_query, audio_load_fn=audio_load_fn, mel_load_fn=mel_load_fn, mel_length_threshold=mel_length_threshold, ).create( is_shuffle=config["is_shuffle"], map_fn=lambda items: collater( items, batch_max_steps=tf.constant(config["batch_max_steps"], dtype=tf.int32), hop_size=tf.constant(config["hop_size"], dtype=tf.int32), ), allow_cache=config["allow_cache"], batch_size=config["batch_size"] * STRATEGY.num_replicas_in_sync, ) valid_dataset = AudioMelDataset( root_dir=args.dev_dir, audio_query=audio_query, mel_query=mel_query, audio_load_fn=audio_load_fn, mel_load_fn=mel_load_fn, mel_length_threshold=mel_length_threshold, ).create( is_shuffle=config["is_shuffle"], map_fn=lambda items: collater( items, batch_max_steps=tf.constant( config["batch_max_steps_valid"], dtype=tf.int32 ), hop_size=tf.constant(config["hop_size"], dtype=tf.int32), ), allow_cache=config["allow_cache"], batch_size=config["batch_size"] * STRATEGY.num_replicas_in_sync, ) # define trainer trainer = MultiBandMelganTrainer( steps=0, epochs=0, config=config, strategy=STRATEGY, is_generator_mixed_precision=args.generator_mixed_precision, is_discriminator_mixed_precision=args.discriminator_mixed_precision, ) with STRATEGY.scope(): # define generator and discriminator generator = TFMelGANGenerator( MultiBandMelGANGeneratorConfig(**config["multiband_melgan_generator_params"]), name="multi_band_melgan_generator", ) discriminator = TFParallelWaveGANDiscriminator( ParallelWaveGANDiscriminatorConfig( **config["parallel_wavegan_discriminator_params"] ), name="parallel_wavegan_discriminator", ) pqmf = TFPQMF( MultiBandMelGANGeneratorConfig(**config["multiband_melgan_generator_params"]), name="pqmf" ) # dummy input to build model. fake_mels = tf.random.uniform(shape=[1, 100, 80], dtype=tf.float32) y_mb_hat = generator(fake_mels) y_hat = pqmf.synthesis(y_mb_hat) discriminator(y_hat) if len(args.pretrained) > 2: print("Loading pretrained weights...") generator.load_weights(args.pretrained) generator.summary() discriminator.summary() # define optimizer generator_lr_fn = getattr( tf.keras.optimizers.schedules, config["generator_optimizer_params"]["lr_fn"] )(**config["generator_optimizer_params"]["lr_params"]) discriminator_lr_fn = getattr( tf.keras.optimizers.schedules, config["discriminator_optimizer_params"]["lr_fn"], )(**config["discriminator_optimizer_params"]["lr_params"]) gen_optimizer = tf.keras.optimizers.Adam( learning_rate=generator_lr_fn, amsgrad=config["generator_optimizer_params"]["amsgrad"], ) dis_optimizer = RectifiedAdam( learning_rate=discriminator_lr_fn, amsgrad=False ) trainer.compile( gen_model=generator, dis_model=discriminator, gen_optimizer=gen_optimizer, dis_optimizer=dis_optimizer, pqmf=pqmf, ) # start training try: trainer.fit( train_dataset, valid_dataset, saved_path=os.path.join(config["outdir"], "checkpoints/"), resume=args.resume, ) except KeyboardInterrupt: trainer.save_checkpoint() logging.info(f"Successfully saved checkpoint @ {trainer.steps}steps.")
def test_get_config(): opt = RectifiedAdam(lr=1e-4) config = opt.get_config() assert config["learning_rate"] == 1e-4 assert config["total_steps"] == 0
def test_get_config(self): opt = RectifiedAdam(lr=1e-4) config = opt.get_config() self.assertEqual(config["learning_rate"], 1e-4) self.assertEqual(config["total_steps"], 0)