}, padding_values={ 'audio': tf.constant(0, dtype=tf.float32), 'mel': tf.constant(0, dtype=tf.float32), }, ) features = dataset.make_one_shot_iterator().get_next() melgan_config = malaya_speech.config.melgan_config generator = melgan.Generator( melgan.GeneratorConfig(**melgan_config['melgan_generator_params']), name='melgan-generator', ) discriminator = melgan.MultiScaleDiscriminator( melgan.DiscriminatorConfig(**melgan_config['melgan_discriminator_params']), name='melgan-discriminator', ) mels_loss = melgan.loss.TFMelSpectrogram() mse_loss = tf.keras.losses.MeanSquaredError() mae_loss = tf.keras.losses.MeanAbsoluteError() def compute_per_example_generator_losses(audios, outputs): y_hat = outputs p_hat = discriminator(y_hat) p = discriminator(tf.expand_dims(audios, 2)) adv_loss = 0.0 for i in range(len(p_hat)):
import malaya_speech.config from malaya_speech.train.loss import calculate_2d_loss, calculate_3d_loss hifigan_config = malaya_speech.config.hifigan_config generator = hifigan.Generator( hifigan.GeneratorConfig(**hifigan_config['hifigan_generator_params']), name='hifigan_generator', ) multiperiod_discriminator = hifigan.MultiPeriodDiscriminator( hifigan.DiscriminatorConfig( **hifigan_config['hifigan_discriminator_params']), name='hifigan_multiperiod_discriminator', ) multiscale_discriminator = melgan.MultiScaleDiscriminator( melgan.DiscriminatorConfig( **hifigan_config['melgan_discriminator_params'], name='melgan_multiscale_discriminator', )) discriminator = hifigan.Discriminator(multiperiod_discriminator, multiscale_discriminator) stft_loss = stft.loss.MultiResolutionSTFT(**hifigan_config['stft_loss_params']) mels_loss = melgan.loss.TFMelSpectrogram() mse_loss = tf.keras.losses.MeanSquaredError() mae_loss = tf.keras.losses.MeanAbsoluteError() def compute_per_example_generator_losses(features): y_hat = generator(features['mel'], training=True) audios = features['audio'] sc_loss, mag_loss = calculate_2d_loss(audios, tf.squeeze(y_hat, -1),