Esempio n. 1
0
    },
    padding_values={
        'audio': tf.constant(0, dtype=tf.float32),
        'mel': tf.constant(0, dtype=tf.float32),
    },
)

features = dataset.make_one_shot_iterator().get_next()

melgan_config = malaya_speech.config.melgan_config
generator = melgan.Generator(
    melgan.GeneratorConfig(**melgan_config['melgan_generator_params']),
    name='melgan-generator',
)
discriminator = melgan.MultiScaleDiscriminator(
    melgan.DiscriminatorConfig(**melgan_config['melgan_discriminator_params']),
    name='melgan-discriminator',
)

mels_loss = melgan.loss.TFMelSpectrogram()
mse_loss = tf.keras.losses.MeanSquaredError()
mae_loss = tf.keras.losses.MeanAbsoluteError()


def compute_per_example_generator_losses(audios, outputs):
    y_hat = outputs
    p_hat = discriminator(y_hat)
    p = discriminator(tf.expand_dims(audios, 2))

    adv_loss = 0.0
    for i in range(len(p_hat)):
Esempio n. 2
0
import malaya_speech.config
from malaya_speech.train.loss import calculate_2d_loss, calculate_3d_loss

hifigan_config = malaya_speech.config.hifigan_config
generator = hifigan.Generator(
    hifigan.GeneratorConfig(**hifigan_config['hifigan_generator_params']),
    name='hifigan_generator',
)
multiperiod_discriminator = hifigan.MultiPeriodDiscriminator(
    hifigan.DiscriminatorConfig(
        **hifigan_config['hifigan_discriminator_params']),
    name='hifigan_multiperiod_discriminator',
)
multiscale_discriminator = melgan.MultiScaleDiscriminator(
    melgan.DiscriminatorConfig(
        **hifigan_config['melgan_discriminator_params'],
        name='melgan_multiscale_discriminator',
    ))
discriminator = hifigan.Discriminator(multiperiod_discriminator,
                                      multiscale_discriminator)

stft_loss = stft.loss.MultiResolutionSTFT(**hifigan_config['stft_loss_params'])
mels_loss = melgan.loss.TFMelSpectrogram()
mse_loss = tf.keras.losses.MeanSquaredError()
mae_loss = tf.keras.losses.MeanAbsoluteError()


def compute_per_example_generator_losses(features):
    y_hat = generator(features['mel'], training=True)
    audios = features['audio']

    sc_loss, mag_loss = calculate_2d_loss(audios, tf.squeeze(y_hat, -1),