コード例 #1
0
def evaluate(feature, dataset, dev_size, model):
    num_processed = 0
    total_cer_dist = 0
    total_cer_len = 0
    total_res_len = 0
    for batch in feature:
        uttids, x = batch
        # import pdb; pdb.set_trace()
        # preds = forward(x, model)
        logits = model(x)
        # len_logits = get_tensor_len(logits)
        # preds = ctc_decode(logits, len_logits)
        logits_shrunk = ctc_shrink(logits)
        preds = tf.argmax(logits_shrunk, -1)
        trans = dataset.get_attrs('trans', uttids.numpy())
        batch_cer_dist, batch_cer_len, batch_res_len = batch_cer(
            preds.numpy(), trans)
        total_cer_dist += batch_cer_dist
        total_cer_len += batch_cer_len
        total_res_len += batch_res_len
        num_processed += len(x)
        sys.stdout.write('\rinfering {} / {} ...'.format(
            num_processed, dev_size))
        sys.stdout.flush()

    cer = total_cer_dist / total_cer_len
    print('dev PER: {:.3f}\t{} / {}'.format(cer, num_processed, dev_size))

    return cer
コード例 #2
0
ファイル: main_GAN.py プロジェクト: eastonYi/Unsupervised-ASR
def train_G(x, _x, _y, G, D, optimizer_G, lambda_supervise, len_D):
    params_G = G.trainable_variables
    with tf.GradientTape(watch_accessed_variables=False) as tape_G:
        tape_G.watch(params_G)

        # supervise
        _logits = G(_x, training=True)
        loss_G_supervise = ctc_loss(
            _logits, get_tensor_len(_logits), _y,
            tf.reduce_sum(tf.cast(_y > 0, tf.int32), -1))
        loss_G_supervise = tf.reduce_mean(loss_G_supervise)
        # loss_G = loss_G_supervise
        # unsupervise
        logits = G(x, training=True)
        logits_shrunk = pad_to(ctc_shrink(logits), len_D)[:, :len_D, :]
        P_G = tf.nn.softmax(logits_shrunk)
        disc_fake = D(P_G, training=False)

        loss_G = lambda_supervise * loss_G_supervise - tf.reduce_mean(
            disc_fake)

    gradients_G = tape_G.gradient(loss_G, params_G)
    optimizer_G.apply_gradients(zip(gradients_G, params_G))

    return loss_G, loss_G_supervise
コード例 #3
0
def monitor(sample, model):
    x = np.array([sample['feature']], dtype=np.float32)
    logits = model(x)
    logits_shrunk = ctc_shrink(logits)
    preds = tf.argmax(logits_shrunk, -1)
    print('predicts: \n', preds.numpy()[0])
    # print('align: \n', sample['align'])
    print('trans: \n', sample['trans'])
コード例 #4
0
def train_G(x, encoder, decoder, D, optimizer, len_D):
    # vars_G = encoder.trainable_variables + decoder.trainable_variables
    vars_G = decoder.trainable_variables
    with tf.GradientTape() as tape:
        encoded = encoder(x, training=True)
        logits = decoder(encoded, training=True)
        P_Fake = ctc_shrink(tf.nn.softmax(logits), tf.shape(logits)[-1], len_D)

        disc_fake = D(P_Fake, training=False)
        loss_G = -tf.reduce_mean(disc_fake)

    gradients = tape.gradient(loss_G, vars_G)
    optimizer.apply_gradients(zip(gradients, vars_G))

    return loss_G
コード例 #5
0
ファイル: main_GAN.py プロジェクト: eastonYi/Unsupervised-ASR
def train_D(x, P_Real, mask_real, G, D, optimizer_D, lambda_gp, len_D):
    params_D = D.trainable_variables
    with tf.GradientTape(watch_accessed_variables=False) as tape_D:
        tape_D.watch(params_D)

        logits = G(x, training=False)
        logits_shrunk = pad_to(ctc_shrink(logits), len_D)[:, :len_D, :]
        P_G = tf.nn.softmax(logits_shrunk)
        disc_real = D(P_Real, training=True)  # to be +inf
        disc_fake = D(P_G, training=True)  # to be -inf

        disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
        gp = gradient_penalty(D, P_Real, P_G)
        disc_cost += lambda_gp * gp

    gradients_D = tape_D.gradient(disc_cost, params_D)
    optimizer_D.apply_gradients(zip(gradients_D, params_D))

    return disc_cost, tf.reduce_mean(disc_fake), tf.reduce_mean(disc_real), gp
コード例 #6
0
def train_D(x, text, encoder, decoder, D, optimizer, lambda_gp, len_D):
    P_Real = tf.one_hot(text, args.dim_output)
    with tf.GradientTape() as tape:
        encoded = encoder(x, training=False)
        logits = decoder(encoded, training=False)
        P_Fake = ctc_shrink(tf.nn.softmax(logits), tf.shape(logits)[-1], len_D)
        disc_fake = D(P_Fake, training=True)
        disc_real = D(P_Real, training=True)
        loss_D = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
        idx = tf.random.uniform(
            (),
            maxval=(tf.shape(P_Real)[0] - tf.shape(P_Fake)[0]),
            dtype=tf.int32)
        gp = gradient_penalty(D, P_Real[idx:idx + tf.shape(P_Fake)[0]], P_Fake)
        loss_D += lambda_gp * gp

    gradients = tape.gradient(loss_D, D.trainable_variables)
    optimizer.apply_gradients(zip(gradients, D.trainable_variables))

    return loss_D