def evaluate(feature, dataset, dev_size, model): num_processed = 0 total_cer_dist = 0 total_cer_len = 0 total_res_len = 0 for batch in feature: uttids, x = batch # import pdb; pdb.set_trace() # preds = forward(x, model) logits = model(x) # len_logits = get_tensor_len(logits) # preds = ctc_decode(logits, len_logits) logits_shrunk = ctc_shrink(logits) preds = tf.argmax(logits_shrunk, -1) trans = dataset.get_attrs('trans', uttids.numpy()) batch_cer_dist, batch_cer_len, batch_res_len = batch_cer( preds.numpy(), trans) total_cer_dist += batch_cer_dist total_cer_len += batch_cer_len total_res_len += batch_res_len num_processed += len(x) sys.stdout.write('\rinfering {} / {} ...'.format( num_processed, dev_size)) sys.stdout.flush() cer = total_cer_dist / total_cer_len print('dev PER: {:.3f}\t{} / {}'.format(cer, num_processed, dev_size)) return cer
def train_G(x, _x, _y, G, D, optimizer_G, lambda_supervise, len_D): params_G = G.trainable_variables with tf.GradientTape(watch_accessed_variables=False) as tape_G: tape_G.watch(params_G) # supervise _logits = G(_x, training=True) loss_G_supervise = ctc_loss( _logits, get_tensor_len(_logits), _y, tf.reduce_sum(tf.cast(_y > 0, tf.int32), -1)) loss_G_supervise = tf.reduce_mean(loss_G_supervise) # loss_G = loss_G_supervise # unsupervise logits = G(x, training=True) logits_shrunk = pad_to(ctc_shrink(logits), len_D)[:, :len_D, :] P_G = tf.nn.softmax(logits_shrunk) disc_fake = D(P_G, training=False) loss_G = lambda_supervise * loss_G_supervise - tf.reduce_mean( disc_fake) gradients_G = tape_G.gradient(loss_G, params_G) optimizer_G.apply_gradients(zip(gradients_G, params_G)) return loss_G, loss_G_supervise
def monitor(sample, model): x = np.array([sample['feature']], dtype=np.float32) logits = model(x) logits_shrunk = ctc_shrink(logits) preds = tf.argmax(logits_shrunk, -1) print('predicts: \n', preds.numpy()[0]) # print('align: \n', sample['align']) print('trans: \n', sample['trans'])
def train_G(x, encoder, decoder, D, optimizer, len_D): # vars_G = encoder.trainable_variables + decoder.trainable_variables vars_G = decoder.trainable_variables with tf.GradientTape() as tape: encoded = encoder(x, training=True) logits = decoder(encoded, training=True) P_Fake = ctc_shrink(tf.nn.softmax(logits), tf.shape(logits)[-1], len_D) disc_fake = D(P_Fake, training=False) loss_G = -tf.reduce_mean(disc_fake) gradients = tape.gradient(loss_G, vars_G) optimizer.apply_gradients(zip(gradients, vars_G)) return loss_G
def train_D(x, P_Real, mask_real, G, D, optimizer_D, lambda_gp, len_D): params_D = D.trainable_variables with tf.GradientTape(watch_accessed_variables=False) as tape_D: tape_D.watch(params_D) logits = G(x, training=False) logits_shrunk = pad_to(ctc_shrink(logits), len_D)[:, :len_D, :] P_G = tf.nn.softmax(logits_shrunk) disc_real = D(P_Real, training=True) # to be +inf disc_fake = D(P_G, training=True) # to be -inf disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) gp = gradient_penalty(D, P_Real, P_G) disc_cost += lambda_gp * gp gradients_D = tape_D.gradient(disc_cost, params_D) optimizer_D.apply_gradients(zip(gradients_D, params_D)) return disc_cost, tf.reduce_mean(disc_fake), tf.reduce_mean(disc_real), gp
def train_D(x, text, encoder, decoder, D, optimizer, lambda_gp, len_D): P_Real = tf.one_hot(text, args.dim_output) with tf.GradientTape() as tape: encoded = encoder(x, training=False) logits = decoder(encoded, training=False) P_Fake = ctc_shrink(tf.nn.softmax(logits), tf.shape(logits)[-1], len_D) disc_fake = D(P_Fake, training=True) disc_real = D(P_Real, training=True) loss_D = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real) idx = tf.random.uniform( (), maxval=(tf.shape(P_Real)[0] - tf.shape(P_Fake)[0]), dtype=tf.int32) gp = gradient_penalty(D, P_Real[idx:idx + tf.shape(P_Fake)[0]], P_Fake) loss_D += lambda_gp * gp gradients = tape.gradient(loss_D, D.trainable_variables) optimizer.apply_gradients(zip(gradients, D.trainable_variables)) return loss_D