Exemple #1
0
def main():
    train_dataset, test_dataset = train_input_fn()
    optimizer = tf.keras.optimizers.Adam(learning_rate=hp.lr)
    model = UTransformer(hp)
    model_loss = Loss(model)

    best_score = float('-inf')
    not_improved_count = 0

    checkpoint_file = hp.ckpt
    if checkpoint_file == '':
        checkpoint_file = 'ckp_0'
    else:
        model.load_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}')

    logger.add(f"{hp.logdir}/cmip_train.log", enqueue=True)

    for epoch in range(hp.num_epochs):
        for step, (x_batch_train, ys_batch_train) in enumerate(train_dataset):
            start = time.clock()
            with tf.GradientTape() as tape:
                y_predict = model([x_batch_train, ys_batch_train], training=True)
                loss_ssim, loss_l2, loss_l1, loss = model_loss([y_predict, ys_batch_train[1]])
            grads = tape.gradient(loss, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_weights))
            elapsed = (time.clock() - start)
            template = ("step {} loss is {:1.5f}, "
                        "loss ssim is {:1.5f}, "
                        "loss l2 is {:1.5f}, "
                        "loss l1 is {:1.5f}."
                        "({:1.2f}s/step)")
            logger.info(template.format(step, loss.numpy(), loss_ssim.numpy(), loss_l2.numpy(), loss_l1.numpy(), elapsed))

        if epoch % hp.num_epoch_record == 0:
            loss_test = 0
            loss_ssim_test = 0
            loss_l2_test = 0
            loss_l1_test = 0
            count = 0
            y_true, y_pred = [], []
            spinner = MoonSpinner('Testing ')
            for step, (x_batch_test, ys_batch_test) in enumerate(test_dataset):
                y_predict = model([x_batch_test, ys_batch_test], training=False)
                loss_ssim, loss_l2, loss_l1, loss = model_loss([y_predict, ys_batch_test[1]])
                loss_ssim_test += loss_ssim.numpy()
                loss_l2_test += loss_l2.numpy()
                loss_l1_test += loss_l1.numpy()
                loss_test += loss.numpy()
                count += 1

                y_true.append(np.array(nino_seq(ys_batch_test[1][:, :, :, :, 0])))
                y_pred.append(np.array(nino_seq(y_predict[:, :, :, :, 0])))

                spinner.next()

            y_true = tf.concat(y_true, axis=0)
            y_pred = tf.concat(y_pred, axis=0)
            sco = score(y_true, y_pred)
            if sco > best_score:
                best_score = sco
                not_improved_count = 0
                best_state = True
            else:
                not_improved_count += 1
                best_state = False

            spinner.finish()
            logger.info("TEST COMPLETE!")
            template = ("TEST DATASET STATISTICS: "
                        "loss is {:1.5f}, "
                        "loss ssim is {:1.5f}, "
                        "loss l2 is {:1.5f}, "
                        "loss l1 is {:1.5f},"
                        "acc skill score is {:1.5f}.")
            logger.info(template.format(loss_test/count, loss_ssim_test/count, loss_l2_test/count, loss_l1_test/count, sco))

            total_epoch = int(re.findall("\d+", checkpoint_file)[0])
            checkpoint_file = checkpoint_file.replace(f'_{total_epoch}', f'_{total_epoch + 1}')
            # if not_improved_count == hp.early_stop_patience:
            #     print("Validation performance didn\'t improve for {} epochs. "  "Training stops.".format(
            #         hp.early_stop_patience))
            #     break
            # if best_state:
            model.save_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}', save_format='tf')
            # model.save("my_model")
            logger.info("Saved checkpoint_file {}".format(checkpoint_file))
Exemple #2
0
def evaluateTestingPairs(encoder, decoder, pairs, input_lang, output_lang, args):
    score_short = 0
    score_long = 0
    list_cand_short = []
    list_ref_short = []
    list_cand_long = []
    list_ref_long = []

    print("Evaluating {} testing sentences...".format(len(pairs)))
    
    for pair in pairs:
        output_words = evaluate(encoder, decoder, pair[0], input_lang, output_lang, args)
        output_sentence = ' '.join(output_words)
        sent_length = len(pair[1].split(' '))
        if sent_length > (15):
            list_cand_long.append(output_sentence)
            list_ref_long.append(pair[1])
        else:
            list_cand_short.append(output_sentence)
            list_ref_short.append(pair[1])

    print("Num of short sentences (length <= 15):", len(list_cand_short))
    if len(list_cand_short) > 0:
        if args.metric == "MULTI":
            score_short_rouge1, score_short_rouge2, score_short_bleu, score_short_bleu_clip = \
                multi_score(list_cand_short, list_ref_short)
            print("score for short sentnces (length <= 15):")
            print("ROUGE1:", score_short_rouge1)
            print("ROUGE2:", score_short_rouge2)
            print("BLEU:", score_short_bleu)
            print("BLEU_CLIP:", score_short_bleu_clip)
            print()
        else:
            score_short = score(list_cand_short, list_ref_short, args.metric)
            print("{} score for short sentnces (length <= 15): {}".format(args.metric, score_short))

    print("Num of long sentences (length > 15):", len(list_cand_long))
    if len(list_cand_long) > 0:
        if args.metric == "MULTI":
            score_long_rouge1, score_long_rouge2, score_long_bleu, score_long_bleu_clip = \
                multi_score(list_cand_long, list_ref_long)
            print("score for long sentnces (length > 15):")
            print("ROUGE1:", score_long_rouge1)
            print("ROUGE2:", score_long_rouge2)
            print("BLEU:", score_long_bleu)
            print("BLEU_CLIP:", score_long_bleu_clip)
            print()
        else:
            score_long = score(list_cand_long, list_ref_long, args.metric)
            print("{} score for long sentnces (length > 15): {}".format(args.metric, score_long))

    get_score_overall = lambda score_short, score_long: \
        (score_short * len(list_cand_short) + score_long * len(list_cand_long)) \
        / (len(list_cand_short) + len(list_cand_long))
    if args.metric == "MULTI":
            score_overall_rouge1 = get_score_overall(score_short_rouge1, score_long_rouge1)
            score_overall_rouge2 = get_score_overall(score_short_rouge2, score_long_rouge2)
            score_overall_bleu = get_score_overall(score_short_bleu, score_long_bleu)
            score_overall_bleu_clip = get_score_overall(score_short_bleu_clip, score_long_bleu_clip)
            print("Overall:")
            print("ROUGE1:", score_overall_rouge1)
            print("ROUGE2:", score_overall_rouge2)
            print("BLEU:", score_overall_bleu)
            print("BLEU_CLIP:", score_overall_bleu_clip)
            print()
    else:
        score_overall = get_score_overall(score_short, score_long)
        print("Overall {} score: {}".format(args.metric, score_overall))