Esempio n. 1
0
def main():
    questions, answers = load_conversations()
    # Build tokenizer using tfds for both questions and answers
    tokenizer = tfds.features.text.SubwordTextEncoder.build_from_corpus(
        questions + answers, target_vocab_size=2**13)

    tokenizer.save_to_file(vocab_filename)

    # Vocabulary size plus start and end token
    VOCAB_SIZE = tokenizer.vocab_size + 2

    questions, answers = tokenize_and_filter(questions, answers, tokenizer)
    print('Vocab size: {}'.format(VOCAB_SIZE))
    print('Number of samples: {}'.format(len(questions)))
    # decoder inputs use the previous target as input
    # remove START_TOKEN from targets
    dataset = tf.data.Dataset.from_tensor_slices((
        {
            'inputs': questions
        },
        {
            'outputs': answers
        },
    ))

    dataset = dataset.cache()
    dataset = dataset.shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    print(dataset)

    model = Transformer(num_layers=NUM_LAYERS,
                        units=UNITS,
                        d_model=D_MODEL,
                        num_heads=NUM_HEADS,
                        vocab_size=VOCAB_SIZE,
                        dropout=DROPOUT,
                        name='transformer')

    learning_rate = CustomSchedule(D_MODEL)

    optimizer = tf.keras.optimizers.Adam(learning_rate,
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)

    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)

    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              checkpoint_path,
                                              max_to_keep=5)

    # if a checkpoint exists, restore the latest checkpoint.
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    for epoch in range(EPOCHS):
        start = time.time()

        train_loss.reset_states()
        train_accuracy.reset_states()

        for (batch, (inp, tar)) in enumerate(dataset):

            train_step(inp, tar, model, optimizer, train_loss, train_accuracy)

            if batch % 500 == 0:
                print('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(
                    epoch + 1, batch, train_loss.result(),
                    train_accuracy.result()))

        if (epoch + 1) % 5 == 0:
            ckpt_save_path = ckpt_manager.save()
            print('Saving checkpoint for epoch {} at {}'.format(
                epoch + 1, ckpt_save_path))

        print('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(
            epoch + 1, train_loss.result(), train_accuracy.result()))

        print('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

    model.save_weights(save_weight_path)
    #model.summary()
    input_sentence = 'Where have you been?'
    predict(input_sentence, tokenizer, model)

    sentence = 'I am not crazy, my mother had me tested.'
    for _ in range(5):
        sentence = predict(sentence, tokenizer, model)
        print('')
args = subword_batches(zip(in_texts, tar_texts), maxlen)
args = subword_batches(zip(in_valid, tar_valid), maxlen, args)
dg = args[params.train_generator]
vdg = args[params.valid_generator]
input_vocab_size = args[params.input_vocab_size]
target_vocab_size = args[params.target_vocab_size]

transformer = Transformer(num_layers,
                          d_model,
                          num_heads,
                          dff,
                          input_vocab_size,
                          target_vocab_size,
                          pe_input=input_vocab_size,
                          pe_target=target_vocab_size,
                          target_len=args[params.valid_seq_len],
                          rate=dropout_rate)

transformer.compile(optimizer=optimizer,
                    loss=loss_function,
                    metrics=[accuracy_function, bleu_score])
history = transformer.fit(dg, epochs=params.epochs, validation_data=vdg)

transformer.save_weights(weights_dir + '/w' + str(maxlen) + '_ex' +
                         str(params.train_size))
json.dump(
    history.history,
    open(history_dir + '/h' + str(maxlen) + '_ex' + str(params.train_size),
         'w'))