def main(): questions, answers = load_conversations() # Build tokenizer using tfds for both questions and answers tokenizer = tfds.features.text.SubwordTextEncoder.build_from_corpus( questions + answers, target_vocab_size=2**13) tokenizer.save_to_file(vocab_filename) # Vocabulary size plus start and end token VOCAB_SIZE = tokenizer.vocab_size + 2 questions, answers = tokenize_and_filter(questions, answers, tokenizer) print('Vocab size: {}'.format(VOCAB_SIZE)) print('Number of samples: {}'.format(len(questions))) # decoder inputs use the previous target as input # remove START_TOKEN from targets dataset = tf.data.Dataset.from_tensor_slices(( { 'inputs': questions }, { 'outputs': answers }, )) dataset = dataset.cache() dataset = dataset.shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) print(dataset) model = Transformer(num_layers=NUM_LAYERS, units=UNITS, d_model=D_MODEL, num_heads=NUM_HEADS, vocab_size=VOCAB_SIZE, dropout=DROPOUT, name='transformer') learning_rate = CustomSchedule(D_MODEL) optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) # if a checkpoint exists, restore the latest checkpoint. if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') for epoch in range(EPOCHS): start = time.time() train_loss.reset_states() train_accuracy.reset_states() for (batch, (inp, tar)) in enumerate(dataset): train_step(inp, tar, model, optimizer, train_loss, train_accuracy) if batch % 500 == 0: print('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format( epoch + 1, batch, train_loss.result(), train_accuracy.result())) if (epoch + 1) % 5 == 0: ckpt_save_path = ckpt_manager.save() print('Saving checkpoint for epoch {} at {}'.format( epoch + 1, ckpt_save_path)) print('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format( epoch + 1, train_loss.result(), train_accuracy.result())) print('Time taken for 1 epoch: {} secs\n'.format(time.time() - start)) model.save_weights(save_weight_path) #model.summary() input_sentence = 'Where have you been?' predict(input_sentence, tokenizer, model) sentence = 'I am not crazy, my mother had me tested.' for _ in range(5): sentence = predict(sentence, tokenizer, model) print('')
args = subword_batches(zip(in_texts, tar_texts), maxlen) args = subword_batches(zip(in_valid, tar_valid), maxlen, args) dg = args[params.train_generator] vdg = args[params.valid_generator] input_vocab_size = args[params.input_vocab_size] target_vocab_size = args[params.target_vocab_size] transformer = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, target_len=args[params.valid_seq_len], rate=dropout_rate) transformer.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy_function, bleu_score]) history = transformer.fit(dg, epochs=params.epochs, validation_data=vdg) transformer.save_weights(weights_dir + '/w' + str(maxlen) + '_ex' + str(params.train_size)) json.dump( history.history, open(history_dir + '/h' + str(maxlen) + '_ex' + str(params.train_size), 'w'))