ckpt.restore(ckpt_manager.latest_checkpoint) log.info(ckpt_manager.latest_checkpoint +' restored') latest_ckpt = int(ckpt_manager.latest_checkpoint[-2:]) else: latest_ckpt=0 log.info('Training from scratch') return (ckpt_manager, latest_ckpt) @tf.function def distributed_train_step(dist_inputs): per_replica_losses,target_ids_ , refine_predictions = strategy.run(train_step, args=(dist_inputs,)) return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None),target_ids_ , refine_predictions train_dataset, val_dataset, num_of_train_examples, _ = create_train_data() val_dataset = strategy.experimental_distribute_dataset(val_dataset) train_dataset = strategy.experimental_distribute_dataset(train_dataset) train_loss, train_accuracy = get_loss_and_accuracy() validation_loss, validation_accuracy = get_loss_and_accuracy() accumulators = [] # if a checkpoint exists, restore the latest checkpoint. ck_pt_mgr, latest_ckpt = check_ckpt(file_path.checkpoint_path) total_steps = int(h_parms.epochs * (h_parms.accumulation_steps)) #train_dataset = train_dataset.repeat(total_steps) count=0 ds_train_size = 287113 length = 1 start = randint(ds_train_size - length, size=1)[0] train_buffer_size = 287113 for (step, (input_ids, input_mask, input_segment_ids, target_ids_, target_mask, target_segment_ids)) in enumerate(train_dataset):
tf.random.set_seed(100) import time import os import shutil import tensorflow_datasets as tfds from preprocess import create_train_data from transformer import Transformer, Pointer_Generator, create_masks from hyper_parameters import h_parms from configuration import config from metrics import optimizer, loss_function, get_loss_and_accuracy, tf_write_summary, monitor_run from input_path import file_path from creates import log, train_summary_writer, valid_summary_writer from create_tokenizer import tokenizer_en from local_tf_ops_v2 import * train_dataset, val_dataset, num_of_train_examples, num_of_valid_examples = create_train_data( ) train_loss, train_accuracy = get_loss_and_accuracy() validation_loss, validation_accuracy = get_loss_and_accuracy() transformer = Transformer(num_layers=config.num_layers, d_model=config.d_model, num_heads=config.num_heads, dff=config.dff, input_vocab_size=config.input_vocab_size, target_vocab_size=config.target_vocab_size, rate=h_parms.dropout_rate) pointer_generator = Pointer_Generator() @tf.function(input_signature=train_step_signature) def train_step(inp, tar, inp_shape, tar_shape, batch):
import preprocess from rnn_numpy import RNNNumpy from rnn_theano import RNNTheano import numpy as np import cProfile UNKNOWN_TOKEN = "UNKNOWN_TOKEN" SENTENCE_START_TOKEN = "SENTENCE_START" SENTENCE_END_TOKEN = "SENTENCE_END" X_train, y_train, vocabulary_size, index_to_word, word_to_index = preprocess.create_train_data() np.random.seed(10) model = RNNNumpy(vocabulary_size) np.random.seed(10) model = RNNNumpy(vocabulary_size) def generate_sentence(model, index_to_word, word_to_index, min_length=5): # We start the sentence with the start token new_sentence = [word_to_index[SENTENCE_START_TOKEN]] # Repeat until we get an end token while not new_sentence[-1] == word_to_index[SENTENCE_END_TOKEN]: next_word_probs = model.predict(new_sentence)[-1] samples = np.random.multinomial(1, [next_word_probs]) sampled_word = np.argmax(samples) new_sentence.append(sampled_word) # Seomtimes we get stuck if the sentence becomes too long, e.g. "........" :( # And: We don't want sentences with UNKNOWN_TOKEN's if len(new_sentence) > 100 or sampled_word == word_to_index[UNKNOWN_TOKEN]: return None
import preprocess from rnn_numpy import RNNNumpy from rnn_theano import RNNTheano import numpy as np import cProfile X_train,y_train,vocabulary_size = preprocess.create_train_data() np.random.seed(10) model = RNNNumpy(vocabulary_size) np.random.seed(10) model = RNNNumpy(vocabulary_size) #cProfile.run("model.numpy_sdg_step(X_train[10], y_train[10], 0.005)") #print("----------------------------------------------------------------") np.random.seed(10) model_theano = RNNTheano(vocabulary_size) #cProfile.run("model_theano.train_with_sgd(X_train[10], y_train[10], 0.005)") print("----------------------------------------------------------------") losses_numpy = model.train_with_sgd(X_train[:100], y_train[:100], nepoch=5, evaluate_loss_after=1) losses_theano = model_theano.train_with_sgd(X_train[:100], y_train[:100], nepoch=5, evaluate_loss_after=1)
import preprocess from rnn_numpy import RNNNumpy import numpy as np X_train,y_train,vocabulary_size = preprocess.create_train_data() np.random.seed(10) model = RNNNumpy(vocabulary_size) output, hidden_states = model.forward_propagation(X_train[10]) print output.shape print output predictions = model.predict(X_train[10]) print predictions.shape print predictions # Limit to 1000 examples to save time print "Expected Loss for random predictions: %f" % np.log(vocabulary_size) print "Actual loss: %f" % model.calculate_loss(X_train[:1000], y_train[:1000])
#import tensorflow_datasets as tfds from preprocess import create_train_data from transformer import Transformer, Generator, create_masks from hyper_parameters import config from metrics import optimizer, loss_function, get_loss_and_accuracy, write_summary from input_path import file_path if config.run_tensorboard: from input_path import train_summary_writer, valid_summary_writer else: train_summary_writer = None valid_summary_writer = None #tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(file_path.subword_vocab_path) train_dataset, val_dataset = create_train_data() train_loss, train_accuracy = get_loss_and_accuracy() validation_loss, validation_accuracy = get_loss_and_accuracy() transformer = Transformer(num_layers=config.num_layers, d_model=config.d_model, num_heads=config.num_heads, dff=config.dff, input_vocab_size=config.input_vocab_size, target_vocab_size=config.target_vocab_size, rate=config.dropout_rate) generator = Generator() # The @tf.function trace-compiles train_step into a TF graph for faster # execution. The function specializes to the precise shape of the argument # tensors. To avoid re-tracing due to the variable sequence lengths or variable
import preprocess from rnn_numpy import RNNNumpy from rnn_theano import RNNTheano import numpy as np import cProfile UNKNOWN_TOKEN = "UNKNOWN_TOKEN" SENTENCE_START_TOKEN = "SENTENCE_START" SENTENCE_END_TOKEN = "SENTENCE_END" X_train, y_train, vocabulary_size, index_to_word, word_to_index = preprocess.create_train_data( ) np.random.seed(10) model = RNNNumpy(vocabulary_size) np.random.seed(10) model = RNNNumpy(vocabulary_size) def generate_sentence(model, index_to_word, word_to_index, min_length=5): # We start the sentence with the start token new_sentence = [word_to_index[SENTENCE_START_TOKEN]] # Repeat until we get an end token while not new_sentence[-1] == word_to_index[SENTENCE_END_TOKEN]: next_word_probs = model.predict(new_sentence)[-1] samples = np.random.multinomial(1, [next_word_probs]) sampled_word = np.argmax(samples) new_sentence.append(sampled_word) # Seomtimes we get stuck if the sentence becomes too long, e.g. "........" :( # And: We don't want sentences with UNKNOWN_TOKEN's if len(new_sentence