def load_model(args): """ Function to load the model from stored checkpoint. :param args: All arguments that were given to train file :type args: Argparse object :return: model :rtype: tf.keras.Model """ # set up dirs if args.use_colab is None: output_file = 'results.txt' OUTPUT_DIR = 'ckpts' if not os.path.isdir(OUTPUT_DIR): os.mkdir(OUTPUT_DIR) else: from google.colab import drive drive.mount('/content/gdrive') OUTPUT_DIR = '/content/gdrive/My Drive/ckpts' output_file = OUTPUT_DIR + '/results.txt' if not os.path.isdir(OUTPUT_DIR): os.mkdir(OUTPUT_DIR) node_vocab, target_vocab = load_vocabs() vocab_nodes_size = len(node_vocab.word_index) + 1 vocab_tgt_size = len(target_vocab.word_index) + 1 OUTPUT_DIR += '/' + args.enc_type + '_' + args.dec_type model = graph_attention_model.TransGAT(args, vocab_nodes_size, vocab_tgt_size, target_vocab) if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) else: optimizer = tf.train.AdamOptimizer(beta1=0.9, beta2=0.98, epsilon=1e-9) step = 0 ckpt = tf.train.Checkpoint(model=model) ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') return model
def _train_gat_trans(args): # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) # Load the eval src and tgt files for evaluation reference = open(args.eval_ref, 'r') eval_file = open(args.eval, 'r') OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type) (dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, steps_per_epoch, src_vocab_size, src_vocab, tgt_vocab_size, tgt_vocab, max_length_targ, dataset_size) = GetGATDataset(args) model = TransGAT(args, src_vocab_size, src_vocab, tgt_vocab_size, max_length_targ, tgt_vocab) loss_layer = LossLayer(tgt_vocab_size, 0.1) if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) else: optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) # Save model parameters for future use if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang, args.model)): with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model), 'rb') as fp: PARAMS = pickle.load(fp) print('Loaded Parameters..') else: if not os.path.isdir(log_dir): os.makedirs(log_dir) PARAMS = { "args": args, "src_vocab_size": src_vocab_size, "tgt_vocab_size": tgt_vocab_size, "max_tgt_length": max_length_targ, "dataset_size": dataset_size, "step": 0 } train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') ckpt = tf.train.Checkpoint( model=model, optimizer=optimizer ) ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial() print('Latest checkpoint restored!!') if args.epochs is not None: steps = args.epochs * steps_per_epoch else: steps = args.steps def train_step(nodes, labels, node1, node2, targ): with tf.GradientTape() as tape: predictions = model(nodes, labels, node1, node2, targ, None) predictions = model.metric_layer([predictions, targ]) batch_loss = loss_layer([predictions, targ]) gradients = tape.gradient(batch_loss, model.trainable_weights) optimizer.apply_gradients(zip(gradients, model.trainable_weights)) acc = model.metrics[0].result() ppl = model.metrics[-1].result() batch_loss = train_loss(batch_loss) return batch_loss, acc, ppl # Eval function def eval_step(steps=None): model.trainable = False results = [] ref_target = [] eval_results = open(EvalResultsFile, 'w+') if steps is None: dev_set = eval_set else: dev_set = eval_set.take(steps) for (batch, (nodes, labels, node1, node2, targets)) in tqdm(enumerate(dev_set)): predictions = model(nodes, labels, node1, node2, targ=None, mask=None) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (tgt_vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition("<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = tgt_vocab.sequences_to_texts(i) sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) eval_results.close() model.trainable = True return rogue # Eval function def test_step(): model.trainable = False results = [] ref_target = [] eval_results = open(TestResults, 'w+') for (batch, (nodes, labels, node1, node2)) in tqdm(enumerate(test_set)): predictions = model(nodes, labels, node1, node2, targ=None, mask=None) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (tgt_vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition("<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = tgt_vocab.sequences_to_texts(i) sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) score = 0 eval_results.close() model.trainable = True process_results(TestResults) return rogue, score if args.mode == 'train': train_loss.reset_states() train_accuracy.reset_states() for (batch, (nodes, labels, node1, node2, targ)) in tqdm(enumerate(dataset.repeat(-1))): if PARAMS['step'] < steps: start = time.time() PARAMS['step'] += 1 if args.decay is not None: optimizer._lr = learning_rate(tf.cast(PARAMS['step'], dtype=tf.float32)) batch_loss, acc, ppl = train_step(nodes, labels, node1, node2, targ) if batch % 100 == 0: print('Step {} Learning Rate {:.4f} Train Loss {:.4f} ' 'Accuracy {:.4f} Perplex {:.4f}'.format(PARAMS['step'], optimizer._lr, train_loss.result(), acc.numpy(), ppl.numpy())) print('Time {} \n'.format(time.time() - start)) # log the training results tf.io.write_file(log_file, f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}" f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n") if batch % args.eval_steps == 0: metric_dict = eval_step(5) print('\n' + '---------------------------------------------------------------------' + '\n') print('ROGUE {:.4f}'.format(metric_dict)) print('\n' + '---------------------------------------------------------------------' + '\n') if batch % args.checkpoint == 0: print("Saving checkpoint \n") ckpt_save_path = ckpt_manager.save() with open(log_dir + '/' + args.lang + '_' + args.model + '_params', 'wb+') as fp: pickle.dump(PARAMS, fp) else: break rogue, score = test_step() print('\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f}'.format(rogue)) print('\n' + '---------------------------------------------------------------------' + '\n') elif args.mode == 'test': rogue, score = test_step() print('\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f}'.format(rogue)) print('\n' + '---------------------------------------------------------------------' + '\n') else: raise ValueError("Mode must be either 'train' or 'test'")
def _train_rnn(args): # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type) dataset, BUFFER_SIZE, BATCH_SIZE, \ steps_per_epoch, vocab_inp_size, vocab_tgt_size, target_lang = GetDataset(args) step = 0 if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) else: optimizer = tf.train.AdamOptimizer(beta1=0.9, beta2=0.98, epsilon=1e-9) loss_object = tf.keras.losses.SparseCategoricalCrossentropy() model = RNNModel.RNNModel(vocab_inp_size, vocab_tgt_size, target_lang, args) enc_hidden = model.encoder.initialize_hidden_state() ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') def loss_function(real, pred): mask = tf.math.logical_not(tf.math.equal(real, 0)) loss_ = loss_object(real, pred) mask = tf.cast(mask, dtype=loss_.dtype) loss_ *= mask return tf.reduce_mean(loss_) def train_step(inp, targ, enc_hidden): with tf.GradientTape() as tape: predictions, dec_hidden, loss = model(inp, targ, enc_hidden) reg_loss = tf.losses.get_regularization_loss() loss += reg_loss batch_loss = (loss / int(targ.shape[1])) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return batch_loss def eval_step(inp, trg, enc_hidden): model.trainable = False predictions, dec_hidden, eval_loss = model(inp, trg, enc_hidden) eval_loss = (eval_loss / int(targ.shape[1])) model.trainable = True return eval_loss for epoch in range(args.epochs): print('Learning Rate' + str(optimizer._lr) + ' Step ' + str(step)) with tqdm(total=(38668 // args.batch_size)) as pbar: for (batch, (inp, targ)) in tqdm(enumerate(dataset)): start = time.time() step += 1 if args.decay is not None: optimizer._lr = learning_rate( tf.cast(step, dtype=tf.float32)) if batch % args.eval_steps == 0: eval_loss = eval_step(inp, targ, enc_hidden) print( '\n' + '---------------------------------------------------------------------' + '\n') print('Epoch {} Batch {} Eval Loss {:.4f} '.format( epoch, batch, eval_loss.numpy())) print( '\n' + '---------------------------------------------------------------------' + '\n') else: batch_loss = train_step(inp, targ, enc_hidden) print('Epoch {} Batch {} Batch Loss {:.4f} '.format( epoch, batch, batch_loss.numpy())) # log the training results tf.io.write_file(log_file, "Epoch {}".format(epoch)) tf.io.write_file(log_file, "Train Loss: {}".format(batch_loss)) if batch % args.checkpoint == 0: ckpt_save_path = ckpt_manager.save() print("Saving checkpoint \n") print('Time {} '.format(time.time() - start)) pbar.update(1) if args.decay is not None: optimizer._lr = optimizer._lr * args.decay_rate**(batch // 1)
if not os.path.isdir(OUTPUT_DIR): os.mkdir(OUTPUT_DIR) if args.enc_type == 'gat' and args.dec_type == 'rnn': OUTPUT_DIR += '/' + args.enc_type + '_' + args.dec_type (dataset, BUFFER_SIZE, BATCH_SIZE, steps_per_epoch, vocab_tgt_size, vocab_nodes_size, vocab_edge_size, vocab_role_size, target_lang, max_length_targ) = get_gat_dataset(args) embedding = tf.keras.layers.Embedding(vocab_nodes_size, args.emb_dim) model = graph_attention_model.GATModel(args, vocab_nodes_size, vocab_role_size, vocab_tgt_size, target_lang) step = 0 if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) else: optimizer = tf.train.AdamOptimizer(beta1=0.9, beta2=0.98, epsilon=1e-9) loss_object = tf.keras.losses.SparseCategoricalCrossentropy() ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy')
def _train_transformer(args): # set up dirs (OUTPUT_DIR, EvalResultsFile, TestResults, log_file, log_dir) = _set_up_dirs(args) OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type) dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, \ steps_per_epoch, src_vocab_size, vocab, dataset_size, max_seq_len = GetDataset(args) reference = open(args.eval_ref, 'r') if args.epochs is not None: steps = args.epochs * steps_per_epoch else: steps = args.steps # Save model parameters for future use if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang, args.model)): with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model), 'rb') as fp: PARAMS = pickle.load(fp) print('Loaded Parameters..') else: if not os.path.isdir(log_dir): os.makedirs(log_dir) PARAMS = { "args": args, "vocab_size": src_vocab_size, "dataset_size": dataset_size, "max_tgt_length": max_seq_len, "step": 0 } if args.decay is not None: learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps) optimizer = LazyAdam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) else: optimizer = LazyAdam(learning_rate=args.learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') model = Transformer(args, src_vocab_size) loss_layer = LossLayer(src_vocab_size, 0.1) ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') if args.learning_rate is not None: optimizer._lr = args.learning_rate def train_step(inp, tar): with tf.GradientTape() as tape: predictions = model(inp, tar, training=model.trainable) predictions = model.metric_layer([predictions, tar]) loss = loss_layer([predictions, tar]) reg_loss = tf.losses.get_regularization_loss() loss += reg_loss gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) acc = model.metrics[0].result() ppl = model.metrics[-1].result() return loss, acc, ppl def eval_step(steps=None): model.trainable = False results = [] ref_target = [] eval_results = open(EvalResultsFile, 'w+') if steps is None: dev_set = eval_set else: dev_set = eval_set.take(steps) for (batch, (inp, tar)) in tqdm(enumerate(dev_set)): predictions = model(inp, targets=None, training=model.trainable) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition( "<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = vocab.sequences_to_texts(i) sentence = [ j.partition("start")[2].partition("end")[0] for j in sentences ] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) score = 0 eval_results.close() model.trainable = True return rogue, score def test_step(): model.trainable = False results = [] ref_target = [] eval_results = open(TestResults, 'w+') for (batch, (inp)) in tqdm(enumerate(test_set)): predictions = model(inp, targets=None, training=model.trainable) pred = [(predictions['outputs'].numpy().tolist())] if args.sentencepiece == 'True': for i in range(len(pred[0])): sentence = (vocab.DecodeIds(list(pred[0][i]))) sentence = sentence.partition("<start>")[2].partition( "<end>")[0] eval_results.write(sentence + '\n') ref_target.append(reference.readline()) results.append(sentence) else: for i in pred: sentences = vocab.sequences_to_texts(i) sentence = [ j.partition("start")[2].partition("end")[0] for j in sentences ] for w in sentence: eval_results.write((w + '\n')) ref_target.append(reference.readline()) results.append(w) rogue = (rouge_n(results, ref_target)) score = 0 eval_results.close() model.trainable = True return rogue, score train_loss.reset_states() train_accuracy.reset_states() for (batch, (inp, tgt)) in tqdm(enumerate(dataset.repeat(-1))): if PARAMS['step'] < steps: start = time.time() PARAMS['step'] += 1 if args.decay is not None: optimizer._lr = learning_rate( tf.cast(PARAMS['step'], dtype=tf.float32)) batch_loss, acc, ppl = train_step(inp, tgt) if batch % 100 == 0: print('Step {} Learning Rate {:.4f} Train Loss {:.4f} ' 'Accuracy {:.4f} Perplex {:.4f}'.format( PARAMS['step'], optimizer._lr, train_loss.result(), acc.numpy(), ppl.numpy())) print('Time {} \n'.format(time.time() - start)) # log the training results tf.io.write_file( log_file, f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}" f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n") if batch % args.eval_steps == 0: rogue, score = eval_step(5) print( '\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score)) print( '\n' + '---------------------------------------------------------------------' + '\n') if batch % args.checkpoint == 0: print("Saving checkpoint \n") ckpt_save_path = ckpt_manager.save() with open( log_dir + '/' + args.lang + '_' + args.model + '_params', 'wb+') as fp: pickle.dump(PARAMS, fp) else: break rogue, score = test_step() print( '\n' + '---------------------------------------------------------------------' + '\n') print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score)) print( '\n' + '---------------------------------------------------------------------' + '\n')