Esempio n. 1
0
def _train_gat_trans(args):
  # set up dirs
  (OUTPUT_DIR, EvalResultsFile,
   TestResults, log_file, log_dir) = _set_up_dirs(args)

  # Load the eval src and tgt files for evaluation
  reference = open(args.eval_ref, 'r')
  eval_file = open(args.eval, 'r')

  OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type)

  (dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, steps_per_epoch,
   src_vocab_size, src_vocab, tgt_vocab_size, tgt_vocab, max_length_targ, dataset_size) = GetGATDataset(args)

  model = TransGAT(args, src_vocab_size, src_vocab,
                   tgt_vocab_size, max_length_targ, tgt_vocab)
  loss_layer = LossLayer(tgt_vocab_size, 0.1)
  if args.decay is not None:
    learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98,
                                       epsilon=1e-9)
  else:
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate, beta1=0.9, beta2=0.98,
                                       epsilon=1e-9)

  # Save model parameters for future use
  if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang, args.model)):
    with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model), 'rb') as fp:
      PARAMS = pickle.load(fp)
      print('Loaded Parameters..')
  else:
    if not os.path.isdir(log_dir):
      os.makedirs(log_dir)
    PARAMS = {
      "args": args,
      "src_vocab_size": src_vocab_size,
      "tgt_vocab_size": tgt_vocab_size,
      "max_tgt_length": max_length_targ,
      "dataset_size": dataset_size,
      "step": 0
    }

  train_loss = tf.keras.metrics.Mean(name='train_loss')
  train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

  ckpt = tf.train.Checkpoint(
    model=model,
    optimizer=optimizer
  )

  ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5)
  if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial()
    print('Latest checkpoint restored!!')

  if args.epochs is not None:
    steps = args.epochs * steps_per_epoch
  else:
    steps = args.steps

  def train_step(nodes, labels, node1, node2, targ):
    with tf.GradientTape() as tape:
      predictions = model(nodes, labels, node1, node2, targ, None)
      predictions = model.metric_layer([predictions, targ])
      batch_loss = loss_layer([predictions, targ])

    gradients = tape.gradient(batch_loss, model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))
    acc = model.metrics[0].result()
    ppl = model.metrics[-1].result()
    batch_loss = train_loss(batch_loss)

    return batch_loss, acc, ppl

    # Eval function

  def eval_step(steps=None):
    model.trainable = False
    results = []
    ref_target = []
    eval_results = open(EvalResultsFile, 'w+')
    if steps is None:
      dev_set = eval_set
    else:
      dev_set = eval_set.take(steps)

    for (batch, (nodes, labels, node1, node2, targets)) in tqdm(enumerate(dev_set)):
      predictions = model(nodes, labels, node1,
                          node2, targ=None, mask=None)
      pred = [(predictions['outputs'].numpy().tolist())]

      if args.sentencepiece == 'True':
        for i in range(len(pred[0])):
          sentence = (tgt_vocab.DecodeIds(list(pred[0][i])))
          sentence = sentence.partition("<start>")[2].partition("<end>")[0]
          eval_results.write(sentence + '\n')
          ref_target.append(reference.readline())
          results.append(sentence)
      else:
        for i in pred:
          sentences = tgt_vocab.sequences_to_texts(i)
          sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences]
          for w in sentence:
            eval_results.write((w + '\n'))
            ref_target.append(reference.readline())
            results.append(w)

    rogue = (rouge_n(results, ref_target))
    eval_results.close()
    model.trainable = True

    return rogue

  # Eval function
  def test_step():
    model.trainable = False
    results = []
    ref_target = []
    eval_results = open(TestResults, 'w+')

    for (batch, (nodes, labels, node1, node2)) in tqdm(enumerate(test_set)):
      predictions = model(nodes, labels, node1,
                          node2, targ=None, mask=None)
      pred = [(predictions['outputs'].numpy().tolist())]
      if args.sentencepiece == 'True':
        for i in range(len(pred[0])):
          sentence = (tgt_vocab.DecodeIds(list(pred[0][i])))
          sentence = sentence.partition("<start>")[2].partition("<end>")[0]
          eval_results.write(sentence + '\n')
          ref_target.append(reference.readline())
          results.append(sentence)
      else:
        for i in pred:
          sentences = tgt_vocab.sequences_to_texts(i)
          sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences]
          for w in sentence:
            eval_results.write((w + '\n'))
            ref_target.append(reference.readline())
            results.append(w)
    rogue = (rouge_n(results, ref_target))
    score = 0
    eval_results.close()
    model.trainable = True
    process_results(TestResults)

    return rogue, score

  if args.mode == 'train':
    train_loss.reset_states()
    train_accuracy.reset_states()

    for (batch, (nodes, labels,
                 node1, node2, targ)) in tqdm(enumerate(dataset.repeat(-1))):
      if PARAMS['step'] < steps:
        start = time.time()
        PARAMS['step'] += 1

        if args.decay is not None:
          optimizer._lr = learning_rate(tf.cast(PARAMS['step'], dtype=tf.float32))

        batch_loss, acc, ppl = train_step(nodes, labels, node1, node2, targ)
        if batch % 100 == 0:
          print('Step {} Learning Rate {:.4f} Train Loss {:.4f} '
                'Accuracy {:.4f} Perplex {:.4f}'.format(PARAMS['step'],
                                                        optimizer._lr,
                                                        train_loss.result(),
                                                        acc.numpy(),
                                                        ppl.numpy()))
          print('Time {} \n'.format(time.time() - start))
        # log the training results
        tf.io.write_file(log_file,
                         f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}"
                         f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n")

        if batch % args.eval_steps == 0:
          metric_dict = eval_step(5)
          print('\n' + '---------------------------------------------------------------------' + '\n')
          print('ROGUE {:.4f}'.format(metric_dict))
          print('\n' + '---------------------------------------------------------------------' + '\n')

        if batch % args.checkpoint == 0:
          print("Saving checkpoint \n")
          ckpt_save_path = ckpt_manager.save()
          with open(log_dir + '/' + args.lang + '_' + args.model + '_params', 'wb+') as fp:
            pickle.dump(PARAMS, fp)
      else:
        break

    rogue, score = test_step()
    print('\n' + '---------------------------------------------------------------------' + '\n')
    print('Rogue {:.4f}'.format(rogue))
    print('\n' + '---------------------------------------------------------------------' + '\n')

  elif args.mode == 'test':
    rogue, score = test_step()
    print('\n' + '---------------------------------------------------------------------' + '\n')
    print('Rogue {:.4f}'.format(rogue))
    print('\n' + '---------------------------------------------------------------------' + '\n')

  else:
    raise ValueError("Mode must be either 'train' or 'test'")
Esempio n. 2
0
from src.MultilingualDataLoader import ProcessMultilingualDataset
from src.arguments import get_args
from src.models.GraphAttentionModel import TransGAT
from src.utils.metrics import LossLayer
from src.utils.model_utils import CustomSchedule, _set_up_dirs
from src.utils.rogue import rouge_n

# model paramteres

if __name__ == "__main__":
    args = get_args()
    global step

    # set up dirs
    (OUTPUT_DIR, EvalResultsFile, TestResults, log_file,
     log_dir) = _set_up_dirs(args)

    if args.enc_type == 'gat' and args.dec_type == 'transformer':
        OUTPUT_DIR += '/' + args.enc_type + '_' + args.dec_type
        (dataset, src_vocab, src_vocab_size, tgt_vocab, tgt_vocab_size,
         MULTI_BUFFER_SIZE, steps_per_epoch,
         MaxSeqSize) = ProcessMultilingualDataset(args)

        # Load the eval src and tgt files for evaluation
        reference = open(args.eval_ref, 'r')
        eval_file = open(args.eval, 'r')

        model = TransGAT(args, src_vocab_size, src_vocab, tgt_vocab_size,
                         MaxSeqSize, tgt_vocab)
        loss_layer = LossLayer(tgt_vocab_size, 0.1)
        if args.decay is not None:
Esempio n. 3
0
def _train_transformer(args):
    # set up dirs
    (OUTPUT_DIR, EvalResultsFile, TestResults, log_file,
     log_dir) = _set_up_dirs(args)

    OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type)

    dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, \
    steps_per_epoch, src_vocab_size, vocab, dataset_size, max_seq_len = GetDataset(args)
    reference = open(args.eval_ref, 'r')

    if args.epochs is not None:
        steps = args.epochs * steps_per_epoch
    else:
        steps = args.steps

    # Save model parameters for future use
    if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang,
                                               args.model)):
        with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model),
                  'rb') as fp:
            PARAMS = pickle.load(fp)
            print('Loaded Parameters..')
    else:
        if not os.path.isdir(log_dir):
            os.makedirs(log_dir)
        PARAMS = {
            "args": args,
            "vocab_size": src_vocab_size,
            "dataset_size": dataset_size,
            "max_tgt_length": max_seq_len,
            "step": 0
        }

    if args.decay is not None:
        learning_rate = CustomSchedule(args.emb_dim,
                                       warmup_steps=args.decay_steps)
        optimizer = LazyAdam(learning_rate=learning_rate,
                             beta_1=0.9,
                             beta_2=0.98,
                             epsilon=1e-9)
    else:
        optimizer = LazyAdam(learning_rate=args.learning_rate,
                             beta_1=0.9,
                             beta_2=0.98,
                             epsilon=1e-9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    model = Transformer(args, src_vocab_size)
    loss_layer = LossLayer(src_vocab_size, 0.1)

    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
    ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5)
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')

    if args.learning_rate is not None:
        optimizer._lr = args.learning_rate

    def train_step(inp, tar):
        with tf.GradientTape() as tape:
            predictions = model(inp, tar, training=model.trainable)
            predictions = model.metric_layer([predictions, tar])
            loss = loss_layer([predictions, tar])
            reg_loss = tf.losses.get_regularization_loss()
            loss += reg_loss

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        acc = model.metrics[0].result()
        ppl = model.metrics[-1].result()

        return loss, acc, ppl

    def eval_step(steps=None):
        model.trainable = False
        results = []
        ref_target = []
        eval_results = open(EvalResultsFile, 'w+')
        if steps is None:
            dev_set = eval_set
        else:
            dev_set = eval_set.take(steps)
        for (batch, (inp, tar)) in tqdm(enumerate(dev_set)):
            predictions = model(inp, targets=None, training=model.trainable)
            pred = [(predictions['outputs'].numpy().tolist())]

            if args.sentencepiece == 'True':
                for i in range(len(pred[0])):
                    sentence = (vocab.DecodeIds(list(pred[0][i])))
                    sentence = sentence.partition("<start>")[2].partition(
                        "<end>")[0]
                    eval_results.write(sentence + '\n')
                    ref_target.append(reference.readline())
                    results.append(sentence)
            else:
                for i in pred:
                    sentences = vocab.sequences_to_texts(i)
                    sentence = [
                        j.partition("start")[2].partition("end")[0]
                        for j in sentences
                    ]
                    for w in sentence:
                        eval_results.write((w + '\n'))
                        ref_target.append(reference.readline())
                        results.append(w)

        rogue = (rouge_n(results, ref_target))
        score = 0
        eval_results.close()
        model.trainable = True

        return rogue, score

    def test_step():
        model.trainable = False
        results = []
        ref_target = []
        eval_results = open(TestResults, 'w+')
        for (batch, (inp)) in tqdm(enumerate(test_set)):
            predictions = model(inp, targets=None, training=model.trainable)
            pred = [(predictions['outputs'].numpy().tolist())]

            if args.sentencepiece == 'True':
                for i in range(len(pred[0])):
                    sentence = (vocab.DecodeIds(list(pred[0][i])))
                    sentence = sentence.partition("<start>")[2].partition(
                        "<end>")[0]
                    eval_results.write(sentence + '\n')
                    ref_target.append(reference.readline())
                    results.append(sentence)
            else:
                for i in pred:
                    sentences = vocab.sequences_to_texts(i)
                    sentence = [
                        j.partition("start")[2].partition("end")[0]
                        for j in sentences
                    ]
                    for w in sentence:
                        eval_results.write((w + '\n'))
                        ref_target.append(reference.readline())
                        results.append(w)

        rogue = (rouge_n(results, ref_target))
        score = 0
        eval_results.close()
        model.trainable = True

        return rogue, score

    train_loss.reset_states()
    train_accuracy.reset_states()

    for (batch, (inp, tgt)) in tqdm(enumerate(dataset.repeat(-1))):
        if PARAMS['step'] < steps:
            start = time.time()
            PARAMS['step'] += 1

            if args.decay is not None:
                optimizer._lr = learning_rate(
                    tf.cast(PARAMS['step'], dtype=tf.float32))

            batch_loss, acc, ppl = train_step(inp, tgt)
            if batch % 100 == 0:
                print('Step {} Learning Rate {:.4f} Train Loss {:.4f} '
                      'Accuracy {:.4f} Perplex {:.4f}'.format(
                          PARAMS['step'], optimizer._lr, train_loss.result(),
                          acc.numpy(), ppl.numpy()))
                print('Time {} \n'.format(time.time() - start))
            # log the training results
            tf.io.write_file(
                log_file,
                f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}"
                f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n")

            if batch % args.eval_steps == 0:
                rogue, score = eval_step(5)
                print(
                    '\n' +
                    '---------------------------------------------------------------------'
                    + '\n')
                print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score))
                print(
                    '\n' +
                    '---------------------------------------------------------------------'
                    + '\n')

            if batch % args.checkpoint == 0:
                print("Saving checkpoint \n")
                ckpt_save_path = ckpt_manager.save()
                with open(
                        log_dir + '/' + args.lang + '_' + args.model +
                        '_params', 'wb+') as fp:
                    pickle.dump(PARAMS, fp)

        else:
            break
    rogue, score = test_step()
    print(
        '\n' +
        '---------------------------------------------------------------------'
        + '\n')
    print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score))
    print(
        '\n' +
        '---------------------------------------------------------------------'
        + '\n')
Esempio n. 4
0
def _train_rnn(args):
    # set up dirs
    (OUTPUT_DIR, EvalResultsFile, TestResults, log_file,
     log_dir) = _set_up_dirs(args)

    OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type)

    dataset, BUFFER_SIZE, BATCH_SIZE, \
    steps_per_epoch, vocab_inp_size, vocab_tgt_size, target_lang = GetDataset(args)

    step = 0

    if args.decay is not None:
        learning_rate = CustomSchedule(args.emb_dim,
                                       warmup_steps=args.decay_steps)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           beta1=0.9,
                                           beta2=0.98,
                                           epsilon=1e-9)
    else:
        optimizer = tf.train.AdamOptimizer(beta1=0.9, beta2=0.98, epsilon=1e-9)

    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    model = RNNModel.RNNModel(vocab_inp_size, vocab_tgt_size, target_lang,
                              args)
    enc_hidden = model.encoder.initialize_hidden_state()

    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
    ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5)
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')

    def loss_function(real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, 0))
        loss_ = loss_object(real, pred)

        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask
        return tf.reduce_mean(loss_)

    def train_step(inp, targ, enc_hidden):

        with tf.GradientTape() as tape:
            predictions, dec_hidden, loss = model(inp, targ, enc_hidden)
            reg_loss = tf.losses.get_regularization_loss()
            loss += reg_loss

        batch_loss = (loss / int(targ.shape[1]))
        variables = model.trainable_variables
        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))

        return batch_loss

    def eval_step(inp, trg, enc_hidden):
        model.trainable = False

        predictions, dec_hidden, eval_loss = model(inp, trg, enc_hidden)
        eval_loss = (eval_loss / int(targ.shape[1]))
        model.trainable = True

        return eval_loss

    for epoch in range(args.epochs):
        print('Learning Rate' + str(optimizer._lr) + ' Step ' + str(step))

        with tqdm(total=(38668 // args.batch_size)) as pbar:
            for (batch, (inp, targ)) in tqdm(enumerate(dataset)):
                start = time.time()
                step += 1
                if args.decay is not None:
                    optimizer._lr = learning_rate(
                        tf.cast(step, dtype=tf.float32))

                if batch % args.eval_steps == 0:
                    eval_loss = eval_step(inp, targ, enc_hidden)
                    print(
                        '\n' +
                        '---------------------------------------------------------------------'
                        + '\n')
                    print('Epoch {} Batch {} Eval Loss {:.4f} '.format(
                        epoch, batch, eval_loss.numpy()))
                    print(
                        '\n' +
                        '---------------------------------------------------------------------'
                        + '\n')
                else:
                    batch_loss = train_step(inp, targ, enc_hidden)
                    print('Epoch {} Batch {} Batch Loss {:.4f} '.format(
                        epoch, batch, batch_loss.numpy()))
                    # log the training results
                    tf.io.write_file(log_file, "Epoch {}".format(epoch))
                    tf.io.write_file(log_file,
                                     "Train Loss: {}".format(batch_loss))

                if batch % args.checkpoint == 0:
                    ckpt_save_path = ckpt_manager.save()
                    print("Saving checkpoint \n")
                print('Time {} '.format(time.time() - start))
                pbar.update(1)
        if args.decay is not None:
            optimizer._lr = optimizer._lr * args.decay_rate**(batch // 1)
Esempio n. 5
0
from src.models.Transformer import Transformer
from src.utils.Optimizers import LazyAdam
from src.utils.metrics import LossLayer
from src.utils.model_utils import CustomSchedule, _set_up_dirs
from src.utils.rogue import rouge_n
from src.utils.model_utils import process_results

# model paramteres

if __name__ == "__main__":
  args = get_args()
  global step

  # set up dirs
  (OUTPUT_DIR, EvalResultsFile,
   TestResults, log_file, log_dir) = _set_up_dirs(args)

  if args.model == 'gat':
    OUTPUT_DIR += '/' + args.enc_type + '_' + args.dec_type
    (dataset, src_vocab, src_vocab_size, tgt_vocab,
     tgt_vocab_size, MULTI_BUFFER_SIZE, steps_per_epoch, MaxSeqSize) = ProcessMultilingualDataset(args)

    # Load the eval src and tgt files for evaluation
    reference = open(args.eval_ref, 'r')
    eval_file = open(args.eval, 'r')

    model = TransGAT(args, src_vocab_size, src_vocab,
                     tgt_vocab_size, MaxSeqSize, tgt_vocab)
    loss_layer = LossLayer(tgt_vocab_size, 0.1)
    if args.decay is not None:
      learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps)