コード例 #1
0
def _train_gat_trans(args):
  # set up dirs
  (OUTPUT_DIR, EvalResultsFile,
   TestResults, log_file, log_dir) = _set_up_dirs(args)

  # Load the eval src and tgt files for evaluation
  reference = open(args.eval_ref, 'r')
  eval_file = open(args.eval, 'r')

  OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type)

  (dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, steps_per_epoch,
   src_vocab_size, src_vocab, tgt_vocab_size, tgt_vocab, max_length_targ, dataset_size) = GetGATDataset(args)

  model = TransGAT(args, src_vocab_size, src_vocab,
                   tgt_vocab_size, max_length_targ, tgt_vocab)
  loss_layer = LossLayer(tgt_vocab_size, 0.1)
  if args.decay is not None:
    learning_rate = CustomSchedule(args.emb_dim, warmup_steps=args.decay_steps)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98,
                                       epsilon=1e-9)
  else:
    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate, beta1=0.9, beta2=0.98,
                                       epsilon=1e-9)

  # Save model parameters for future use
  if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang, args.model)):
    with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model), 'rb') as fp:
      PARAMS = pickle.load(fp)
      print('Loaded Parameters..')
  else:
    if not os.path.isdir(log_dir):
      os.makedirs(log_dir)
    PARAMS = {
      "args": args,
      "src_vocab_size": src_vocab_size,
      "tgt_vocab_size": tgt_vocab_size,
      "max_tgt_length": max_length_targ,
      "dataset_size": dataset_size,
      "step": 0
    }

  train_loss = tf.keras.metrics.Mean(name='train_loss')
  train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

  ckpt = tf.train.Checkpoint(
    model=model,
    optimizer=optimizer
  )

  ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5)
  if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial()
    print('Latest checkpoint restored!!')

  if args.epochs is not None:
    steps = args.epochs * steps_per_epoch
  else:
    steps = args.steps

  def train_step(nodes, labels, node1, node2, targ):
    with tf.GradientTape() as tape:
      predictions = model(nodes, labels, node1, node2, targ, None)
      predictions = model.metric_layer([predictions, targ])
      batch_loss = loss_layer([predictions, targ])

    gradients = tape.gradient(batch_loss, model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))
    acc = model.metrics[0].result()
    ppl = model.metrics[-1].result()
    batch_loss = train_loss(batch_loss)

    return batch_loss, acc, ppl

    # Eval function

  def eval_step(steps=None):
    model.trainable = False
    results = []
    ref_target = []
    eval_results = open(EvalResultsFile, 'w+')
    if steps is None:
      dev_set = eval_set
    else:
      dev_set = eval_set.take(steps)

    for (batch, (nodes, labels, node1, node2, targets)) in tqdm(enumerate(dev_set)):
      predictions = model(nodes, labels, node1,
                          node2, targ=None, mask=None)
      pred = [(predictions['outputs'].numpy().tolist())]

      if args.sentencepiece == 'True':
        for i in range(len(pred[0])):
          sentence = (tgt_vocab.DecodeIds(list(pred[0][i])))
          sentence = sentence.partition("<start>")[2].partition("<end>")[0]
          eval_results.write(sentence + '\n')
          ref_target.append(reference.readline())
          results.append(sentence)
      else:
        for i in pred:
          sentences = tgt_vocab.sequences_to_texts(i)
          sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences]
          for w in sentence:
            eval_results.write((w + '\n'))
            ref_target.append(reference.readline())
            results.append(w)

    rogue = (rouge_n(results, ref_target))
    eval_results.close()
    model.trainable = True

    return rogue

  # Eval function
  def test_step():
    model.trainable = False
    results = []
    ref_target = []
    eval_results = open(TestResults, 'w+')

    for (batch, (nodes, labels, node1, node2)) in tqdm(enumerate(test_set)):
      predictions = model(nodes, labels, node1,
                          node2, targ=None, mask=None)
      pred = [(predictions['outputs'].numpy().tolist())]
      if args.sentencepiece == 'True':
        for i in range(len(pred[0])):
          sentence = (tgt_vocab.DecodeIds(list(pred[0][i])))
          sentence = sentence.partition("<start>")[2].partition("<end>")[0]
          eval_results.write(sentence + '\n')
          ref_target.append(reference.readline())
          results.append(sentence)
      else:
        for i in pred:
          sentences = tgt_vocab.sequences_to_texts(i)
          sentence = [j.partition("<start>")[2].partition("<end>")[0] for j in sentences]
          for w in sentence:
            eval_results.write((w + '\n'))
            ref_target.append(reference.readline())
            results.append(w)
    rogue = (rouge_n(results, ref_target))
    score = 0
    eval_results.close()
    model.trainable = True
    process_results(TestResults)

    return rogue, score

  if args.mode == 'train':
    train_loss.reset_states()
    train_accuracy.reset_states()

    for (batch, (nodes, labels,
                 node1, node2, targ)) in tqdm(enumerate(dataset.repeat(-1))):
      if PARAMS['step'] < steps:
        start = time.time()
        PARAMS['step'] += 1

        if args.decay is not None:
          optimizer._lr = learning_rate(tf.cast(PARAMS['step'], dtype=tf.float32))

        batch_loss, acc, ppl = train_step(nodes, labels, node1, node2, targ)
        if batch % 100 == 0:
          print('Step {} Learning Rate {:.4f} Train Loss {:.4f} '
                'Accuracy {:.4f} Perplex {:.4f}'.format(PARAMS['step'],
                                                        optimizer._lr,
                                                        train_loss.result(),
                                                        acc.numpy(),
                                                        ppl.numpy()))
          print('Time {} \n'.format(time.time() - start))
        # log the training results
        tf.io.write_file(log_file,
                         f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}"
                         f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n")

        if batch % args.eval_steps == 0:
          metric_dict = eval_step(5)
          print('\n' + '---------------------------------------------------------------------' + '\n')
          print('ROGUE {:.4f}'.format(metric_dict))
          print('\n' + '---------------------------------------------------------------------' + '\n')

        if batch % args.checkpoint == 0:
          print("Saving checkpoint \n")
          ckpt_save_path = ckpt_manager.save()
          with open(log_dir + '/' + args.lang + '_' + args.model + '_params', 'wb+') as fp:
            pickle.dump(PARAMS, fp)
      else:
        break

    rogue, score = test_step()
    print('\n' + '---------------------------------------------------------------------' + '\n')
    print('Rogue {:.4f}'.format(rogue))
    print('\n' + '---------------------------------------------------------------------' + '\n')

  elif args.mode == 'test':
    rogue, score = test_step()
    print('\n' + '---------------------------------------------------------------------' + '\n')
    print('Rogue {:.4f}'.format(rogue))
    print('\n' + '---------------------------------------------------------------------' + '\n')

  else:
    raise ValueError("Mode must be either 'train' or 'test'")
コード例 #2
0
    (OUTPUT_DIR, EvalResultsFile, TestResults, log_file,
     log_dir) = _set_up_dirs(args)

    if args.enc_type == 'gat' and args.dec_type == 'transformer':
        OUTPUT_DIR += '/' + args.enc_type + '_' + args.dec_type
        (dataset, src_vocab, src_vocab_size, tgt_vocab, tgt_vocab_size,
         MULTI_BUFFER_SIZE, steps_per_epoch,
         MaxSeqSize) = ProcessMultilingualDataset(args)

        # Load the eval src and tgt files for evaluation
        reference = open(args.eval_ref, 'r')
        eval_file = open(args.eval, 'r')

        model = TransGAT(args, src_vocab_size, src_vocab, tgt_vocab_size,
                         MaxSeqSize, tgt_vocab)
        loss_layer = LossLayer(tgt_vocab_size, 0.1)
        if args.decay is not None:
            learning_rate = CustomSchedule(args.emb_dim,
                                           warmup_steps=args.decay_steps)
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                               beta1=0.9,
                                               beta2=0.98,
                                               epsilon=1e-9)
        else:
            optimizer = tf.train.AdamOptimizer(
                learning_rate=args.learning_rate,
                beta1=0.9,
                beta2=0.98,
                epsilon=1e-9)
        step = 0
コード例 #3
0
ファイル: train_single.py プロジェクト: PtolemyRE/GSoC-19
                                 beta_2=0.98,
                                 epsilon=1e-9)
        else:
            optimizer = LazyAdam(learning_rate=args.learning_rate,
                                 beta_1=0.9,
                                 beta_2=0.98,
                                 epsilon=1e-9)

        loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction='none')
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
            name='train_accuracy')

        model = Transformer(args, vocab_size)
        loss_layer = LossLayer(vocab_size, 0.1)

        ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
        ckpt_manager = tf.train.CheckpointManager(ckpt,
                                                  OUTPUT_DIR,
                                                  max_to_keep=5)
        if ckpt_manager.latest_checkpoint:
            ckpt.restore(ckpt_manager.latest_checkpoint)
            print('Latest checkpoint restored!!')

        if args.learning_rate is not None:
            optimizer._lr = args.learning_rate

        def train_step(inp, tar):
            with tf.GradientTape() as tape:
                predictions = model(inp, tar, training=model.trainable)
コード例 #4
0
def _train_transformer(args):
    # set up dirs
    (OUTPUT_DIR, EvalResultsFile, TestResults, log_file,
     log_dir) = _set_up_dirs(args)

    OUTPUT_DIR += '/{}_{}'.format(args.enc_type, args.dec_type)

    dataset, eval_set, test_set, BUFFER_SIZE, BATCH_SIZE, \
    steps_per_epoch, src_vocab_size, vocab, dataset_size, max_seq_len = GetDataset(args)
    reference = open(args.eval_ref, 'r')

    if args.epochs is not None:
        steps = args.epochs * steps_per_epoch
    else:
        steps = args.steps

    # Save model parameters for future use
    if os.path.isfile('{}/{}_{}_params'.format(log_dir, args.lang,
                                               args.model)):
        with open('{}/{}_{}_params'.format(log_dir, args.lang, args.model),
                  'rb') as fp:
            PARAMS = pickle.load(fp)
            print('Loaded Parameters..')
    else:
        if not os.path.isdir(log_dir):
            os.makedirs(log_dir)
        PARAMS = {
            "args": args,
            "vocab_size": src_vocab_size,
            "dataset_size": dataset_size,
            "max_tgt_length": max_seq_len,
            "step": 0
        }

    if args.decay is not None:
        learning_rate = CustomSchedule(args.emb_dim,
                                       warmup_steps=args.decay_steps)
        optimizer = LazyAdam(learning_rate=learning_rate,
                             beta_1=0.9,
                             beta_2=0.98,
                             epsilon=1e-9)
    else:
        optimizer = LazyAdam(learning_rate=args.learning_rate,
                             beta_1=0.9,
                             beta_2=0.98,
                             epsilon=1e-9)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    model = Transformer(args, src_vocab_size)
    loss_layer = LossLayer(src_vocab_size, 0.1)

    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
    ckpt_manager = tf.train.CheckpointManager(ckpt, OUTPUT_DIR, max_to_keep=5)
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')

    if args.learning_rate is not None:
        optimizer._lr = args.learning_rate

    def train_step(inp, tar):
        with tf.GradientTape() as tape:
            predictions = model(inp, tar, training=model.trainable)
            predictions = model.metric_layer([predictions, tar])
            loss = loss_layer([predictions, tar])
            reg_loss = tf.losses.get_regularization_loss()
            loss += reg_loss

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        acc = model.metrics[0].result()
        ppl = model.metrics[-1].result()

        return loss, acc, ppl

    def eval_step(steps=None):
        model.trainable = False
        results = []
        ref_target = []
        eval_results = open(EvalResultsFile, 'w+')
        if steps is None:
            dev_set = eval_set
        else:
            dev_set = eval_set.take(steps)
        for (batch, (inp, tar)) in tqdm(enumerate(dev_set)):
            predictions = model(inp, targets=None, training=model.trainable)
            pred = [(predictions['outputs'].numpy().tolist())]

            if args.sentencepiece == 'True':
                for i in range(len(pred[0])):
                    sentence = (vocab.DecodeIds(list(pred[0][i])))
                    sentence = sentence.partition("<start>")[2].partition(
                        "<end>")[0]
                    eval_results.write(sentence + '\n')
                    ref_target.append(reference.readline())
                    results.append(sentence)
            else:
                for i in pred:
                    sentences = vocab.sequences_to_texts(i)
                    sentence = [
                        j.partition("start")[2].partition("end")[0]
                        for j in sentences
                    ]
                    for w in sentence:
                        eval_results.write((w + '\n'))
                        ref_target.append(reference.readline())
                        results.append(w)

        rogue = (rouge_n(results, ref_target))
        score = 0
        eval_results.close()
        model.trainable = True

        return rogue, score

    def test_step():
        model.trainable = False
        results = []
        ref_target = []
        eval_results = open(TestResults, 'w+')
        for (batch, (inp)) in tqdm(enumerate(test_set)):
            predictions = model(inp, targets=None, training=model.trainable)
            pred = [(predictions['outputs'].numpy().tolist())]

            if args.sentencepiece == 'True':
                for i in range(len(pred[0])):
                    sentence = (vocab.DecodeIds(list(pred[0][i])))
                    sentence = sentence.partition("<start>")[2].partition(
                        "<end>")[0]
                    eval_results.write(sentence + '\n')
                    ref_target.append(reference.readline())
                    results.append(sentence)
            else:
                for i in pred:
                    sentences = vocab.sequences_to_texts(i)
                    sentence = [
                        j.partition("start")[2].partition("end")[0]
                        for j in sentences
                    ]
                    for w in sentence:
                        eval_results.write((w + '\n'))
                        ref_target.append(reference.readline())
                        results.append(w)

        rogue = (rouge_n(results, ref_target))
        score = 0
        eval_results.close()
        model.trainable = True

        return rogue, score

    train_loss.reset_states()
    train_accuracy.reset_states()

    for (batch, (inp, tgt)) in tqdm(enumerate(dataset.repeat(-1))):
        if PARAMS['step'] < steps:
            start = time.time()
            PARAMS['step'] += 1

            if args.decay is not None:
                optimizer._lr = learning_rate(
                    tf.cast(PARAMS['step'], dtype=tf.float32))

            batch_loss, acc, ppl = train_step(inp, tgt)
            if batch % 100 == 0:
                print('Step {} Learning Rate {:.4f} Train Loss {:.4f} '
                      'Accuracy {:.4f} Perplex {:.4f}'.format(
                          PARAMS['step'], optimizer._lr, train_loss.result(),
                          acc.numpy(), ppl.numpy()))
                print('Time {} \n'.format(time.time() - start))
            # log the training results
            tf.io.write_file(
                log_file,
                f"Step {PARAMS['step']} Train Accuracy: {acc.numpy()}"
                f" Loss: {train_loss.result()} Perplexity: {ppl.numpy()} \n")

            if batch % args.eval_steps == 0:
                rogue, score = eval_step(5)
                print(
                    '\n' +
                    '---------------------------------------------------------------------'
                    + '\n')
                print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score))
                print(
                    '\n' +
                    '---------------------------------------------------------------------'
                    + '\n')

            if batch % args.checkpoint == 0:
                print("Saving checkpoint \n")
                ckpt_save_path = ckpt_manager.save()
                with open(
                        log_dir + '/' + args.lang + '_' + args.model +
                        '_params', 'wb+') as fp:
                    pickle.dump(PARAMS, fp)

        else:
            break
    rogue, score = test_step()
    print(
        '\n' +
        '---------------------------------------------------------------------'
        + '\n')
    print('Rogue {:.4f} BLEU {:.4f}'.format(rogue, score))
    print(
        '\n' +
        '---------------------------------------------------------------------'
        + '\n')