예제 #1
0
def main():
  result_path = "../result_toy/"
  if not os.path.exists(result_path):
    os.makedirs(result_path)

  batch_size = 32

  train = '../dataset/toy_reverse/train/'

  X_train, y_train = parse_data(train)

  train_ner_data = ToyDataFeeder(X_train, y_train)
  train_X, train_Y, _ \
    = train_ner_data.naive_batch_buckets(batch_size)

  index2word = train_ner_data._idx_to_word
  index2label = train_ner_data._idx_to_label

  vocab_size = len(index2word)
  label_size = len(index2label)

  # Using word2vec pre-trained embedding
  # word_embedding_dim = 300
  word_embedding_dim = 8

  hidden_dim = 64
  label_embedding_dim = 8

  max_epoch = 50

  # 0.001 is a good value
  learning_rate = 0.001

  #attention = "fixed"
  attention = None

  #pretrained = 'de64'
  pretrained = None

  if pretrained == 'de64':
    word_embedding_dim = 64

  gpu = True

  load_model_filename = None

  machine = ner(word_embedding_dim, hidden_dim, label_embedding_dim, vocab_size, label_size, learning_rate=learning_rate, minibatch_size=batch_size, max_epoch=max_epoch, train_X=train_X, train_Y=train_Y, val_X=None, val_Y=None, test_X=None, test_Y=None, attention=attention, gpu=gpu, pretrained=pretrained, load_model_filename=load_model_filename)
  if gpu:
    machine = machine.cuda()

  shuffle = True

  # Pure training, no evaluation
  train_loss_list = machine.train(shuffle, result_path, False, None)
def main():
    result_path = "../result_toy/"

    train = "../dataset/toy_reverse/train/"
    X_train, y_train = parse_data(train)
    train_ner_data = ToyDataFeeder(X_train, y_train)

    batch_size = 1
    val = "../dataset/toy_reverse/valid/"
    test = "../dataset/toy_reverse/test/"
    X_val, y_val = parse_data(val)
    X_test, y_test = parse_data(test)

    val_ner_data = ToyDataFeeder(X_val,
                                 y_val,
                                 word_to_idx=train_ner_data._word_to_idx,
                                 idx_to_word=train_ner_data._idx_to_word,
                                 label_to_idx=train_ner_data._label_to_idx,
                                 idx_to_label=train_ner_data._idx_to_label)
    val_X, val_Y, _ \
      = val_ner_data.naive_batch_buckets(batch_size)

    test_ner_data = ToyDataFeeder(X_test,
                                  y_test,
                                  word_to_idx=train_ner_data._word_to_idx,
                                  idx_to_word=train_ner_data._idx_to_word,
                                  label_to_idx=train_ner_data._label_to_idx,
                                  idx_to_label=train_ner_data._idx_to_label)
    test_X, test_Y, _ \
      = test_ner_data.naive_batch_buckets(batch_size)

    index2word = train_ner_data._idx_to_word
    index2label = train_ner_data._idx_to_label
    vocab_size = len(index2word)
    label_size = len(index2label)

    # Using word2vec pre-trained embedding
    # word_embedding_dim = 300
    word_embedding_dim = 8

    hidden_dim = 64
    label_embedding_dim = 8

    max_epoch = 50

    # 0.001 is a good value
    learning_rate = 0.001

    #attention = "fixed"
    attention = None

    #pretrained = 'de64'
    pretrained = None

    if pretrained == 'de64':
        word_embedding_dim = 64

    gpu = False

    ##################

    eval_output_file = open(
        os.path.join(result_path, "eval_beam_1_ckpt_24.txt"), "w+")

    epoch = 24

    load_model_filename = os.path.join(result_path,
                                       "ckpt_" + str(epoch) + ".pth")

    machine = ner(word_embedding_dim,
                  hidden_dim,
                  label_embedding_dim,
                  vocab_size,
                  label_size,
                  learning_rate=learning_rate,
                  minibatch_size=batch_size,
                  max_epoch=max_epoch,
                  train_X=None,
                  train_Y=None,
                  val_X=val_X,
                  val_Y=val_Y,
                  test_X=test_X,
                  test_Y=test_Y,
                  attention=attention,
                  gpu=gpu,
                  pretrained=pretrained,
                  load_model_filename=load_model_filename,
                  load_map_location=lambda storage, loc: storage)
    if gpu:
        machine = machine.cuda()

    decode_method = "beam"

    beam_size = 1
    #max_beam_size = label_size
    max_beam_size = None

    #accum_logP_ratio_low = 0.1
    #logP_ratio_low = 0.1

    #agent = det_agent(max_beam_size, accum_logP_ratio_low, logP_ratio_low)
    agent = None

    # For German dataset, f_score_index_begin = 5 (because O_INDEX = 4)
    # For toy dataset, f_score_index_begin = 4 (because {0: '<s>', 1: '<e>', 2: '<p>', 3: '<u>', ...})
    f_score_index_begin = 4

    # We don't evaluate on training set simply because it is too slow since we can't use mini-batch in adaptive beam search
    val_fscore = machine.evaluate(val_X, val_Y, index2word, index2label, "val",
                                  None, decode_method, beam_size,
                                  max_beam_size, agent, f_score_index_begin)

    time_begin = time.time()
    test_fscore = machine.evaluate(test_X, test_Y, index2word, index2label,
                                   "test", None, decode_method, beam_size,
                                   max_beam_size, agent, f_score_index_begin)
    time_end = time.time()

    print_msg = "epoch %d, val F = %.6f, test F = %.6f, test time = %.6f" % (
        epoch, val_fscore, test_fscore, time_end - time_begin)
    log_msg = "%d\t%f\t%f\t%f" % (epoch, val_fscore, test_fscore,
                                  time_end - time_begin)
    print(print_msg)
    print(log_msg, file=eval_output_file, flush=True)

    eval_output_file.close()