Exemple #1
0
def main():
    # save file
    config.mulu = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    config.save_dir = os.path.join(config.save_direction, config.mulu)
    if not os.path.isdir(config.save_dir):
        os.makedirs(config.save_dir)

    # get iter
    create_alphabet = None
    train_iter, dev_iter, test_iter, create_alphabet = load_Data(config)

    config.embed_num = create_alphabet.word_alphabet.vocab_size
    config.class_num = create_alphabet.label_alphabet.vocab_size
    config.paddingId = create_alphabet.word_paddingId
    config.label_paddingId = create_alphabet.label_paddingId
    config.create_alphabet = create_alphabet
    print("embed_num : {}, class_num : {}".format(config.embed_num, config.class_num))
    print("PaddingID {}".format(config.paddingId))

    if config.pretrained_embed:
        print("Using Pre_Trained Embedding.")
        pretrain_embed = load_pretrained_emb_zeros(path=config.pretrained_embed_file,
                                                   text_field_words_dict=create_alphabet.word_alphabet.id2words,
                                                   pad=paddingkey)
        config.pretrained_weight = pretrain_embed

    model = None
    if config.model_BiLstm is True:
        print("loading model.....")
        model = BiLSTM(config)
        print(model)
        if config.use_cuda is True:
            model = model.cuda()
        print("Training Start......")

    train.train(train_iter=train_iter, dev_iter=dev_iter, test_iter=test_iter, model=model, config=config)
Exemple #2
0
def main():
    # save file
    mulu = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    args.mulu = mulu
    args.save_dir = os.path.join(args.save_dir, mulu)
    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    # get iter
    train_iter, dev_iter, test_iter, create_alphabet = load_Conll2003_NER(args)

    args.embed_num = create_alphabet.word_alphabet.vocab_size
    args.class_num = create_alphabet.label_alphabet.vocab_size
    args.paddingId = create_alphabet.word_paddingId
    args.create_alphabet = create_alphabet
    print("embed_num : {}, class_num : {}".format(args.embed_num,
                                                  args.class_num))
    print("PaddingID {}".format(args.paddingId))

    if args.word_Embedding:
        print("Using Pre_Trained Embedding.")
        pretrain_embed = load_pretrained_emb_avg(
            path=args.word_Embedding_Path,
            text_field_words_dict=create_alphabet.word_alphabet.id2words,
            pad=paddingkey)
        args.pretrained_weight = pretrain_embed

    # print params
    show_params()

    model = None
    if args.BiLSTM is True:
        print("loading BiLSTM model.....")
        model = BiLSTM(args)
        shutil.copy("./models/BiLSTM.py", args.save_dir)
        print(model)
    elif args.BiLSTM_CRF is True:
        print("loading BiLSTM_CRF model......")
        bilstm = BiLSTM(args)
        crf = CRF(args)
        model = BiLSTM_CRF(BiLSTM=bilstm, CRF=crf, args=args)
        print(model)
    if args.use_cuda is True:
        print("Using Cuda To Speed Up......")
        model = model.cuda()

    if os.path.exists("./Test_Result.txt"):
        os.remove("./Test_Result.txt")
    print("Training Start......")
    if args.BiLSTM is True:
        train_conll2003.train(train_iter=train_iter,
                              dev_iter=dev_iter,
                              test_iter=test_iter,
                              model=model,
                              args=args)
    elif args.BiLSTM_CRF is True:
        train_conll2003_CRF.train(train_iter=train_iter,
                                  dev_iter=dev_iter,
                                  test_iter=test_iter,
                                  model=model,
                                  args=args)
Exemple #3
0
dropout = args.dropout

# tag
out_dim = len(tag2id) + 2

model = BiLSTM(train_vocab_size, embedding_dim, num_layers, hidden_dim,
               dropout, out_dim)

optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)

batch_size = args.batch_size

if args.cuda:
    model.cuda()
    train_word_id_lists = train_word_id_lists.cuda()
    train_tag_id_lists = train_tag_id_lists.cuda()
    dev_word_id_lists = dev_word_id_lists.cuda()
    dev_tag_id_lists = dev_tag_id_lists.cuda()
    test_word_id_lists = test_word_id_lists.cuda()
    test_tag_id_lists = test_tag_id_lists.cuda()

train_total_step = train_vocab_size // batch_size + 1
dev_total_step = dev_vocab_size // batch_size + 1
test_total_step = dev_vocab_size // batch_size + 1

best_model = None
best_val_loss = float('inf')