コード例 #1
0
def Train(trainfile):
    word2id, embeddings = getDicEmbed()
    traindata = getTrainData(trainfile)

    model = BiLSTM_CRF(batch_size=args.batch_size,
                       epoch_num=args.epoch,
                       hidden_dim=args.hidden_dim,
                       embeddings=embeddings,
                       dropout_keep=args.dropout,
                       optimizer=args.optimizer,
                       lr=args.lr,
                       clip_grad=args.clip,
                       tag2label=tag2label,
                       vocab=word2id,
                       shuffle=args.shuffle,
                       model_path=ckpt_prefix,
                       summary_path=summary_path,
                       log_path=log_path,
                       result_path=result_path,
                       CRF=args.CRF,
                       update_embedding=args.update_embedding)
    model.build_graph()

    dev_data = traindata[:5000]
    dev_size = len(dev_data)
    train_data = traindata[5000:]
    train_size = len(train_data)
    print("train data: {0}\n dev data: {1}".format(train_size, dev_size))
    model.train(traindata, dev_data)
コード例 #2
0
ファイル: main.py プロジェクト: ryan147k/NER-pytorch
def train():
    """
    模型训练
    """
    train_writer = SummaryWriter(log_dir='./log/train')
    test_writer = SummaryWriter(log_dir='./log/test')

    # step1 模型
    bilstm_crf = BiLSTM_CRF(opt.vocab_size, opt.emb_dim, opt.emb_dim//2, opt.tag_num, dropout=opt.dropout)
    if opt.load_model_path:     # 是否加载checkpoint
        bilstm_crf.load(opt.load_model_path)

    # step2 数据
    rmrb_train_dataset = RmrbDataset(train=True)
    rmrb_test_dataset = RmrbDataset(train=False)
    rmrb_train_dataloader = DataLoader(rmrb_train_dataset, batch_size=64, shuffle=True)
    rmrb_test_dataloader = DataLoader(rmrb_test_dataset, batch_size=len(rmrb_test_dataset), shuffle=True)

    # step3 损失函数和优化器
    # loss_fn = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(params=bilstm_crf.parameters(), lr=lr, weight_decay=opt.weight_decay)

    previous_loss = 1e9
    iteration = 0
    for epoch in range(opt.max_epoch):
        print('epoch {}'.format(epoch))
        for ii, (x_batch, y_batch) in enumerate(rmrb_train_dataloader):
            # 计算loss
            loss = bilstm_crf.log_likelihood(x_batch, y_batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if ii % 20 == 0:
                # print('loss:{}'.format(loss.item()))
                train_writer.add_scalar('Loss', loss.item(), iteration)
                iteration += 1
                if loss > previous_loss:
                    lr = lr * opt.lr_decay
                else:
                    previous_loss = loss.item()
        # 保存模型检查点
        bilstm_crf.save()

        # 评价指标
        with t.no_grad():
            bilstm_crf.eval()   # 将模型设置为验证模式
            for x_test, y_test in rmrb_test_dataloader:
                test_loss = bilstm_crf.log_likelihood(x_test, y_test)
                test_writer.add_scalar('Loss', test_loss.item(), iteration)
                y_pre = bilstm_crf(x_test)
                print(classification_report(t.flatten(y_test), t.flatten(y_pre)))
            bilstm_crf.train()  # 将模型恢复成训练模式
コード例 #3
0
 def train_model(self):
     '''
     开始训练
     :return:
     '''
     model = BiLSTM_CRF(args,
                        self.embedding,
                        self.tag2id,
                        self.word2id,
                        self.paths,
                        config=config)
     model.build_graph()
     print("train data: {}".format(len(self.train_data)))
     print("dev data: {}".format(len(self.dev_data)))
     model.train(self.train_data, self.dev_data, args)
コード例 #4
0
ファイル: main.py プロジェクト: lyzKF/180-china
if args.mode == 'train':
    model = BiLSTM_CRF(args, embeddings, tag2label,
                       word2id, paths, config=config)
    print(model)
    model.build_graph()

    # hyperparameters-tuning, split train/dev
    # dev_data = train_data[:5000]; dev_size = len(dev_data)
    # train_data = train_data[5000:]; train_size = len(train_data)
    # print("train data: {0}\ndev data: {1}".format(train_size, dev_size))
    # model.train(train=train_data, dev=dev_data)

    # train model on the whole training data
    print("train data: {}".format(len(train_data)))
    # use test_data as the dev_data to see overfitting phenomena
    model.train(train=train_data, dev=test_data)

# testing model
elif args.mode == 'test':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
    paths['model_path'] = ckpt_file
    model = BiLSTM_CRF(args, embeddings, tag2label,
                       word2id, paths, config=config)
    model.build_graph()
    print("test data: {}".format(test_size))
    model.test(test_data)

# demo
elif args.mode == 'demo':
    ckpt_file = tf.train.latest_checkpoint(model_path)
コード例 #5
0
ファイル: train.py プロジェクト: ymsBFSU/cws_biLSTM_crf
            for i in range(len(sent)):
                sent_res.append([sent[i], tag[i], tag_pred[i]])
            model_pred.append(sent_res)
    # label_path = os.path.join(hp.result_path, 'label_' + epoch_num)
    # metric_path = os.path.join(hp.result_path, 'result_metric_' + epoch_num)
    result = conlleval(model_pred)
    print(result)
    # print(len(label_pred))


if __name__ == '__main__':
    logging.info("Loading data")
    train_data = load_data(hp.prepro_dir + 'train.utf8', name="train")
    test_data = load_data(hp.prepro_dir + 'test.utf8', name='test')
    model = BiLSTM_CRF()
    loss, train_op, global_step = model.train()
    logits, transition_params = model.eval()
    logging.info("Graph loaded")
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    config_proto.gpu_options.allow_growth = True
    with tf.Session(config=config_proto) as sess:
        saver = tf.train.Saver(max_to_keep=hp.max_to_keep)
        ckpt = tf.train.latest_checkpoint(hp.log_dir)
        if ckpt is None:
            logging.info("Initializing from scratch")
            sess.run(tf.global_variables_initializer())
        else:
            logging.info("Restore model from {}".format(ckpt))
            saver.restore(sess, ckpt)
        last_save_steps = 0
        for i in range(hp.epoch):
コード例 #6
0
ckpt_prefix = os.path.join(model_path, "model") # 預設 './data_path_save/timestamp/checkpoints/model'
paths['model_path'] = ckpt_prefix
result_path = os.path.join(output_path, "results") # 預設 './data_path_save/timestamp/results'
paths['result_path'] = result_path
if not os.path.exists(result_path): os.makedirs(result_path)
log_path = os.path.join(result_path, "log.txt") # 預設 './data_path_save/timestamp/results/log.txt'
paths['log_path'] = log_path
get_logger(log_path).info(str(args))


## training model
if args.mode == 'train':
    model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) # 建立model
    model.build_graph() # 建立graph
    print("train data: {}".format(len(train_data)))
    model.train(train=train_data, dev=test_data)  # 模型訓練

## testing model
elif args.mode == 'test':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
    paths['model_path'] = ckpt_file
    model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) # 建立model
    model.build_graph() # 建立graph
    print("test data: {}".format(test_size))
    model.test(test_data) # 模型測試

## demo
elif args.mode == 'demo':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
コード例 #7
0
ファイル: train.py プロジェクト: msps9341012/NER-pytorch
best_idx = 0

if parameters['reload']:
    print('loading model:', parameters['reload'])
    checkpoint = torch.load(models_path + parameters['reload'])
    #model.load_state_dict(checkpoint)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    adjust_learning_rate(optimizer, lr=learning_rate)

sys.stdout.flush()

from conlleval import evaluate

model.train(True)
ratio = 0.0
if parameters['adv']:
    ratio = 0.5

from conlleval import evaluate


def evaluating_batch(model, datas, best_F, display_confusion_matrix=False):

    true_tags = []
    pred_tags = []

    save = False
    new_F = 0.0
コード例 #8
0
    os.makedirs(result_path)
log_path = os.path.join(result_path, "log.txt")
get_logger(log_path).info(str(args))

# training model
if args.mode == 'train':
    model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings,
                       dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip,
                       tag2label=tag2label, vocab=word2id, shuffle=args.shuffle,
                       model_path=ckpt_prefix, summary_path=summary_path, log_path=log_path, result_path=result_path,
                       CRF=args.CRF, update_embedding=args.update_embedding)
    model.build_graph()
    # hyperparameters-tuning, split train/dev
    # train model on the whole training raw_data
    print("train raw_data: {}".format(len(train_data)))
    model.train(train_data, test_data)  # we could use test_data as the dev_data to see the overfitting phenomena

# testing model
elif args.mode == 'test':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
    model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings,
                       dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip,
                       tag2label=tag2label, vocab=word2id, shuffle=args.shuffle,
                       model_path=ckpt_file, summary_path=summary_path, log_path=log_path, result_path=result_path,
                       CRF=args.CRF, update_embedding=args.update_embedding)
    model.build_graph()
    print("test raw_data: {}".format(test_size))
    model.test(test_data)
elif args.mode == 'demo':
    ckpt_file = tf.train.latest_checkpoint(model_path)
コード例 #9
0
                                momentum=learning_momentum)

elif optimizer_choice == OptimizationMethod.Adam:
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()))
elif optimizer_choice == OptimizationMethod.AdaDelta:
    optimizer = torch.optim.Adadelta(
        filter(lambda p: p.requires_grad, model.parameters()))

best_dev_results = [-1.0, -1.0, -1.0, -1.0]
best_test_results = [-1.0, -1.0, -1.0, -1.0]
best_train_results = [-1.0, -1.0, -1.0, -1.0]

batch_count = math.ceil(len(train_set) / mini_batch_size)

model.train(True)
for epoch in range(max_epoch):
    train_indecies = np.random.permutation(len(train_set))
    full_logs = []
    if epoch == 0:
        # print(opts_str)
        full_logs.append(opts_str)

    train_time = 0
    for batch_i in range(batch_count):
        start_idx = batch_i * mini_batch_size
        end_idx = min((batch_i + 1) * mini_batch_size, len(train_set))

        mini_batch_idx = train_indecies[start_idx:end_idx]

        sentence_masks, words, chars, tags, \
コード例 #10
0
    vocab = load_vocabulary('./dataset/vocabulary.pkl')
else:
    build_vocabulary('./dataset/vocabulary.pkl', train_path, 10)
    vocab = load_vocabulary('./dataset/vocabulary.pkl')

# read_dataset & training
if args.mode == 'train':
    model = BiLSTM_CRF(args, tag2label, vocab, log_path, logger, config)
    model.build_graph()
    print('Start training ...')
    print('training data contains : {} lines'.format(len(train_data)))

    saver = tf.train.Saver(tf.global_variables())
    with tf.Session(config=model.config) as sess:
        sess.run(tf.global_variables_initializer())
        model.train(sess=sess, train=train_data, dev=train_data, saver=saver)

elif args.mode == 'demo':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
    log_path['model_path'] = ckpt_file
    model = BiLSTM_CRF(args, tag2label, vocab, log_path, logger, config=config)
    model.build_graph()
    saver = tf.train.Saver()
    with tf.Session(config=config) as sess:
        print('Start demo ...')
        saver.restore(sess, ckpt_file)
        while True:
            print('Please input sentence(pause enter or space to exit):')
            demo_sent = input()
            if demo_sent == '' or demo_sent.isspace():
コード例 #11
0
ファイル: main.py プロジェクト: liuxinghui01/wolf-ai
def main(_):
    print('start app')

    data_path = os.path.join(FLAGS.train_data, 'word2id.pkl')
    word2id = read_dictionary(data_path)
    if FLAGS.pretrain_embedding == 'random':
        data_embeddings = random_embedding(word2id, FLAGS.embedding_dim)
    else:
        embedding_path = 'pretrain_embedding.npy'
        data_embeddings = np.array(np.load(embedding_path), dtype='float32')

    if FLAGS.mode != 'demo':
        train_file = os.path.join(FLAGS.train_data, 'train_data')
        test_file = os.path.join(FLAGS.test_data, 'test_data')
        train_data = read_corpus(train_file)
        test_data = read_corpus(test_file)
        test_size = len(test_data)

    time_stamp = str(int(
        time.time())) if FLAGS.mode == 'train' else FLAGS.demo_model

    def generator_dir(file_path):
        if not os.path.exists(file_path):
            os.makedirs(file_path)
        return file_path

    output_path = generator_dir(
        os.path.join(FLAGS.train_data + '_save', time_stamp))
    summary_path = generator_dir(os.path.join(output_path, 'summary'))
    model_path = generator_dir(os.path.join(output_path, 'checkpoints'))
    ckpt_prefix = generator_dir(os.path.join(model_path, 'model'))
    result_path = generator_dir(os.path.join(output_path, 'results'))

    if FLAGS.mode == 'train':
        print('train ==================')
        """
        def __init__(self, batch_size, epoch, hidden_size, embeddings, crf, update_embedding, dropout_keepprob, optimizer,
         learning_rate, clip, tag2label, vocab, shuffle, model_p, summary_p, results_p, config):
        """
        model = BiLSTM_CRF(FLAGS.batch_size, FLAGS.epoch, FLAGS.hidden_size,
                           data_embeddings, FLAGS.CRF, FLAGS.update_embedding,
                           FLAGS.dropout, FLAGS.optimizer, FLAGS.learning_rate,
                           FLAGS.clipping, tag2label, word2id, FLAGS.shuffle,
                           model_path, summary_path, result_path, config)
        model.build_graph()
        model.train(train_data, test_data)
    elif FLAGS.mode == 'test':
        print('test ===============')
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print('ckpt file {}'.format(ckpt_file))
        model = BiLSTM_CRF(FLAGS.batch_size, FLAGS.epoch, FLAGS.hidden_size,
                           data_embeddings, FLAGS.CRF, FLAGS.update_embedding,
                           FLAGS.dropout, FLAGS.optimizer, FLAGS.learning_rate,
                           FLAGS.clipping, tag2label, word2id, FLAGS.shuffle,
                           ckpt_file, summary_path, result_path, config)
        model.build_graph()
        print('test data {}'.format(test_size))
        model.test(test_data)
    elif FLAGS.mode == 'demo':
        print('demo ===========')
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print('ckpt file {}'.format(ckpt_file))
        model = BiLSTM_CRF(FLAGS.batch_size, FLAGS.epoch, FLAGS.hidden_size,
                           data_embeddings, FLAGS.CRF, FLAGS.update_embedding,
                           FLAGS.dropout, FLAGS.optimizer, FLAGS.learning_rate,
                           FLAGS.clipping, tag2label, word2id, FLAGS.shuffle,
                           ckpt_file, summary_path, result_path, config)
        model.build_graph()
        saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            saver.restore(sess, ckpt_file)
            while True:
                print("please input you sentence:")
                demo_sentence = input()
                if not demo_sentence or demo_sentence.isspace():
                    print('bye')
                    break
                else:
                    demo_sent = list(demo_sentence.strip())
                    damo_data = [(demo_sent, [0] * len(demo_sent))]
                    tag = model.demo_one(sess, damo_data)
                    per, loc, org = get_entiry(tag, demo_sent)
                    print('per {0} loc {1} org {2}'.format(per, loc, org))
コード例 #12
0

## training model
if args.mode == 'train':
    model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config, on_train=True)
    model.build_graph()

    # hyperparameters-tuning, split train/dev
    dev_data = test_data; dev_size = len(dev_data)
    train_data = train_data; train_size = len(train_data)
    print("train data: {0}\ndev data: {1}".format(train_size, dev_size))

    # ckpt_file = r'.\data_path_save\1527663228\checkpoints\model-17136'
    # paths['model_path'] = ckpt_file
    # print(ckpt_file)
    model.train(train=train_data, dev=dev_data)

    # ## train model on the whole training data
    # print("train data: {}".format(len(train_data)))
    # model.train(train=train_data, dev=test_data)  # use test_data as the dev_data to see overfitting phenomena

## testing model
elif args.mode == 'test':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
    # ckpt_file = r'.\data_path_save\1527697768\checkpoints\model-19992'
    paths['model_path'] = ckpt_file
    model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config)
    model.build_graph()
    print("test data: {}".format(test_size))
    model.test(test_data)
コード例 #13
0
def my_train():
    os.makedirs(f"model_result", exist_ok=True)
    torch.manual_seed(1)
    device = torch.device('cuda')

    data_dir = f"data/{DATASET}/processed"

    # 加载
    train_data = NERDataset(os.path.join(data_dir, "train.pkl"))
    test_data = NERDataset(os.path.join(data_dir, "test.pkl"))
    dev_data = NERDataset(os.path.join(data_dir, "dev.pkl"))

    word_to_idx = load_obj(os.path.join(data_dir, "word_to_idx.pkl"))
    tag_to_idx = load_obj(os.path.join(data_dir, "tag_to_idx.pkl"))

    idx_to_tag = {n: m for m, n in tag_to_idx.items()}

    train_loader = DataLoader(
        train_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )
    dev_loader = DataLoader(
        dev_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )
    test_loader = DataLoader(
        test_data,
        batch_size=BATCH_SIZE,
        collate_fn=BatchPadding(),
        shuffle=True,
        num_workers=2,
        pin_memory=True,
    )

    # 建模
    model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), EMBEDDING_DIM,
                       HIDDEN_DIM, DROPOUT).to(device)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE)

    print("\n开始训练")
    f1_max = 0
    cur_patience = 0  # 用于避免过拟合
    for epoch in range(EPOCHS):
        model.train()
        for i, (seqs, tags, masks) in enumerate(train_loader, 1):
            optimizer.zero_grad()
            loss = model.loss(seqs.to(device), tags.to(device),
                              masks.to(device))
            loss.backward()
            optimizer.step()
            if i % LOG_INTERVAL == 0:
                print("epoch {}: {:.0f}%\t\tLoss: {:.6f}".format(
                    epoch, 100.0 * i / len(train_loader), loss.item()))
        dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader,
                                                     idx_to_tag)
        test_precision, test_recall, test_f1 = evaluate(
            model, test_loader, idx_to_tag)
        print(
            f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}"
        )
        print(
            f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n"
        )

        torch.save(model.state_dict(), f"model_result/{epoch}.pt")

        if dev_f1 > f1_max:  # 用于检测过拟合情况
            f1_max = dev_f1
            cur_patience = 0
            if dev_f1 > 0.9 and test_f1 > 0.9:
                break
        else:
            cur_patience += 1
            if cur_patience >= PATIENCE:  # 多次低于最高f1,break
                break
    print("Best dev F1: ", f1_max)
コード例 #14
0
ファイル: ner.py プロジェクト: Ma-Dan/NER
    tgt_vocab_size = len(tgt_vocab)
    tgt_unknown = tgt_vocab_size
    tgt_padding = tgt_vocab_size + 1
    #print(tgt_vocab)

    embedding = load_word2vec_embedding(args.word_embedding_file,
                                        args.embedding_dim, src_vocab_size)

    if args.mode == 'train':
        model = BiLSTM_CRF(args, embedding, src_vocab, tgt_vocab, src_padding,
                           tgt_padding, paths)
        model.build_graph()
        train_data = read_corpus(args.src_file, args.tgt_file, src_vocab,
                                 tgt_vocab, src_unknown, tgt_unknown)
        print("train data: {}".format(len(train_data)))
        model.train(train_data=train_data, test_data=None)
    elif args.mode == 'demo':
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print(ckpt_file)
        paths['model_path'] = ckpt_file
        model = BiLSTM_CRF(args, embedding, src_vocab, tgt_vocab, src_padding,
                           tgt_padding, paths)
        model.build_graph()
        saver = tf.train.Saver()
        with tf.Session() as sess:
            print('============= demo =============')
            saver.restore(sess, ckpt_file)
            while (1):
                print('Please input your sentence:')
                demo_sent = input()
                if demo_sent == '' or demo_sent.isspace():
コード例 #15
0
def trainAll(args):

    if args.mode == 'train':
        model = BiLSTM_CRF(args,
                           embeddings,
                           tag2label,
                           word2id,
                           paths,
                           config=config)
        model.build_graph()

        ## hyperparameters-tuning, split train/dev
        # dev_data = train_data[:5000]; dev_size = len(dev_data)
        # train_data = train_data[5000:]; train_size = len(train_data)
        # print("train data: {0}\ndev data: {1}".format(train_size, dev_size))
        # model.train(train=train_data, dev=dev_data)

        ## train model on the whole training data
        print("train data: {}".format(len(train_data)))
        model.train(
            train=train_data, dev=test_data
        )  # use test_data as the dev_data to see overfitting phenomena

    ## testing model
    elif args.mode == 'test':
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print(ckpt_file)
        paths['model_path'] = ckpt_file
        model = BiLSTM_CRF(args,
                           embeddings,
                           tag2label,
                           word2id,
                           paths,
                           config=config)
        model.build_graph()
        print("test data: {}".format(test_size))
        model.test(test_data)

    ## demo
    elif args.mode == 'demo':
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print(ckpt_file)
        paths['model_path'] = ckpt_file
        model = BiLSTM_CRF(args,
                           embeddings,
                           tag2label,
                           word2id,
                           paths,
                           config=config)
        model.build_graph()
        saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            print('============= demo =============')
            saver.restore(sess, ckpt_file)
            while (1):
                print('Please input your sentence:')
                demo_sent = input()
                if demo_sent == '' or demo_sent.isspace():
                    print('See you next time!')
                    break
                else:
                    demo_sent = list(demo_sent.strip())
                    demo_data = [(demo_sent, ['O'] * len(demo_sent))]
                    tag = model.demo_one(sess, demo_data)
                    PER, LOC, ORG = get_entity(tag, demo_sent)

                    print('PER: {}\nLOC: {}\nORG: {}'.format(PER, LOC, ORG))

    elif args.mode == 'savemodel':

        ckpt_file = tf.train.latest_checkpoint(model_path)
        print(ckpt_file)
        paths['model_path'] = ckpt_file
        model = BiLSTM_CRF(args,
                           embeddings,
                           tag2label,
                           word2id,
                           paths,
                           config=config)
        model.build_graph()
        saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            saver.restore(sess, ckpt_file)
            demo_sent = tf.placeholder(tf.string, name='input')
            demo_sent = list(str(demo_sent).strip())
            demo_data = [(demo_sent, ['O'] * len(demo_sent))]
            tag = model.demo_one(sess, demo_data)
            PER, LOC, ORG = get_entity(tag, demo_sent)
            result = {'PER': PER, 'LOC': LOC, 'ORG': ORG}
            print('PER: {}\nLOC: {}\nORG: {}'.format(PER, LOC, ORG))
            # #保存SavedModel模型
            builder = tf.saved_model.builder.SavedModelBuilder('./savemodels')
            signature = predict_signature_def(inputs={'input': demo_sent},
                                              outputs={'output': result})
            builder.add_meta_graph_and_variables(
                sess, [tf.saved_model.tag_constants.SERVING],
                signature_def_map={'predict': signature})
            builder.save()
            print('savemodel saves')
コード例 #16
0
ファイル: main.py プロジェクト: avBuffer/DL-NLP
    ckpt_prefix = os.path.join(model_path, 'model')

    summary_path = args.log_path
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)

    ## training model
    if args.mode == 'train':
        print('==========lr====', args.lr)
        model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings,
                           dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip, tag2label=tag2label,
                           vocab=word2id, shuffle=args.shuffle, model_path=ckpt_prefix, summary_path=summary_path, CRF=args.CRF,
                           update_embedding=args.update_embeddings)
        model.build_graph()
        print('train data len=', len(train_data))
        model.train(train_data, test_data)

    elif args.mode == 'test':
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print(ckpt_file)
        model = BiLSTM_CRF(batch_size=args.batch_size, epoch_num=args.epoch, hidden_dim=args.hidden_dim, embeddings=embeddings,
                           dropout_keep=args.dropout, optimizer=args.optimizer, lr=args.lr, clip_grad=args.clip, tag2label=tag2label,
                           vocab=word2id, shuffle=args.shuffle, model_path=ckpt_file, summary_path=summary_path, CRF=args.CRF,
                           update_embedding=args.update_embedding)
        model.build_graph()
        print('test data: {}'.format(test_size))
        model.test(test_data)

    elif args.mode == 'demo':
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print(ckpt_file)
コード例 #17
0
ファイル: main.py プロジェクト: hanxuew/NER_bi-lstm
label2id = ner_cfg.generate_tag_to_label()

logger = logging.getLogger(__name__)
current_dir = os.path.dirname(os.path.abspath(__file__))

## get char embeddings
word2id_pos2id = read_dictionary('word2id_pos2id_new.pkl')
word2id = word2id_pos2id['word2id']
pos2id = word2id_pos2id['pos2id']
word_embedding = np.array(np.load('word2vec.npy'), dtype=np.float32)
pos_embedding = np.array(np.load('pos2vec.npy'), dtype=np.float32)

config = Config(word2id,
                pos2id,
                label2id,
                batch_size=128,
                n_epochs=200,
                n_neurons=60)
config.word_embedding = word_embedding
config.pos_embedding = pos_embedding

## read corpus and get training data
train_data, test_data = read_corpus('train_data')
# test_data = read_corpus('test_data')
# test_size = len(test_data)

model = BiLSTM_CRF(is_training=True, config=config)
model.build_graph()
model.train(train_data=train_data, valid_data=test_data)
# model.test(test_data)
コード例 #18
0
 def train(self):
     config = self.conf()
     model = BiLSTM_CRF(self.embeddings, self.dicts["labels2idx"], self.dicts["words2idx"],
                        self.dicts["intents2idx"], config=config)
     model.build_graph()
     model.train(self.train_set)
コード例 #19
0
ファイル: main.py プロジェクト: GhibliField/zh-NER-TF

## training model
if args.mode == 'train':
    model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config)
    model.build_graph()

    ## hyperparameters-tuning, split train/dev
    # dev_data = train_data[:5000]; dev_size = len(dev_data)
    # train_data = train_data[5000:]; train_size = len(train_data)
    # print("train data: {0}\ndev data: {1}".format(train_size, dev_size))
    # model.train(train=train_data, dev=dev_data)

    ## train model on the whole training data
    print("train data: {}".format(len(train_data)))
    model.train(train=train_data, dev=test_data)  # use test_data as the dev_data to see overfitting phenomena

## testing model
elif args.mode == 'test':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
    paths['model_path'] = ckpt_file
    model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config)
    model.build_graph()
    print("test data: {}".format(test_size))
    model.test(test_data)

## demo
elif args.mode == 'demo':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    print(ckpt_file)
コード例 #20
0
ファイル: main.py プロジェクト: LeiChen9/zh-NER-TF
                       tag2label,
                       word2id,
                       paths,
                       config=config)
    model.num_tags = len(word2id)
    model.build_graph()

    ## hyperparameters-tuning, split train/dev
    # dev_data = train_data[:5000]; dev_size = len(dev_data)
    # train_data = train_data[5000:]; train_size = len(train_data)
    # print("train data: {0}\ndev data: {1}".format(train_size, dev_size))
    # model.train(train=train_data, dev=dev_data)

    ## train model on the whole training data
    print("train data: {}".format(len(pre_train_data)))
    model.train(train=pre_train_data, dev=test_data
                )  # use test_data as the dev_data to see overfitting phenomena

## training model
if args.mode == 'train':
    model = BiLSTM_CRF(args,
                       embeddings,
                       tag2label,
                       word2id,
                       paths,
                       config=config)
    model.build_graph()

    ## hyperparameters-tuning, split train/dev
    # dev_data = train_data[:5000]; dev_size = len(dev_data)
    # train_data = train_data[5000:]; train_size = len(train_data)
    # print("train data: {0}\ndev data: {1}".format(train_size, dev_size))
コード例 #21
0
    model.build_graph()

    # hyperparameters-tuning, split train/dev
    # dev_data = train_data[:5000]; dev_size = len(dev_data)
    # train_data = train_data[5000:]; train_size = len(train_data)
    # print("train data: {0}\ndev data: {1}".format(train_size, dev_size))
    # model.train(train=train_data, dev=dev_data)

    # train model on the whole training data

    ckpt_file = tf.train.latest_checkpoint(model_path)  #采用最新的模型训练
    # ckpt_file=r'model_path\DaGuang\1566526104\checkpoints/model.ckpt-8' #指定载入模型训练
    print(ckpt_file)
    paths['model_path'] = ckpt_file
    model.train(
        train=train_data, dev=test_data, model_path=ckpt_file
    )  # use test_data.txt as the dev_data to see overfitting phenomena

# testing model
elif args.mode == 'test':
    ckpt_file = tf.train.latest_checkpoint(model_path)
    # ckpt_file = r'model_path\DaGuang\1566611347\checkpoints/model.ckpt-9'
    print(ckpt_file)
    paths['model_path'] = ckpt_file
    model = BiLSTM_CRF(args,
                       embeddings,
                       tag2label,
                       word2id,
                       paths,
                       config=config)
    model.build_graph()
コード例 #22
0
import torch
import torch.optim as optim
from dataset import Dataset
from model import BiLSTM_CRF

# torch.set_default_tensor_type('torch.cuda.FloatTensor')

epochs = 100
dataset = Dataset()
train_loader = dataset.get_train_loader(1)
model = BiLSTM_CRF(dataset.get_vocab_size(), dataset.get_label_index_dict(),
                   128, 128)

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

model.train()
for epoch in range(epochs):
    for iter, batch in enumerate(train_loader):
        sentence_in, targets = batch.line, batch.label

        sentence_in = sentence_in.permute([1, 0]).reshape(-1).contiguous()
        targets = targets.permute([1, 0]).reshape(-1).contiguous()

        model.zero_grad()
        loss = model.neg_log_likelihood(sentence_in.squeeze(-1),
                                        targets.squeeze(-1)) / len(sentence_in)

        loss.backward()
        optimizer.step()

        print("{}-{}: {:.5f}".format(epoch, iter, loss.item()))
コード例 #23
0
def run(sentences):
    # 配置session的参数
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # 使用GPU 0
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 日志级别设置
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.2  # need ~700MB GPU memory

    # hyperparameters超参数设置
    # 创建一个解析器对象,并告诉它将会有些什么参数
    # 那么当你的程序运行时,该解析器就可以用于处理命令行参数
    parser = argparse.ArgumentParser(
        description='BiLSTM-CRF for Chinese NER task')
    parser.add_argument('--train_data',
                        type=str,
                        default='data_path',
                        help='train data source')
    parser.add_argument('--test_data',
                        type=str,
                        default='data_path',
                        help='test data source')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='#sample of each minibatch')
    # batch :批次大小 在深度学习中,一般采用SGD训练,即每次训练在训练集中取batchsize个样本训练
    # iteration:中文翻译为迭代,1个iteration等于使用batchsize个样本训练一次
    # 一个迭代 = 一个正向通过+一个反向通过
    parser.add_argument('--epoch',
                        type=int,
                        default=40,
                        help='#epoch of training')
    # epoch:迭代次数,1个epoch等于使用训练集中的全部样本训练一次
    # 一个epoch = 所有训练样本的一个正向传递和一个反向传递 举个例子,训练集有1000个样本,batchsize=10,那么: 训练完整个样本集需要: 100次iteration,1次epoch。
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=300,
                        help='#dim of hidden state')
    # 输出向量的维度:300维
    parser.add_argument('--optimizer',
                        type=str,
                        default='Adam',
                        help='Adam/Adadelta/Adagrad/RMSProp/Momentum/SGD')
    # 优化器用的Adam
    parser.add_argument('--CRF',
                        type=str2bool,
                        default=True,
                        help='use CRF at the top layer. if False, use Softmax')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--clip',
                        type=float,
                        default=5.0,
                        help='gradient clipping')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.5,
                        help='dropout keep_prob')
    # dropout是指在深度学习网络的训练过程中,对于神经网络单元,按照一定的概率将其暂时从网络中丢弃
    parser.add_argument('--update_embedding',
                        type=str2bool,
                        default=True,
                        help='update embedding during training')
    parser.add_argument(
        '--pretrain_embedding',
        type=str,
        default='random',
        help='use pretrained char embedding or init it randomly')
    parser.add_argument('--embedding_dim',
                        type=int,
                        default=300,
                        help='random init char embedding_dim')
    parser.add_argument('--shuffle',
                        type=str2bool,
                        default=True,
                        help='shuffle training data before each epoch')
    parser.add_argument('--mode',
                        type=str,
                        default='demo',
                        help='train/test/demo')
    parser.add_argument('--demo_model',
                        type=str,
                        default='1559398699',
                        help='model for test and demo')
    # 传递参数送入模型中
    args = parser.parse_args()

    # 初始化embedding矩阵,读取词典
    word2id = read_dictionary(os.path.join('.', args.train_data,
                                           'word2id.pkl'))
    # 通过调用random_embedding函数返回一个len(vocab)*embedding_dim=3905*300的矩阵(矩阵元素均在-0.25到0.25之间)作为初始值
    if args.pretrain_embedding == 'random':
        embeddings = random_embedding(word2id, args.embedding_dim)
    else:
        embedding_path = 'pretrain_embedding.npy'
        embeddings = np.array(np.load(embedding_path), dtype='float32')

    # 读取训练集和测试集
    if args.mode != 'demo':
        train_path = os.path.join('.', args.train_data, 'train_data')
        test_path = os.path.join('.', args.test_data, 'test_data')
        train_data = read_corpus(train_path)
        test_data = read_corpus(test_path)
        test_size = len(test_data)

    # 设置路径
    paths = {}
    timestamp = str(int(
        time.time())) if args.mode == 'train' else args.demo_model
    output_path = os.path.join('.', args.train_data + "_save", timestamp)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    summary_path = os.path.join(output_path, "summaries")
    paths['summary_path'] = summary_path
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)
    model_path = os.path.join(output_path, "checkpoints/")
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    ckpt_prefix = os.path.join(model_path, "model")
    paths['model_path'] = ckpt_prefix
    result_path = os.path.join(output_path, "results")
    paths['result_path'] = result_path
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    log_path = os.path.join(result_path, "log.txt")
    paths['log_path'] = log_path
    get_logger(log_path).info(str(args))  # 将参数写入日志文件

    if args.mode == 'train':  # 训练模型
        model = BiLSTM_CRF(args,
                           embeddings,
                           tag2label,
                           word2id,
                           paths,
                           config=config)
        model.build_graph()
        model.train(train=train_data, dev=test_data)

    elif args.mode == 'test':  # 测试模型
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print(ckpt_file)
        paths['model_path'] = ckpt_file
        model = BiLSTM_CRF(args,
                           embeddings,
                           tag2label,
                           word2id,
                           paths,
                           config=config)
        model.build_graph()
        print("test data: {}".format(test_size))
        model.test(test_data)

    # demo
    elif args.mode == 'demo':
        location = []
        ckpt_file = tf.train.latest_checkpoint(model_path)
        print("model path: ", ckpt_file)
        paths['model_path'] = ckpt_file  # 设置模型路径
        model = BiLSTM_CRF(args,
                           embeddings,
                           tag2label,
                           word2id,
                           paths,
                           config=config)
        model.build_graph()
        saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            saver.restore(sess, ckpt_file)
            for sentence in sentences:
                demo_sent = sentence
                demo_sent = list(demo_sent.strip())  # 删除空白符
                demo_data = [(demo_sent, ['O'] * len(demo_sent))]
                tag = model.demo_one(sess, demo_data)
                PER, LOC, ORG = get_entity(tag, demo_sent)  # 根据标注序列输出对应的字符
                new_LOC = list(set(LOC))  # 去重
                loc = ' '.join(new_LOC)
                location.append(loc)
            return location