Example #1
0
def start():
    # 优先使用缓存
    if not os.path.exists(args.TRAIN) or not os.path.exists(args.VALID):
        produce_data(user_define=USER_DEFINE)

    if os.path.exists(args.TRAIN_CACHE):
        train_iter, num_train_steps = torch.load(args.TRAIN_CACHE)
    else:
        train_iter, num_train_steps = create_batch_iter("train")

    if os.path.exists(args.VALID_CACHE):
        eval_iter = torch.load(args.VALID_CACHE)
    else:
        eval_iter = create_batch_iter("dev")

    epoch_size = num_train_steps * args.train_batch_size * args.gradient_accumulation_steps / args.num_train_epochs

    pbar = ProgressBar(epoch_size=epoch_size, batch_size=args.train_batch_size)

    model = Bert_CRF.from_pretrained(args.bert_model, num_tag=len(args.labels))

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name)

    fit(model=model,
        training_iter=train_iter,
        eval_iter=eval_iter,
        num_epoch=args.num_train_epochs,
        pbar=pbar,
        num_train_steps=num_train_steps,
        verbose=1)
Example #2
0
def hybrid_3():
    # 数据预处理
    word2id, epoch_size = data_helper(vocab_size=config.vocab_size,
                                      max_len=config.max_len,
                                      min_freq=1,
                                      stop_list=None,
                                      valid_size=0.2,
                                      random_state=2018,
                                      shuffle=True,
                                      is_debug=config.is_debug)

    vocab_size = len(word2id)

    # 初始化进度条
    pbar = ProgressBar(epoch_size=epoch_size, batch_size=config.batch_size)

    # 加载batch
    bi = BatchIterator(config.TRAIN_FILE,
                       config.VALID_FILE,
                       config.batch_size,
                       fix_length=config.max_len,
                       x_var="text",
                       y_var=["label"],
                       format='tsv')
    train, valid = bi.create_dataset()
    train_iter, val_iter = bi.get_iterator(train, valid)

    # 初始化模型
    model = Hybrid(vocab_size=vocab_size,
                   word_embedding_dimension=config.word_embedding_dimension,
                   word2id=word2id,
                   dropout=config.dropout,
                   attention_size=config.attention_size,
                   filters=config.filters,
                   kernel_size=config.kernel_size,
                   hidden_size=config.hidden_size,
                   bi_flag=config.bi_flag,
                   num_layer=config.num_layer,
                   checkpoint_dir=config.CHECKPOINT_DIR)

    # 训练
    fit(model,
        train_iter,
        val_iter,
        config.num_epoch,
        pbar,
        config.lr_decay_mode,
        config.initial_lr,
        verbose=1)
Example #3
0
def bilstm_crf():
    # 数据预处理
    word2id, epoch_size = data_helper(vocab_size=config.vocab_size,
                                      max_len=config.max_len,
                                      min_freq=1,
                                      valid_size=0.2,
                                      random_state=2018,
                                      shuffle=True,
                                      is_debug=config.is_debug)

    vocab_size = len(word2id)

    # 初始化进度条
    pbar = ProgressBar(epoch_size=epoch_size, batch_size=config.batch_size)

    # 加载batch
    bi = BatchIterator(config.TRAIN_FILE,
                       config.VALID_FILE,
                       config.batch_size,
                       fix_length=config.max_len,
                       x_var="text",
                       y_var="label")
    train, valid = bi.create_dataset()
    train_iter, val_iter = bi.get_iterator(train, valid)

    model = BISLTM_CRF(vocab_size=config.vocab_size,
                       word_embedding_dim=config.word_embedding_dim,
                       word2id=word2id,
                       hidden_size=128,
                       bi_flag=True,
                       num_layer=1,
                       input_size=config.word_embedding_dim,
                       cell_type=config.cell_type,
                       dropout=config.dropout,
                       num_tag=len(config.labels),
                       tag2ix=config.tag_to_ix,
                       checkpoint_dir=config.checkpoint_dir)

    # 训练
    fit(model,
        train_iter,
        val_iter,
        config.num_epoch,
        pbar,
        config.lr_decay_mode,
        config.initial_lr,
        verbose=1)
Example #4
0
def start():

    train_iter, num_train_steps = create_batch_iter("train", args.TRAIN_PATH)
    eval_iter = create_batch_iter("dev", args.VALID_PATH)

    epoch_size = num_train_steps * args.train_batch_size * args.gradient_accumulation_steps / args.num_train_epochs
    print(f'epoch_size = {epoch_size}')
    pbar = ProgressBar(epoch_size=epoch_size, batch_size=args.train_batch_size)
    model = Bert_CRF.from_pretrained(args.bert_model, num_tag=len(args.labels))
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name)

    fit(model=model,
        training_iter=train_iter,
        eval_iter=eval_iter,
        num_epoch=args.num_train_epochs,
        pbar=pbar,
        num_train_steps=num_train_steps,
        verbose=1)
def start():
    train_iter, num_train_steps = create_batch_iter("train")
    eval_iter = create_batch_iter("dev")

    epoch_size = num_train_steps * args.train_batch_size * args.gradient_accumulation_steps / args.num_train_epochs

    pbar = ProgressBar(epoch_size=epoch_size, batch_size=args.train_batch_size)

    model = QaExtract.from_pretrained(args.bert_model)

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name)

    fit(model=model,
        training_iter=train_iter,
        eval_iter=eval_iter,
        num_epoch=args.num_train_epochs,
        pbar=pbar,
        num_train_steps=num_train_steps,
        verbose=1)
Example #6
0
def start():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--do_not_train_ernie",
        default=False,
        action='store_true',
    )
    parser.add_argument(
        "--do_CRF",
        default=False,
        action='store_true',
    )
    arg = parser.parse_args()
    args.do_not_train_ernie = arg.do_not_train_ernie
    args.do_CRF = arg.do_CRF

    produce_data()
    train_iter, num_train_steps = create_batch_iter("train")
    eval_iter = create_batch_iter("dev")

    epoch_size = num_train_steps * args.train_batch_size * args.gradient_accumulation_steps / args.num_train_epochs

    pbar = ProgressBar(epoch_size=epoch_size, batch_size=args.train_batch_size)
    if args.load_weight:
        model = load_model(args.output_dir)
    else:
        model = Bert_CRF.from_pretrained(args.bert_model,
                                         num_tag=len(args.labels))

    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name)

    fit(model=model,
        training_iter=train_iter,
        eval_iter=eval_iter,
        num_epoch=args.num_train_epochs,
        pbar=pbar,
        num_train_steps=num_train_steps,
        verbose=1)
Example #7
0
from utils.create_batch_iter import create_batch_iter
from utils.progress_util import ProgressBar
import config as args
from model.bert_ner import Bert_CRF
from train.train import fit
if __name__=='__main__':
    #创建迭代数据
    training_iter,num_train_step = create_batch_iter('train')
    eval_iter = create_batch_iter('dev')
    test_iter = create_batch_iter('test')
    #轮数
    epoch_size = num_train_step*args.train_batch_size*args.gradient_accumulation_steps/args.num_train_epochs
    #显示进度
    pbar = ProgressBar(epoch_size=epoch_size,batch_size=args.train_batch_size)
    #模型
    model = Bert_CRF.from_pretrained('bert-base-chinese',num_tag=len(args.label_dict))
    fit(model=model,
        training_iter=training_iter,
        eval_iter=eval_iter,
        test_iter=test_iter,
        num_epoch=args.num_train_epochs,
        pbar=pbar,
        num_train_steps=num_train_step,
        verbose=1)