Exemple #1
0
    all_data = nli_reader.get_examples('train.gz')  #,max_examples=5000)
    random.shuffle(all_data)
    train_data = all_data[:int(train_rate * len(all_data))]
    dev_data = all_data[int(train_rate * len(all_data)):]

    logging.info('train data size {}'.format(len(train_data)))
    logging.info('dev data size {}'.format(len(dev_data)))
    total_steps = math.ceil(epoch_num * len(train_data) * 1. / batch_size)
    warmup_steps = int(total_steps * warmup_percent)

    model = BertNLIModel(gpu=gpu,
                         batch_size=batch_size,
                         bert_type=bert_type,
                         model_path=trained_model)
    optimizer = AdamW(model.parameters(),
                      lr=2e-5,
                      eps=1e-6,
                      correct_bias=False)
    scheduler = get_scheduler(optimizer,
                              scheduler_setting,
                              warmup_steps=warmup_steps,
                              t_total=total_steps)
    if fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
Exemple #2
0
    # msnli_data = nli_reader.get_examples('train.gz', max_examples=1000)

    all_data = msnli_data + hans_data
    random.shuffle(all_data)
    train_num = int(len(all_data)*0.9)
    train_data = all_data[:train_num]
    dev_data = all_data[train_num:]

    logging.info('train data size {}'.format(len(train_data)))
    logging.info('dev data size {}'.format(len(dev_data)))

    total_steps = math.ceil(epoch_num*len(train_data)*1./batch_size)
    warmup_steps = int(total_steps*warmup_percent)

    model = BertNLIModel(gpu=gpu,batch_size=batch_size,bert_type=bert_type,model_path=trained_model, reinit_num=reinit_layers, freeze_layers=freeze_layers) 
    optimizer = AdamW(model.parameters(),lr=2e-5,eps=1e-6,correct_bias=False)
    scheduler = get_scheduler(optimizer, scheduler_setting, warmup_steps=warmup_steps, t_total=total_steps)
    if fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    best_acc = -1.
    best_model_dic = None
    for ep in range(epoch_num):
        logging.info('\n=====epoch {}/{}====='.format(ep,epoch_num))
        model_dic = train(model, optimizer, scheduler, train_data, dev_data, batch_size, fp16, checkpoint, gpu, max_grad_norm, best_acc)
        # print(model_dic)
        if model_dic is not None: