Beispiel #1
0
     model = BertClassifical(config, BertModel, args.pretrained_model_path,
                             args.layer_output_counts)
     model.cuda()
     train_model(model, train_iter, valid_iter, args)
 if args.do_valid:
     start_time = time.time()
     valid_set = Mydataset(valid, tokenizer, args.maxlen)
     valid_iter = DataLoader(dataset=valid_set,
                             batch_size=args.batch_size_per_gpu,
                             shuffle=False)
     model = BertClassifical(config, BertModel, args.pretrained_model_path,
                             args.layer_output_counts)
     model.cuda()
     model.load_state_dict(torch.load(args.save_path))
     print("模型加载完成的时间为{}".format(timeSince(start_time)))
     p, r, f1 = evaluate_valid(model, valid_iter)
     print("验证集的模型精确率:{},召回率:{},f1值:{}".format(p, r, f1))
 if args.do_test:
     #加载整个模型
     #model = torch.load(save_path)
     #加载模型权重
     start_time = time.time()
     #print("开始预测的时间为{}".format(start_time))
     #test_set = testset(test_data, tokenizer, args.maxlen)
     test_set = standard_class_test_dataset(test_data, tokenizer,
                                            args.maxlen)
     test_iter = DataLoader(dataset=test_set,
                            batch_size=args.batch_size_per_gpu,
                            shuffle=False)
     model = BertClassifical(config, BertModel, args.pretrained_model_path,
                             args.layer_output_counts)
Beispiel #2
0
def train_model(model, train_iter, valid_iter, args):

    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
    ]
    optimizer = AdamW(
        optimizer_grouped_parameters,
        lr=args.learning_rate,
    )
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=len(train_iter) * args.num_epochs)
    #optimizer = Adam(model.parameters(), lr=args.learning_rate)
    """
    optimizer = Adam([
    {'params': model.bert.parameters(), 'lr': 1e-5},
    {'params': model.classifier.parameters(), 'lr': 3e-4}])
    """
    #监控学习率代码
    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, verbose=1, min_lr=0.0001)
    #scheduler = get_constant_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,num_training_steps=len(train_iter) // gradient_accumulation_steps * num_epochs)
    #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[3, 5], gamma=0.5)
    #scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)
    #scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
    """
    warm_up_with_cosine_lr = lambda epoch: epoch / args.warmup_epochs if epoch <= args.warmup_epochs else 0.5 * (
                math.cos((epoch - args.warmup_epochs) / (args.num_epochs - args.warmup_epochs) * math.pi) + 1)
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=warm_up_with_cosine_lr)
    """
    total_steps = 0
    best_f1 = 0
    stop_count = 0
    start_time = time.time()
    #初始化对抗训练
    fgm = FGM(model)
    #打印模型参数
    print("打印模型参数")
    for name, param in model.named_parameters():
        print(name)
    for epoch in range(args.num_epochs):
        model.train()
        epoch_loss = 0

        for step, input in enumerate(train_iter):
            inputs = {
                "input_ids": input[0].cuda(non_blocking=True),
                "token_type_ids": input[1].cuda(non_blocking=True),
                "attention_mask": input[2].cuda(non_blocking=True)
            }
            total_steps += 1
            labels = input[3].cuda(non_blocking=True)
            logits = model(**inputs)
            #定义损失
            if not args.use_labelsmooth:
                loss = F.cross_entropy(logits, labels)
            else:
                loss = LabelSmoothCELoss()(logits, labels, 0.1)
            loss.backward()
            #对抗训练,在embedding层添加对抗训练
            fgm.attack()
            logits_ad = model(**inputs)
            loss_ad = F.cross_entropy(logits_ad, labels)
            loss_ad.backward()
            fgm.restore()  # 恢复embedding参数
            epoch_loss += loss_ad.item()
            #梯度累加
            if total_steps % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()
                model.zero_grad()

            #if total_steps % args.eval_steps ==0:
        end_time = timeSince(start_time)
        print("epoch: {},eval_steps: {},time: {}".format(
            epoch + 1, total_steps, end_time))
        p, r, f1 = evaluate_valid(model, valid_iter)

        print("valid_p:{:.4f},valid_r:{:.4f},valid_f1:{:.4f}".format(p, r, f1))

        if f1 > best_f1:
            best_f1 = f1
            # 保存整个模型
            #torch.save(model, 'resnet.pth')
            #保存权重
            torch.save(model.state_dict(), args.save_path)
            # 释放显存
            torch.cuda.empty_cache()
            #model.train()
        #打印epoch_loss
        print('Epoch {} - Loss {:.4f}'.format(epoch + 1,
                                              epoch_loss / len(train_iter)))
def train_model(model,train_iter,valid_iter,args):

    optimizer = Adam(model.parameters(), lr=args.learning_rate)
    model, optimizer = amp.initialize(model, optimizer)
    #model = DistributedDataParallel(model, device_ids=[args.local_rank])
    model = DistributedDataParallel(model)
    """
    optimizer = Adam([
    {'params': model.bert.parameters(), 'lr': 1e-5},
    {'params': model.classifier.parameters(), 'lr': 3e-4}])
    """
    #监控学习率代码
    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, verbose=1, min_lr=0.0001)
    #scheduler = get_constant_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,num_training_steps=len(train_iter) // gradient_accumulation_steps * num_epochs)
    scheduler = get_constant_schedule_with_warmup(optimizer,
                                                  num_warmup_steps=args.warmup_steps,)
    total_steps = 0
    best_f1 = 0
    stop_count = 0
    start_time = time.time()
    for epoch in range(args.num_epochs):
        model.train()
        epoch_loss = 0

        for step,input in enumerate(train_iter):
            inputs = {
                "input_ids":input[0].cuda(non_blocking=True),
                "token_type_ids":input[1].cuda(non_blocking=True),
                "attention_mask":input[2].cuda(non_blocking=True)
            }
            total_steps+=1
            labels = input[3].cuda(non_blocking=True)
            logits = model(**inputs)
            #定义损失
            loss = F.cross_entropy(logits,labels)
            #对损失进行包装
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            epoch_loss+=scaled_loss.item()
            #梯度累加
            if total_steps % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()
                model.zero_grad()
            if total_steps % args.eval_steps ==0:
                end_time = timeSince(start_time)
                print("epoch: {},eval_steps: {},time: {}".format(epoch+1, total_steps, end_time))
                p,r,f1 = evaluate_valid(model, valid_iter)

                print("valid_p:{:.4f},valid_r:{:.4f},valid_f1:{:.4f}".format(p, r, f1))

                if f1 > best_f1:
                    best_f1 = f1
                    # 保存整个模型
                    #torch.save(model, 'resnet.pth')
                    #保存权重
                    torch.save(model.state_dict(), args.save_path)
                    # 释放显存
                    torch.cuda.empty_cache()
                    #model.train()
        #打印epoch_loss
        print('Epoch {} - Loss {:.4f}'.format(epoch + 1, epoch_loss / len(train_iter)))