def train(): vis = Visualizer("Kesci" + time.strftime('%m%d%H%M')) train_data = AppData("../kesci/data/data_v3_23d/train_ab.json", iflabel=True) val_data = AppData("../kesci/data/data_v3_23d/val_ab.json", iflabel=True) train_dataloader = DataLoader(train_data, 256, shuffle=True, num_workers=4) val_dataloader = DataLoader(val_data, 512, shuffle=False, num_workers=2) test_data = AppData("../kesci/data/data_v3_23d/test_ab.json", iflabel=True) test_dataloader = DataLoader(test_data, 512, shuffle=False, num_workers=2) criterion = t.nn.BCEWithLogitsLoss().cuda() learning_rate = 0.002 weight_decay = 0.0003 model = DoubleSequence(31, 128, 1).cuda() optimizer = t.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(400): loss_meter.reset() confusion_matrix.reset() for ii, (data, property, target) in tqdm(enumerate(train_dataloader)): input = Variable(data).cuda() input2 = Variable(property).cuda() target = Variable(target).cuda() output = model(input, input2) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) if ii % 100 == 99: vis.plot('loss', loss_meter.value()[0]) if epoch % 3 == 2: train_cm, train_f1 = val(model, train_dataloader) vis.plot('train_f1', train_f1) val_cm, val_f1 = val(model, val_dataloader) vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate}) if loss_meter.value()[0] > previous_loss: learning_rate = learning_rate * 0.9 # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate previous_loss = loss_meter.value()[0] if epoch % 3 == 2: model.save() test_cm, test_f1 = val(model, test_dataloader) vis.plot('test_f1', test_f1) vis.log( "训练集:{train_f1:%}, {train_pre:%}, {train_rec:%} | 验证集:{val_f1:%}, {val_pre:%}, {val_rec:%} | \ 测试集:{test_f1:%}, {test_pre:%}, {test_rec:%} | {train_true_num:%}, {val_true_num:%}, {test_true_num:%}" .format( train_f1=train_f1, val_f1=val_f1, test_f1=test_f1, train_true_num=train_cm.value()[:, 0].sum() / len(train_data), val_true_num=val_cm.value()[:, 0].sum() / len(val_data), test_true_num=test_cm.value()[:, 0].sum() / len(test_data), train_pre=train_cm.value()[0][0] / train_cm.value()[0].sum(), train_rec=train_cm.value()[0][0] / train_cm.value()[:, 0].sum(), val_pre=val_cm.value()[0][0] / val_cm.value()[0].sum(), val_rec=val_cm.value()[0][0] / val_cm.value()[:, 0].sum(), test_pre=test_cm.value()[0][0] / test_cm.value()[0].sum(), test_rec=test_cm.value()[0][0] / test_cm.value()[:, 0].sum()))
def train(): vis = Visualizer(opt.env, port=opt.vis_port) # step1 : load model model = getattr(models, opt.model)(pretrained=True) # 加载预训练模型,微调或者特征提取 model = init_extract_model(model, 10) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # step2: data train_data_list = standard_data(opt.train_data_dir, 'train') val_data_list = standard_data(opt.train_data_dir, 'val') train_dataloader = DataLoader(IcvDataset(train_data_list), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(IcvDataset(val_data_list, train=False), batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer and scheduler criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) # 每100个epoch 下降 lr=lr*gamma scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=300, gamma=0.1) # step4: define metrics train_losses = AverageMeter() train_top1 = AverageMeter() # step5.1: some parameters for K-fold and restart model start_epoch = 0 best_top1 = 50 # step5.2: restart the training process # PyTorch 保存断点checkpoints 的格式为 .tar文件扩展名格式 if opt.resum_model_dir is not None: checkpoint = torch.load(opt.resum_model_dir) start_epoch = checkpoint["epoch"] best_top1 = checkpoint["best_top1"] optimizer.load_state_dict(checkpoint["optimizer"]) model.load_state_dict(checkpoint["state_dict"]) # 在恢复训练时,需要调用 model.train() 以确保所有网络层处于训练模式 model.train() # step6 : train for epoch in range(start_epoch, opt.max_epoch): # lr 下降 scheduler.step(epoch) lr = get_learning_rate(optimizer) train_losses.reset() train_top1.reset() for iter, (input, target) in enumerate(train_dataloader): input = input.to(device) target = target.to(device) optimizer.zero_grad() # forword output = model(input) loss = criterion(output, target) precious = accuracy(output, target, topk=(1, )) # loss and acc train_losses.update(loss.item(), input.size(0)) train_top1.update(precious[0].item(), input.size(0)) # backword loss.backward() optimizer.step() val_loss, val_top1 = val(model, val_dataloader, criterion, device) is_best = val_top1.avg > best_top1 best_top1 = max(val_top1.avg, best_top1) print("epoch : {}/{}".format(epoch, opt.max_epoch)) print("train-->loss:{},acc:{}".format(train_losses.avg, train_top1.avg)) print("val-->loss:{},acc:{}".format(val_loss.avg, val_top1.avg)) vis.plot_many({ 'train_loss': train_losses.avg, 'val_loss': val_loss.avg }) # vis.plot('train_loss', train_losses.avg) # vis.plot('val_accuracy', val_top1.avg) vis.log( "epoch:{epoch},lr:{lr},train_loss:{train_loss},val_loss:{val_loss},train_acc:{train_acc},val_acc:{val_acc}" .format(epoch=epoch, train_loss=train_losses.avg, val_loss=str(val_loss.avg), train_acc=str(train_top1.avg), val_acc=str(val_top1.avg), lr=lr)) if epoch % 10 == 0: save_checkpoint( { "epoch": epoch + 1, "model": opt.model, "state_dict": model.state_dict(), "best_top1": best_top1, "optimizer": optimizer.state_dict(), "val_loss": val_loss.avg, }, opt.save_model_dir, is_best, epoch)
def train(): vis = Visualizer("Kesci") train_data = AppData("data/data_16d_target/train.json", iflabel=True) val_data = AppData("data/data_16d_target/val.json", iflabel=True) train_dataloader = DataLoader(train_data, 32, shuffle=True, num_workers=4) val_dataloader = DataLoader(val_data, 256, shuffle=False, num_workers=2) test_data = AppData("data/data_16d_target/test.json", iflabel=True) test_dataloader = DataLoader(test_data, 256, shuffle=False, num_workers=2) criterion = t.nn.CrossEntropyLoss().cuda() learning_rate = 0.003 weight_decay = 0.0002 model = Sequence(15, 128, 1).cuda() optimizer = t.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 for epoch in range(500): loss_meter.reset() confusion_matrix.reset() for ii, (data, property, label) in tqdm(enumerate(train_dataloader)): input = Variable(data).cuda() input2 = Variable(property).cuda() target = Variable(label).cuda().view(-1) output = model(input, input2) optimizer.zero_grad() loss = criterion(output, target) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) confusion_matrix.add(output.data, target.data) if ii % 100 == 99: vis.plot('loss', loss_meter.value()[0]) if epoch % 3 == 2: train_cm, train_f1 = val(model, train_dataloader) vis.plot('train_f1', train_f1) val_cm, val_f1 = val(model, val_dataloader) vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate}) # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format( # epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), # train_cm=str(confusion_matrix.value()), lr=learning_rate)) if loss_meter.value()[0] > previous_loss: learning_rate = learning_rate * 0.95 # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate previous_loss = loss_meter.value()[0] if epoch % 10 == 9: model.save() test_cm, test_f1 = val(model, test_dataloader) vis.plot('test_f1', test_f1) vis.log( "model:{model} | {train_f1}, {train_pre}, {train_rec} | {val_f1}, {val_pre}, {val_rec} | {test_f1}, {test_pre}, {test_rec}" .format(train_f1=train_f1, val_f1=val_f1, test_f1=test_f1, model=time.strftime('%m%d %H:%M:%S'), train_pre=str(train_cm.value()[0][0] / train_cm.value()[:, 0].sum()), train_rec=str(train_cm.value()[0][0] / train_cm.value()[0].sum()), val_pre=str(val_cm.value()[0][0] / val_cm.value()[:, 0].sum()), val_rec=str(val_cm.value()[0][0] / val_cm.value()[0].sum()), test_pre=str(test_cm.value()[0][0] / test_cm.value()[:, 0].sum()), test_rec=str(test_cm.value()[0][0] / test_cm.value()[0].sum())))
def train(**kwargs): opt.parse(kwargs) vis = Visualizer(opt.env) # step1: configure model model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: data train_data = Ocean(opt.train_data_root, mode='train') val_data = Ocean(opt.train_data_root, mode='val') train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers) val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers) # step3: criterion and optimizer #loss_fn = t.nn.BCELoss(reduce=False, size_average=False) criterion = t.nn.CrossEntropyLoss(weight=t.Tensor([1, 5]).cuda()) lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay) # step4: meters loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e100 # train for epoch in range(opt.max_epoch): loss_meter.reset() confusion_matrix.reset() for ii, (data, label) in tqdm(enumerate(train_dataloader)): # train model input = Variable(data) target = Variable(label) if opt.use_gpu: input = input.cuda() target = target.cuda() optimizer.zero_grad() score = model(input) print(score, target) loss = criterion(score, target) loss.backward() optimizer.step() # meters update and visualize loss_meter.add(loss.data[0]) confusion_matrix.add(score.data, target.data) if ii % opt.print_freq == opt.print_freq - 1: vis.plot('loss', loss_meter.value()[0]) # validate and visualize train_cm, train_accuracy = val(model, train_dataloader) val_cm, val_accuracy = val(model, val_dataloader) vis.plot_many({ 'val_accuracy': val_accuracy, 'train_accuracy': train_accuracy, 'learning_rate': lr }) vis.log( "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}" .format(epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()), lr=lr)) # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * opt.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0] if epoch % 10 == 9: model.save()