def main(): # initialise the models vmodel = VideoNet().to(device) amodel = AudioNet().to(device) avmodel = AVNet().to(device) vmodel.load_state_dict(torch.load('vmodel_final.pt')) amodel.load_state_dict(torch.load('amodel_final.pt')) avmodel.load_state_dict(torch.load('avmodel_final.pt')) print('loaded model') params = list(vmodel.parameters())+list(amodel.parameters())+list(avmodel.parameters()) # optimiser = optim.Adam(params, lr=LR) optimiser = optim.SGD(params, lr=LR, momentum=0.9) list_vid = os.listdir('data/train/full_vid') # ensure no extra files like .DS_Store are present train_list, val_list = utils.split_data(list_vid, 0.8, 0.2) # log the list for reference utils.log_list(train_list, 'data/train_list.txt') utils.log_list(val_list, 'data/val_list.txt') # uncomment following to read previous list # train_list = utils.read_list('data/train_list.txt') # val_list = utils.read_list('data/val_list.txt') train_list = ['video_001.mp4'] composed = transforms.Compose([Resize(256), RandomCrop(224)]) # composed = transforms.Compose([Resize(256)]) train_loader = torch.utils.data.DataLoader(AVDataset(train_list[:1], transform=composed), batch_size=batch_size, shuffle=False, num_workers=4) val_loader = torch.utils.data.DataLoader(AVDataset(train_list[:1], transform=composed), batch_size=batch_size,shuffle=False, num_workers=4) l,p,cam=val(vmodel,amodel,avmodel,val_loader) print(p,cam.shape) import skvideo.io vids=skvideo.io.vread('data/train/'+'snippet/video_001.mp4') # print('vids',vids) findcam(np.expand_dims(vids,0),np.abs(cam.cpu().numpy()))
def main(): # initialise the models vmodel = VideoNet().to(device) amodel = AudioNet().to(device) avmodel = AVNet().to(device) # vmodel.load_state_dict(torch.load('./pretrained/tfvmodel.pt')) # amodel.load_state_dict(torch.load('./pretrained/tfamodel.pt')) # avmodel.load_state_dict(torch.load('./pretrained/tfavmodel.pt')) # print('loaded model') params = list(vmodel.parameters())+list(amodel.parameters())+list(avmodel.parameters()) optimiser = optim.Adam(params, lr=LR) list_vid = os.listdir('data/train/full_vid') # ensure no extra files like .DS_Store are present train_list, val_list = utils.split_data(list_vid, 0.8, 0.2) # log the list for reference utils.log_list(train_list, 'data/train_list.txt') utils.log_list(val_list, 'data/val_list.txt') # uncomment following to read previous list # train_list = utils.read_list('data/train_list.txt') # val_list = utils.read_list('data/val_list.txt') composed = transforms.Compose([Resize(256), RandomCrop(224)]) train_loader = torch.utils.data.DataLoader(AVDataset(train_list, transform=composed), batch_size=batch_size, shuffle=True, num_workers=6) val_loader = torch.utils.data.DataLoader(AVDataset(val_list, transform=composed), batch_size=test_batch_size,shuffle=True, num_workers=6) train(vmodel, amodel, avmodel, optimiser, nepochs, train_loader, val_loader)
tra_lbl_name_list.append(data_dir + tra_label_dir + imidx + label_ext) print("---") print("train images: ", len(tra_img_name_list)) print("train labels: ", len(tra_lbl_name_list)) print("---") train_num = len(tra_img_name_list) salobj_dataset = SalObjDataset( img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ RescaleT(320), RandomCrop(288), ToTensorLab(flag=0)])) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True #shuffle=False , num_workers=0) # ------- 3. define model -------- # define the net #选择模型 if(model_name=='u2net'): net = U2NET(3, 1) elif(model_name=='u2netp'): net = U2NETP(3,1) if torch.cuda.is_available(): net.cuda()
def train(): if os.name == 'nt': data_dir = 'C:/Users/marky/Documents/Courses/saliency/datasets/DUTS/' else: data_dir = os.getenv( "HOME") + '/Documents/Courses/EE298-CV/finalproj/datasets/DUTS/' tra_image_dir = 'DUTS-TR/DUTS-TR-Image/' tra_label_dir = 'DUTS-TR/DUTS-TR-Mask/' test_image_dir = 'DUTS-TE/DUTS-TE-Image/' test_label_dir = 'DUTS-TE/DUTS-TE-Mask/' image_ext = '.jpg' label_ext = '.png' model_dir = "./saved_models/basnet_bsi_aug/" resume_train = False resume_model_path = model_dir + "basnet_bsi_epoch_81_itr_106839_train_1.511335_tar_0.098392.pth" last_epoch = 1 epoch_num = 100000 batch_size_train = 8 batch_size_val = 1 train_num = 0 val_num = 0 enableInpaintAug = False device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") # ------- 5. training process -------- print("---start training...") test_increments = 6250 ite_num = 0 running_loss = 0.0 running_tar_loss = 0.0 ite_num4val = 1 next_test = ite_num + 0 visdom_tab_title = "StructArchWithoutStructImgs(WithHFlip)" ############ ############ ############ ############ tra_img_name_list = glob.glob(data_dir + tra_image_dir + '*' + image_ext) print("data_dir + tra_image_dir + '*' + image_ext: ", data_dir + tra_image_dir + '*' + image_ext) test_img_name_list = glob.glob(data_dir + test_image_dir + '*' + image_ext) print("data_dir + test_image_dir + '*' + image_ext: ", data_dir + test_image_dir + '*' + image_ext) tra_lbl_name_list = [] for img_path in tra_img_name_list: img_name = img_path.split("/")[-1] aaa = img_name.split(".") bbb = aaa[0:-1] imidx = bbb[0] for i in range(1, len(bbb)): imidx = imidx + "." + bbb[i] tra_lbl_name_list.append(data_dir + tra_label_dir + imidx + label_ext) test_lbl_name_list = [] for img_path in test_img_name_list: img_name = img_path.split("/")[-1] aaa = img_name.split(".") bbb = aaa[0:-1] imidx = bbb[0] for i in range(1, len(bbb)): imidx = imidx + "." + bbb[i] test_lbl_name_list.append(data_dir + test_label_dir + imidx + label_ext) print("---") print("train images: ", len(tra_img_name_list)) print("train labels: ", len(tra_lbl_name_list)) print("---") print("---") print("test images: ", len(test_img_name_list)) print("test labels: ", len(test_lbl_name_list)) print("---") train_num = len(tra_img_name_list) test_num = len(test_img_name_list) salobj_dataset = SalObjDataset(img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ RescaleT(256), RandomCrop(224), ToTensorLab(flag=0) ]), category="train", enableInpaintAug=enableInpaintAug) salobj_dataset_test = SalObjDataset(img_name_list=test_img_name_list, lbl_name_list=test_lbl_name_list, transform=transforms.Compose([ RescaleT(256), RandomCrop(224), ToTensorLab(flag=0) ]), category="test", enableInpaintAug=enableInpaintAug) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1) salobj_dataloader_test = DataLoader(salobj_dataset_test, batch_size=batch_size_val, shuffle=True, num_workers=1) # ------- 3. define model -------- # define the net net = BASNet(3, 1) if resume_train: # print("resume_model_path:", resume_model_path) checkpoint = torch.load(resume_model_path) net.load_state_dict(checkpoint) if torch.cuda.is_available(): net.to(device) # ------- 4. define optimizer -------- print("---define optimizer...") optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) plotter = VisdomLinePlotter(env_name=visdom_tab_title) best_ave_mae = 100000 best_max_fmeasure = 0 best_relaxed_fmeasure = 0 best_ave_maxf = 0 best_own_RelaxedFmeasure = 0 for epoch in range(last_epoch - 1, epoch_num): ### Train network train_loss0 = AverageMeter() train_loss1 = AverageMeter() train_loss2 = AverageMeter() train_loss3 = AverageMeter() train_loss4 = AverageMeter() train_loss5 = AverageMeter() train_loss6 = AverageMeter() train_loss7 = AverageMeter() train_struct_loss1 = AverageMeter() train_struct_loss2 = AverageMeter() train_struct_loss3 = AverageMeter() train_struct_loss4 = AverageMeter() train_struct_loss5 = AverageMeter() train_struct_loss6 = AverageMeter() train_struct_loss7 = AverageMeter() test_loss0 = AverageMeter() test_loss1 = AverageMeter() test_loss2 = AverageMeter() test_loss3 = AverageMeter() test_loss4 = AverageMeter() test_loss5 = AverageMeter() test_loss6 = AverageMeter() test_loss7 = AverageMeter() test_struct_loss1 = AverageMeter() test_struct_loss2 = AverageMeter() test_struct_loss3 = AverageMeter() test_struct_loss4 = AverageMeter() test_struct_loss5 = AverageMeter() test_struct_loss6 = AverageMeter() test_struct_loss7 = AverageMeter() average_mae = AverageMeter() average_maxf = AverageMeter() average_relaxedf = AverageMeter() average_own_RelaxedFMeasure = AverageMeter() net.train() for i, data in enumerate(salobj_dataloader): ite_num = ite_num + 1 ite_num4val = ite_num4val + 1 inputs, labels, labels_struct = data['image'], data['label'], data[ 'label2'] inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) labels_struct = labels_struct.type(torch.FloatTensor) # wrap them in Variable if torch.cuda.is_available(): inputs_v, labels_v, labels_struct_v = Variable( inputs.to(device), requires_grad=False), Variable( labels.to(device), requires_grad=False), Variable( labels_struct.to(device), requires_grad=False) else: inputs_v, labels_v, labels_struct_v = Variable( inputs, requires_grad=False), Variable( labels, requires_grad=False), Variable(labels_struct, requires_grad=False) # y zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize d0, d1, d2, d3, d4, d5, d6, d7, d1_struct, d2_struct, d3_struct, d4_struct, d5_struct, d6_struct, d7_struct = net( inputs_v) loss2, loss = muti_bce_loss_fusion( d0, d1, d2, d3, d4, d5, d6, d7, d1_struct, d2_struct, d3_struct, d4_struct, d5_struct, d6_struct, d7_struct, labels_v, train_loss0, train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7, train_struct_loss1, train_struct_loss2, train_struct_loss3, train_struct_loss4, train_struct_loss5, train_struct_loss6, train_struct_loss7) loss.backward() optimizer.step() # # print statistics running_loss += loss.data running_tar_loss += loss2.data # del temporary outputs and loss del d0, d1, d2, d3, d4, d5, d6, d7, d1_struct, d2_struct, d3_struct, d4_struct, d5_struct, d6_struct, d7_struct, loss2, loss print( "[train epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % (epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) plotter.plot('loss0', 'train', 'Main Loss 0', epoch + 1, float(train_loss0.avg)) plotter.plot('loss1', 'train', 'Main Loss 1', epoch + 1, float(train_loss1.avg)) plotter.plot('loss2', 'train', 'Main Loss 2', epoch + 1, float(train_loss2.avg)) plotter.plot('loss3', 'train', 'Main Loss 3', epoch + 1, float(train_loss3.avg)) plotter.plot('loss4', 'train', 'Main Loss 4', epoch + 1, float(train_loss4.avg)) plotter.plot('loss5', 'train', 'Main Loss 5', epoch + 1, float(train_loss5.avg)) plotter.plot('loss6', 'train', 'Main Loss 6', epoch + 1, float(train_loss6.avg)) plotter.plot('loss7', 'train', 'Main Loss 7', epoch + 1, float(train_loss7.avg)) plotter.plot('structloss1', 'train', 'Struct Loss 1', epoch + 1, float(train_struct_loss1.avg)) plotter.plot('structloss2', 'train', 'Struct Loss 2', epoch + 1, float(train_struct_loss2.avg)) plotter.plot('structloss3', 'train', 'Struct Loss 3', epoch + 1, float(train_struct_loss3.avg)) plotter.plot('structloss4', 'train', 'Struct Loss 4', epoch + 1, float(train_struct_loss4.avg)) plotter.plot('structloss5', 'train', 'Struct Loss 5', epoch + 1, float(train_struct_loss5.avg)) plotter.plot('structloss6', 'train', 'Struct Loss 6', epoch + 1, float(train_struct_loss6.avg)) plotter.plot('structloss7', 'train', 'Struct Loss 7', epoch + 1, float(train_struct_loss7.avg)) ### Validate model print("---Evaluate model---") if ite_num >= next_test: # test and save model 10000 iterations, due to very large DUTS-TE dataset next_test = ite_num + test_increments net.eval() max_epoch_fmeasure = 0 for i, data in enumerate(salobj_dataloader_test): inputs, labels = data['image'], data['label'] inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) if torch.cuda.is_available(): inputs_v, labels_v = Variable( inputs.to(device), requires_grad=False), Variable(labels.to(device), requires_grad=False) else: inputs_v, labels_v = Variable( inputs, requires_grad=False), Variable(labels, requires_grad=False) d0, d1, d2, d3, d4, d5, d6, d7, d1_struct, d2_struct, d3_struct, d4_struct, d5_struct, d6_struct, d7_struct = net( inputs_v) pred = d0[:, 0, :, :] pred = normPRED(pred) pred = pred.squeeze() predict_np = pred.cpu().data.numpy() im = Image.fromarray(predict_np * 255).convert('RGB') img_name = test_img_name_list[i] image = cv2.imread(img_name) imo = im.resize((image.shape[1], image.shape[0]), resample=Image.BILINEAR) imo = imo.convert("L") ### Convert to grayscale 1-channel resizedImg_np = np.array( imo) ### Result is 2D numpy array predicted salient map img__lbl_name = test_lbl_name_list[i] gt_img = np.array(Image.open(img__lbl_name).convert( "L")) ### Ground truth salient map ### Compute metrics result_mae = getMAE(gt_img, resizedImg_np) average_mae.update(result_mae, 1) precision, recall = getPRCurve(gt_img, resizedImg_np) result_maxfmeasure = getMaxFMeasure(precision, recall) result_maxfmeasure = result_maxfmeasure.mean() average_maxf.update(result_maxfmeasure, 1) if (result_maxfmeasure > max_epoch_fmeasure): max_epoch_fmeasure = result_maxfmeasure result_relaxedfmeasure = getRelaxedFMeasure( gt_img, resizedImg_np) result_ownrelaxedfmeasure = own_RelaxedFMeasure( gt_img, resizedImg_np) average_relaxedf.update(result_relaxedfmeasure, 1) average_own_RelaxedFMeasure.update(result_ownrelaxedfmeasure, 1) loss2, loss = muti_bce_loss_fusion( d0, d1, d2, d3, d4, d5, d6, d7, d1_struct, d2_struct, d3_struct, d4_struct, d5_struct, d6_struct, d7_struct, labels_v, test_loss0, test_loss1, test_loss2, test_loss3, test_loss4, test_loss5, test_loss6, test_loss7, test_struct_loss1, test_struct_loss2, test_struct_loss3, test_struct_loss4, test_struct_loss5, test_struct_loss6, test_struct_loss7) del d0, d1, d2, d3, d4, d5, d6, d7, d1_struct, d2_struct, d3_struct, d4_struct, d5_struct, d6_struct, d7_struct, loss2, loss print( "[test epoch: %3d/%3d, batch: %5d/%5d, ite: %d] test loss: %3f, tar: %3f " % (epoch + 1, epoch_num, (i + 1) * batch_size_val, test_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) model_name = model_dir + "basnet_bsi_epoch_%d_itr_%d_train_%3f_tar_%3f.pth" % ( epoch + 1, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val) torch.save(net.state_dict(), model_name) running_loss = 0.0 running_tar_loss = 0.0 net.train() # resume train ite_num4val = 1 if (average_mae.avg < best_ave_mae): best_ave_mae = average_mae.avg newname = model_dir + "bestMAE/basnet_bsi_epoch_%d_itr_%d_train_%3f_tar_%3f_mae_%3f.pth" % ( epoch + 1, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val, best_ave_mae) fold_dir = newname.rsplit("/", 1) if not os.path.isdir(fold_dir[0]): os.mkdir(fold_dir[0]) copyfile(model_name, newname) if (max_epoch_fmeasure > best_max_fmeasure): best_max_fmeasure = max_epoch_fmeasure newname = model_dir + "bestEpochMaxF/basnet_bsi_epoch_%d_itr_%d_train_%3f_tar_%3f_maxfmeas_%3f.pth" % ( epoch + 1, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val, best_max_fmeasure) fold_dir = newname.rsplit("/", 1) if not os.path.isdir(fold_dir[0]): os.mkdir(fold_dir[0]) copyfile(model_name, newname) if (average_maxf.avg > best_ave_maxf): best_ave_maxf = average_maxf.avg newname = model_dir + "bestAveMaxF/basnet_bsi_epoch_%d_itr_%d_train_%3f_tar_%3f_avemfmeas_%3f.pth" % ( epoch + 1, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val, best_ave_maxf) fold_dir = newname.rsplit("/", 1) if not os.path.isdir(fold_dir[0]): os.mkdir(fold_dir[0]) copyfile(model_name, newname) if (average_relaxedf.avg > best_relaxed_fmeasure): best_relaxed_fmeasure = average_relaxedf.avg newname = model_dir + "bestAveRelaxF/basnet_bsi_epoch_%d_itr_%d_train_%3f_tar_%3f_averelaxfmeas_%3f.pth" % ( epoch + 1, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val, best_relaxed_fmeasure) fold_dir = newname.rsplit("/", 1) if not os.path.isdir(fold_dir[0]): os.mkdir(fold_dir[0]) copyfile(model_name, newname) if (average_own_RelaxedFMeasure.avg > best_own_RelaxedFmeasure): best_own_RelaxedFmeasure = average_own_RelaxedFMeasure.avg newname = model_dir + "bestOwnRelaxedF/basnet_bsi_epoch_%d_itr_%d_train_%3f_tar_%3f_averelaxfmeas_%3f.pth" % ( epoch + 1, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val, best_own_RelaxedFmeasure) fold_dir = newname.rsplit("/", 1) if not os.path.isdir(fold_dir[0]): os.mkdir(fold_dir[0]) copyfile(model_name, newname) plotter.plot('loss0', 'test', 'Main Loss 0', epoch + 1, float(test_loss0.avg)) plotter.plot('loss1', 'test', 'Main Loss 1', epoch + 1, float(test_loss1.avg)) plotter.plot('loss2', 'test', 'Main Loss 2', epoch + 1, float(test_loss2.avg)) plotter.plot('loss3', 'test', 'Main Loss 3', epoch + 1, float(test_loss3.avg)) plotter.plot('loss4', 'test', 'Main Loss 4', epoch + 1, float(test_loss4.avg)) plotter.plot('loss5', 'test', 'Main Loss 5', epoch + 1, float(test_loss5.avg)) plotter.plot('loss6', 'test', 'Main Loss 6', epoch + 1, float(test_loss6.avg)) plotter.plot('loss7', 'test', 'Main Loss 7', epoch + 1, float(test_loss7.avg)) plotter.plot('structloss1', 'test', 'Struct Loss 1', epoch + 1, float(test_struct_loss1.avg)) plotter.plot('structloss2', 'test', 'Struct Loss 2', epoch + 1, float(test_struct_loss2.avg)) plotter.plot('structloss3', 'test', 'Struct Loss 3', epoch + 1, float(test_struct_loss3.avg)) plotter.plot('structloss4', 'test', 'Struct Loss 4', epoch + 1, float(test_struct_loss4.avg)) plotter.plot('structloss5', 'test', 'Struct Loss 5', epoch + 1, float(test_struct_loss5.avg)) plotter.plot('structloss6', 'test', 'Struct Loss 6', epoch + 1, float(test_struct_loss6.avg)) plotter.plot('structloss7', 'test', 'Struct Loss 7', epoch + 1, float(test_struct_loss7.avg)) plotter.plot('mae', 'test', 'Average Epoch MAE', epoch + 1, float(average_mae.avg)) plotter.plot('max_maxf', 'test', 'Max Max Epoch F-Measure', epoch + 1, float(max_epoch_fmeasure)) plotter.plot('ave_maxf', 'test', 'Average Max F-Measure', epoch + 1, float(average_maxf.avg)) plotter.plot('ave_relaxedf', 'test', 'Average Relaxed F-Measure', epoch + 1, float(average_relaxedf.avg)) plotter.plot('own_RelaxedFMeasure', 'test', 'Own Average Relaxed F-Measure', epoch + 1, float(average_own_RelaxedFMeasure.avg)) print('-------------Congratulations! Training Done!!!-------------')
def train(): data_dir = './train_data/' tra_image_dir = 'DUTS-TR/DUTS-TR-Image/' tra_label_dir = 'DUTS-TR/DUTS-TR-Mask/' image_ext = '.jpg' label_ext = '.png' model_dir = "./saved_models/basnet_bsi/" epoch_num = 100 batch_size_train = 1 batch_size_val = 1 train_num = 0 val_num = 0 tra_img_name_list = glob.glob(data_dir + tra_image_dir + '*' + image_ext) tra_lbl_name_list = glob.glob(data_dir + tra_label_dir + '*' + label_ext) # tra_lbl_name_list = [] # for img_path in tra_img_name_list: # img_name = img_path.split("/")[-1] # aaa = img_name.split(".") # print(aaa) # bbb = aaa[0:-1] # imidx = bbb[0] # for i in range(1,len(bbb)): # imidx = imidx + "." + bbb[i] # print(imidx) # tra_lbl_name_list.append(data_dir + tra_label_dir + '*' + label_ext) print("---") print("train images: ", len(tra_img_name_list)) print("train labels: ", len(tra_lbl_name_list)) print("---") train_num = len(tra_img_name_list) salobj_dataset = SalObjDataset(img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ RescaleT(256), RandomCrop(224), ToTensorLab(flag=0) ])) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1) # ------- 3. define model -------- # define the net net = BASNet(3, 1) #if torch.cuda.is_available(): # print(torch.cuda.is_available()) net.cuda() # ------- 4. define optimizer -------- print("---define optimizer...") optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # ------- 5. training process -------- print("---start training...") ite_num = 0 running_loss = 0.0 running_tar_loss = 0.0 ite_num4val = 0 for epoch in range(0, epoch_num): net.train() for i, data in enumerate(salobj_dataloader): ite_num = ite_num + 1 ite_num4val = ite_num4val + 1 inputs, labels = data['image'], data['label'] inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) # wrap them in Variable # if torch.cuda.is_available(): inputs_v, labels_v = Variable(inputs.cuda(), requires_grad=False), Variable( labels.cuda(), requires_grad=False) # else: # inputs_v, labels_v = Variable(inputs, requires_grad=False), Variable(labels, requires_grad=False) # y zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize d0, d1, d2, d3, d4, d5, d6, d7 = net(inputs_v) loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, d7, labels_v) loss.backward() optimizer.step() # # print statistics running_loss += loss.data running_tar_loss += loss2.data # del temporary outputs and loss del d0, d1, d2, d3, d4, d5, d6, d7, loss2, loss print( "[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % (epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) if ite_num % 2000 == 0: # save model every 2000 iterations torch.save( net.state_dict(), model_dir + "basnet_bsi_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) running_loss = 0.0 running_tar_loss = 0.0 net.train() # resume train ite_num4val = 0 print('-------------Congratulations! Training Done!!!-------------')
#valid_size = round(len(tra_img_name_list) * 0.2) valid_size = len(valid_img_name_list) #train_size = len(tra_img_name_list) - valid_size train_size = len(tra_img_name_list) #print([train_size, valid_size]) #train_num = len(tra_img_name_list) train_data = SalObjDataset( img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ RescaleT(256), RandomCrop(224), ToTensorLab(flag=0)])) val_data = SalObjDataset( img_name_list=valid_img_name_list, lbl_name_list=valid_lbl_name_list, transform=transforms.Compose([ RescaleT(256), RandomCrop(224), ToTensorLab(flag=0)])) #train_data, val_data = torch.utils.data.random_split(salobj_dataset, [train_size, valid_size])
def train(): data_dir = '/media/markytools/New Volume/Courses/EE298CompVis/finalproject/datasets/' test_image_dir = 'DUTS/DUTS-TE/DUTS-TE-Image/' test_label_dir = 'DUTS/DUTS-TE/DUTS-TE-Mask/' image_ext = '.jpg' label_ext = '.png' model_dir = "../saved_models/" resume_train = True resume_model_path = model_dir + "basnet-original.pth" last_epoch = 1 epoch_num = 100000 batch_size_train = 8 batch_size_val = 1 train_num = 0 val_num = 0 enableInpaintAug = False device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") #set CPU to 0 # ------- 5. training process -------- print("---start training...") test_increments = 15000 ite_num = 0 running_loss = 0.0 running_tar_loss = 0.0 ite_num4val = 1 next_test = ite_num + 0 ############ ############ ############ ############ test_img_name_list = glob.glob(data_dir + test_image_dir + '*' + image_ext) print("data_dir + test_image_dir + '*' + image_ext: ", data_dir + test_image_dir + '*' + image_ext) test_lbl_name_list = [] for img_path in test_img_name_list: img_name = img_path.split("/")[-1] aaa = img_name.split(".") bbb = aaa[0:-1] imidx = bbb[0] for i in range(1,len(bbb)): imidx = imidx + "." + bbb[i] test_lbl_name_list.append(data_dir + test_label_dir + imidx + label_ext) print("---") print("test images: ", len(test_img_name_list)) print("test labels: ", len(test_lbl_name_list)) print("---") test_num = len(test_img_name_list) for test_lbl in test_lbl_name_list: test_jpg = test_lbl.replace("png", "jpg") test_jpg = test_jpg.replace("Mask", "Image") if test_jpg not in test_img_name_list: print("test_lbl not in label: ", test_lbl) salobj_dataset_test = SalObjDataset( img_name_list=test_img_name_list, lbl_name_list=test_lbl_name_list, transform=transforms.Compose([ RescaleT(256), RandomCrop(224), ToTensorLab(flag=0)]), category="test", enableInpaintAug=enableInpaintAug) salobj_dataloader_test = DataLoader(salobj_dataset_test, batch_size=batch_size_val, shuffle=True, num_workers=1) # ------- 3. define model -------- # define the net net = BASNet(3, 1) if resume_train: # print("resume_model_path:", resume_model_path) checkpoint = torch.load(resume_model_path) net.load_state_dict(checkpoint) if torch.cuda.is_available(): net.to(device) # ------- 4. define optimizer -------- print("---define optimizer...") optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) plotter = VisdomLinePlotter(env_name='NewlyAddedRelaxedMeasureEnv1') best_ave_mae = 100000 best_max_fmeasure = 0 best_relaxed_fmeasure = 0 best_ave_maxf = 0 ### Train network train_loss0 = AverageMeter() train_loss1 = AverageMeter() train_loss2 = AverageMeter() train_loss3 = AverageMeter() train_loss4 = AverageMeter() train_loss5 = AverageMeter() train_loss6 = AverageMeter() train_loss7 = AverageMeter() test_loss0 = AverageMeter() test_loss1 = AverageMeter() test_loss2 = AverageMeter() test_loss3 = AverageMeter() test_loss4 = AverageMeter() test_loss5 = AverageMeter() test_loss6 = AverageMeter() test_loss7 = AverageMeter() average_mae = AverageMeter() average_maxf = AverageMeter() average_relaxedf = AverageMeter() ### Validate model print("---Evaluate model---") next_test = ite_num + test_increments net.eval() max_epoch_fmeasure = 0 for i, data in enumerate(salobj_dataloader_test): inputs, labels = data['image'], data['label'] inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) if torch.cuda.is_available(): inputs_v, labels_v = Variable(inputs.to(device), requires_grad=False), Variable(labels.to(device), requires_grad=False) else: inputs_v, labels_v = Variable(inputs, requires_grad=False), Variable(labels, requires_grad=False) d0, d1, d2, d3, d4, d5, d6, d7 = net(inputs_v) pred = d0[:,0,:,:] pred = normPRED(pred) pred = pred.squeeze() predict_np = pred.cpu().data.numpy() im = Image.fromarray(predict_np*255).convert('RGB') img_name = test_img_name_list[i] image = cv2.imread(img_name) imo = im.resize((image.shape[1],image.shape[0]),resample=Image.BILINEAR) imo = imo.convert("L") ### Convert to grayscale 1-channel resizedImg_np = np.array(imo) ### Result is 2D numpy array predicted salient map img__lbl_name = test_lbl_name_list[i] gt_img = np.array(Image.open(img__lbl_name).convert("L")) ### Ground truth salient map ### Compute metrics img_name_png = result_mae = getMAE(gt_img, resizedImg_np) average_mae.update(result_mae, 1) precision, recall = getPRCurve(gt_img, resizedImg_np) result_maxfmeasure = getMaxFMeasure(precision, recall) result_maxfmeasure = result_maxfmeasure.mean() average_maxf.update(result_maxfmeasure, 1) if (result_maxfmeasure > max_epoch_fmeasure): max_epoch_fmeasure = result_maxfmeasure result_relaxedfmeasure = getRelaxedFMeasure(gt_img, resizedImg_np) average_relaxedf.update(result_relaxedfmeasure, 1) loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, d7,labels_v, test_loss0, test_loss1, test_loss2, test_loss3, test_loss4, test_loss5, test_loss6, test_loss7) del d0, d1, d2, d3, d4, d5, d6, d7,loss2, loss print("Average Epoch MAE: ", average_mae.avg) print("Max Max Epoch F-Measure: ", average_maxf.avg) print("Average Max F-Measure: ", max_epoch_fmeasure) print("Average Relaxed F-Measure: ", average_relaxedf.avg) print('-------------Congratulations! Training Done!!!-------------')
def main(): # data_dir = './train_data/' train_image_dir = './train_data/DUTS/DUTS-TR-Image/' train_label_dir = './train_data/DUTS/DUTS-TR-Mask/' model_dir = './saved_models/' resume_train = True saved_model_path = model_dir + 'model.pth' validation = True save_every = 1 epoch_num = 100000 batch_size_train = 16 batch_size_val = 1 train_num = 0 val_num = 0 if validation: val_image_dir = 'test_data/val/images/' val_label_dir = 'test_data/val/gts/' prediction_dir = './val_results/' val_img_name_list = glob.glob(val_image_dir + '*.jpg') val_lbl_name_list = glob.glob(val_label_dir + '*.png') val_dataset = DatasetLoader(img_name_list=val_img_name_list, lbl_name_list=val_lbl_name_list, transform=transforms.Compose( [Rescale(256), ToTensor()])) val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=4) train_img_name_list = glob.glob(train_image_dir + '*.jpg') train_lbl_name_list = [] for img_path in train_img_name_list: img_path = img_path.replace('.jpg', '.png') img_path = img_path.replace('DUTS-TR-Image', 'DUTS-TR-Mask') train_lbl_name_list.append(img_path) if len(train_img_name_list) == 0 or len(val_img_name_list) == 0: print('0 images found.') assert False print('Train images: ', len(train_img_name_list)) print('Train labels: ', len(train_lbl_name_list)) train_num = len(train_img_name_list) dataset = DatasetLoader(img_name_list=train_img_name_list, lbl_name_list=train_lbl_name_list, transform=transforms.Compose([ RandomHorizontalFlip(0.5), RandomVerticalFlip(0.5), Rescale(300), RandomCrop(256), ToTensor() ])) dataloader = DataLoader(dataset, batch_size=batch_size_train, shuffle=True, num_workers=4) model = MYNet(3, 1) model.cuda() from torchsummary import summary summary(model, input_size=(3, 256, 256)) # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.00001, nesterov=False) optimizer = optim.Adam(model.parameters(), lr=0.01, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[200000, 350000], gamma=0.1, last_epoch=-1) # scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, # max_lr=0.01, step_size_up=8000, mode='triangular2') i_num_tot = 0 loss_output = 0.0 loss_pre_ref = 0.0 i_num_epoch = 0 epoch_init = 0 if resume_train: print('Loading checkpoint: ', saved_model_path) checkpoint = torch.load(saved_model_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']), epoch_init = checkpoint['epoch'] + 1 i_num_tot = checkpoint['i_num_tot'] + 1 i_num_epoch = checkpoint['i_num_epoch'] loss_output = checkpoint['loss_output'] # loss_pre_ref = checkpoint['loss_pre_ref'] log_file = open('logs/log.txt', 'a+') log_file.write(str(model) + '\n') log_file.close() print('Training...') _s = time.time() for epoch in range(epoch_init, epoch_num): model.train() print('Epoch {}...'.format(epoch)) _time_epoch = time.time() for i, data in enumerate(dataloader): i_num_tot += 1 i_num_epoch += 1 inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() optimizer.zero_grad() out = model(inputs) loss = muti_bce_loss_fusion(out, labels) loss[0].backward() optimizer.step() scheduler.step() loss_output += loss[0].item() # loss_pre_ref += loss[1].item() del out, inputs, labels print('Epoch time: {}'.format(time.time() - _time_epoch)) if epoch % save_every == 0: # save the model every X epochs state_dic = { 'epoch': epoch, 'i_num_tot': i_num_tot, 'i_num_epoch': i_num_epoch, 'loss_output': loss_output, # 'loss_pre_ref': loss_pre_ref, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), } torch.save(state_dic, model_dir + 'model.pth') log = '[epoch: {:d}/{:d}, ite: {:d}] loss_output: {:.6f}, l: {:.6f}\n'.format( epoch, epoch_num, i_num_tot, loss_output / i_num_epoch, loss[0].item()) del loss loss_output = 0 loss_pre_ref = 0 i_num_epoch = 0 log_file = open('logs/log.txt', 'a+') log_file.write(log + '\n') log_file.close() print(log) if validation: model.eval() # val_i_num_tot = 0 val_i_num_epoch = 0 val_loss_output = 0 # val_loss_pre_ref = 0 val_log_file = open('logs/log_val.txt', 'a+') print('Evaluating...') with torch.no_grad(): for val_i, val_data in enumerate(val_dataloader): # val_i_num_tot += 1 val_i_num_epoch += 1 val_inputs, val_labels = val_data val_inputs = val_inputs.cuda() val_labels = val_labels.cuda() val_out = model(val_inputs) val_loss = muti_bce_loss_fusion(val_out, val_labels) val_loss_output += val_loss[0].item() # val_loss_pre_ref += val_loss0.item() pred = val_out[0][:, 0, :, :] pred = normPRED(pred) save_output(val_img_name_list[val_i], pred, prediction_dir) del val_out, val_inputs, val_labels, val_loss log_val = '[val: epoch: {:d}, ite: {:d}] loss_output: {:.6f}\n'.format( epoch, i_num_tot, val_loss_output / val_i_num_epoch) val_log_file.write(log_val + '\n') val_log_file.close() _t = 'Training time: ' + str(time.time() - _s) + '\n' print(_t) log_file = open('logs/log.txt', 'a+') log_file.write(_t) log_file.close()
def main(): # --------------------------------------------------------- # Configs # --------------------------------------------------------- checkpoint = "./saved_models/u2net/u2net_maskrefine_conv_bn_relu_csse_heavy_aug_bce_itr_172000_train_0.278174_tar_0.031154.pth" # checkpoint = None mixup_augmentation = False heavy_augmentation = True multiscale_training = False multi_gpu = False model_name = 'u2net' # 'u2netp' block_type = "conv_bn_relu" se_type = "csse" data_dir = '../datasets/' tra_image_dir = 'DUTS-TR/DUTS-TR-Image/' tra_label_dir = 'DUTS-TR/DUTS-TR-Mask/' image_ext = '.jpg' label_ext = '.png' model_dir = './saved_models/' + model_name + '/' os.makedirs(model_dir, exist_ok=True) lr = 0.0001 epoch_num = 500 batch_size_train = 2 batch_size_val = 1 workers = 16 save_frq = 2000 # save the model every 2000 iterations # --------------------------------------------------------- tra_img_name_list = glob.glob(data_dir + tra_image_dir + '*' + image_ext) tra_lbl_name_list = [] for img_path in tra_img_name_list: img_name = img_path.split("/")[-1] aaa = img_name.split(".") bbb = aaa[0:-1] imidx = bbb[0] for i in range(1, len(bbb)): imidx = imidx + "." + bbb[i] tra_lbl_name_list.append(data_dir + tra_label_dir + imidx + label_ext) print("---") print("train images: ", len(tra_img_name_list)) print("train labels: ", len(tra_lbl_name_list)) print("---") train_num = len(tra_img_name_list) val_num = 0 if heavy_augmentation: transform = AlbuSampleTransformer( get_heavy_transform( transform_size=False if multiscale_training else True)) else: transform = transforms.Compose([ RescaleT(320), RandomCrop(288), ]) dataset_kwargs = dict(img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ transform, ] + ([ ToTensorLab(flag=0), ] if not multiscale_training else []))) if mixup_augmentation: _dataset_cls = MixupAugSalObjDataset elif multiscale_training: _dataset_cls = MultiScaleSalObjDataset else: _dataset_cls = SalObjMaskRefineDataset salobj_dataset = _dataset_cls(**dataset_kwargs) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=workers) # ------- 3. define model -------- # define the net if (model_name == 'u2net'): net = U2NETRefiner(block_type=block_type, se_type=se_type) if checkpoint: if not os.path.exists(checkpoint): raise FileNotFoundError(f"Checkpoint file not found: {checkpoint}") print(f"Restoring from checkpoint: {checkpoint}") try: net.load_state_dict(torch.load(checkpoint, map_location="cpu")) print("-- success") except: print("-- error") if torch.cuda.is_available(): net.cuda() # ------- 4. define optimizer -------- print("---define optimizer...") optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) if torch.cuda.device_count() > 1 and multi_gpu: print(f"Multi-GPU training using {torch.cuda.device_count()} GPUs.") net = nn.DataParallel(net) else: print(f"Training using {torch.cuda.device_count()} GPUs.") # ------- 5. training process -------- print("---start training...") ite_num = 0 ite_num4val = 0 running_loss = 0.0 running_tar_loss = 0.0 for epoch in range(0, epoch_num): net.train() for i, data in enumerate(salobj_dataloader): ite_num = ite_num + 1 ite_num4val = ite_num4val + 1 image_key = "image" mask_key = "label_aug" label_key = "label" if multiscale_training: size = np.random.choice(salobj_dataloader.dataset.sizes) # print(f"size: {size}") image_key = f"image_{size}" mask_key = f"label_aug_{size}" label_key = f"label_{size}" inputs, labels = data[image_key], data[label_key] masks = data[mask_key] # print(f"inputs shape : {inputs.shape}") # print(f"masks shape : {masks.shape}") inputs = inputs.type(torch.FloatTensor) masks = masks.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) # wrap them in Variable if torch.cuda.is_available(): inputs_v, labels_v, masks_v = \ Variable(inputs.cuda(), requires_grad=False), \ Variable(labels.cuda(), requires_grad=False), \ Variable(masks.cuda(), requires_grad=False) else: inputs_v, labels_v, masks_v = \ Variable(inputs, requires_grad=False), \ Variable(labels, requires_grad=False), \ Variable(masks, requires_grad=False) # y zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize d6 = 0 if model_name == "custom": d0, d1, d2, d3, d4, d5 = net(inputs_v, masks_v) loss2, loss = multi_bce_loss_fusion5(d0, d1, d2, d3, d4, d5, labels_v) else: d0, d1, d2, d3, d4, d5, d6 = net(inputs_v, masks_v) loss2, loss = multi_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v) loss.backward() optimizer.step() # # print statistics running_loss += loss.item() running_tar_loss += loss2.item() # del temporary outputs and loss del d0, d1, d2, d3, d4, d5, d6, loss2, loss print( "[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % (epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) if ite_num % save_frq == 0: torch.save( net.module.state_dict() if hasattr(net, "module") else net.state_dict(), model_dir + model_name + "_maskrefine" + ("_" + block_type) + ("_" + se_type if se_type else "") + ("_mixup_aug" if mixup_augmentation else "") + ("_heavy_aug" if heavy_augmentation else "") + ("_multiscale" if multiscale_training else "") + "_bce_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) running_loss = 0.0 running_tar_loss = 0.0 net.train() # resume train ite_num4val = 0
def main(model_name, img_dir, retrain, weight, model_dir): model_name = 'u2net' #'u2netp' #data_dir = os.path.join(os.getcwd(), 'train_data' + os.sep) # tra_image_dir = os.path.join('DUTS', 'DUTS-TR', 'DUTS-TR', 'im_aug' + os.sep) # tra_label_dir = os.path.join('DUTS', 'DUTS-TR', 'DUTS-TR', 'gt_aug' + os.sep) tra_image_dir = os.path.join(img_dir, 'origin') tra_label_dir = os.path.join(img_dir, 'mask') # train_image_dir = os.path.join('') image_ext = '.jpg' label_ext = '.png' model_dir = os.path.join(os.getcwd(), 'saved_models', model_name + os.sep) epoch_start = 0 epoch_num = 500 batch_size_train = 20 batch_size_val = 1 train_num = 4000 val_num = 500 # tra_img_name_list = glob.glob(tra_image_dir + '*' + image_ext) tra_img_name_list = os.listdir(tra_image_dir) for i,item in enumerate(tra_img_name_list): tra_img_name_list[i] = os.path.join(tra_image_dir, item) tra_lbl_name_list = os.listdir(tra_label_dir) for i,item in enumerate(tra_lbl_name_list): tra_lbl_name_list[i] = os.path.join(tra_label_dir, item) print(tra_img_name_list) # for img_path in tra_img_name_list: # img_name = img_path.split(os.sep)[-1] # aaa = img_name.split(".") # bbb = aaa[0:-1] # imidx = bbb[0] # for i in range(1,len(bbb)): # imidx = imidx + "." + bbb[i] # tra_lbl_name_list.append(tra_label_dir + imidx + label_ext) print("---") print("train images: ", len(tra_img_name_list)) print("train labels: ", len(tra_lbl_name_list)) print("---") train_num = len(tra_img_name_list) salobj_dataset = SalObjDataset( img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ RescaleT(320), RandomCrop(288), ToTensorLab(flag=0)])) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=4) # ------- 3. define model -------- # define the net if(model_name=='u2net'): net = U2NET(3, 1) elif(model_name=='u2netp'): net = U2NETP(3,1) if torch.cuda.is_available(): net.cuda() # ------- 4. define optimizer -------- print("---define optimizer...") optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # ------- 5. training process -------- print("---start training...") if retrain == True: checkpoint = torch.load(weight) net.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] # loss = checkpoint['loss'] ite_num = 0 running_loss = 0.0 running_tar_loss = 0.0 ite_num4val = 0 save_frq = 2000 # save the model every 2000 iterations for epoch in range(0, epoch_num): net.train() for i, data in enumerate(salobj_dataloader): ite_num = ite_num + 1 ite_num4val = ite_num4val + 1 # print(data) inputs, labels = data['image'], data['label'] inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) # wrap them in Variable if torch.cuda.is_available(): inputs_v, labels_v = Variable(inputs.cuda(), requires_grad=False), Variable(labels.cuda(), requires_grad=False) else: inputs_v, labels_v = Variable(inputs, requires_grad=False), Variable(labels, requires_grad=False) # y zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize d0, d1, d2, d3, d4, d5, d6 = net(inputs_v) loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v) loss.backward() optimizer.step() # # print statistics running_loss += loss.data.item() running_tar_loss += loss2.data.item() # del temporary outputs and loss del d0, d1, d2, d3, d4, d5, d6, loss2, loss print("[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % ( epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) # if ite_num % save_frq == 0: # # torch.save(net.state_dict(), model_dir + model_name+"_bce_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) # # torch.save({ # # 'epoch': epoch, # # 'model_state_dict': net.state_dict(), # # 'optimizer_state_dict': optimizer.state_dict(), # # 'loss': loss, # # }, model_dir + model_name + epoch) # running_loss = 0.0 # running_tar_loss = 0.0 # net.train() # resume train # ite_num4val = 0 torch.save( { 'epoch': epoch, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, os.path.join(model_dir, model_name + str(epoch)) )
def main(): # --------------------------------------------------------- # Configurations # --------------------------------------------------------- heavy_augmentation = True # False to use author's default implementation gan_training = False mixup_augmentation = False fullsize_training = False multiscale_training = False multi_gpu = True mixed_precision_training = True model_name = "u2net" # "u2net", "u2netp", "u2net_heavy" se_type = None # "csse", "sse", "cse", None; None to use author's default implementation # checkpoint = "saved_models/u2net/u2net.pth" checkpoint = None checkpoint_netD = None w_adv = 0.2 w_vgg = 0.2 train_dirs = [ "../datasets/sky_segmentation_dataset/datasets/cvprw2020_sky_seg/train/" ] train_dirs_file_limit = [ None, ] image_ext = '.jpg' label_ext = '.png' dataset_name = "cvprw2020_sky_seg" lr = 0.0003 epoch_num = 500 batch_size_train = 48 # batch_size_val = 1 workers = 16 save_frq = 1000 # save the model every 2000 iterations save_debug_samples = False debug_samples_dir = "./debug/" # --------------------------------------------------------- model_dir = './saved_models/' + model_name + '/' os.makedirs(model_dir, exist_ok=True) writer = SummaryWriter() if fullsize_training: batch_size_train = 1 multiscale_training = False # --------------------------------------------------------- # 1. Construct data input pipeline # --------------------------------------------------------- # Get dataset name dataset_name = dataset_name.replace(" ", "_") # Get training data assert len(train_dirs) == len(train_dirs_file_limit), \ "Different train dirs and train dirs file limit length!" tra_img_name_list = [] tra_lbl_name_list = [] for d, flimit in zip(train_dirs, train_dirs_file_limit): img_files = glob.glob(d + '**/*' + image_ext, recursive=True) if flimit: img_files = np.random.choice(img_files, size=flimit, replace=False) print(f"directory: {d}, files: {len(img_files)}") for img_path in img_files: lbl_path = img_path.replace("/image/", "/alpha/") \ .replace(image_ext, label_ext) if os.path.exists(img_path) and os.path.exists(lbl_path): assert os.path.splitext( os.path.basename(img_path))[0] == os.path.splitext( os.path.basename(lbl_path))[0], "Wrong filename." tra_img_name_list.append(img_path) tra_lbl_name_list.append(lbl_path) else: print( f"Warning, dropping sample {img_path} because label file {lbl_path} not found!" ) tra_img_name_list, tra_lbl_name_list = shuffle(tra_img_name_list, tra_lbl_name_list) train_num = len(tra_img_name_list) # val_num = 0 # unused print(f"dataset name : {dataset_name}") print(f"training samples : {train_num}") # Construct data input pipeline if heavy_augmentation: transform = AlbuSampleTransformer( get_heavy_transform( fullsize_training=fullsize_training, transform_size=False if (fullsize_training or multiscale_training) else True)) else: transform = transforms.Compose([ RescaleT(320), RandomCrop(288), ]) # Create dataset and dataloader dataset_kwargs = dict(img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ transform, ] + ([ SaveDebugSamples(out_dir=debug_samples_dir), ] if save_debug_samples else []) + ([ ToTensorLab(flag=0), ] if not multiscale_training else []))) if mixup_augmentation: _dataset_cls = MixupAugSalObjDataset else: _dataset_cls = SalObjDataset salobj_dataset = _dataset_cls(**dataset_kwargs) salobj_dataloader = DataLoader( salobj_dataset, batch_size=batch_size_train, collate_fn=multi_scale_collater if multiscale_training else None, shuffle=True, pin_memory=True, num_workers=workers) # --------------------------------------------------------- # 2. Load model # --------------------------------------------------------- # Instantiate model if model_name == "u2net": net = U2NET(3, 1, se_type=se_type) elif model_name == "u2netp": net = U2NETP(3, 1, se_type=se_type) elif model_name == "u2net_heavy": net = u2net_heavy() elif model_name == "custom": net = CustomNet() else: raise ValueError(f"Unknown model_name: {model_name}") # Restore model weights from checkpoint if checkpoint: if not os.path.exists(checkpoint): raise FileNotFoundError(f"Checkpoint file not found: {checkpoint}") try: print(f"Restoring from checkpoint: {checkpoint}") net.load_state_dict(torch.load(checkpoint, map_location="cpu")) print(" - [x] success") except: print(" - [!] error") if torch.cuda.is_available(): net.cuda() if gan_training: netD = MultiScaleNLayerDiscriminator() if checkpoint_netD: if not os.path.exists(checkpoint_netD): raise FileNotFoundError( f"Discriminator checkpoint file not found: {checkpoint_netD}" ) try: print( f"Restoring discriminator from checkpoint: {checkpoint_netD}" ) netD.load_state_dict( torch.load(checkpoint_netD, map_location="cpu")) print(" - [x] success") except: print(" - [!] error") if torch.cuda.is_available(): netD.cuda() vgg19 = VGG19Features() vgg19.eval() if torch.cuda.is_available(): vgg19 = vgg19.cuda() # --------------------------------------------------------- # 3. Define optimizer # --------------------------------------------------------- optimizer = optim.Adam(net.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # optimizer = optim.SGD(net.parameters(), lr=lr) # scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=lr/4, max_lr=lr, # mode="triangular2", # step_size_up=2 * len(salobj_dataloader)) if gan_training: optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(0.5, 0.9)) # --------------------------------------------------------- # 4. Initialize AMP and data parallel stuffs # --------------------------------------------------------- GOT_AMP = False if mixed_precision_training: try: print("Checking for Apex AMP support...") from apex import amp GOT_AMP = True print(" - [x] yes") except ImportError: print(" - [!] no") if GOT_AMP: amp.register_float_function(torch, 'sigmoid') net, optimizer = amp.initialize(net, optimizer, opt_level="O1") if gan_training: netD, optimizerD = amp.initialize(netD, optimizerD, opt_level="O1") vgg19 = amp.initialize(vgg19, opt_level="O1") if torch.cuda.device_count() > 1 and multi_gpu: print(f"Multi-GPU training using {torch.cuda.device_count()} GPUs.") net = nn.DataParallel(net) if gan_training: netD = nn.DataParallel(netD) vgg19 = nn.DataParallel(vgg19) else: print(f"Training using {torch.cuda.device_count()} GPUs.") # --------------------------------------------------------- # 5. Training # --------------------------------------------------------- print("Start training...") ite_num = 0 ite_num4val = 0 running_loss = 0.0 running_bce_loss = 0.0 running_tar_loss = 0.0 running_adv_loss = 0.0 running_per_loss = 0.0 running_fake_loss = 0.0 running_real_loss = 0.0 running_lossD = 0.0 for epoch in tqdm(range(0, epoch_num), desc="All epochs"): net.train() if gan_training: netD.train() for i, data in enumerate( tqdm(salobj_dataloader, desc=f"Epoch #{epoch}")): ite_num = ite_num + 1 ite_num4val = ite_num4val + 1 image_key = "image" label_key = "label" inputs, labels = data[image_key], data[label_key] # tqdm.write(f"input tensor shape: {inputs.shape}") inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) # Wrap them in Variable if torch.cuda.is_available(): inputs_v, labels_v = \ Variable(inputs.cuda(), requires_grad=False), \ Variable(labels.cuda(), requires_grad=False) else: inputs_v, labels_v = \ Variable(inputs, requires_grad=False), \ Variable(labels, requires_grad=False) # # Zero the parameter gradients # optimizer.zero_grad() # Forward + backward + optimize d6 = 0 if model_name == "custom": d0, d1, d2, d3, d4, d5 = net(inputs_v) else: d0, d1, d2, d3, d4, d5, d6 = net(inputs_v) if gan_training: optimizerD.zero_grad() dis_fake = netD(inputs_v, d0.detach()) dis_real = netD(inputs_v, labels_v) loss_fake = bce_with_logits_loss(dis_fake, torch.zeros_like(dis_fake)) loss_real = bce_with_logits_loss(dis_real, torch.ones_like(dis_real)) lossD = loss_fake + loss_real if GOT_AMP: with amp.scale_loss(lossD, optimizerD) as scaled_loss: scaled_loss.backward() else: lossD.backward() optimizerD.step() writer.add_scalar("lossD/fake", loss_fake.item(), ite_num) writer.add_scalar("lossD/real", loss_real.item(), ite_num) writer.add_scalar("lossD/sum", lossD.item(), ite_num) running_fake_loss += loss_fake.item() running_real_loss += loss_real.item() running_lossD += lossD.item() # Zero the parameter gradients optimizer.zero_grad() if model_name == "custom": loss2, loss = multi_bce_loss_fusion5(d0, d1, d2, d3, d4, d5, labels_v) else: loss2, loss = multi_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v) writer.add_scalar("lossG/bce", loss.item(), ite_num) running_bce_loss += loss.item() if gan_training: # Adversarial loss loss_adv = 0.0 if w_adv: dis_fake = netD(inputs_v, d0) loss_adv = bce_with_logits_loss(dis_fake, torch.ones_like(dis_fake)) # Perceptual loss loss_per = 0.0 if w_vgg: vgg19_fm_pred = vgg19(inputs_v * d0) vgg19_fm_label = vgg19(inputs_v * labels_v) loss_per = mae_loss(vgg19_fm_pred, vgg19_fm_label) loss = loss + w_adv * loss_adv + w_vgg * loss_per if GOT_AMP: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # scheduler.step() writer.add_scalar("lossG/sum", loss.item(), ite_num) writer.add_scalar("lossG/loss2", loss2.item(), ite_num) running_loss += loss.item() running_tar_loss += loss2.item() if gan_training: writer.add_scalar("lossG/adv", loss_adv.item(), ite_num) writer.add_scalar("lossG/perceptual", loss_per.item(), ite_num) running_adv_loss += loss_adv.item() running_per_loss += loss_per.item() if ite_num % 200 == 0: writer.add_images("inputs", inv_normalize(inputs_v), ite_num) writer.add_images("labels", labels_v, ite_num) writer.add_images("preds", d0, ite_num) # Delete temporary outputs and loss del d0, d1, d2, d3, d4, d5, d6, loss2, loss if gan_training: del dis_fake, dis_real, loss_fake, loss_real, lossD, loss_adv, vgg19_fm_pred, vgg19_fm_label, loss_per # Print stats tqdm.write( "[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train G/sum: %3f, G/bce: %3f, G/bce_tar: %3f, G/adv: %3f, G/percept: %3f, D/fake: %3f, D/real: %3f, D/sum: %3f" % (epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_bce_loss / ite_num4val, running_tar_loss / ite_num4val, running_adv_loss / ite_num4val, running_per_loss / ite_num4val, running_fake_loss / ite_num4val, running_real_loss / ite_num4val, running_lossD / ite_num4val)) if ite_num % save_frq == 0: # Save checkpoint torch.save( net.module.state_dict() if hasattr( net, "module") else net.state_dict(), model_dir + model_name + (("_" + se_type) if se_type else "") + ("_" + dataset_name) + ("_mixup_aug" if mixup_augmentation else "") + ("_heavy_aug" if heavy_augmentation else "") + ("_fullsize" if fullsize_training else "") + ("_multiscale" if multiscale_training else "") + "_bce_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) if gan_training: torch.save( netD.module.state_dict() if hasattr(netD, "module") else netD.state_dict(), model_dir + "netD_" + model_name + (("_" + se_type) if se_type else "") + ("_" + dataset_name) + ("_mixup_aug" if mixup_augmentation else "") + ("_heavy_aug" if heavy_augmentation else "") + ("_fullsize" if fullsize_training else "") + ("_multiscale" if multiscale_training else "") + "itr_%d.pth" % (ite_num)) # Reset stats running_loss = 0.0 running_bce_loss = 0.0 running_tar_loss = 0.0 running_adv_loss = 0.0 running_per_loss = 0.0 running_fake_loss = 0.0 running_real_loss = 0.0 running_lossD = 0.0 ite_num4val = 0 net.train() # resume train if gan_training: netD.train() writer.close() print("Training completed successfully.")
img_list = glob.glob(img_dir + '*' + ext) mask_list = [] for img_path in img_list: img_name = img_path.split("/")[-1] aaa = img_name.split(".") bbb = aaa[0:-1] imidx = bbb[0] for i in range(1, len(bbb)): imidx = imidx + "." + bbb[i] mask_list.append(mask_dir + imidx + ext) ''' 读取数据集 ''' salobj_dataset = SalObjDataset(img_name_list=img_list, lbl_name_list=mask_list, transform=transforms.Compose([ RescaleT(args.scale), RandomCrop(args.crop), ToTensorLab(flag=0) ])) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1) ''' 加在网络 ''' net = U2NET(3, 1) net.cuda() # 是否多卡训练 net = nn.DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08,
def main(): # ------- 1. define loss function -------- bce_loss = nn.BCELoss(size_average=True) def muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v): loss0 = bce_loss(d0, labels_v) loss1 = bce_loss(d1, labels_v) loss2 = bce_loss(d2, labels_v) loss3 = bce_loss(d3, labels_v) loss4 = bce_loss(d4, labels_v) loss5 = bce_loss(d5, labels_v) loss6 = bce_loss(d6, labels_v) loss = loss0 + loss1 + loss2 + loss3 + loss4 + loss5 + loss6 print( "l0: %3f, l1: %3f, l2: %3f, l3: %3f, l4: %3f, l5: %3f, l6: %3f\n" % (loss0.data[0], loss1.data[0], loss2.data[0], loss3.data[0], loss4.data[0], loss5.data[0], loss6.data[0])) return loss0, loss # ------- 2. set the directory of training dataset -------- model_name = 'u2net' #'u2netp' data_dir = './train_data/' tra_image_dir = 'DUTS/DUTS-TR/DUTS-TR/im_aug/' tra_label_dir = 'DUTS/DUTS-TR/DUTS-TR/gt_aug/' image_ext = '.jpg' label_ext = '.png' model_dir = './saved_models/' + model_name + '/' epoch_num = 100000 batch_size_train = 12 batch_size_val = 1 train_num = 0 val_num = 0 tra_img_name_list = glob.glob(data_dir + tra_image_dir + '*' + image_ext) tra_lbl_name_list = [] for img_path in tra_img_name_list: img_name = img_path.split("/")[-1] aaa = img_name.split(".") bbb = aaa[0:-1] imidx = bbb[0] for i in range(1, len(bbb)): imidx = imidx + "." + bbb[i] tra_lbl_name_list.append(data_dir + tra_label_dir + imidx + label_ext) print("---") print("train images: ", len(tra_img_name_list)) print("train labels: ", len(tra_lbl_name_list)) print("---") train_num = len(tra_img_name_list) salobj_dataset = SalObjDataset(img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ RescaleT(320), RandomCrop(288), ToTensorLab(flag=0) ])) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1) # ------- 3. define model -------- # define the net if (model_name == 'u2net'): net = U2NET(3, 1) elif (model_name == 'u2netp'): net = U2NETP(3, 1) if torch.cuda.is_available(): net.cuda() # ------- 4. define optimizer -------- print("---define optimizer...") optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # ------- 5. training process -------- print("---start training...") ite_num = 0 running_loss = 0.0 running_tar_loss = 0.0 ite_num4val = 0 save_frq = 2000 # save the model every 2000 iterations for epoch in range(0, epoch_num): net.train() for i, data in enumerate(salobj_dataloader): ite_num = ite_num + 1 ite_num4val = ite_num4val + 1 inputs, labels = data['image'], data['label'] inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) # wrap them in Variable if torch.cuda.is_available(): inputs_v, labels_v = Variable(inputs.cuda(), requires_grad=False), Variable( labels.cuda(), requires_grad=False) else: inputs_v, labels_v = Variable( inputs, requires_grad=False), Variable(labels, requires_grad=False) # y zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize d0, d1, d2, d3, d4, d5, d6 = net(inputs_v) loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v) loss.backward() optimizer.step() # # print statistics running_loss += loss.data[0] running_tar_loss += loss2.data[0] # del temporary outputs and loss del d0, d1, d2, d3, d4, d5, d6, loss2, loss print( "[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % (epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) if ite_num % save_frq == 0: torch.save( net.state_dict(), model_dir + model_name + "_bce_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) running_loss = 0.0 running_tar_loss = 0.0 net.train() # resume train ite_num4val = 0
def main(): # ------- 2. set the directory of training dataset -------- model_name = 'u2netp' #'u2netp' data_dir = os.path.join(os.getcwd(), 'train_data' + os.sep) tra_image_dir = os.path.join('DUTS', 'DUTS-TR', 'im_aug' + os.sep) tra_label_dir = os.path.join('DUTS', 'DUTS-TR', 'gt_aug' + os.sep) image_ext = '.jpg' label_ext = '.png' model_dir = os.path.join(os.getcwd(), 'saved_models', model_name + os.sep) epoch_num = 100000 batch_size_train = 8 batch_size_val = 1 train_num = 0 val_num = 0 tra_img_name_list = glob.glob(data_dir + tra_image_dir + '*' + image_ext) tra_lbl_name_list = [] for img_path in tra_img_name_list: img_name = img_path.split(os.sep)[-1] aaa = img_name.split(".") bbb = aaa[0:-1] imidx = bbb[0] for i in range(1, len(bbb)): imidx = imidx + "." + bbb[i] tra_lbl_name_list.append(data_dir + tra_label_dir + imidx + label_ext) print("---") print("train images: ", len(tra_img_name_list)) print("train labels: ", len(tra_lbl_name_list)) print("---") train_num = len(tra_img_name_list) salobj_dataset = SalObjDataset(img_name_list=tra_img_name_list, lbl_name_list=tra_lbl_name_list, transform=transforms.Compose([ RescaleT(320), RandomCrop(288), ToTensorLab(flag=0) ])) salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1) # ------- 3. define model -------- # define the net if (model_name == 'u2net'): net = U2NET(3, 1) elif (model_name == 'u2netp'): net = U2NETP(3, 1) if torch.cuda.is_available(): net.cuda() # ------- 4. define optimizer -------- print("---define optimizer...") optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # ------- 5. training process -------- print("---start training...") ite_num = 0 running_loss = 0.0 running_tar_loss = 0.0 ite_num4val = 0 save_frq = 2000 # save the model every 2000 iterations for epoch in range(0, epoch_num): net.train() for i, data in enumerate(salobj_dataloader): print("I:", i) ite_num = ite_num + 1 ite_num4val = ite_num4val + 1 inputs, labels = data['image'], data['label'] # Training monitoring # img_grid = make_grid(inputs) # these are not random # matplotlib_imshow(img_grid, one_channel=True) # writer.add_image('four_training', img_grid) inputs = inputs.type(torch.FloatTensor) labels = labels.type(torch.FloatTensor) # wrap them in Variable if torch.cuda.is_available(): inputs_v, labels_v = Variable(inputs.cuda(), requires_grad=False), Variable( labels.cuda(), requires_grad=False) else: inputs_v, labels_v = Variable( inputs, requires_grad=False), Variable(labels, requires_grad=False) # y zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize d0, d1, d2, d3, d4, d5, d6 = net(inputs_v) loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v) loss.backward() optimizer.step() # # print statistics running_loss += loss.item() running_tar_loss += loss2.item() if i % 50 == 0: # every 50 mini-batches... print("LOggin to TeNsOrBOARDs\n\n") # ...log the running loss writer.add_scalar('training loss', running_loss / ite_num4val, epoch * len(salobj_dataloader) + i) writer.add_scalar('tar loss', running_tar_loss / ite_num4val, epoch * len(salobj_dataloader) + i) # del temporary outputs and loss del d0, d1, d2, d3, d4, d5, d6, loss2, loss print( "[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % (epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) if ite_num % save_frq == 0: torch.save( net.state_dict(), model_dir + model_name + "_bce_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val)) running_loss = 0.0 running_tar_loss = 0.0 net.train() # resume train ite_num4val = 0