def main(): #### =========== 定义模型 (主要包括网络结构, loss函数,还有优化器设置) =========== #### model_params = {} model_params['architecture'] = arch model = init_network(model_params) # move network to gpu model.cuda() # define loss function (criterion) try: criterion = eval(lossfnc)().cuda() except: raise (RuntimeError("Loss {} not available!".format(lossfnc))) optimizer = torch.optim.Adam(model.parameters(), lr=0.00001) start_epoch = 0 best_epoch = 0 best_dice = 0 # define scheduler -- 动态调整学习率 try: scheduler = eval(scheduler2)() except: raise (RuntimeError("Scheduler {} not available!".format(scheduler2))) optimizer = scheduler.schedule(model, start_epoch, epochs)[0] # Data loading code train_csv = pd.read_csv(path + 'train.csv') # 50272 # 减少样本数量 -- 样本数量减少十倍 train_csv = train_csv[:5000] train_csv['ImageId'], train_csv['ClassId'] = zip( *train_csv['ImageId_ClassId'].str.split('_')) train_csv['ClassId'] = train_csv['ClassId'].astype(int) train_csv = pd.pivot(train_csv, index='ImageId', columns='ClassId', values='EncodedPixels') train_csv['defects'] = train_csv.count(axis=1) train_data, val_data = train_test_split(train_csv, test_size=0.2, stratify=train_csv['defects'], random_state=69) train_dataset = ImageData(train_data, path, dataAugumentation, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), subset="train") # train_loader = DataLoader(dataset=train_dataset, batch_size=8, shuffle=True) myTrainSample = MyBalanceClassSampler(train_dataset) train_loader = DataLoader(dataset=train_dataset, batch_size=1, sampler=myTrainSample, pin_memory=True) valid_dataset = ImageData(val_data, path, augmentation=dataAugumentation, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), subset="train") # valid_loader = DataLoader(dataset=valid_dataset, batch_size=8, shuffle=True) myValSample = MyBalanceClassSampler(valid_dataset) valid_loader = DataLoader(dataset=valid_dataset, batch_size=1, sampler=myValSample, pin_memory=True) start_epoch += 1 for epoch in range(start_epoch, epochs + 1): # adjust learning rate for each epoch lr_list = scheduler.step(model, epoch, epochs) lr = lr_list[0] # train for one epoch on train set iter, train_loss, train_dice = train(train_loader, model, criterion, optimizer, epoch, lr=lr) with torch.no_grad(): valid_loss, valid_dice = validate(valid_loader, model, criterion, epoch) train_loss_list.append(train_loss) train_dice_list.append(train_dice) val_loss_list.append(valid_loss) val_dice_list.append(valid_dice) # remember best loss and save checkpoint is_best = valid_dice >= best_dice if epoch > 10: if is_best or epoch == epochs: best_epoch = epoch best_dice = valid_dice print('\r', end='', flush=True) model_name = 'epoch' + '%03d' % epoch + '_' + '%.2f' % best_dice save_model(model, model_out_dir, epoch, model_name, optimizer=optimizer, best_epoch=best_epoch, best_dice=best_dice)
train_csv['ImageId'], train_csv['ClassId'] = zip( *train_csv['ImageId_ClassId'].str.split('_')) train_csv['ClassId'] = train_csv['ClassId'].astype(int) train_csv = pd.pivot(train_csv, index='ImageId', columns='ClassId', values='EncodedPixels') train_csv['defects'] = train_csv.count(axis=1) train_data, val_data = train_test_split(train_csv, test_size=0.2, stratify=train_csv['defects'], random_state=69) train_dataset = ImageData(train_data, path, dataAugumentation3, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), subset="train") # train_loader = DataLoader(dataset=train_dataset, batch_size=8, shuffle=True) myTrainSample = MyBalanceClassSampler(train_dataset) train_loader = DataLoader(dataset=train_dataset, batch_size=16, sampler=myTrainSample, pin_memory=True) valid_dataset = ImageData(val_data, path, augmentation=dataAugumentation3, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), subset="train")