def main(cls_label): #### =========== 定义模型 (主要包括网络结构, loss函数,还有优化器设置) =========== #### model_params = {} model_params['architecture'] = arch model = init_network(model_params) # move network to gpu model.cuda() # define loss function (criterion) try: criterion = eval(lossfnc)().cuda() except: raise (RuntimeError("Loss {} not available!".format(lossfnc))) optimizer = torch.optim.Adam(model.parameters(), lr=0.00001) start_epoch = 0 best_epoch = 0 best_dice = 0 # define scheduler -- 动态调整学习率 try: scheduler = eval(scheduler2)() except: raise (RuntimeError("Scheduler {} not available!".format(scheduler2))) optimizer = scheduler.schedule(model, start_epoch, epochs)[0] # Data loading code train_csv = pd.read_csv(path + 'train.csv') # 50272 # 减少样本数量 -- train_csv['ImageId'], train_csv['ClassId'] = zip( *train_csv['ImageId_ClassId'].str.split('_')) train_csv['ClassId'] = train_csv['ClassId'].astype(int) train_csv = pd.pivot(train_csv, index='ImageId', columns='ClassId', values='EncodedPixels') train_csv = train_csv[(pd.isnull(train_csv[cls_label]).astype( np.int32) == 0)] train_csv['defects'] = train_csv.count(axis=1) #train_data, val_data = train_test_split(train_csv, test_size = 0.2, stratify = train_csv['defects'], random_state=69) train_data, val_data = train_test_split(train_csv, test_size=0.2, random_state=69) train_dataset = ImageDataSingleClsWithValidCrop(train_data, cls_label, path, dataAugumentationAfterCrop, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), subset="train") train_loader = DataLoader(dataset=train_dataset, batch_size=set_batch_size, shuffle=True, pin_memory=True) # myTrainSample = MyBalanceClassSampler(train_dataset) # train_loader = DataLoader(dataset=train_dataset, batch_size=8, sampler=myTrainSample, pin_memory=True) valid_dataset = ImageDataSingleClsWithValidCrop(val_data, cls_label, path, dataAugumentationAfterCrop, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), subset="valid") valid_loader = DataLoader(dataset=valid_dataset, batch_size=set_batch_size, shuffle=True, pin_memory=True) # myValSample = MyBalanceClassSampler(valid_dataset) # valid_loader = DataLoader(dataset=valid_dataset, batch_size=8, sampler=myValSample, pin_memory=True) start_epoch += 1 for epoch in range(start_epoch, epochs + 1): # adjust learning rate for each epoch lr_list = scheduler.step(model, epoch, epochs) lr = lr_list[0] # train for one epoch on train set iter, train_loss, train_dice = train(train_loader, model, criterion, optimizer, epoch, lr=lr) with torch.no_grad(): valid_loss, valid_dice = validate(valid_loader, model, criterion, epoch) train_loss_list.append(train_loss) train_dice_list.append(train_dice) val_loss_list.append(valid_loss) val_dice_list.append(valid_dice) # remember best loss and save checkpoint is_best = valid_dice >= best_dice if epoch > 5: if is_best or epoch == epochs: best_epoch = epoch best_dice = valid_dice print('\r', end='', flush=True) model_name = 'epoch' + '%03d' % epoch + '_' + '%.2f' % best_dice save_model(model, model_out_dir, epoch, model_name, optimizer=optimizer, best_epoch=best_epoch, best_dice=best_dice) myplot(train_loss_list, "train_loss") myplot(train_dice_list, "train_dice") myplot(val_loss_list, "val_loss") myplot(val_dice_list, "val_dice")
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold) if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.train.txt'), mode='a') model_out_dir = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold) log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( model_out_dir)) if not ope(model_out_dir): os.makedirs(model_out_dir) # set cuda visible device os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id cudnn.benchmark = True # set random seeds torch.manual_seed(0) torch.cuda.manual_seed_all(0) np.random.seed(0) model_params = {} model_params['architecture'] = args.arch model = init_network(model_params) # move network to gpu model = DataParallel(model) model.cuda() if args.ema: ema_model = copy.deepcopy(model) ema_model.cuda() else: ema_model = None # define loss function (criterion) try: criterion = eval(args.loss)().cuda() except: raise (RuntimeError("Loss {} not available!".format(args.loss))) start_epoch = 0 best_epoch = 0 best_dice = 0 best_dice_arr = np.zeros(3) # define scheduler try: scheduler = eval(args.scheduler)() except: raise (RuntimeError("Scheduler {} not available!".format( args.scheduler))) optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0] # optionally resume from a checkpoint if args.resume: model_fpath = os.path.join(model_out_dir, args.resume) if os.path.isfile(model_fpath): # load checkpoint weights and update model and optimizer log.write(">> Loading checkpoint:\n>> '{}'\n".format(model_fpath)) checkpoint = torch.load(model_fpath) start_epoch = checkpoint['epoch'] best_epoch = checkpoint['best_epoch'] best_dice_arr = checkpoint['best_dice_arr'] best_dice = np.max(best_dice_arr) model.module.load_state_dict(checkpoint['state_dict']) optimizer_fpath = model_fpath.replace('.pth', '_optim.pth') if ope(optimizer_fpath): log.write(">> Loading checkpoint:\n>> '{}'\n".format( optimizer_fpath)) optimizer.load_state_dict( torch.load(optimizer_fpath)['optimizer']) if args.ema: ema_model_fpath = model_fpath.replace('.pth', '_ema.pth') if ope(ema_model_fpath): log.write(">> Loading checkpoint:\n>> '{}'\n".format( ema_model_fpath)) ema_model.module.load_state_dict( torch.load(ema_model_fpath)['state_dict']) log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format( model_fpath, checkpoint['epoch'])) else: log.write(">> No checkpoint found at '{}'\n".format(model_fpath)) # Data loading code train_transform = train_multi_augment9 train_split_file = opj(DATA_DIR, args.split_type, args.split_name, 'random_train_cv%d.csv' % args.fold) train_dataset = SiimDataset( train_split_file, img_size=args.img_size, mask_size=args.img_size, transform=train_transform, return_label=True, crop_version=args.crop_version, pseudo=args.pseudo, pseudo_ratio=args.pseudo_ratio, dataset='train', ) if args.is_balance: train_sampler = BalanceClassSampler( train_dataset, args.sample_times * len(train_dataset)) else: train_sampler = RandomSampler(train_dataset) train_loader = DataLoader( train_dataset, sampler=train_sampler, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, pin_memory=True, ) valid_split_file = opj(DATA_DIR, args.split_type, args.split_name, 'random_valid_cv%d.csv' % args.fold) valid_dataset = SiimDataset( valid_split_file, img_size=args.img_size, mask_size=args.img_size, transform=None, return_label=True, crop_version=args.crop_version, dataset='val', ) valid_loader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=max(int(args.batch_size // 2), 1), drop_last=False, num_workers=args.workers, pin_memory=True) log.write('** start training here! **\n') log.write('\n') log.write( 'epoch iter rate | smooth_loss/dice | valid_loss/dice | best_epoch/best_score | min \n' ) log.write( '------------------------------------------------------------------------------------------------\n' ) start_epoch += 1 for epoch in range(start_epoch, args.epochs + 1): end = time.time() # set manual seeds per epoch np.random.seed(epoch) torch.manual_seed(epoch) torch.cuda.manual_seed_all(epoch) # adjust learning rate for each epoch lr_list = scheduler.step(model, epoch, args.epochs) lr = lr_list[0] # train for one epoch on train set iter, train_loss, train_dice = train(train_loader, model, ema_model, criterion, optimizer, epoch, args, lr=lr) with torch.no_grad(): if args.ema: valid_loss, valid_dice = validate(valid_loader, ema_model, criterion, epoch) else: valid_loss, valid_dice = validate(valid_loader, model, criterion, epoch) # remember best loss and save checkpoint is_best = valid_dice >= best_dice if is_best: best_epoch = epoch best_dice = valid_dice if args.ema: save_top_epochs(model_out_dir, ema_model, best_dice_arr, valid_dice, best_epoch, epoch, best_dice, ema=True) best_dice_arr = save_top_epochs(model_out_dir, model, best_dice_arr, valid_dice, best_epoch, epoch, best_dice, ema=False) print('\r', end='', flush=True) log.write('%5.1f %5d %0.6f | %0.4f %0.4f | %0.4f %6.4f | %6.1f %6.4f | %3.1f min \n' % \ (epoch, iter + 1, lr, train_loss, train_dice, valid_loss, valid_dice, best_epoch, best_dice, (time.time() - end) / 60)) model_name = '%03d' % epoch if args.ema: save_model(ema_model, model_out_dir, epoch, model_name, best_dice_arr, is_best=is_best, optimizer=optimizer, best_epoch=best_epoch, best_dice=best_dice, ema=True) save_model(model, model_out_dir, epoch, model_name, best_dice_arr, is_best=is_best, optimizer=optimizer, best_epoch=best_epoch, best_dice=best_dice, ema=False)
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, f'fold{args.fold}') if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.submit.txt'), mode='a') if args.ema: network_path = opj(RESULT_DIR, 'models', args.out_dir, f'fold{args.fold}', f'{args.predict_epoch}_ema.pth') else: network_path = opj(RESULT_DIR, 'models', args.out_dir, f'fold{args.fold}', f'{args.predict_epoch}.pth') submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir, f'fold{args.fold}', f'epoch_{args.predict_epoch}') log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( submit_out_dir)) if not ope(submit_out_dir): os.makedirs(submit_out_dir) # setting up the visible GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id args.augment = args.augment.split(',') for augment in args.augment: if augment not in augment_list: raise ValueError( 'Unsupported or unknown test augmentation: {}!'.format( augment)) model_params = {} model_params['architecture'] = args.arch model = init_network(model_params) log.write(">> Loading network:\n>>>> '{}'\n".format(network_path)) checkpoint = torch.load(network_path) model.load_state_dict(checkpoint['state_dict']) log.write(">>>> loaded network:\n>>>> epoch {}\n".format( checkpoint['epoch'])) # moving network to gpu and eval mode model = DataParallel(model) model.cuda() model.eval() # Data loading code dataset = args.dataset if dataset == 'test': steel_test_df = pd.read_csv(opj('..', 'input', 'sample_submission.csv')) elif dataset == 'val': steel_test_df = pd.read_csv( opj(DATA_DIR, args.split_type, args.split_name, f'random_valid_cv{args.fold}.csv')) else: raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset)) steel_test_df['ImageId'], steel_test_df['ClassId'] = zip( *steel_test_df['ImageId_ClassId'].apply(lambda x: x.split('_'))) imageId = pd.DataFrame(steel_test_df['ImageId'].unique(), columns=['ImageId']) test_dataset = SteelDataset( imageId, img_size=args.img_size, mask_size=args.img_size, transform=None, return_label=False, dataset=args.dataset, ) test_loader = DataLoader( test_dataset, sampler=SequentialSampler(test_dataset), batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True, ) for augment in args.augment: test_loader.dataset.transform = eval('augment_%s' % augment) unaugment_func = eval('unaugment_%s' % augment) sub_submit_out_dir = opj(submit_out_dir, augment) if not ope(sub_submit_out_dir): os.makedirs(sub_submit_out_dir) with torch.no_grad(): predict(test_loader, model, sub_submit_out_dir, dataset, args, unaugment_func=unaugment_func)
subset="train") train_set_loader = DataLoader(dataset=train_set, batch_size=16, shuffle=True) def metric(logit, truth, threshold=0.5): dice = dice_score(logit, truth, threshold=threshold) return dice device = torch.device("cuda:0") from networks.imageunet import init_network model_params = {} model_params['architecture'] = "unet_resnet34_cbam_v0a" net = init_network(model_params) # net.load_state_dict(torch.load("D:/chenyiwen/steel/george/fcn/model_bceabdnormalization.pth")) # net.eval() net = net.to(device) # criterion = ComboLoss({'dice': 1.0, 'bce': 1.0}, per_image=True).cuda() criterion = nn.BCEWithLogitsLoss().cuda() # criterion = SymmetricLovaszLoss().cuda() optimizer = torch.optim.Adam(net.parameters(), lr=0.00001) # optimizer = torch.optim.SGD(net.parameters(), weight_decay=1e-4, lr = 0.001, momentum=0.9) # summary(net, (1, 32, 32)) phase = "train" losses = []
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold) if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.submit.txt'), mode='a') if args.ema: network_path = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold, '%s_ema.pth' % args.predict_epoch) else: network_path = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold, '%s.pth' % args.predict_epoch) submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir, 'fold%d' % args.fold, 'epoch_%s' % args.predict_epoch) log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( submit_out_dir)) if not ope(submit_out_dir): os.makedirs(submit_out_dir) # setting up the visible GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id args.augment = args.augment.split(',') for augment in args.augment: if augment not in augment_list: raise ValueError( 'Unsupported or unknown test augmentation: {}!'.format( augment)) model_params = {} model_params['architecture'] = args.arch model = init_network(model_params) log.write(">> Loading network:\n>>>> '{}'\n".format(network_path)) checkpoint = torch.load(network_path) model.load_state_dict(checkpoint['state_dict']) log.write(">>>> loaded network:\n>>>> epoch {}\n".format( checkpoint['epoch'])) # moving network to gpu and eval mode model = DataParallel(model) model.cuda() model.eval() # Data loading code dataset = args.dataset if dataset == 'train': test_split_file = opj(DATA_DIR, args.split_type, 'train.csv') elif dataset == 'test': test_split_file = opj(DATA_DIR, args.split_type, 'test.csv') elif dataset == 'val': test_split_file = opj(DATA_DIR, args.split_type, args.split_name, 'random_valid_cv%d.csv' % args.fold) elif dataset == 'nih': test_split_file = opj(DATA_DIR, args.split_type, 'nih_112120.csv') elif dataset == 'chexpert': test_split_file = opj(DATA_DIR, args.split_type, 'chexpert_188521.csv') else: raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset)) test_dataset = SiimDataset( test_split_file, img_size=args.img_size, mask_size=args.img_size, transform=None, return_label=False, crop_version=args.crop_version, dataset=args.dataset, predict_pos=args.predict_pos, ) test_loader = DataLoader( test_dataset, sampler=SequentialSampler(test_dataset), batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True, ) for augment in args.augment: test_loader.dataset.transform = eval('augment_%s' % augment) unaugment_func = eval('unaugment_%s' % augment) sub_submit_out_dir = opj(submit_out_dir, augment) if not ope(sub_submit_out_dir): os.makedirs(sub_submit_out_dir) with torch.no_grad(): predict(test_loader, model, sub_submit_out_dir, dataset, args, unaugment_func=unaugment_func)