def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold) if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.train.txt'), mode='a') model_out_dir = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold) log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( model_out_dir)) if not ope(model_out_dir): os.makedirs(model_out_dir) # set cuda visible device os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id cudnn.benchmark = True # set random seeds torch.manual_seed(0) torch.cuda.manual_seed_all(0) np.random.seed(0) model_params = {} model_params['architecture'] = args.arch model_params['num_classes'] = args.num_classes model_params['in_channels'] = args.in_channels model = init_network(model_params) # move network to gpu model = DataParallel(model) model.cuda() # define loss function (criterion) try: criterion = eval(args.loss)().cuda() except: raise (RuntimeError("Loss {} not available!".format(args.loss))) start_epoch = 0 best_loss = 1e5 best_epoch = 0 best_focal = 1e5 # define scheduler try: scheduler = eval(args.scheduler)() except: raise (RuntimeError("Scheduler {} not available!".format( args.scheduler))) optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0] # optionally resume from a checkpoint if args.resume: args.resume = os.path.join(model_out_dir, args.resume) if os.path.isfile(args.resume): # load checkpoint weights and update model and optimizer log.write(">> Loading checkpoint:\n>> '{}'\n".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] best_epoch = checkpoint['best_epoch'] best_focal = checkpoint['best_score'] model.module.load_state_dict(checkpoint['state_dict']) optimizer_fpath = args.resume.replace('.pth', '_optim.pth') if ope(optimizer_fpath): log.write(">> Loading checkpoint:\n>> '{}'\n".format( optimizer_fpath)) optimizer.load_state_dict( torch.load(optimizer_fpath)['optimizer']) log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format( args.resume, checkpoint['epoch'])) else: log.write(">> No checkpoint found at '{}'\n".format(args.resume)) # Data loading code train_transform = train_multi_augment2 train_split_file = opj(DATA_DIR, 'split', args.split_name, 'random_train_cv%d.csv' % args.fold) train_dataset = ProteinDataset( train_split_file, img_size=args.img_size, is_trainset=True, return_label=True, in_channels=args.in_channels, transform=train_transform, crop_size=args.crop_size, random_crop=True, ) train_loader = DataLoader( train_dataset, sampler=RandomSampler(train_dataset), batch_size=args.batch_size, drop_last=True, num_workers=args.workers, pin_memory=True, ) valid_split_file = opj(DATA_DIR, 'split', args.split_name, 'random_valid_cv%d.csv' % args.fold) valid_dataset = ProteinDataset( valid_split_file, img_size=args.img_size, is_trainset=True, return_label=True, in_channels=args.in_channels, transform=None, crop_size=args.crop_size, random_crop=False, ) valid_loader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True) focal_loss = FocalLoss().cuda() log.write('** start training here! **\n') log.write('\n') log.write( 'epoch iter rate | train_loss/acc | valid_loss/acc/focal/kaggle |best_epoch/best_focal| min \n' ) log.write( '-----------------------------------------------------------------------------------------------------------------\n' ) start_epoch += 1 for epoch in range(start_epoch, args.epochs + 1): end = time.time() # set manual seeds per epoch np.random.seed(epoch) torch.manual_seed(epoch) torch.cuda.manual_seed_all(epoch) # adjust learning rate for each epoch lr_list = scheduler.step(model, epoch, args.epochs) lr = lr_list[0] # train for one epoch on train set iter, train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, clipnorm=args.clipnorm, lr=lr) with torch.no_grad(): valid_loss, valid_acc, valid_focal_loss, kaggle_score = validate( valid_loader, model, criterion, epoch, focal_loss) # remember best loss and save checkpoint is_best = valid_focal_loss < best_focal best_loss = min(valid_focal_loss, best_loss) best_epoch = epoch if is_best else best_epoch best_focal = valid_focal_loss if is_best else best_focal print('\r', end='', flush=True) log.write('%5.1f %5d %0.6f | %0.4f %0.4f | %0.4f %6.4f %6.4f %6.4f | %6.1f %6.4f | %3.1f min \n' % \ (epoch, iter + 1, lr, train_loss, train_acc, valid_loss, valid_acc, valid_focal_loss, kaggle_score, best_epoch, best_focal, (time.time() - end) / 60)) save_model(model, is_best, model_out_dir, optimizer=optimizer, epoch=epoch, best_epoch=best_epoch, best_focal=best_focal)
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold) if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.train.txt'), mode='a') model_out_dir = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold) log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( model_out_dir)) if not ope(model_out_dir): os.makedirs(model_out_dir) # set cuda visible device os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id cudnn.benchmark = True # set random seeds torch.manual_seed(0) torch.cuda.manual_seed_all(0) np.random.seed(0) model_params = {} model_params['architecture'] = args.arch model = init_network(model_params) # move network to gpu model = DataParallel(model) model.cuda() if args.ema: ema_model = copy.deepcopy(model) ema_model.cuda() else: ema_model = None # define loss function (criterion) try: criterion = eval(args.loss)().cuda() except: raise (RuntimeError("Loss {} not available!".format(args.loss))) start_epoch = 0 best_epoch = 0 best_dice = 0 best_dice_arr = np.zeros(3) # define scheduler try: scheduler = eval(args.scheduler)() except: raise (RuntimeError("Scheduler {} not available!".format( args.scheduler))) optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0] # optionally resume from a checkpoint if args.resume: model_fpath = os.path.join(model_out_dir, args.resume) if os.path.isfile(model_fpath): # load checkpoint weights and update model and optimizer log.write(">> Loading checkpoint:\n>> '{}'\n".format(model_fpath)) checkpoint = torch.load(model_fpath) start_epoch = checkpoint['epoch'] best_epoch = checkpoint['best_epoch'] best_dice_arr = checkpoint['best_dice_arr'] best_dice = np.max(best_dice_arr) model.module.load_state_dict(checkpoint['state_dict']) optimizer_fpath = model_fpath.replace('.pth', '_optim.pth') if ope(optimizer_fpath): log.write(">> Loading checkpoint:\n>> '{}'\n".format( optimizer_fpath)) optimizer.load_state_dict( torch.load(optimizer_fpath)['optimizer']) if args.ema: ema_model_fpath = model_fpath.replace('.pth', '_ema.pth') if ope(ema_model_fpath): log.write(">> Loading checkpoint:\n>> '{}'\n".format( ema_model_fpath)) ema_model.module.load_state_dict( torch.load(ema_model_fpath)['state_dict']) log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format( model_fpath, checkpoint['epoch'])) else: log.write(">> No checkpoint found at '{}'\n".format(model_fpath)) # Data loading code train_transform = train_multi_augment9 train_split_file = opj(DATA_DIR, args.split_type, args.split_name, 'random_train_cv%d.csv' % args.fold) train_dataset = SiimDataset( train_split_file, img_size=args.img_size, mask_size=args.img_size, transform=train_transform, return_label=True, crop_version=args.crop_version, pseudo=args.pseudo, pseudo_ratio=args.pseudo_ratio, dataset='train', ) if args.is_balance: train_sampler = BalanceClassSampler( train_dataset, args.sample_times * len(train_dataset)) else: train_sampler = RandomSampler(train_dataset) train_loader = DataLoader( train_dataset, sampler=train_sampler, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, pin_memory=True, ) valid_split_file = opj(DATA_DIR, args.split_type, args.split_name, 'random_valid_cv%d.csv' % args.fold) valid_dataset = SiimDataset( valid_split_file, img_size=args.img_size, mask_size=args.img_size, transform=None, return_label=True, crop_version=args.crop_version, dataset='val', ) valid_loader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=max(int(args.batch_size // 2), 1), drop_last=False, num_workers=args.workers, pin_memory=True) log.write('** start training here! **\n') log.write('\n') log.write( 'epoch iter rate | smooth_loss/dice | valid_loss/dice | best_epoch/best_score | min \n' ) log.write( '------------------------------------------------------------------------------------------------\n' ) start_epoch += 1 for epoch in range(start_epoch, args.epochs + 1): end = time.time() # set manual seeds per epoch np.random.seed(epoch) torch.manual_seed(epoch) torch.cuda.manual_seed_all(epoch) # adjust learning rate for each epoch lr_list = scheduler.step(model, epoch, args.epochs) lr = lr_list[0] # train for one epoch on train set iter, train_loss, train_dice = train(train_loader, model, ema_model, criterion, optimizer, epoch, args, lr=lr) with torch.no_grad(): if args.ema: valid_loss, valid_dice = validate(valid_loader, ema_model, criterion, epoch) else: valid_loss, valid_dice = validate(valid_loader, model, criterion, epoch) # remember best loss and save checkpoint is_best = valid_dice >= best_dice if is_best: best_epoch = epoch best_dice = valid_dice if args.ema: save_top_epochs(model_out_dir, ema_model, best_dice_arr, valid_dice, best_epoch, epoch, best_dice, ema=True) best_dice_arr = save_top_epochs(model_out_dir, model, best_dice_arr, valid_dice, best_epoch, epoch, best_dice, ema=False) print('\r', end='', flush=True) log.write('%5.1f %5d %0.6f | %0.4f %0.4f | %0.4f %6.4f | %6.1f %6.4f | %3.1f min \n' % \ (epoch, iter + 1, lr, train_loss, train_dice, valid_loss, valid_dice, best_epoch, best_dice, (time.time() - end) / 60)) model_name = '%03d' % epoch if args.ema: save_model(ema_model, model_out_dir, epoch, model_name, best_dice_arr, is_best=is_best, optimizer=optimizer, best_epoch=best_epoch, best_dice=best_dice, ema=True) save_model(model, model_out_dir, epoch, model_name, best_dice_arr, is_best=is_best, optimizer=optimizer, best_epoch=best_epoch, best_dice=best_dice, ema=False)
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, f'fold{args.fold}') if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.train.txt'), mode='a') model_out_dir = opj(RESULT_DIR, 'models', args.out_dir, f'fold{args.fold}') log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( model_out_dir)) if not ope(model_out_dir): os.makedirs(model_out_dir) # set cuda visible device os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id cudnn.benchmark = True # set random seeds torch.manual_seed(0) torch.cuda.manual_seed_all(0) np.random.seed(0) model = resnet34(pretrained=False, num_classes=5) # move network to gpu model = DataParallel(model) model.cuda() ema_model = None # define loss function (criterion) try: criterion = eval(args.loss)().cuda() except: raise (RuntimeError("Loss {} not available!".format(args.loss))) start_epoch = 0 best_epoch = 0 best_dice = 0 best_dice_arr = np.zeros(3) # define scheduler try: scheduler = eval(args.scheduler)() except: raise (RuntimeError("Scheduler {} not available!".format( args.scheduler))) optimizer = scheduler.schedule(model, start_epoch, args.epochs)[0] # optionally resume from a checkpoint if args.resume: model_fpath = os.path.join(model_out_dir, args.resume) if os.path.isfile(model_fpath): # load checkpoint weights and update model and optimizer log.write(">> Loading checkpoint:\n>> '{}'\n".format(model_fpath)) checkpoint = torch.load(model_fpath) start_epoch = checkpoint['epoch'] best_epoch = checkpoint['best_epoch'] best_dice_arr = checkpoint['best_dice_arr'] best_dice = np.max(best_dice_arr) model.module.load_state_dict(checkpoint['state_dict']) optimizer_fpath = model_fpath.replace('.pth', '_optim.pth') if ope(optimizer_fpath): log.write(">> Loading checkpoint:\n>> '{}'\n".format( optimizer_fpath)) optimizer.load_state_dict( torch.load(optimizer_fpath)['optimizer']) log.write(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})\n".format( model_fpath, checkpoint['epoch'])) else: log.write(">> No checkpoint found at '{}'\n".format(model_fpath)) # Data loading code train_transform = eval(args.train_transform) steel_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv')) steel_df['ImageId'], steel_df['ClassId'] = zip( *steel_df['ImageId_ClassId'].apply(lambda x: x.split('_'))) steel_df = pd.pivot_table(steel_df, index='ImageId', columns='ClassId', values='EncodedPixels', aggfunc=lambda x: x, dropna=False) steel_df = steel_df.reset_index() steel_df.columns = [str(i) for i in steel_df.columns.values] steel_df['class_count'] = steel_df[['1', '2', '3', '4']].count(axis=1) steel_df['split_label'] = steel_df[['1', '2', '3', '4', 'class_count' ]].apply(lambda x: make_split_label(x), axis=1) steel_df['label'] = steel_df['split_label'].apply(lambda x: make_label(x)) train_idx, valid_idx, _, _ = train_test_split(steel_df.index, steel_df['split_label'], test_size=0.2, random_state=43) train_dataset = SteelDataset( steel_df.iloc[train_idx], img_size=args.img_size, mask_size=args.img_size, transform=train_transform, return_label=True, dataset='train', ) train_loader = DataLoader( train_dataset, sampler=RandomSampler(train_dataset), # sampler=SequentialSampler(train_dataset), batch_size=args.batch_size, drop_last=True, num_workers=args.workers, pin_memory=True, ) valid_dataset = SteelDataset( steel_df.iloc[valid_idx], img_size=args.img_size, mask_size=args.img_size, transform=None, return_label=True, dataset='val', ) valid_loader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=max(int(args.batch_size // 2), 1), drop_last=False, num_workers=args.workers, pin_memory=True) log.write('** start training here! **\n') log.write('\n') log.write( 'epoch iter rate | smooth_loss/dice | valid_loss/dice | best_epoch/best_score | min \n' ) log.write( '------------------------------------------------------------------------------------------------\n' ) start_epoch += 1 for epoch in range(start_epoch, args.epochs + 1): end = time.time() # set manual seeds per epoch np.random.seed(epoch) torch.manual_seed(epoch) torch.cuda.manual_seed_all(epoch) # adjust learning rate for each epoch lr_list = scheduler.step(model, epoch, args.epochs) lr = lr_list[0] # train for one epoch on train set iter, train_loss, train_dice = train(train_loader, model, ema_model, criterion, optimizer, epoch, args, lr=lr) with torch.no_grad(): valid_loss, valid_dice = validate(valid_loader, model, criterion, epoch) # remember best loss and save checkpoint is_best = valid_dice >= best_dice if is_best: best_epoch = epoch best_dice = valid_dice best_dice_arr = save_top_epochs(model_out_dir, model, best_dice_arr, valid_dice, best_epoch, epoch, best_dice, ema=False) print('\r', end='', flush=True) log.write('%5.1f %5d %0.6f | %0.4f %0.4f | %0.4f %6.4f | %6.1f %6.4f | %3.1f min \n' % \ (epoch, iter + 1, lr, train_loss, train_dice, valid_loss, valid_dice, best_epoch, best_dice, (time.time() - end) / 60)) model_name = '%03d' % epoch save_model(model, model_out_dir, epoch, model_name, best_dice_arr, is_best=is_best, optimizer=optimizer, best_epoch=best_epoch, best_dice=best_dice, ema=False)
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold) if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.submit.txt'), mode='a') args.predict_epoch = 'final' if args.predict_epoch is None else '%03d' % args.predict_epoch network_path = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold, '%s.pth' % args.predict_epoch) submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir, 'fold%d' % args.fold, 'epoch_%s' % args.predict_epoch) log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( submit_out_dir)) if not ope(submit_out_dir): os.makedirs(submit_out_dir) # setting up the visible GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id args.augment = args.augment.split(',') for augment in args.augment: if augment not in augment_list: raise ValueError( 'Unsupported or unknown test augmentation: {}!'.format( augment)) model_params = {} model_params['architecture'] = args.arch model_params['num_classes'] = args.num_classes model_params['in_channels'] = args.in_channels model = init_network(model_params) log.write(">> Loading network:\n>>>> '{}'\n".format(network_path)) checkpoint = torch.load(network_path) model.load_state_dict(checkpoint['state_dict']) log.write(">>>> loaded network:\n>>>> epoch {}\n".format( checkpoint['epoch'])) # moving network to gpu and eval mode model = DataParallel(model) model.cuda() model.eval() # Data loading code dataset = args.dataset if dataset == 'test': test_split_file = opj(DATA_DIR, 'split', 'test_11702.csv') elif dataset == 'val': test_split_file = opj(DATA_DIR, 'split', args.split_name, 'random_valid_cv%d.csv' % args.fold) else: raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset)) test_dataset = ProteinDataset( test_split_file, img_size=args.img_size, is_trainset=(dataset != 'test'), return_label=False, in_channels=args.in_channels, transform=None, crop_size=args.crop_size, random_crop=False, ) test_loader = DataLoader( test_dataset, sampler=SequentialSampler(test_dataset), batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True, ) seeds = [args.seed] if args.seeds is None else [ int(i) for i in args.seeds.split(',') ] for seed in seeds: test_dataset.random_crop = (seed != 0) for augment in args.augment: test_loader.dataset.transform = eval('augment_%s' % augment) if args.crop_size > 0: sub_submit_out_dir = opj(submit_out_dir, '%s_seed%d' % (augment, seed)) else: sub_submit_out_dir = opj(submit_out_dir, augment) if not ope(sub_submit_out_dir): os.makedirs(sub_submit_out_dir) with torch.no_grad(): predict(test_loader, model, sub_submit_out_dir, dataset)
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir) if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.submit.txt'), mode='a') args.predict_epoch = 'final' if args.predict_epoch is None else '%03d' % args.predict_epoch network_path = opj(RESULT_DIR, 'models', args.out_dir, '%s.pth' % args.predict_epoch) submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir, 'epoch_%s' % args.predict_epoch) log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format(submit_out_dir)) if not ope(submit_out_dir): os.makedirs(submit_out_dir) # setting up the visible GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id model_params = {} model_params['architecture'] = args.arch model_params['num_classes'] = args.num_classes model_params['in_channels'] = args.in_channels model = init_network(model_params) model.set_configs(extract_feature=True) log.write(">> Loading network:\n>>>> '{}'\n".format(network_path)) checkpoint = torch.load(network_path) model.load_state_dict(checkpoint['state_dict']) log.write(">>>> loaded network:\n>>>> epoch {}\n".format(checkpoint['epoch'])) # moving network to gpu and eval mode model = DataParallel(model) model.cuda() model.eval() # Data loading code dataset = args.dataset if dataset == 'test': test_split_file = opj(DATA_DIR, 'split', 'test_11702.csv') elif dataset == 'ext': test_split_file = opj(DATA_DIR, 'split', 'external_antibody_split.csv') elif dataset == 'train': test_split_file = opj(DATA_DIR, 'split', 'external_trainset_antibody_split.csv') elif dataset == 'val': test_split_file = opj(DATA_DIR, 'split', 'external_validset_antibody_split.csv') else: raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset)) test_dataset = ProteinMLDataset( test_split_file, img_size=args.img_size, is_trainset=False, return_label=False, in_channels=args.in_channels, transform=None, ) test_loader = DataLoader( test_dataset, sampler=SequentialSampler(test_dataset), batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True, ) with torch.no_grad(): predict(test_loader, model, submit_out_dir, dataset)
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, f'fold{args.fold}') if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.submit.txt'), mode='a') if args.ema: network_path = opj(RESULT_DIR, 'models', args.out_dir, f'fold{args.fold}', f'{args.predict_epoch}_ema.pth') else: network_path = opj(RESULT_DIR, 'models', args.out_dir, f'fold{args.fold}', f'{args.predict_epoch}.pth') submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir, f'fold{args.fold}', f'epoch_{args.predict_epoch}') log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( submit_out_dir)) if not ope(submit_out_dir): os.makedirs(submit_out_dir) # setting up the visible GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id args.augment = args.augment.split(',') for augment in args.augment: if augment not in augment_list: raise ValueError( 'Unsupported or unknown test augmentation: {}!'.format( augment)) model_params = {} model_params['architecture'] = args.arch model = init_network(model_params) log.write(">> Loading network:\n>>>> '{}'\n".format(network_path)) checkpoint = torch.load(network_path) model.load_state_dict(checkpoint['state_dict']) log.write(">>>> loaded network:\n>>>> epoch {}\n".format( checkpoint['epoch'])) # moving network to gpu and eval mode model = DataParallel(model) model.cuda() model.eval() # Data loading code dataset = args.dataset if dataset == 'test': steel_test_df = pd.read_csv(opj('..', 'input', 'sample_submission.csv')) elif dataset == 'val': steel_test_df = pd.read_csv( opj(DATA_DIR, args.split_type, args.split_name, f'random_valid_cv{args.fold}.csv')) else: raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset)) steel_test_df['ImageId'], steel_test_df['ClassId'] = zip( *steel_test_df['ImageId_ClassId'].apply(lambda x: x.split('_'))) imageId = pd.DataFrame(steel_test_df['ImageId'].unique(), columns=['ImageId']) test_dataset = SteelDataset( imageId, img_size=args.img_size, mask_size=args.img_size, transform=None, return_label=False, dataset=args.dataset, ) test_loader = DataLoader( test_dataset, sampler=SequentialSampler(test_dataset), batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True, ) for augment in args.augment: test_loader.dataset.transform = eval('augment_%s' % augment) unaugment_func = eval('unaugment_%s' % augment) sub_submit_out_dir = opj(submit_out_dir, augment) if not ope(sub_submit_out_dir): os.makedirs(sub_submit_out_dir) with torch.no_grad(): predict(test_loader, model, sub_submit_out_dir, dataset, args, unaugment_func=unaugment_func)
def main(): args = parser.parse_args() log_out_dir = opj(RESULT_DIR, 'logs', args.out_dir, 'fold%d' % args.fold) if not ope(log_out_dir): os.makedirs(log_out_dir) log = Logger() log.open(opj(log_out_dir, 'log.submit.txt'), mode='a') if args.ema: network_path = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold, '%s_ema.pth' % args.predict_epoch) else: network_path = opj(RESULT_DIR, 'models', args.out_dir, 'fold%d' % args.fold, '%s.pth' % args.predict_epoch) submit_out_dir = opj(RESULT_DIR, 'submissions', args.out_dir, 'fold%d' % args.fold, 'epoch_%s' % args.predict_epoch) log.write(">> Creating directory if it does not exist:\n>> '{}'\n".format( submit_out_dir)) if not ope(submit_out_dir): os.makedirs(submit_out_dir) # setting up the visible GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id args.augment = args.augment.split(',') for augment in args.augment: if augment not in augment_list: raise ValueError( 'Unsupported or unknown test augmentation: {}!'.format( augment)) model_params = {} model_params['architecture'] = args.arch model = init_network(model_params) log.write(">> Loading network:\n>>>> '{}'\n".format(network_path)) checkpoint = torch.load(network_path) model.load_state_dict(checkpoint['state_dict']) log.write(">>>> loaded network:\n>>>> epoch {}\n".format( checkpoint['epoch'])) # moving network to gpu and eval mode model = DataParallel(model) model.cuda() model.eval() # Data loading code dataset = args.dataset if dataset == 'train': test_split_file = opj(DATA_DIR, args.split_type, 'train.csv') elif dataset == 'test': test_split_file = opj(DATA_DIR, args.split_type, 'test.csv') elif dataset == 'val': test_split_file = opj(DATA_DIR, args.split_type, args.split_name, 'random_valid_cv%d.csv' % args.fold) elif dataset == 'nih': test_split_file = opj(DATA_DIR, args.split_type, 'nih_112120.csv') elif dataset == 'chexpert': test_split_file = opj(DATA_DIR, args.split_type, 'chexpert_188521.csv') else: raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset)) test_dataset = SiimDataset( test_split_file, img_size=args.img_size, mask_size=args.img_size, transform=None, return_label=False, crop_version=args.crop_version, dataset=args.dataset, predict_pos=args.predict_pos, ) test_loader = DataLoader( test_dataset, sampler=SequentialSampler(test_dataset), batch_size=args.batch_size, drop_last=False, num_workers=args.workers, pin_memory=True, ) for augment in args.augment: test_loader.dataset.transform = eval('augment_%s' % augment) unaugment_func = eval('unaugment_%s' % augment) sub_submit_out_dir = opj(submit_out_dir, augment) if not ope(sub_submit_out_dir): os.makedirs(sub_submit_out_dir) with torch.no_grad(): predict(test_loader, model, sub_submit_out_dir, dataset, args, unaugment_func=unaugment_func)