parser = argparse.ArgumentParser() arg = parser.add_argument arg('--model_path', type=str, default='data/models/unet11_binary_20', help='path to model folder') arg('--model_type', type=str, default='UNet11', help='network architecture', choices=['UNet', 'UNet11', 'UNet16', 'LinkNet34']) arg('--output_path', type=str, help='path to save images', default='.') arg('--batch-size', type=int, default=4) arg('--fold', type=int, default=0, choices=[0, 1, 2, 3, -1], help='-1: all folds') arg('--problem_type', type=str, default='parts', choices=['binary', 'parts', 'instruments']) arg('--workers', type=int, default=8) args = parser.parse_args() if args.fold == -1: for fold in [0, 1, 2, 3]: _, file_names = get_split(fold) model = get_model(str(Path(args.model_path).joinpath('model_{fold}.pt'.format(fold=fold))), model_type=args.model_type, problem_type=args.problem_type) print('num file_names = {}'.format(len(file_names))) output_path = Path(args.output_path) output_path.mkdir(exist_ok=True, parents=True) predict(model, file_names, args.batch_size, output_path, problem_type=args.problem_type) else: _, file_names = get_split(args.fold) model = get_model(str(Path(args.model_path).joinpath('model_{fold}.pt'.format(fold=args.fold))), model_type=args.model_type, problem_type=args.problem_type) print('num file_names = {}'.format(len(file_names)))
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--jaccard-weight', default=0.3, type=float) arg('--device-ids', type=str, default='0', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=1) arg('--limit', type=int, default=10000, help='number of images in epoch') arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=12) arg('--model', type=str, default='UNet', choices=['UNet', 'UNet11', 'UNet16', 'AlbuNet34']) args = parser.parse_args() root = Path(args.root) root.mkdir(exist_ok=True, parents=True) num_classes = 1 if args.model == 'UNet': model = UNet(num_classes=num_classes) elif args.model == 'UNet11': model = UNet11(num_classes=num_classes, pretrained=True) elif args.model == 'UNet16': model = UNet16(num_classes=num_classes, pretrained=True) elif args.model == 'LinkNet34': model = LinkNet34(num_classes=num_classes, pretrained=True) elif args.model == 'AlbuNet': model = AlbuNet34(num_classes=num_classes, pretrained=True) else: model = UNet(num_classes=num_classes, input_channels=3) if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() loss = LossBinary(jaccard_weight=args.jaccard_weight) cudnn.benchmark = True def make_loader(file_names, shuffle=False, transform=None, limit=None): return DataLoader(dataset=AngyodysplasiaDataset(file_names, transform=transform, limit=limit), shuffle=shuffle, num_workers=args.workers, batch_size=args.batch_size, pin_memory=torch.cuda.is_available()) train_file_names, val_file_names = get_split(args.fold) print('num train = {}, num_val = {}'.format(len(train_file_names), len(val_file_names))) train_transform = DualCompose([ SquarePaddingTraining(), CenterCrop([574, 574]), HorizontalFlip(), VerticalFlip(), Rotate(), ImageOnly(RandomHueSaturationValue()), ImageOnly(Normalize()) ]) val_transform = DualCompose([ SquarePaddingTraining(), CenterCrop([574, 574]), ImageOnly(Normalize()) ]) train_loader = make_loader(train_file_names, shuffle=True, transform=train_transform, limit=args.limit) valid_loader = make_loader(val_file_names, transform=val_transform) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(init_optimizer=lambda lr: Adam(model.parameters(), lr=lr), args=args, model=model, criterion=loss, train_loader=train_loader, valid_loader=valid_loader, validation=validation_binary, fold=args.fold)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--jaccard-weight', default=0.5, type=float) arg('--device-ids', type=str, default='0', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=1) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=12) arg('--train_crop_height', type=int, default=1024) arg('--train_crop_width', type=int, default=1280) arg('--val_crop_height', type=int, default=1024) arg('--val_crop_width', type=int, default=1280) arg('--type', type=str, default='binary', choices=['binary', 'parts', 'instruments']) arg('--model', type=str, default='UNet', choices=moddel_list.keys()) args = parser.parse_args() root = Path(args.root) root.mkdir(exist_ok=True, parents=True) if not utils.check_crop_size(args.train_crop_height, args.train_crop_width): print('Input image sizes should be divisible by 32, but train ' 'crop sizes ({train_crop_height} and {train_crop_width}) ' 'are not.'.format(train_crop_height=args.train_crop_height, train_crop_width=args.train_crop_width)) sys.exit(0) if not utils.check_crop_size(args.val_crop_height, args.val_crop_width): print('Input image sizes should be divisible by 32, but validation ' 'crop sizes ({val_crop_height} and {val_crop_width}) ' 'are not.'.format(val_crop_height=args.val_crop_height, val_crop_width=args.val_crop_width)) sys.exit(0) if args.type == 'parts': num_classes = 4 elif args.type == 'instruments': num_classes = 8 else: num_classes = 1 if args.model == 'UNet': model = UNet(num_classes=num_classes) else: model_name = moddel_list[args.model] model = model_name(num_classes=num_classes, pretrained=True) if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() else: raise SystemError('GPU device not found') if args.type == 'binary': loss = LossBinary(jaccard_weight=args.jaccard_weight) else: loss = LossMulti(num_classes=num_classes, jaccard_weight=args.jaccard_weight) cudnn.benchmark = True def make_loader(file_names, shuffle=False, transform=None, problem_type='binary', batch_size=1): return DataLoader(dataset=RoboticsDataset(file_names, transform=transform, problem_type=problem_type), shuffle=shuffle, num_workers=args.workers, batch_size=batch_size, pin_memory=torch.cuda.is_available()) #print('sfsdgsdhsfffffffffff',args.fold) train_file_names, val_file_names = get_split(args.fold) print('num train = {}, num_val = {}'.format(len(train_file_names), len(val_file_names))) def train_transform(p=1): return Compose([ PadIfNeeded(min_height=args.train_crop_height, min_width=args.train_crop_width, p=1), RandomCrop(height=args.train_crop_height, width=args.train_crop_width, p=1), VerticalFlip(p=0.5), HorizontalFlip(p=0.5), Normalize(p=1) ], p=p) def val_transform(p=1): return Compose([ PadIfNeeded(min_height=args.val_crop_height, min_width=args.val_crop_width, p=1), CenterCrop( height=args.val_crop_height, width=args.val_crop_width, p=1), Normalize(p=1) ], p=p) train_loader = make_loader(train_file_names, shuffle=True, transform=train_transform(p=1), problem_type=args.type, batch_size=args.batch_size) valid_loader = make_loader(val_file_names, transform=val_transform(p=1), problem_type=args.type, batch_size=len(device_ids)) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) if args.type == 'binary': valid = validation_binary else: valid = validation_multi print(model.parameters()) utils.train(init_optimizer=lambda lr: Adam(model.parameters(), lr=lr), args=args, model=model, criterion=loss, train_loader=train_loader, valid_loader=valid_loader, validation=valid, fold=args.fold, num_classes=num_classes)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--jaccard-weight', default=0.3, type=float) arg('--device-ids', type=str, default='0', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=1) arg('--limit', type=int, default=10000, help='number of images in epoch') arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.001) arg('--workers', type=int, default=12) arg('--model', type=str, default='UNet', choices=['UNet', 'UNet11', 'LinkNet34', 'UNet16', 'AlbuNet34', 'MDeNet', 'EncDec', 'hourglass', 'MDeNetplus']) args = parser.parse_args() root = Path(args.root) root.mkdir(exist_ok=True, parents=True) num_classes = 1 if args.model == 'UNet': model = UNet(num_classes=num_classes) elif args.model == 'UNet11': model = UNet11(num_classes=num_classes, pretrained=True) elif args.model == 'UNet16': model = UNet16(num_classes=num_classes, pretrained=True) elif args.model == 'MDeNet': print('Mine MDeNet..................') model = MDeNet(num_classes=num_classes, pretrained=True) elif args.model == 'MDeNetplus': print('load MDeNetplus..................') model = MDeNetplus(num_classes=num_classes, pretrained=True) elif args.model == 'EncDec': print('Mine EncDec..................') model = EncDec(num_classes=num_classes, pretrained=True) elif args.model == 'GAN': model = GAN(num_classes=num_classes, pretrained=True) elif args.model == 'AlbuNet34': model = AlbuNet34(num_classes=num_classes, pretrained=False) elif args.model == 'hourglass': model = hourglass(num_classes=num_classes, pretrained=True) else: model = UNet(num_classes=num_classes, input_channels=3) if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model).cuda() # nn.DataParallel(model, device_ids=device_ids).cuda() cudnn.benchmark = True def make_loader(file_names, shuffle=False, transform=None, limit=None): return DataLoader( dataset=Polyp(file_names, transform=transform, limit=limit), shuffle=shuffle, num_workers=args.workers, batch_size=args.batch_size, pin_memory=torch.cuda.is_available() ) train_file_names, val_file_names = get_split(args.fold) print('num train = {}, num_val = {}'.format(len(train_file_names), len(val_file_names))) train_transform = DualCompose([ CropCVC612(), img_resize(512), HorizontalFlip(), VerticalFlip(), Rotate(), Rescale(), Zoomin(), ImageOnly(RandomHueSaturationValue()), ImageOnly(Normalize()) ]) train_loader = make_loader(train_file_names, shuffle=True, transform=train_transform, limit=args.limit) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train( args=args, model=model, train_loader=train_loader, fold=args.fold )
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--jaccard-weight', default=1, type=float) arg('--device-ids', type=str, default='0', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=1) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=8) arg('--type', type=str, default='binary', choices=['binary', 'parts', 'instruments']) arg('--model', type=str, default='UNet', choices=['UNet', 'UNet11', 'LinkNet34']) args = parser.parse_args() root = Path(args.root) root.mkdir(exist_ok=True, parents=True) if args.type == 'parts': num_classes = 4 elif args.type == 'instruments': num_classes = 8 else: num_classes = 1 if args.model == 'UNet': model = UNet(num_classes=num_classes) elif args.model == 'UNet11': model = UNet11(num_classes=num_classes, pretrained='vgg') elif args.model == 'UNet16': model = UNet16(num_classes=num_classes, pretrained='vgg') elif args.model == 'LinkNet34': model = LinkNet34(num_classes=num_classes, pretrained=True) else: model = UNet(num_classes=num_classes, input_channels=3) if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() if args.type == 'binary': loss = LossBinary(jaccard_weight=args.jaccard_weight) else: loss = LossMulti(num_classes=num_classes, jaccard_weight=args.jaccard_weight) cudnn.benchmark = True def make_loader(file_names, shuffle=False, transform=None, problem_type='binary'): return DataLoader(dataset=RoboticsDataset(file_names, transform=transform, problem_type=problem_type), shuffle=shuffle, num_workers=args.workers, batch_size=args.batch_size, pin_memory=torch.cuda.is_available()) train_file_names, val_file_names = get_split(args.fold) print('num train = {}, num_val = {}'.format(len(train_file_names), len(val_file_names))) train_transform = DualCompose( [HorizontalFlip(), VerticalFlip(), ImageOnly(Normalize())]) val_transform = DualCompose([ImageOnly(Normalize())]) train_loader = make_loader(train_file_names, shuffle=True, transform=train_transform, problem_type=args.type) valid_loader = make_loader(val_file_names, transform=val_transform, problem_type=args.type) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) if args.type == 'binary': valid = validation_binary else: valid = validation_multi utils.train(init_optimizer=lambda lr: Adam(model.parameters(), lr=lr), args=args, model=model, criterion=loss, train_loader=train_loader, valid_loader=valid_loader, validation=valid, fold=args.fold, num_classes=num_classes)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--device_ids', type=str, default='0,1,2,3', help='For example 0,1 to run on two GPUs') arg('--requires_grad', type=bool, default=False, help='freez encoder') arg('--start_epoch', type=str, default='0', help='start epoch emp 21') arg('--rop_step', type=int, default=6, help='reduce on plateu step') arg('--hem_sample_count', type=int, default=0, help='hard example sample count') arg('--jaccard-weight', default=0.5, type=float) arg('--device-ids', type=str, default='0,1', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=256) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0003) arg('--workers', type=int, default=20) arg('--loss', type=str, default='bce_lava') arg('--optim', type=str, default='adam') arg('--scheduler', type=str, default='rop') arg('--early_stop_patience', type=int, default=1000) arg('--save_best_count', type=int, default=6) arg('--model', type=str, default='SE_ResNeXt_50', choices=moddel_list.keys()) args = parser.parse_args() print(args) num_classes = 1 fold_path = Path(args.root + "/" + args.model + '/fold_' + str(args.fold)) fold_path.mkdir(exist_ok=True, parents=True) model_name = moddel_list[args.model] model = model_name(num_classes=num_classes, pretrained=True, requires_grad=args.requires_grad) if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() else: raise SystemError('GPU device not found') loss = losses[args.loss] cudnn.benchmark = True def make_loader(file_names, shuffle=False, transform=None, batch_size=1): return DataLoader( dataset=SegDataSet(file_names, transform=transform), shuffle=shuffle, num_workers=args.workers, batch_size=batch_size, pin_memory=torch.cuda.is_available() ) train_file_names, val_file_names = get_split(args.fold) print('num train = {}, num_val = {}'.format(len(train_file_names), len(val_file_names))) def train_transform(p=1): return Compose([ Resize(64, 64), OneOf([ GridDistortion(), ElasticTransform(), ], p=0.0), RandomBrightness(p=0.5), HorizontalFlip(p=0.5), ShiftScaleRotate(rotate_limit=5, p=0.5), Normalize(p=1) ], p=p) def val_transform(p=1): return Compose([ Resize(64, 64), Normalize(p=1) ], p=p) train_loader = make_loader(train_file_names, shuffle=True, transform=train_transform(p=1), batch_size=args.batch_size) valid_loader = make_loader(val_file_names, transform=val_transform(p=1), batch_size=args.batch_size) valid = validation_binary optimizers = { 'adam': optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.000001), 'rmsprop': optim.RMSprop(model.parameters(), lr=args.lr), 'sgd': optim.SGD(model.parameters(), lr=args.lr, nesterov=True, momentum=0.9) } optimizer = optimizers[args.optim] scheduler = { 'co': CosineAnnealingLR(optimizer, T_max=6, eta_min=0.001), 'rop': ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=args.rop_step, verbose=True) } utils.train( optimizer=optimizer, scheduler=scheduler[args.scheduler], args=args, model=model, criterion=loss, train_loader=train_loader, valid_loader=valid_loader, validation=valid, fold=args.fold )
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--jaccard-weight', type=float, default=1) arg('--root', type=str, default='runs/debug', help='checkpoint root') arg('--image-path', type=str, default='data', help='image path') arg('--batch-size', type=int, default=2) arg('--n-epochs', type=int, default=100) arg('--optimizer', type=str, default='Adam', help='Adam or SGD') arg('--lr', type=float, default=0.001) arg('--workers', type=int, default=10) arg('--model', type=str, default='UNet16', choices=[ 'UNet', 'UNet11', 'UNet16', 'LinkNet34', 'FCDenseNet57', 'FCDenseNet67', 'FCDenseNet103' ]) arg('--model-weight', type=str, default=None) arg('--resume-path', type=str, default=None) arg('--attribute', type=str, default='all', choices=[ 'pigment_network', 'negative_network', 'streaks', 'milia_like_cyst', 'globules', 'all' ]) args = parser.parse_args() ## folder for checkpoint root = Path(args.root) root.mkdir(exist_ok=True, parents=True) image_path = args.image_path #print(args) if args.attribute == 'all': num_classes = 5 else: num_classes = 1 args.num_classes = num_classes ### save initial parameters print('--' * 10) print(args) print('--' * 10) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) ## load pretrained model if args.model == 'UNet': model = UNet(num_classes=num_classes) elif args.model == 'UNet11': model = UNet11(num_classes=num_classes, pretrained='vgg') elif args.model == 'UNet16': model = UNet16(num_classes=num_classes, pretrained='vgg') elif args.model == 'LinkNet34': model = LinkNet34(num_classes=num_classes, pretrained=True) elif args.model == 'FCDenseNet103': model = FCDenseNet103(num_classes=num_classes) else: model = UNet(num_classes=num_classes, input_channels=3) ## multiple GPUs device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) ## load pretrained model if args.model_weight is not None: state = torch.load(args.model_weight) #epoch = state['epoch'] #step = state['step'] model.load_state_dict(state['model']) print('--' * 10) print('Load pretrained model', args.model_weight) #print('Restored model, epoch {}, step {:,}'.format(epoch, step)) print('--' * 10) ## replace the last layer ## although the model and pre-trained weight have differernt size (the last layer is different) ## pytorch can still load the weight ## I found that the weight for one layer just duplicated for all layers ## therefore, the following code is not necessary # if args.attribute == 'all': # model = list(model.children())[0] # num_filters = 32 # model.final = nn.Conv2d(num_filters, num_classes, kernel_size=1) # print('--' * 10) # print('Load pretrained model and replace the last layer', args.model_weight, num_classes) # print('--' * 10) # if torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # model.to(device) ## model summary print_model_summay(model) ## define loss loss_fn = LossBinary(jaccard_weight=args.jaccard_weight) ## It enables benchmark mode in cudnn. ## benchmark mode is good whenever your input sizes for your network do not vary. This way, cudnn will look for the ## optimal set of algorithms for that particular configuration (which takes some time). This usually leads to faster runtime. ## But if your input sizes changes at each iteration, then cudnn will benchmark every time a new size appears, ## possibly leading to worse runtime performances. cudnn.benchmark = True ## get train_test_id train_test_id = get_split() ## train vs. val print('--' * 10) print('num train = {}, num_val = {}'.format( (train_test_id['Split'] == 'train').sum(), (train_test_id['Split'] != 'train').sum())) print('--' * 10) train_transform = DualCompose( [HorizontalFlip(), VerticalFlip(), ImageOnly(Normalize())]) val_transform = DualCompose([ImageOnly(Normalize())]) ## define data loader train_loader = make_loader(train_test_id, image_path, args, train=True, shuffle=True, transform=train_transform) valid_loader = make_loader(train_test_id, image_path, args, train=False, shuffle=True, transform=val_transform) if True: print('--' * 10) print('check data') train_image, train_mask, train_mask_ind = next(iter(train_loader)) print('train_image.shape', train_image.shape) print('train_mask.shape', train_mask.shape) print('train_mask_ind.shape', train_mask_ind.shape) print('train_image.min', train_image.min().item()) print('train_image.max', train_image.max().item()) print('train_mask.min', train_mask.min().item()) print('train_mask.max', train_mask.max().item()) print('train_mask_ind.min', train_mask_ind.min().item()) print('train_mask_ind.max', train_mask_ind.max().item()) print('--' * 10) valid_fn = validation_binary ########### ## optimizer if args.optimizer == 'Adam': optimizer = Adam(model.parameters(), lr=args.lr) elif args.optimizer == 'SGD': optimizer = SGD(model.parameters(), lr=args.lr, momentum=0.9) ## loss criterion = loss_fn ## change LR scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.8, patience=5, verbose=True) ########## ## load previous model status previous_valid_loss = 10 model_path = root / 'model.pt' if args.resume_path is not None and model_path.exists(): state = torch.load(str(model_path)) epoch = state['epoch'] step = state['step'] model.load_state_dict(state['model']) epoch = 1 step = 0 try: previous_valid_loss = state['valid_loss'] except: previous_valid_loss = 10 print('--' * 10) print('Restored previous model, epoch {}, step {:,}'.format( epoch, step)) print('--' * 10) else: epoch = 1 step = 0 ######### ## start training log = root.joinpath('train.log').open('at', encoding='utf8') writer = SummaryWriter() meter = AllInOneMeter() #if previous_valid_loss = 10000 print('Start training') print_model_summay(model) previous_valid_jaccard = 0 for epoch in range(epoch, args.n_epochs + 1): model.train() random.seed() #jaccard = [] start_time = time.time() meter.reset() w1 = 1.0 w2 = 0.5 w3 = 0.5 try: train_loss = 0 valid_loss = 0 # if epoch == 1: # freeze_layer_names = get_freeze_layer_names(part='encoder') # set_freeze_layers(model, freeze_layer_names=freeze_layer_names) # #set_train_layers(model, train_layer_names=['module.final.weight','module.final.bias']) # print_model_summay(model) # elif epoch == 5: # w1 = 1.0 # w2 = 0.0 # w3 = 0.5 # freeze_layer_names = get_freeze_layer_names(part='encoder') # set_freeze_layers(model, freeze_layer_names=freeze_layer_names) # # set_train_layers(model, train_layer_names=['module.final.weight','module.final.bias']) # print_model_summay(model) #elif epoch == 3: # set_train_layers(model, train_layer_names=['module.dec5.block.0.conv.weight','module.dec5.block.0.conv.bias', # 'module.dec5.block.1.weight','module.dec5.block.1.bias', # 'module.dec4.block.0.conv.weight','module.dec4.block.0.conv.bias', # 'module.dec4.block.1.weight','module.dec4.block.1.bias', # 'module.dec3.block.0.conv.weight','module.dec3.block.0.conv.bias', # 'module.dec3.block.1.weight','module.dec3.block.1.bias', # 'module.dec2.block.0.conv.weight','module.dec2.block.0.conv.bias', # 'module.dec2.block.1.weight','module.dec2.block.1.bias', # 'module.dec1.conv.weight','module.dec1.conv.bias', # 'module.final.weight','module.final.bias']) # print_model_summa zvgf t5y(model) # elif epoch == 50: # set_freeze_layers(model, freeze_layer_names=None) # print_model_summay(model) for i, (train_image, train_mask, train_mask_ind) in enumerate(train_loader): # inputs, targets = variable(inputs), variable(targets) train_image = train_image.permute(0, 3, 1, 2) train_mask = train_mask.permute(0, 3, 1, 2) train_image = train_image.to(device) train_mask = train_mask.to(device).type(torch.cuda.FloatTensor) train_mask_ind = train_mask_ind.to(device).type( torch.cuda.FloatTensor) # if args.problem_type == 'binary': # train_mask = train_mask.to(device).type(torch.cuda.FloatTensor) # else: # #train_mask = train_mask.to(device).type(torch.cuda.LongTensor) # train_mask = train_mask.to(device).type(torch.cuda.FloatTensor) outputs, outputs_mask_ind1, outputs_mask_ind2 = model( train_image) #print(outputs.size()) #print(outputs_mask_ind1.size()) #print(outputs_mask_ind2.size()) ### note that the last layer in the model is defined differently # if args.problem_type == 'binary': # train_prob = F.sigmoid(outputs) # loss = criterion(outputs, train_mask) # else: # #train_prob = outputs # train_prob = F.sigmoid(outputs) # loss = torch.tensor(0).type(train_mask.type()) # for feat_inx in range(train_mask.shape[1]): # loss += criterion(outputs, train_mask) train_prob = F.sigmoid(outputs) train_mask_ind_prob1 = F.sigmoid(outputs_mask_ind1) train_mask_ind_prob2 = F.sigmoid(outputs_mask_ind2) loss1 = criterion(outputs, train_mask) #loss1 = F.binary_cross_entropy_with_logits(outputs, train_mask) #loss2 = nn.BCEWithLogitsLoss()(outputs_mask_ind1, train_mask_ind) #print(train_mask_ind.size()) #weight = torch.ones_like(train_mask_ind) #weight[:, 0] = weight[:, 0] * 1 #weight[:, 1] = weight[:, 1] * 14 #weight[:, 2] = weight[:, 2] * 14 #weight[:, 3] = weight[:, 3] * 4 #weight[:, 4] = weight[:, 4] * 4 #weight = weight * train_mask_ind + 1 #weight = weight.to(device).type(torch.cuda.FloatTensor) loss2 = F.binary_cross_entropy_with_logits( outputs_mask_ind1, train_mask_ind) loss3 = F.binary_cross_entropy_with_logits( outputs_mask_ind2, train_mask_ind) #loss3 = criterion(outputs_mask_ind2, train_mask_ind) loss = loss1 * w1 + loss2 * w2 + loss3 * w3 #print(loss1.item(), loss2.item(), loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() step += 1 #jaccard += [get_jaccard(train_mask, (train_prob > 0).float()).item()] meter.add(train_prob, train_mask, train_mask_ind_prob1, train_mask_ind_prob2, train_mask_ind, loss1.item(), loss2.item(), loss3.item(), loss.item()) # print(train_mask.data.shape) # print(train_mask.data.sum(dim=-2).shape) # print(train_mask.data.sum(dim=-2).sum(dim=-1).shape) # print(train_mask.data.sum(dim=-2).sum(dim=-1).sum(dim=0).shape) # intersection = train_mask.data.sum(dim=-2).sum(dim=-1) # print(intersection.shape) # print(intersection.dtype) # print(train_mask.data.shape[0]) #torch.zeros([2, 4], dtype=torch.float32) ######################### ## at the end of each epoch, evualte the metrics epoch_time = time.time() - start_time train_metrics = meter.value() train_metrics['epoch_time'] = epoch_time train_metrics['image'] = train_image.data train_metrics['mask'] = train_mask.data train_metrics['prob'] = train_prob.data #train_jaccard = np.mean(jaccard) #train_auc = str(round(mtr1.value()[0],2))+' '+str(round(mtr2.value()[0],2))+' '+str(round(mtr3.value()[0],2))+' '+str(round(mtr4.value()[0],2))+' '+str(round(mtr5.value()[0],2)) valid_metrics = valid_fn(model, criterion, valid_loader, device, num_classes) ############## ## write events write_event(log, step, epoch=epoch, train_metrics=train_metrics, valid_metrics=valid_metrics) #save_weights(model, model_path, epoch + 1, step) ######################### ## tensorboard write_tensorboard(writer, model, epoch, train_metrics=train_metrics, valid_metrics=valid_metrics) ######################### ## save the best model valid_loss = valid_metrics['loss1'] valid_jaccard = valid_metrics['jaccard'] if valid_loss < previous_valid_loss: save_weights(model, model_path, epoch + 1, step, train_metrics, valid_metrics) previous_valid_loss = valid_loss print('Save best model by loss') if valid_jaccard > previous_valid_jaccard: save_weights(model, model_path, epoch + 1, step, train_metrics, valid_metrics) previous_valid_jaccard = valid_jaccard print('Save best model by jaccard') ######################### ## change learning rate scheduler.step(valid_metrics['loss1']) except KeyboardInterrupt: # print('--' * 10) # print('Ctrl+C, saving snapshot') # save_weights(model, model_path, epoch, step) # print('done.') # print('--' * 10) writer.close() #return writer.close()
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--jaccard-weight', default=0.3, type=float) arg('--device-ids', type=str, default='0', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=1) arg('--limit', type=int, default=10000, help='number of images in epoch') arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=12) arg("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient") arg("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient") args = parser.parse_args() root = Path(args.root) root.mkdir(exist_ok=True, parents=True) # Loss functions criterion_GAN = GAN_loss(gan_weight=1) #torch.nn.MSELoss() criterion_pixelwise = torch.nn.L1Loss() criterion_discrim = Discrim_loss(dircrim_weight=1) # Loss weight of L1 pixel-wise loss between translated image and real image lambda_pixel = 100 # Initialize generator and discriminator model = AlbuNet34(num_classes=1, pretrained=True) discrim_model = discriminator() if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() discrim_model = nn.DataParallel(discrim_model, device_ids=device_ids).cuda() # Load pretrained models root = Path(args.root) model_path = root / 'model_{fold}.pt'.format(fold=args.fold) if model_path.exists(): state = torch.load(str(model_path)) epoch = state['epoch'] step = state['step'] model.load_state_dict(state['model']) print('Restored model, epoch {}, step {:,}'.format(epoch, step)) else: epoch = 1 step = 0 save = lambda ep: torch.save( { 'model': model.state_dict(), 'epoch': ep, 'step': step, }, str(model_path)) # Optimizers optimizer_G = Adam(model.parameters(), lr=args.lr, betas=(args.b1, args.b2)) optimizer_D = Adam(discrim_model.parameters(), lr=args.lr, betas=(args.b1, args.b2)) # Configure dataloaders def make_loader(file_names, shuffle=False, transform=None, limit=None): return DataLoader(dataset=Polyp(file_names, transform=transform, limit=limit), shuffle=shuffle, num_workers=args.workers, batch_size=args.batch_size, pin_memory=torch.cuda.is_available()) train_file_names, val_file_names = get_split(args.fold) print('num train = {}, num_val = {}'.format(len(train_file_names), len(val_file_names))) train_transform = DualCompose([ CropCVC612(), img_resize(512), HorizontalFlip(), VerticalFlip(), Rotate(), Rescale(), Zoomin(), ImageOnly(RandomHueSaturationValue()), ImageOnly(Normalize()) ]) train_loader = make_loader(train_file_names, shuffle=True, transform=train_transform, limit=args.limit) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) report_each = 10 log = root.joinpath('train_{fold}.log'.format(fold=args.fold)).open( 'at', encoding='utf8') for epoch in range(epoch, args.n_epochs + 1): model.train() discrim_model.train() random.seed() tq = tqdm.tqdm(total=(len(train_loader) * args.batch_size)) tq.set_description('Epoch {}, lr {}'.format(epoch, args.lr)) losses = [] tl = train_loader try: mean_loss = 0 for i, (inputs, targets) in enumerate(tl): # Model inputs inputs, targets = variable(inputs), variable(targets) # ------------------ # Train Generators # ------------------ optimizer_G.zero_grad() # Generate output outputs = model(inputs) # fake loss predict_fake = discrim_model(inputs, outputs) # Pixel-wise loss loss_pixel = criterion_pixelwise(outputs, targets) # Generator loss loss_GAN = criterion_GAN(predict_fake) # Total loss of GAN loss_G = loss_GAN + lambda_pixel * loss_pixel loss_G.backward() optimizer_G.step() # --------------------- # Train Discriminator # --------------------- optimizer_D.zero_grad() # Real loss predict_real = discrim_model(inputs, targets) predict_fake = discrim_model(inputs, outputs.detach()) # Discriminator loss loss_D = criterion_discrim(predict_real, predict_fake) loss_D.backward() optimizer_D.step() step += 1 batch_size = inputs.size(0) tq.update(batch_size) losses.append(float(loss_G.data)) mean_loss = np.mean(losses[-report_each:]) tq.set_postfix(loss='{:.5f}'.format(mean_loss)) if i and i % report_each == 0: write_event(log, step, loss=mean_loss) write_event(log, step, loss=mean_loss) tq.close() save(epoch + 1) except KeyboardInterrupt: tq.close() print('Ctrl+C, saving snapshot') save(epoch) print('done.') return
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--jaccard-weight', default=0.5, type=float) arg('--device-ids', type=str, default='0', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=1) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.001) arg('--workers', type=int, default=8) arg('--loss', type=str, default='BCE', choices=['BCE', 'StableBCE', 'Lovasz']) arg('--model', type=str, default='ResNext', choices=['WideResnet', 'WideResnetShort', 'ResNext']) args = parser.parse_args() root = Path(args.root) root.mkdir(exist_ok=True, parents=True) if args.model == 'ResNext': model = ResNext(pretrained=True) elif args.model == 'WideResnet': model = WideResnet(pretrained=True) elif args.model == 'WideResnetShort': model = WideResnetShort(pretrained=True) print('CUDA: {}'.format(torch.cuda.is_available())) if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() if args.loss == "Lovasz": loss = LossLovasz() elif args.loss == 'StableBCE': loss = LossStableBCE(jaccard_weight=args.jaccard_weight) else: loss = LossBinary(jaccard_weight=args.jaccard_weight) cudnn.benchmark = True def make_loader(file_names, shuffle=False, transform=None, batch_size=1): return DataLoader( dataset=SaltDataset(file_names, transform=transform), shuffle=shuffle, num_workers=args.workers, batch_size=batch_size, pin_memory=torch.cuda.is_available() ) train_file_names, val_file_names = get_split(args.fold) print('num train = {}, num_val = {}'.format(len(train_file_names), len(val_file_names))) def train_transform(p=1): return Compose([ # ShiftScaleRotate(p=0.5), HorizontalFlip(p=0.5), Blur(blur_limit=3, p=.5), RandomContrast(p=0.3), RandomBrightness(p=0.3), ElasticTransform(p=0.3), Resize(202, 202, interpolation=cv2.INTER_NEAREST), PadIfNeeded(256, 256), Normalize(p=1) ], p=p) def val_transform(p=1): return Compose([ Resize(202, 202, interpolation=cv2.INTER_NEAREST), PadIfNeeded(256, 256), Normalize(p=1) ], p=p) train_loader = make_loader(train_file_names, shuffle=True, transform=train_transform(p=1), batch_size=args.batch_size) valid_loader = make_loader(val_file_names, transform=val_transform(p=1), batch_size=args.batch_size) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) valid = validation_binary utils.train( init_optimizer=lambda lr: Adam(model.parameters(), lr=lr), args=args, model=model, criterion=loss, train_loader=train_loader, valid_loader=valid_loader, validation=valid, fold=args.fold )