def test_b4_effunet32_s2(): model = maybe_cuda(b4_effunet32_s2()) x = maybe_cuda(torch.rand((2, 3, 512, 512))) output = model(x) print(count_parameters(model)) for key, value in output.items(): print(key, value.size(), value.mean(), value.std())
def test_models(model_name): model = maybe_cuda(get_model(model_name, pretrained=False).eval()) x = maybe_cuda(torch.rand((2, 3, 512, 512))) output = model(x) print(model_name, count_parameters(model)) for key, value in output.items(): print(key, value.size(), value.mean(), value.std())
def test_resnet18_encoder(): encoder = maybe_cuda(E.Resnet18Encoder(layers=[0, 1, 2, 3, 4]).eval()) input = maybe_cuda(torch.rand((4, 3, 512, 512))) feature_maps = encoder(input) assert len(feature_maps) == 5 assert feature_maps[0].size(2) == 256
def test_U2NET(): model = U2NET() model = maybe_cuda(model.eval()) x = maybe_cuda(torch.rand((2, 3, 512, 512))) output = model(x) print(count_parameters(model)) for key, value in output.items(): print(key, value.size(), value.mean(), value.std())
def test_b6_unet32_s2_rdtc(): model = b6_unet32_s2_rdtc(need_supervision_masks=True) model = maybe_cuda(model.eval()) x = maybe_cuda(torch.rand((2, 3, 512, 512))) output = model(x) print(count_parameters(model)) for key, value in output.items(): print(key, value.size(), value.mean(), value.std())
def test_jit_trace(encoder, encoder_params): model = encoder(**encoder_params).eval() print(model.__class__.__name__, count_parameters(model)) print(model.strides) print(model.channels) dummy_input = torch.rand((1, 3, 256, 256)) dummy_input = maybe_cuda(dummy_input) model = maybe_cuda(model) model = torch.jit.trace(model, dummy_input, check_trace=True)
def test_encoders(encoder: EncoderModule, encoder_params): with torch.no_grad(): net = encoder(**encoder_params).eval() print(net.__class__.__name__, count_parameters(net)) input = torch.rand((4, 3, 512, 512)) input = maybe_cuda(input) net = maybe_cuda(net) output = net(input) assert len(output) == len(net.output_filters) for feature_map, expected_stride, expected_channels in zip( output, net.output_strides, net.output_filters): assert feature_map.size(1) == expected_channels assert feature_map.size(2) * expected_stride == 512 assert feature_map.size(3) * expected_stride == 512
def test_encoders(encoder: E.EncoderModule, encoder_params): net = encoder(**encoder_params).eval() print(net.__class__.__name__, count_parameters(net)) print(net.output_strides) print(net.out_channels) x = torch.rand((4, 3, 256, 256)) x = maybe_cuda(x) net = maybe_cuda(net) output = net(x) assert len(output) == len(net.channels) for feature_map, expected_stride, expected_channels in zip( output, net.strides, net.channels): assert feature_map.size(1) == expected_channels assert feature_map.size(2) * expected_stride == 256 assert feature_map.size(3) * expected_stride == 256
def test_hourglass_encoder(encoder, encoder_params): net = encoder(**encoder_params).eval() print(repr(net), count_parameters(net)) print("Strides ", net.strides) print("Channels", net.channels) x = torch.rand((4, 3, 256, 256)) x = maybe_cuda(x) net = maybe_cuda(net) output = net(x) assert len(output) == len(net.channels) for feature_map, expected_stride, expected_channels in zip( output, net.strides, net.channels): assert feature_map.size(1) == expected_channels assert feature_map.size(2) * expected_stride == 256 assert feature_map.size(3) * expected_stride == 256
def test_unet_encoder(): net = E.UnetEncoder().eval() print(net.__class__.__name__, count_parameters(net)) print(net.output_strides) print(net.channels) x = torch.rand((4, 3, 256, 256)) x = maybe_cuda(x) net = maybe_cuda(net) output = net(x) assert len(output) == len(net.output_filters) for feature_map, expected_stride, expected_channels in zip( output, net.output_strides, net.output_filters): print(feature_map.size(), feature_map.mean(), feature_map.std()) assert feature_map.size(1) == expected_channels assert feature_map.size(2) * expected_stride == 256 assert feature_map.size(3) * expected_stride == 256
def test_supervised_hourglass_encoder(encoder, encoder_params): net = encoder(**encoder_params).eval() print(net.__class__.__name__, count_parameters(net)) print(net.output_strides) print(net.out_channels) x = torch.rand((4, 3, 256, 256)) x = maybe_cuda(x) net = maybe_cuda(net) output, supervision = net(x) assert len(output) == len(net.output_filters) assert len(supervision) == len(net.output_filters) - 2 for feature_map, expected_stride, expected_channels in zip( output, net.output_strides, net.output_filters): assert feature_map.size(1) == expected_channels assert feature_map.size(2) * expected_stride == 256 assert feature_map.size(3) * expected_stride == 256
def main(): images_dir = 'c:\\datasets\\ILSVRC2013_DET_val' canny_cnn = maybe_cuda(CannyModel()) optimizer = Adam(canny_cnn.parameters(), lr=1e-4) images = find_images_in_dir(images_dir) train_images, valid_images = train_test_split(images, test_size=0.1, random_state=1234) num_workers = 6 num_epochs = 100 batch_size = 16 if False: train_images = train_images[:batch_size * 4] valid_images = valid_images[:batch_size * 4] train_loader = DataLoader(EdgesDataset(train_images), batch_size=batch_size, num_workers=num_workers, shuffle=True, drop_last=True, pin_memory=True) valid_loader = DataLoader(EdgesDataset(valid_images), batch_size=batch_size, num_workers=num_workers, pin_memory=True) loaders = collections.OrderedDict() loaders["train"] = train_loader loaders["valid"] = valid_loader scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() # checkpoint = UtilsFactory.load_checkpoint("logs/checkpoints//best.pth") # UtilsFactory.unpack_checkpoint(checkpoint, model=canny_cnn) # model training runner.train( model=canny_cnn, criterion=FocalLoss(), optimizer=optimizer, scheduler=scheduler, callbacks=[ JaccardCallback(), ShowPolarBatchesCallback(visualize_canny_predictions, metric='jaccard', minimize=False), EarlyStoppingCallback(patience=5, min_delta=0.01, metric='jaccard', minimize=False), ], loaders=loaders, logdir='logs', num_epochs=num_epochs, verbose=True, main_metric='jaccard', minimize_metric=False # check=True )
def test_onnx_export(encoder, encoder_params): import onnx model = encoder(**encoder_params).eval() print(model.__class__.__name__, count_parameters(model)) print(model.strides) print(model.channels) dummy_input = torch.rand((1, 3, 256, 256)) dummy_input = maybe_cuda(dummy_input) model = maybe_cuda(model) input_names = ["image"] output_names = [f"feature_map_{i}" for i in range(len(model.channels))] torch.onnx.export(model, dummy_input, "tmp.onnx", verbose=True, input_names=input_names, output_names=output_names) model = onnx.load("tmp.onnx") onnx.checker.check_model(model)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=42, help='Random seed') parser.add_argument('--fast', action='store_true') parser.add_argument('--fp16', action='store_true') parser.add_argument('-dd', '--data-dir', type=str, required=True, help='Data directory for INRIA sattelite dataset') parser.add_argument('-m', '--model', type=str, default='unet', help='') parser.add_argument('-b', '--batch-size', type=int, default=8, help='Batch Size during training, e.g. -b 64') parser.add_argument('-e', '--epochs', type=int, default=150, help='Epoch to run') parser.add_argument('-es', '--early-stopping', type=int, default=None, help='Maximum number of epochs without improvement') # parser.add_argument('-f', '--fold', default=None, required=True, type=int, help='Fold to train') # # parser.add_argument('-fe', '--freeze-encoder', type=int, default=0, help='Freeze encoder parameters for N epochs') # # parser.add_argument('-ft', '--fine-tune', action='store_true') parser.add_argument('-lr', '--learning-rate', type=float, default=1e-3, help='Initial learning rate') parser.add_argument('-l', '--criterion', type=str, default='bce', help='Criterion') parser.add_argument('-o', '--optimizer', default='Adam', help='Name of the optimizer') parser.add_argument( '-c', '--checkpoint', type=str, default=None, help='Checkpoint filename to use as initial model weights') parser.add_argument('-w', '--workers', default=8, type=int, help='Num workers') args = parser.parse_args() set_manual_seed(args.seed) data_dir = args.data_dir num_workers = args.workers num_epochs = args.epochs batch_size = args.batch_size learning_rate = args.learning_rate model_name = args.model optimizer_name = args.optimizer image_size = (512, 512) train_loader, valid_loader = get_dataloaders(data_dir=data_dir, batch_size=batch_size, num_workers=num_workers, image_size=image_size, fast=args.fast) model = maybe_cuda(get_model(model_name, image_size=image_size)) criterion = get_loss(args.criterion) optimizer = get_optimizer(optimizer_name, model.parameters(), learning_rate) loaders = collections.OrderedDict() loaders["train"] = train_loader loaders["valid"] = valid_loader scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3) # model runner runner = SupervisedRunner() if args.checkpoint: checkpoint = UtilsFactory.load_checkpoint(auto_file(args.checkpoint)) UtilsFactory.unpack_checkpoint(checkpoint, model=model) checkpoint_epoch = checkpoint['epoch'] print('Loaded model weights from', args.checkpoint) print('Epoch :', checkpoint_epoch) print('Metrics:', checkpoint['epoch_metrics']) # try: # UtilsFactory.unpack_checkpoint(checkpoint, optimizer=optimizer) # except Exception as e: # print('Failed to restore optimizer state', e) # try: # UtilsFactory.unpack_checkpoint(checkpoint, scheduler=scheduler) # except Exception as e: # print('Failed to restore scheduler state', e) print('Loaded model weights from', args.checkpoint) current_time = datetime.now().strftime('%b%d_%H_%M') prefix = f'{current_time}_{args.model}_{args.criterion}' log_dir = os.path.join('runs', prefix) os.makedirs(log_dir, exist_ok=False) print('Train session:', prefix) print('\tFast mode :', args.fast) print('\tEpochs :', num_epochs) print('\tWorkers :', num_workers) print('\tData dir :', data_dir) print('\tLog dir :', log_dir) print('\tTrain size :', len(train_loader), len(train_loader.dataset)) print('\tValid size :', len(valid_loader), len(valid_loader.dataset)) print('Model:', model_name) print('\tParameters:', count_parameters(model)) print('\tImage size:', image_size) print('Optimizer:', optimizer_name) print('\tLearning rate:', learning_rate) print('\tBatch size :', batch_size) print('\tCriterion :', args.criterion) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, callbacks=[ # OneCycleLR( # cycle_len=num_epochs, # div_factor=10, # increase_fraction=0.3, # momentum_range=(0.95, 0.85)), PixelAccuracyMetric(), EpochJaccardMetric(), ShowPolarBatchesCallback(visualize_inria_predictions, metric='accuracy', minimize=False), # EarlyStoppingCallback(patience=5, min_delta=0.01, metric='jaccard', minimize=False), ], loaders=loaders, logdir=log_dir, num_epochs=num_epochs, verbose=True, main_metric='jaccard', minimize_metric=False, state_kwargs={"cmd_args": vars(args)})
def main(): parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=42, help='Random seed') parser.add_argument('--fast', action='store_true') parser.add_argument('-dd', '--data-dir', type=str, default='data', help='Data directory for INRIA sattelite dataset') parser.add_argument('-m', '--model', type=str, default='cls_resnet18', help='') parser.add_argument('-b', '--batch-size', type=int, default=8, help='Batch Size during training, e.g. -b 64') parser.add_argument('-e', '--epochs', type=int, default=100, help='Epoch to run') parser.add_argument('-es', '--early-stopping', type=int, default=None, help='Maximum number of epochs without improvement') parser.add_argument('-fe', '--freeze-encoder', action='store_true') parser.add_argument('-lr', '--learning-rate', type=float, default=1e-4, help='Initial learning rate') parser.add_argument('-l', '--criterion', type=str, default='bce', help='Criterion') parser.add_argument('-o', '--optimizer', default='Adam', help='Name of the optimizer') parser.add_argument( '-c', '--checkpoint', type=str, default=None, help='Checkpoint filename to use as initial model weights') parser.add_argument('-w', '--workers', default=multiprocessing.cpu_count(), type=int, help='Num workers') parser.add_argument('-a', '--augmentations', default='hard', type=str, help='') parser.add_argument('-tta', '--tta', default=None, type=str, help='Type of TTA to use [fliplr, d4]') parser.add_argument('-tm', '--train-mode', default='random', type=str, help='') parser.add_argument('-rm', '--run-mode', default='fit_predict', type=str, help='') parser.add_argument('--transfer', default=None, type=str, help='') parser.add_argument('--fp16', action='store_true') args = parser.parse_args() set_manual_seed(args.seed) data_dir = args.data_dir num_workers = args.workers num_epochs = args.epochs batch_size = args.batch_size learning_rate = args.learning_rate early_stopping = args.early_stopping model_name = args.model optimizer_name = args.optimizer image_size = (512, 512) fast = args.fast augmentations = args.augmentations train_mode = args.train_mode run_mode = args.run_mode log_dir = None fp16 = args.fp16 freeze_encoder = args.freeze_encoder run_train = run_mode == 'fit_predict' or run_mode == 'fit' run_predict = run_mode == 'fit_predict' or run_mode == 'predict' model = maybe_cuda(get_model(model_name, num_classes=1)) if args.transfer: transfer_checkpoint = fs.auto_file(args.transfer) print("Transfering weights from model checkpoint", transfer_checkpoint) checkpoint = load_checkpoint(transfer_checkpoint) pretrained_dict = checkpoint['model_state_dict'] for name, value in pretrained_dict.items(): try: model.load_state_dict(collections.OrderedDict([(name, value)]), strict=False) except Exception as e: print(e) checkpoint = None if args.checkpoint: checkpoint = load_checkpoint(fs.auto_file(args.checkpoint)) unpack_checkpoint(checkpoint, model=model) checkpoint_epoch = checkpoint['epoch'] print('Loaded model weights from:', args.checkpoint) print('Epoch :', checkpoint_epoch) print('Metrics (Train):', 'f1 :', checkpoint['epoch_metrics']['train']['f1_score'], 'loss:', checkpoint['epoch_metrics']['train']['loss']) print('Metrics (Valid):', 'f1 :', checkpoint['epoch_metrics']['valid']['f1_score'], 'loss:', checkpoint['epoch_metrics']['valid']['loss']) log_dir = os.path.dirname( os.path.dirname(fs.auto_file(args.checkpoint))) if run_train: if freeze_encoder: set_trainable(model.encoder, trainable=False, freeze_bn=True) criterion = get_loss(args.criterion) parameters = get_optimizable_parameters(model) optimizer = get_optimizer(optimizer_name, parameters, learning_rate) if checkpoint is not None: try: unpack_checkpoint(checkpoint, optimizer=optimizer) print('Restored optimizer state from checkpoint') except Exception as e: print('Failed to restore optimizer state from checkpoint', e) train_loader, valid_loader = get_dataloaders( data_dir=data_dir, batch_size=batch_size, num_workers=num_workers, image_size=image_size, augmentation=augmentations, fast=fast) loaders = collections.OrderedDict() loaders["train"] = train_loader loaders["valid"] = valid_loader current_time = datetime.now().strftime('%b%d_%H_%M') prefix = f'adversarial/{args.model}/{current_time}_{args.criterion}' if fp16: prefix += '_fp16' if fast: prefix += '_fast' log_dir = os.path.join('runs', prefix) os.makedirs(log_dir, exist_ok=False) scheduler = MultiStepLR(optimizer, milestones=[10, 30, 50, 70, 90], gamma=0.5) print('Train session :', prefix) print('\tFP16 mode :', fp16) print('\tFast mode :', args.fast) print('\tTrain mode :', train_mode) print('\tEpochs :', num_epochs) print('\tEarly stopping :', early_stopping) print('\tWorkers :', num_workers) print('\tData dir :', data_dir) print('\tLog dir :', log_dir) print('\tAugmentations :', augmentations) print('\tTrain size :', len(train_loader), len(train_loader.dataset)) print('\tValid size :', len(valid_loader), len(valid_loader.dataset)) print('Model :', model_name) print('\tParameters :', count_parameters(model)) print('\tImage size :', image_size) print('\tFreeze encoder :', freeze_encoder) print('Optimizer :', optimizer_name) print('\tLearning rate :', learning_rate) print('\tBatch size :', batch_size) print('\tCriterion :', args.criterion) # model training visualization_fn = partial(draw_classification_predictions, class_names=['Train', 'Test']) callbacks = [ F1ScoreCallback(), AUCCallback(), ShowPolarBatchesCallback(visualization_fn, metric='f1_score', minimize=False), ] if early_stopping: callbacks += [ EarlyStoppingCallback(early_stopping, metric='auc', minimize=False) ] runner = SupervisedRunner(input_key='image') runner.train(fp16=fp16, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, callbacks=callbacks, loaders=loaders, logdir=log_dir, num_epochs=num_epochs, verbose=True, main_metric='auc', minimize_metric=False, state_kwargs={"cmd_args": vars(args)}) if run_predict and not fast: # Training is finished. Let's run predictions using best checkpoint weights best_checkpoint = load_checkpoint( fs.auto_file('best.pth', where=log_dir)) unpack_checkpoint(best_checkpoint, model=model) model.eval() torch.no_grad() train_csv = pd.read_csv(os.path.join(data_dir, 'train.csv')) train_csv['id_code'] = train_csv['id_code'].apply( lambda x: os.path.join(data_dir, 'train_images', f'{x}.png')) test_ds = RetinopathyDataset(train_csv['id_code'], None, get_test_aug(image_size), target_as_array=True) test_dl = DataLoader(test_ds, batch_size, pin_memory=True, num_workers=num_workers) test_ids = [] test_preds = [] for batch in tqdm(test_dl, desc='Inference'): input = batch['image'].cuda() outputs = model(input) predictions = to_numpy(outputs['logits'].sigmoid().squeeze(1)) test_ids.extend(batch['image_id']) test_preds.extend(predictions) df = pd.DataFrame.from_dict({ 'id_code': test_ids, 'is_test': test_preds }) df.to_csv(os.path.join(log_dir, 'test_in_train.csv'), index=None)