def train(args):
    set_random_seed(42)
    model = get_model(args.network)
    print('Loading model')
    model.encoder.conv1 = nn.Conv2d(
        count_channels(args.channels), 64, kernel_size=(7, 7),
        stride=(2, 2), padding=(3, 3), bias=False)
    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')

    ds = Dataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers)
    loaders = ds.create_loaders(train_df, val_df)
    print(loaders['train'].dataset.data)

    criterion = BCE_Dice_Loss(bce_weight=0.2)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[10, 20, 40], gamma=0.3
    )

    save_path = os.path.join(
        args.logdir,
        '_'.join([args.network, *args.channels])
    )

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            DiceCallback()
        ],
        logdir=save_path,
        num_epochs=args.epochs,
        verbose=True
    )

    infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
            InferCallback()
        ],
    )
Exemple #2
0
def main():
    images_dir = 'c:\\datasets\\ILSVRC2013_DET_val'

    canny_cnn = maybe_cuda(CannyModel())
    optimizer = Adam(canny_cnn.parameters(), lr=1e-4)

    images = find_images_in_dir(images_dir)
    train_images, valid_images = train_test_split(images, test_size=0.1, random_state=1234)

    num_workers = 6
    num_epochs = 100
    batch_size = 16

    if False:
        train_images = train_images[:batch_size * 4]
        valid_images = valid_images[:batch_size * 4]

    train_loader = DataLoader(EdgesDataset(train_images), batch_size=batch_size, num_workers=num_workers, shuffle=True,
                              drop_last=True, pin_memory=True)
    valid_loader = DataLoader(EdgesDataset(valid_images), batch_size=batch_size, num_workers=num_workers,
                              pin_memory=True)

    loaders = collections.OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3)

    # model runner
    runner = SupervisedRunner()
    # checkpoint = UtilsFactory.load_checkpoint("logs/checkpoints//best.pth")
    # UtilsFactory.unpack_checkpoint(checkpoint, model=canny_cnn)

    # model training
    runner.train(
        model=canny_cnn,
        criterion=FocalLoss(),
        optimizer=optimizer,
        scheduler=scheduler,
        callbacks=[
            JaccardCallback(),
            ShowPolarBatchesCallback(visualize_canny_predictions, metric='jaccard', minimize=False),
            EarlyStoppingCallback(patience=5, min_delta=0.01, metric='jaccard', minimize=False),
        ],
        loaders=loaders,
        logdir='logs',
        num_epochs=num_epochs,
        verbose=True,
        main_metric='jaccard',
        minimize_metric=False
        # check=True
    )
Exemple #3
0
def train(args):
    set_random_seed(42)
    for fold in range(args.folds):
        model = get_model(args.network)

        print("Loading model")
        model, device = UtilsFactory.prepare_model(model)
        train_df = pd.read_csv(
            os.path.join(args.dataset_path,
                         f'train{fold}.csv')).to_dict('records')
        val_df = pd.read_csv(os.path.join(args.dataset_path,
                                          f'val{fold}.csv')).to_dict('records')

        ds = Dataset(args.channels, args.dataset_path, args.image_size,
                     args.batch_size, args.num_workers)
        loaders = ds.create_loaders(train_df, val_df)

        criterion = BCE_Dice_Loss(bce_weight=0.2)

        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[10, 20, 40], gamma=0.3)

        # model runner
        runner = SupervisedRunner()

        save_path = os.path.join(args.logdir, f'fold{fold}')

        # model training
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[DiceCallback()],
                     logdir=save_path,
                     num_epochs=args.epochs,
                     verbose=True)

        infer_loader = collections.OrderedDict([("infer", loaders["valid"])])
        runner.infer(
            model=model,
            loaders=infer_loader,
            callbacks=[
                CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
                InferCallback()
            ],
        )

        print(f'Fold {fold} ended')
def train(args):
    model = Autoencoder_Unet(encoder_name='resnet50')

    print("Loading model")
    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')
    ds = AutoencoderDataset(args.channels, args.dataset_path, args.image_size,
                            args.batch_size, args.num_workers)
    loaders = ds.create_loaders(train_df, val_df)

    criterion = MSELoss()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-3,
                                 weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[10, 20, 40],
                                                     gamma=0.3)

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 callbacks=[],
                 scheduler=scheduler,
                 loaders=loaders,
                 logdir=args.logdir,
                 num_epochs=args.epochs,
                 verbose=True)

    infer_loader = collections.OrderedDict([("infer", loaders["valid"])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f"{args.logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
Exemple #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', type=int, default=42, help='Random seed')
    parser.add_argument('--fast', action='store_true')
    parser.add_argument('--fp16', action='store_true')
    parser.add_argument('-dd',
                        '--data-dir',
                        type=str,
                        required=True,
                        help='Data directory for INRIA sattelite dataset')
    parser.add_argument('-m', '--model', type=str, default='unet', help='')
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=8,
                        help='Batch Size during training, e.g. -b 64')
    parser.add_argument('-e',
                        '--epochs',
                        type=int,
                        default=150,
                        help='Epoch to run')
    parser.add_argument('-es',
                        '--early-stopping',
                        type=int,
                        default=None,
                        help='Maximum number of epochs without improvement')
    # parser.add_argument('-f', '--fold', default=None, required=True, type=int, help='Fold to train')
    #     # parser.add_argument('-fe', '--freeze-encoder', type=int, default=0, help='Freeze encoder parameters for N epochs')
    #     # parser.add_argument('-ft', '--fine-tune', action='store_true')
    parser.add_argument('-lr',
                        '--learning-rate',
                        type=float,
                        default=1e-3,
                        help='Initial learning rate')
    parser.add_argument('-l',
                        '--criterion',
                        type=str,
                        default='bce',
                        help='Criterion')
    parser.add_argument('-o',
                        '--optimizer',
                        default='Adam',
                        help='Name of the optimizer')
    parser.add_argument(
        '-c',
        '--checkpoint',
        type=str,
        default=None,
        help='Checkpoint filename to use as initial model weights')
    parser.add_argument('-w',
                        '--workers',
                        default=8,
                        type=int,
                        help='Num workers')

    args = parser.parse_args()
    set_manual_seed(args.seed)

    data_dir = args.data_dir
    num_workers = args.workers
    num_epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    model_name = args.model
    optimizer_name = args.optimizer
    image_size = (512, 512)

    train_loader, valid_loader = get_dataloaders(data_dir=data_dir,
                                                 batch_size=batch_size,
                                                 num_workers=num_workers,
                                                 image_size=image_size,
                                                 fast=args.fast)

    model = maybe_cuda(get_model(model_name, image_size=image_size))
    criterion = get_loss(args.criterion)
    optimizer = get_optimizer(optimizer_name, model.parameters(),
                              learning_rate)

    loaders = collections.OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[10, 20, 40],
                                                     gamma=0.3)

    # model runner
    runner = SupervisedRunner()

    if args.checkpoint:
        checkpoint = UtilsFactory.load_checkpoint(auto_file(args.checkpoint))
        UtilsFactory.unpack_checkpoint(checkpoint, model=model)

        checkpoint_epoch = checkpoint['epoch']
        print('Loaded model weights from', args.checkpoint)
        print('Epoch   :', checkpoint_epoch)
        print('Metrics:', checkpoint['epoch_metrics'])

        # try:
        #     UtilsFactory.unpack_checkpoint(checkpoint, optimizer=optimizer)
        # except Exception as e:
        #     print('Failed to restore optimizer state', e)

        # try:
        #     UtilsFactory.unpack_checkpoint(checkpoint, scheduler=scheduler)
        # except Exception as e:
        #     print('Failed to restore scheduler state', e)

        print('Loaded model weights from', args.checkpoint)

    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model}_{args.criterion}'
    log_dir = os.path.join('runs', prefix)
    os.makedirs(log_dir, exist_ok=False)

    print('Train session:', prefix)
    print('\tFast mode  :', args.fast)
    print('\tEpochs     :', num_epochs)
    print('\tWorkers    :', num_workers)
    print('\tData dir   :', data_dir)
    print('\tLog dir    :', log_dir)
    print('\tTrain size :', len(train_loader), len(train_loader.dataset))
    print('\tValid size :', len(valid_loader), len(valid_loader.dataset))
    print('Model:', model_name)
    print('\tParameters:', count_parameters(model))
    print('\tImage size:', image_size)
    print('Optimizer:', optimizer_name)
    print('\tLearning rate:', learning_rate)
    print('\tBatch size   :', batch_size)
    print('\tCriterion    :', args.criterion)

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        callbacks=[
            # OneCycleLR(
            #     cycle_len=num_epochs,
            #     div_factor=10,
            #     increase_fraction=0.3,
            #     momentum_range=(0.95, 0.85)),
            PixelAccuracyMetric(),
            EpochJaccardMetric(),
            ShowPolarBatchesCallback(visualize_inria_predictions,
                                     metric='accuracy',
                                     minimize=False),
            # EarlyStoppingCallback(patience=5, min_delta=0.01, metric='jaccard', minimize=False),
        ],
        loaders=loaders,
        logdir=log_dir,
        num_epochs=num_epochs,
        verbose=True,
        main_metric='jaccard',
        minimize_metric=False,
        state_kwargs={"cmd_args": vars(args)})
logdir = "./logs/cifar_simple_notebook_1"

# model, criterion, optimizer
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

# model runner
runner = SupervisedRunner()

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    check=True
)

# In[ ]:

# you can use plotly and tensorboard to plot metrics inside jupyter
# by default it only plots loss
# logs_plot = UtilsFactory.plot_metrics(logdir=logdir)

# # Setup 2 - training with scheduler

# In[ ]:
def train(args):
    set_random_seed(42)
    if(args.model=='lstm_diff'):
        model = ULSTMNet(count_channels(args.channels), 1, args.image_size)
    elif(args.model=='lstm_decoder'):
        model = Unet_LstmDecoder(count_channels(args.channels), all_masks=args.allmasks)
    else:
        print('Unknown LSTM model. Return to the default model.')
        model = ULSTMNet(count_channels(args.channels), 1, args.image_size)
    
    if torch.cuda.is_available(): model.cuda()
    print('Loading model')

    model, device = UtilsFactory.prepare_model(model)
    print(device)

    optimizer = get_optimizer(args.optimizer, args.lr, model)
    criterion = get_loss(args.loss)    
    
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.2
    )

    save_path = os.path.join(
        args.logdir,
        args.name
    )
    
    os.system(f"mkdir {save_path}")

    train_df = pd.read_csv(args.train_df)
    val_df = pd.read_csv(args.val_df)

    train_dataset = LstmDataset(args.neighbours, train_df, 'train',args.channels, args.dataset_path, args.image_size, args.batch_size, args.allmasks)
    valid_dataset = LstmDataset(args.neighbours, val_df, 'valid',args.channels, args.dataset_path, args.image_size, args.batch_size, args.allmasks)

    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, 
        shuffle=sampler is None, num_workers=args.num_workers, sampler=sampler(train_df))
    valid_loader = DataLoader(valid_dataset, batch_size=1, 
        shuffle=False, num_workers=args.num_workers)

    loaders = collections.OrderedDict()

    loaders['train'] = train_loader
    loaders['valid'] = valid_loader

    runner = SupervisedRunner()
    if args.model_weights_path:
        checkpoint = torch.load(args.model_weights_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
    
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            DiceCallback()
        ],
        logdir=save_path,
        num_epochs=args.epochs,
        verbose=True
    )

    infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
            InferCallback()
        ],
    )

    '''
Exemple #8
0
def main():
    DATA_DIR = os.path.join(os.path.dirname(__file__), 'CamVid')

    x_train_dir = os.path.join(DATA_DIR, 'train')
    y_train_dir = os.path.join(DATA_DIR, 'trainannot')

    x_valid_dir = os.path.join(DATA_DIR, 'val')
    y_valid_dir = os.path.join(DATA_DIR, 'valannot')

    x_test_dir = os.path.join(DATA_DIR, 'test')
    y_test_dir = os.path.join(DATA_DIR, 'testannot')

    train_ds = CamVidDataset(x_train_dir, y_train_dir,
                             transform=get_training_augmentation())
    valid_ds = CamVidDataset(x_valid_dir, y_valid_dir,
                             transform=get_validation_augmentation())
    test_ds = CamVidDataset(x_test_dir, y_test_dir,
                            transform=get_validation_augmentation())

    data_loaders = OrderedDict()
    num_train_samples = len(train_ds)
    mul_factor = 10
    batch_size = 4 * torch.cuda.device_count()
    data_loaders['train'] = DataLoader(train_ds,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=8,
                                       pin_memory=True,
                                       sampler=WeightedRandomSampler(np.ones(num_train_samples),
                                                                     num_train_samples * mul_factor))

    data_loaders['valid'] = DataLoader(valid_ds,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=0,
                                       pin_memory=True)

    data_loaders['test'] = DataLoader(test_ds,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      num_workers=0,
                                      pin_memory=True)

    print(len(train_ds), len(valid_ds))

    num_classes = len(CLASSES)
    model = effnetB7_fpn(num_classes).cuda()

    # model runner
    runner = SupervisedRunner()

    optimizer = Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[50, 75, 100, 125, 130, 135, 140, 145],
                                                     gamma=0.5)

    # model training
    runner.train(
        model=model,
        criterion=nn.CrossEntropyLoss(),
        optimizer=optimizer,
        scheduler=scheduler,
        callbacks=[
            MulticlassIoUCallback(prefix='iou'),
            ShowPolarBatchesCallback(visualize_predictions, metric='iou', minimize=True),
        ],
        logdir='runs/effnetB7_fpn',
        loaders=data_loaders,
        num_epochs=150,
        verbose=True,
        main_metric='iou',
        minimize_metric=False
    )
def train(args):
    set_random_seed(42)
    model = get_model(args.network, args.classification_head)
    print('Loading model')

    model.encoder.conv1 = nn.Conv2d(count_channels(args.channels) *
                                    args.neighbours,
                                    64,
                                    kernel_size=(7, 7),
                                    stride=(2, 2),
                                    padding=(3, 3),
                                    bias=False)

    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')

    ds = Dataset(args.channels, args.dataset_path, args.image_size,
                 args.batch_size, args.num_workers, args.neighbours,
                 args.classification_head)
    loaders = ds.create_loaders(train_df, val_df)

    save_path = os.path.join(args.logdir, args.name)

    optimizer = get_optimizer(args.optimizer, args.lr, model)

    if not args.classification_head:
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.1)

        criterion = get_loss(args.loss)

        runner = SupervisedRunner()
        if args.model_weights_path:
            checkpoint = torch.load(args.model_weights_path,
                                    map_location='cpu')
            model.load_state_dict(checkpoint['model_state_dict'])

        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[DiceCallback()],
                     logdir=save_path,
                     num_epochs=args.epochs,
                     verbose=True)

        infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
        runner.infer(
            model=model,
            loaders=infer_loader,
            callbacks=[
                CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
                InferCallback()
            ],
        )
    else:
        criterion = get_loss('multi')
        net = Model(model,
                    optimizer,
                    criterion,
                    batch_metrics=[
                        classification_head_accuracy, segmentation_head_dice
                    ])
        net = net.to(device)
        net.fit_generator(loaders['train'],
                          loaders['valid'],
                          epochs=args.epochs,
                          callbacks=[
                              ModelCheckpoint(
                                  f'{save_path}/checkpoints/best.pth', ),
                              MultiStepLR(milestones=[10, 40, 80, 150, 300],
                                          gamma=0.1)
                          ])
Exemple #10
0
# model, criterion, optimizer
model = Unet(num_classes=1, in_channels=1, num_channels=32, num_blocks=2)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[10, 20, 40],
                                                 gamma=0.3)

# model runner
runner = SupervisedRunner()

# model training
runner.train(model=model,
             criterion=criterion,
             optimizer=optimizer,
             loaders=loaders,
             logdir=logdir,
             num_epochs=num_epochs,
             check=True)

# # Inference

# In[ ]:

from catalyst.dl.callbacks import InferCallback, CheckpointCallback
loaders = collections.OrderedDict([("infer", loaders["valid"])])
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=[
        CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
Exemple #11
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('-c', '--config_path', default='configs/dataset.json', type=str)
    arg('-f', '--fold_id', default=0, type=int, help='Fold id.')
    arg('-l',
        '--log_dir',
        default='./logdir',
        type=str,
        help='Path to store logs')
    arg('-n', '--num_epochs', default=42, type=int, help='Number of epochs.')
    arg('-m',
        '--model_name',
        default='resnet18',
        type=str,
        help='Name of the network',
        choices=encoders)
    arg('-j',
        '--num_workers',
        default=1,
        type=int,
        help='Number of CPU threads to use.')
    arg('-b', '--batch_size', default=1, type=int, help='Size of the batch')
    arg('--lr', default=0.0001, type=float, help='Learning Rate')
    arg('--jaccard_weight',
        default=0.3,
        type=float,
        help='Weight for soft Jaccard in loss.')
    arg('--num_folds', default=5, type=int, help='Number of folds.')
    args = parser.parse_args()

    with open(args.config_path) as f:
        config = json.load(f)

    data_path = Path(config['data_path']).absolute().expanduser()

    train_file_names, val_files_names = get_train_val_image_paths(
        data_path / 'train', args.fold_id, args.num_folds)

    train_transform = augmentations.get_train_transform()
    val_transform = augmentations.get_val_transform()

    train_loader = get_loader(train_file_names,
                              shuffle=True,
                              transform=train_transform,
                              num_workers=args.num_workers,
                              batch_size=args.batch_size)
    val_loader = get_loader(val_files_names,
                            shuffle=False,
                            transform=val_transform,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size)

    # data
    loaders = OrderedDict({"train": train_loader, "valid": val_loader})

    # model, criterion, optimizer
    model = get_model(args.model_name)

    criterion = LossBinary(jaccard_weight=args.jaccard_weight)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[10, 20, 30],
                                                     gamma=0.3)

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        callbacks=[EpochJaccardMetric()],
        scheduler=scheduler,
        logdir=args.log_dir + '_' + args.model_name + '_' + str(args.fold_id),
        num_epochs=args.num_epochs,
        main_metric='jaccard',
        verbose=True,
    )