Beispiel #1
0
def train(args):
    ckp = None
    if os.path.exists(args.log_dir + '/checkpoints/best.pth'):
        ckp = args.log_dir + '/checkpoints/best.pth'
    model = create_model(args.encoder_type, ckp=ckp).cuda()
    loaders = get_train_val_loaders(args.encoder_type,
                                    batch_size=args.batch_size,
                                    ifold=args.ifold)

    # model, criterion, optimizer
    if args.encoder_type.startswith('myunet'):
        optimizer = RAdam(model.parameters(), lr=args.lr)
    else:
        base_optim = RAdam([
            {
                'params': model.decoder.parameters(),
                'lr': args.lr
            },
            {
                'params': model.encoder.parameters(),
                'lr': args.lr / 10.
            },
        ])
        #base_optim = RAdam(model.parameters(),lr = 0.001)
        optimizer = Lookahead(base_optim, k=5, alpha=0.5)
    #scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2)

    if args.lrs == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=args.factor,
                                      patience=args.patience,
                                      min_lr=args.min_lr)
    else:
        scheduler = CosineAnnealingLR(optimizer,
                                      args.t_max,
                                      eta_min=args.min_lr)

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks = [
        DiceCallback(),
        EarlyStoppingCallback(patience=15, min_delta=0.001),
    ]
    #if os.path.exists(args.log_dir + '/checkpoints/best_full.pth'):
    #    callbacks.append(CheckpointCallback(resume=args.log_dir + '/checkpoints/best_full.pth'))

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks,
                 logdir=args.log_dir,
                 num_epochs=args.num_epochs,
                 verbose=True)
Beispiel #2
0
def train(args):
    set_random_seed(42)
    model = get_model(args.network)
    print('Loading model')
    model.encoder.conv1 = nn.Conv2d(
        count_channels(args.channels), 64, kernel_size=(7, 7),
        stride=(2, 2), padding=(3, 3), bias=False)
    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')

    ds = Dataset(args.channels, args.dataset_path, args.image_size, args.batch_size, args.num_workers)
    loaders = ds.create_loaders(train_df, val_df)
    print(loaders['train'].dataset.data)

    criterion = BCE_Dice_Loss(bce_weight=0.2)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[10, 20, 40], gamma=0.3
    )

    save_path = os.path.join(
        args.logdir,
        '_'.join([args.network, *args.channels])
    )

    # model runner
    runner = SupervisedRunner()

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            DiceCallback()
        ],
        logdir=save_path,
        num_epochs=args.epochs,
        verbose=True
    )

    infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
            InferCallback()
        ],
    )
Beispiel #3
0
def train_model():

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )


    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    num_workers = 0
    bs = 10
    train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=num_workers)

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    num_epochs = 40

    # model, criterion, optimizer
    optimizer = RAdam([
        {'params': model.decoder.parameters(), 'lr': 1e-2},
        {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2, threshold=0.001)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    runner = SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True
    )

    return True
Beispiel #4
0
def train(args):
    set_random_seed(42)
    for fold in range(args.folds):
        model = get_model(args.network)

        print("Loading model")
        model, device = UtilsFactory.prepare_model(model)
        train_df = pd.read_csv(
            os.path.join(args.dataset_path,
                         f'train{fold}.csv')).to_dict('records')
        val_df = pd.read_csv(os.path.join(args.dataset_path,
                                          f'val{fold}.csv')).to_dict('records')

        ds = Dataset(args.channels, args.dataset_path, args.image_size,
                     args.batch_size, args.num_workers)
        loaders = ds.create_loaders(train_df, val_df)

        criterion = BCE_Dice_Loss(bce_weight=0.2)

        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[10, 20, 40], gamma=0.3)

        # model runner
        runner = SupervisedRunner()

        save_path = os.path.join(args.logdir, f'fold{fold}')

        # model training
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[DiceCallback()],
                     logdir=save_path,
                     num_epochs=args.epochs,
                     verbose=True)

        infer_loader = collections.OrderedDict([("infer", loaders["valid"])])
        runner.infer(
            model=model,
            loaders=infer_loader,
            callbacks=[
                CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
                InferCallback()
            ],
        )

        print(f'Fold {fold} ended')
def train_model(epoch, train_loader, valid_loader, valid_dataset, log_dir):
    # create segmentation model with pretrained encoder

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=len(CLASSES),
        activation=ACTIVATION,
    )

    loss = smp.utils.losses.BCEDiceLoss()

    optimizer = Nadam(model.parameters(), lr=1e-5)
    model = nn.DataParallel(model)
    # optimizer = torch.optim.Adam([{'params': model.module.decoder.parameters(), 'lr': 1e-4},
    #                               # decrease lr for encoder in order not to permute
    #                               # pre-trained weights with large gradients on training start
    #                               {'params': model.module.encoder.parameters(), 'lr': 1e-6}, ])

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=(epoch // 9) + 1)

    runner = SupervisedRunner()

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    runner.train(
        model=model,
        criterion=loss,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), IouCallback(), EarlyStoppingCallback(
            patience=6, min_delta=0.001)],
        logdir=log_dir,
        num_epochs=epoch,
        verbose=True
    )

    probabilities, valid_masks = valid_model(
        runner, model, valid_loader, valid_dataset,  log_dir)

    get_optimal_thres(probabilities, valid_masks)
def training(train_ids, valid_ids, num_split, encoder, decoder):
    """
    模型训练
    """
    train = "./data/Clouds_Classify/train.csv"

    # Data overview
    train = pd.read_csv(open(train))
    train.head()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])

    ENCODER = encoder
    ENCODER_WEIGHTS = 'imagenet'

    if decoder == 'unet':
        model = smp.Unet(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=None,
        )
    else:
        model = smp.FPN(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=None,
        )
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)

    num_workers = 4
    bs = 12
    train_dataset = CloudDataset(
        df=train,
        transforms=get_training_augmentation(),
        datatype='train',
        img_ids=train_ids,
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(
        df=train,
        transforms=get_validation_augmentation(),
        datatype='valid',
        img_ids=valid_ids,
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=bs,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    num_epochs = 50
    logdir = "./logs/log_{}_{}/log_{}".format(encoder, decoder, num_split)

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            'params': model.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.encoder.parameters(),
            'lr': 1e-3
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.35, patience=4)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[DiceCallback()],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=True)

    # Exploring predictions
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
Beispiel #7
0
num_epochs = EPOCHS
logdir = LOGDIR
loaders = {
    "train": train_dl,
    "valid": valid_dl
}
criterion = smp.utils.losses.BCEDiceLoss(eps=1e-7)
optimizer = torch.optim.SGD([
    {'params': model.encoder.parameters(), 'lr': LR},  
    {'params': model.decoder.parameters(), 'lr': LR},
], lr=LR)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 30, 35])

callbacks = [
    DiceCallback(
        threshold=0.5,
        activation=ACTIVATION.capitalize(),
    ),
    IouCallback(
        threshold=0.5,
        activation=ACTIVATION.capitalize(),
    ),
]
callbacks[0].metric_fn = dice_wo_back
runner = SupervisedRunner()

## Step 1.

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
Beispiel #8
0
    # elif args.loss == 'lovasz_softmax':
    #     criterion = lovasz_softmax()
    elif args.loss == 'BCEMulticlassDiceLoss':
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == 'MulticlassDiceMetricCallback':
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4),
                     EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth')
    model.cuda()
    utils.unpack_checkpoint(checkpoint, model=model)
    #
    #
    runner = SupervisedRunner()
    if args.train:
        print('Training')
Beispiel #9
0
def train_model(train_parameters):

    k = train_parameters["k"]
    loaders = train_parameters["loaders"]
    num_epochs = train_parameters["num_epochs"]
    net = train_parameters["net"]
    ENCODER = train_parameters["ENCODER"]
    ENCODER_WEIGHTS = train_parameters["ENCODER_WEIGHTS"]
    ACTIVATION = train_parameters["ACTIVATION"]

    model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION)
    """ multi-gpu """
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    model.to("cuda")

    #     if k==0:
    #         summary(model.module.encoder,(3,384,576))

    logdir = "./logs/segmentation_{}_{}Fold".format(net, k)

    # model, criterion, optimizer
    optimizer = RAdam([
        {
            'params': model.module.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.module.encoder.parameters(),
            'lr': 1e-3
        },
        #         {'params': model.decoder.parameters(), 'lr': 1e-2},
        #         {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    #     criterion = FocalLoss()
    #     criterion = FocalDiceLoss()
    # criterion = smp.utils.losses.DiceLoss(eps=1.)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    runner = SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            EarlyStoppingCallback(patience=10, min_delta=0.001),
            DiceCallback()
        ],
        #                    AUCCallback(),
        #                    IouCallback()],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True)

    del loaders, optimizer, scheduler, model, runner
    torch.cuda.empty_cache()
    gc.collect()
    print("Collect GPU cache")
Beispiel #10
0
    CriterionCallback(input_key="seg_targets",
                      output_key="seg_logits",
                      prefix="loss_seg",
                      criterion_key="seg"),
    CriterionAggregatorCallback(
        prefix="loss",
        loss_keys=["loss_cls", "loss_seg"],
        loss_aggregate_fn="sum"  # or "mean"
    ),
    MultiMetricCallback(metric_fn=calc_metric,
                        prefix='rocauc',
                        input_key="cls_targets",
                        output_key="cls_logits",
                        list_args=['_']),
    DiceCallback(
        input_key="seg_targets",
        output_key="seg_logits",
    ),
    EarlyStoppingCallback(patience=10, min_delta=0.001)
]

runner.train(fp16=args.fp16,
             model=model,
             criterion=criterion,
             optimizer=optimizer,
             scheduler=scheduler,
             loaders=loaders,
             callbacks=callbacks,
             logdir=logdir,
             num_epochs=num_epochs,
             verbose=True)
Beispiel #11
0
def main(config):
    opts = config()
    path = opts.path
    train = pd.read_csv(f'{path}/train.csv')
    pseudo_label = pd.read_csv(
        './submissions/submission_segmentation_and_classifier.csv')

    n_train = len(os.listdir(f'{path}/train_images'))
    n_test = len(os.listdir(f'{path}/test_images'))
    print(f'There are {n_train} images in train dataset')
    print(f'There are {n_test} images in test dataset')

    train.loc[train['EncodedPixels'].isnull() == False,
              'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()
    train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
        lambda x: x.split('_')[0]).value_counts().value_counts()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])
    id_mask_count = train.loc[train['EncodedPixels'].isnull() == False,
                              'Image_Label'].apply(lambda x: x.split('_')[
                                  0]).value_counts().reset_index().rename(
                                      columns={
                                          'index': 'img_id',
                                          'Image_Label': 'count'
                                      })
    print(id_mask_count.head())

    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(
                         lambda x: x.split('_')[1]).value_counts()
    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(lambda x: x.split('_')[0]
                                          ).value_counts().value_counts()

    pseudo_label['label'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[1])
    pseudo_label['im_id'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[0])
    pseudo_label_ids = pseudo_label.loc[
        pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
            lambda x: x.split('_')[0]).value_counts().reset_index().rename(
                columns={
                    'index': 'img_id',
                    'Image_Label': 'count'
                })
    print(pseudo_label_ids.head())

    if not os.path.exists("csvs/train_all.csv"):
        train_ids, valid_ids = train_test_split(
            id_mask_count,
            random_state=39,
            stratify=id_mask_count['count'],
            test_size=0.1)
        valid_ids.to_csv("csvs/valid_threshold.csv", index=False)
        train_ids.to_csv("csvs/train_all.csv", index=False)
    else:
        train_ids = pd.read_csv("csvs/train_all.csv")
        valid_ids = pd.read_csv("csvs/valid_threshold.csv")

    for fold, ((train_ids_new, valid_ids_new),
               (train_ids_pl, valid_ids_pl)) in enumerate(
                   zip(
                       stratified_groups_kfold(train_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0),
                       stratified_groups_kfold(pseudo_label_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0))):

        train_ids_new.to_csv(f'csvs/train_fold{fold}.csv')
        valid_ids_new.to_csv(f'csvs/valid_fold{fold}.csv')
        train_ids_new = train_ids_new['img_id'].values
        valid_ids_new = valid_ids_new['img_id'].values

        train_ids_pl = train_ids_pl['img_id'].values
        valid_ids_pl = valid_ids_pl['img_id'].values

        ENCODER = opts.backborn
        ENCODER_WEIGHTS = opts.encoder_weights
        DEVICE = 'cuda'

        ACTIVATION = None
        model = get_model(
            model_type=opts.model_type,
            encoder=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            activation=ACTIVATION,
            n_classes=opts.class_num,
            task=opts.task,
            center=opts.center,
            attention_type=opts.attention_type,
            head='simple',
            classification=opts.classification,
        )
        model = convert_model(model)
        preprocessing_fn = encoders.get_preprocessing_fn(
            ENCODER, ENCODER_WEIGHTS)

        num_workers = opts.num_workers
        bs = opts.batchsize

        train_dataset = CloudDataset(
            df=train,
            label_smoothing_eps=opts.label_smoothing_eps,
            datatype='train',
            img_ids=train_ids_new,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset = CloudDataset(
            df=train,
            datatype='valid',
            img_ids=valid_ids_new,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        ################# make pseudo label dataset #######################
        train_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=train_ids_pl,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=valid_ids_pl,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        #         train_dataset = ConcatDataset([train_dataset, train_dataset_pl])
        #         valid_dataset = ConcatDataset([valid_dataset, valid_dataset_pl])
        train_dataset = ConcatDataset([train_dataset, valid_dataset_pl])
        ################# make pseudo label dataset #######################
        train_loader = DataLoader(train_dataset,
                                  batch_size=bs,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  drop_last=True)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=bs,
                                  shuffle=False,
                                  num_workers=num_workers,
                                  drop_last=True)

        loaders = {"train": train_loader, "valid": valid_loader}
        num_epochs = opts.max_epoch
        logdir = f"{opts.logdir}/fold{fold}"
        optimizer = get_optimizer(optimizer=opts.optimizer,
                                  lookahead=opts.lookahead,
                                  model=model,
                                  separate_decoder=True,
                                  lr=opts.lr,
                                  lr_e=opts.lr_e)
        opt_level = 'O1'
        model.cuda()
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=opt_level)
        scheduler = opts.scheduler(optimizer)
        criterion = opts.criterion
        runner = SupervisedRunner()
        if opts.task == "segmentation":
            callbacks = [DiceCallback()]
        else:
            callbacks = []
        if opts.early_stop:
            callbacks.append(
                EarlyStoppingCallback(patience=10, min_delta=0.001))
        if opts.mixup:
            callbacks.append(MixupCallback(alpha=0.25))
        if opts.accumeration is not None:
            callbacks.append(CriterionCallback())
            callbacks.append(
                OptimizerCallback(accumulation_steps=opts.accumeration))
        print(
            f"############################## Start training of fold{fold}! ##############################"
        )
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=callbacks,
                     logdir=logdir,
                     num_epochs=num_epochs,
                     verbose=True)
        print(
            f"############################## Finish training of fold{fold}! ##############################"
        )
        del model
        del loaders
        del runner
        torch.cuda.empty_cache()
        gc.collect()
def train(args):
    set_random_seed(42)
    model = get_model(args.network, args.classification_head)
    print('Loading model')

    model.encoder.conv1 = nn.Conv2d(count_channels(args.channels) *
                                    args.neighbours,
                                    64,
                                    kernel_size=(7, 7),
                                    stride=(2, 2),
                                    padding=(3, 3),
                                    bias=False)

    model, device = UtilsFactory.prepare_model(model)

    train_df = pd.read_csv(args.train_df).to_dict('records')
    val_df = pd.read_csv(args.val_df).to_dict('records')

    ds = Dataset(args.channels, args.dataset_path, args.image_size,
                 args.batch_size, args.num_workers, args.neighbours,
                 args.classification_head)
    loaders = ds.create_loaders(train_df, val_df)

    save_path = os.path.join(args.logdir, args.name)

    optimizer = get_optimizer(args.optimizer, args.lr, model)

    if not args.classification_head:
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.1)

        criterion = get_loss(args.loss)

        runner = SupervisedRunner()
        if args.model_weights_path:
            checkpoint = torch.load(args.model_weights_path,
                                    map_location='cpu')
            model.load_state_dict(checkpoint['model_state_dict'])

        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[DiceCallback()],
                     logdir=save_path,
                     num_epochs=args.epochs,
                     verbose=True)

        infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
        runner.infer(
            model=model,
            loaders=infer_loader,
            callbacks=[
                CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
                InferCallback()
            ],
        )
    else:
        criterion = get_loss('multi')
        net = Model(model,
                    optimizer,
                    criterion,
                    batch_metrics=[
                        classification_head_accuracy, segmentation_head_dice
                    ])
        net = net.to(device)
        net.fit_generator(loaders['train'],
                          loaders['valid'],
                          epochs=args.epochs,
                          callbacks=[
                              ModelCheckpoint(
                                  f'{save_path}/checkpoints/best.pth', ),
                              MultiStepLR(milestones=[10, 40, 80, 150, 300],
                                          gamma=0.1)
                          ])
Beispiel #13
0
def main_kaggle_smp(path_dataset='/dataset/kaggle/understanding_cloud_organization',
                    ENCODER='resnet50',
                    ENCODER_WEIGHTS='imagenet',
                    num_workers=0,
                    batch_size=8,
                    epochs=19,
                    debug=False,
                    exec_catalyst=True,
                    logdir="/src/logs/segmentation",
                    pretrained=True
                    ):
    # below line is potential input args
    # (name_dataset='eurosat', lr=0.0001, wd=0, ratio=0.9, batch_size=32, workers=4, epochs=15, num_gpus=1,
    # resume=None, dir_weights='./weights'):

    torch.backends.cudnn.benchmark = True

    # Dataset
    train, sub = get_meta_info_table(path_dataset)
    train_ids, valid_ids, test_ids = prepare_dataset(train, sub)
    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(),
                                 preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset)
    valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids,
                                 transforms=get_validation_augmentation(),
                                 preprocessing=get_preprocessing(preprocessing_fn), path=path_dataset)
    # DataLoader
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    # todo: check how to used device in this case
    DEVICE = 'cuda'
    if debug:
        device = 'cpu'
    else:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    ACTIVATION = None
    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )
    images, labels = next(iter(train_loader))
    model.to(device)
    print(model)
    print(summary(model, input_size=tuple(images.shape[1:])))

    # use smp epoch
    # num_epochs = 19

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {'params': model.decoder.parameters(), 'lr': 1e-2},
        {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.DiceLoss(eps=1.)  # smp.utils.losses.BCEDiceLoss(eps=1.)

    if not pretrained:
        # catalyst
        if exec_catalyst:
            device = utils.get_device()
            runner = SupervisedRunner(device=device)

            # train model
            runner.train(
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                scheduler=scheduler,
                loaders=loaders,
                callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
                logdir=logdir,
                num_epochs=epochs,
                verbose=True
            )

            # # prediction
            # encoded_pixels = []
            # loaders = {"infer": valid_loader}
            # runner.infer(
            #     model=model,
            #     loaders=loaders,
            #     callbacks=[
            #         CheckpointCallback(
            #             resume=f"{logdir}/checkpoints/best.pth"),
            #         InferCallback()
            #     ],
            # )
            # valid_masks = []
            #
            # # todo: where .pth?
            # # todo: from here
            # valid_num = valid_dataset.__len__()
            # probabilities = np.zeros((valid_num * 4, 350, 525))
            # for i, (batch, output) in enumerate(tqdm(zip(
            #         valid_dataset, runner.callbacks[0].predictions["logits"]))):
            #     image, mask = batch
            #     for m in mask:
            #         if m.shape != (350, 525):
            #             m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            #         valid_masks.append(m)
            #
            #     for j, probability in enumerate(output):
            #         if probability.shape != (350, 525):
            #             probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            #         probabilities[valid_num * 4 + j, :, :] = probability
            #
            # # todo: from here
            # class_params = {}
            # for class_id in range(4):
            #     print(class_id)
            #     attempts = []
            #     for t in range(0, 100, 5):
            #         t /= 100
            #         for ms in [0, 100, 1200, 5000, 10000]:
            #             masks = []
            #             for i in range(class_id, len(probabilities), 4):
            #                 probability = probabilities[i]
            #                 predict, num_predict = post_process(sigmoid(probability), t, ms)
            #                 masks.append(predict)
            #
            #             d = []
            #             for i, j in zip(masks, valid_masks[class_id::4]):
            #                 if (i.sum() == 0) & (j.sum() == 0):
            #                     d.append(1)
            #                 else:
            #                     d.append(dice(i, j))
            #
            #             attempts.append((t, ms, np.mean(d)))
            #
            #     attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])
            #
            #     attempts_df = attempts_df.sort_values('dice', ascending=False)
            #     print(attempts_df.head())
            #     best_threshold = attempts_df['threshold'].values[0]
            #     best_size = attempts_df['size'].values[0]
            #
            #     class_params[class_id] = (best_threshold, best_size)

        else:
            for epoch in trange(epochs, desc="Epochs"):
                metrics_train = train_epoch(model, train_loader, criterion, optimizer, device)
                metrics_eval = eval_epoch(model, valid_loader, criterion, device)

                scheduler.step(metrics_eval['valid_loss'])
                print(f'epoch: {epoch} ', metrics_train, metrics_eval)
    else:
        if exec_catalyst:
            device = utils.get_device()
            checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best_full.pth')
            utils.unpack_checkpoint(checkpoint, model=model)
            runner = SupervisedRunner(model=model)

            # prediction with infer
            encoded_pixels = []
            loaders = {"infer": valid_loader}
            runner.infer(
                model=model,
                loaders=loaders,
                callbacks=[
                    CheckpointCallback(
                        resume=f"{logdir}/checkpoints/best.pth"),
                    InferCallback()
                ],
            )

            # todo: jupyterで確認中
            valid_masks = []


            valid_num = valid_dataset.__len__()
            probabilities = np.zeros((valid_num * 4, 350, 525))
            for i, (batch, output) in enumerate(tqdm(zip(
                    valid_dataset, runner.callbacks[0].predictions["logits"]))):
                image, mask = batch
                for m in mask:
                    if m.shape != (350, 525):
                        m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    valid_masks.append(m)

                for j, probability in enumerate(output):
                    if probability.shape != (350, 525):
                        probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    probabilities[i * 4 + j, :, :] = probability

            class_params = {}
            for class_id in range(4):
                print(class_id)
                attempts = []
                for t in range(0, 100, 5):
                    t /= 100
                    for ms in [0, 100, 1200, 5000, 10000]:
                        masks = []
                        for i in range(class_id, len(probabilities), 4):
                            probability = probabilities[i]
                            predict, num_predict = post_process(sigmoid(probability), t, ms)
                            masks.append(predict)

                        d = []
                        for i, j in zip(masks, valid_masks[class_id::4]):
                            if (i.sum() == 0) & (j.sum() == 0):
                                d.append(1)
                            else:
                                d.append(dice(i, j))

                        attempts.append((t, ms, np.mean(d)))

                attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])

                attempts_df = attempts_df.sort_values('dice', ascending=False)
                print(attempts_df.head())
                best_threshold = attempts_df['threshold'].values[0]
                best_size = attempts_df['size'].values[0]

                class_params[class_id] = (best_threshold, best_size)

            # predictions
            torch.cuda.empty_cache()
            gc.collect()

            test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms=get_validation_augmentation(),
                                        preprocessing=get_preprocessing(preprocessing_fn))
            test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=0)

            loaders = {"test": test_loader}
            encoded_pixels = []
            image_id = 0
            for i, test_batch in enumerate(tqdm(loaders['test'])):
                runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits']
                for i, batch in enumerate(runner_out):
                    for probability in batch:

                        probability = probability.cpu().detach().numpy()
                        if probability.shape != (350, 525):
                            probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                        predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0],
                                                            class_params[image_id % 4][1])
                        if num_predict == 0:
                            encoded_pixels.append('')
                        else:
                            r = mask2rle(predict)
                            encoded_pixels.append(r)
                        image_id += 1

            sub['EncodedPixels'] = encoded_pixels
            sub.to_csv('data/kaggle_cloud_org/submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
Beispiel #14
0
def main():
    train_data = pd.read_csv(TRAIN_PATH)
    sub_data = pd.read_csv(SAMPLE_PATH)

    train_data["img_id"] = train_data["Image_Label"].apply(
        lambda x: x.split("_")[0])
    train_data["label"] = train_data["Image_Label"].apply(
        lambda x: x.split("_")[1])
    # gen_image(train_data)

    # 1つの画像あたりlabelをいくつ含んでいるのかをカウントする
    id_mask_count = train_data.loc[
        train_data['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
            lambda x: x.split('_')[0]).value_counts().reset_index().rename(
                columns={
                    'index': 'img_id',
                    'Image_Label': 'count'
                })
    # 学習データを学習用と検証用に分割(画像1枚あたりが含んでいるラベル数が偏らないようにtrain_test_splitの引数としてstratifyを与えている。)
    train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values,
                                            random_state=42,
                                            stratify=id_mask_count['count'],
                                            test_size=0.1)
    test_ids = sub_data['Image_Label'].apply(
        lambda x: x.split('_')[0]).drop_duplicates().values
    """
    segmentation_models_pytorchを用いて任意のモデルの事前学習を行う。
    Unetは「エンコーダ」と「デコーダ」で構成されており、今回はresnet50モデルの一部を画像特徴量抽出(エンコーダ)として用い、
    デコーダでは特徴量を元にラベリングを行う。
    """
    model = smp.Unet(
        encoder_name=ENCODER,  # resnet50のモデルを事前学習させる。
        encoder_weights=ENCODER_WEIGHTS,  # ImageNetで事前学習させたモデルを用いる。
        classes=4,  # 最終出力数
        activation=ACTIVATION,  # 多値分類なのでsoftmax関数
    )
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)  # 事前学習時に用いた前処理パラメータ、関数等を取得する

    train_dataset = CloudDataset(
        df=train_data,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(
        df=train_data,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    # model, criterion, optimizer
    optimizer = op.Adam([
        {
            'params': model.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.encoder.parameters(),
            'lr': 1e-3
        },
    ])
    # ReduceLROnPlateau: 指標値(例えばloss)が○○回連続で改善しない場合は学習率を減少させる。
    scheduler = op.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                  factor=0.15,
                                                  patience=2)
    criterion = BCEDiceLoss(eps=1.)
    # SupervisedRunner:学習させるモデルやその他関数、指標値などを渡すだけ(教師ありモデルでのランナー)
    runner = dl.SupervisedRunner()
    # 学習開始
    runner.train(
        model=model,  # 学習させるモデル(今回は事前学習済みのresnet50モデル)
        criterion=criterion,  # 損失関数
        optimizer=optimizer,  # 重みパラメータ更新手法
        scheduler=scheduler,  # 学習率の減衰
        loaders=loaders,  # 学習用と評価用、それぞれのDataloaderが定義されたオブジェクト
        # DiceCallback:評価関数の1つ, EarlyStoppingCallback: 指標値が改善されなくなった場合に学習を停止する
        callbacks=[
            DiceCallback(),
            EarlyStoppingCallback(patience=5, min_delta=0.001)
        ],
        logdir=LOG_DIR,
        num_epochs=num_epochs,
        verbose=True,
    )
    # 推論
    encoded_pixels = []
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{LOG_DIR}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))  # 確率マップ
    for i, (batch, output) in enumerate(
            tqdm(zip(
                valid_dataset,
                runner.callbacks[0].predictions["logits"]))):  # tqdm:プログレスバー表示
        image, mask = batch
        for m in mask:
            if m.shape != (350, 525):
                m = cv2.resize(m,
                               dsize=(525, 350),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability,
                                         dsize=(525, 350),
                                         interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    class_params = {}
    for class_id in tqdm(range(4)):
        print("##################################")
        print(f"class_id : {class_id}")
        print("##################################")
        attempts = []
        for t in range(0, 100, 5):
            t /= 100
            for ms in [0, 100, 1200, 5000, 10000]:
                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(
                        sigmoid(probability), t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts,
                                   columns=['threshold', 'size', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]

        class_params[class_id] = (best_threshold, best_size)

    # 予測
    test_dataset = CloudDataset(
        df=sub_data,
        datatype="test",
        img_ids=test_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=8,
        shuffle=False,
        num_workers=num_workers,
    )
    loaders = {"test": test_loader}
    encoded_pixels = []
    image_id = 0
    for i, test_batch in enumerate(tqdm(loaders["test"])):
        runner_out = runner.predict_batch({"features":
                                           test_batch[0].cuda()})['logits']
        for i, batch in enumerate(runner_out):
            for probability in batch:

                probability = probability.cpu().detach().numpy()
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability,
                                             dsize=(525, 350),
                                             interpolation=cv2.INTER_LINEAR)
                predict, num_predict = post_process(
                    sigmoid(probability), class_params[image_id % 4][0],
                    class_params[image_id % 4][1])
                if num_predict == 0:
                    encoded_pixels.append('')
                else:
                    r = mask2rle(predict)
                    encoded_pixels.append(r)
                image_id += 1

    sub_data['EncodedPixels'] = encoded_pixels
    sub_data.to_csv(SAVE_PATH,
                    columns=['Image_Label', 'EncodedPixels'],
                    index=False)
Beispiel #15
0
def run(config_file):
    config = load_config(config_file)
    #set up the environment flags for working with the KAGGLE GPU OR COLAB_GPU
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    #save the configuration to the working dir
    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    #Enter the GPUS you have,
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    #our dataset has an explicit validation folder, use that later.
    all_transforms['valid'] = get_transforms(config.transforms.test)

    print("before rajat config", config.data.height, config.data.width)
    #fetch the dataloaders we need
    dataloaders = {
        phase: make_loader(data_folder=config.data.train_dir,
                           df_path=config.data.train_df_path,
                           phase=phase,
                           img_size=(config.data.height, config.data.width),
                           batch_size=config.train.batch_size,
                           num_workers=config.num_workers,
                           idx_fold=config.data.params.idx_fold,
                           transforms=all_transforms[phase],
                           num_classes=config.data.num_classes,
                           pseudo_label_path=config.train.pseudo_label_path,
                           debug=config.debug)
        for phase in ['train', 'valid']
    }

    #creating the segmentation model with pre-trained encoder
    '''
    dumping the parameters for smp library
    encoder_name: str = "resnet34",
    encoder_depth: int = 5,
    encoder_weights: str = "imagenet",
    decoder_use_batchnorm: bool = True,
    decoder_channels: List[int] = (256, 128, 64, 32, 16),
    decoder_attention_type: Optional[str] = None,
    in_channels: int = 3,
    classes: int = 1,
    activation: Optional[Union[str, callable]] = None,
    aux_params: Optional[dict] = None,
    '''
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    #fetch the loss
    criterion = get_loss(config)
    params = [
        {
            'params': model.decoder.parameters(),
            'lr': config.optimizer.params.decoder_lr
        },
        {
            'params': model.encoder.parameters(),
            'lr': config.optimizer.params.encoder_lr
        },
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)
    '''
    dumping the catalyst supervised runner
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py

    model (Model): Torch model object
    device (Device): Torch device
    input_key (str): Key in batch dict mapping for model input
    output_key (str): Key in output dict model output
        will be stored under
    input_target_key (str): Key in batch dict mapping for target
    '''

    runner = SupervisedRunner(model=model, device=get_device())

    #@pavel,srk,rajat,vladimir,pudae check the IOU and the Dice Callbacks

    callbacks = [DiceCallback(), IouCallback()]

    #adding patience
    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    #thanks for handling the distributed training
    '''
    we are gonna take zero_grad after accumulation accumulation_steps
    '''
    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend([
            CriterionCallback(),
            OptimizerCallback(accumulation_steps=accumulation_steps)
        ])

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))
    '''
    pudae добавь пожалуйста обратный вызов
    https://arxiv.org/pdf/1710.09412.pdf
    **srk adding the mixup callback
    '''
    if config.train.mixup:
        callbacks.append(MixupCallback())
    if config.train.cutmix:
        callbacks.append(CutMixCallback())
    '''@rajat implemented cutmix, a wieghed combination of cutout and mixup '''
    callbacks.append(MixupCallback())
    callbacks.append(CutMixCallback())
    '''
    rajat introducing training loop
    https://github.com/catalyst-team/catalyst/blob/master/catalyst/dl/runner/supervised.py
    take care of the nvidias fp16 precision
    '''
    print(config.work_dir)
    print(config.train.minimize_metric)
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=False,
    )
    },
    {
        'params': model.encoder.parameters(),
        'lr': 1e-3
    },
])
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
runner = SupervisedRunner()
runner.train(model=model,
             criterion=criterion,
             optimizer=optimizer,
             scheduler=scheduler,
             loaders=loaders,
             callbacks=[
                 DiceCallback(),
                 EarlyStoppingCallback(patience=5, min_delta=0.001)
             ],
             logdir=logdir,
             num_epochs=num_epochs,
             verbose=True)
utils.plot_metrics(
    logdir=logdir,
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr'])
encoded_pixels = []
loaders = {"infer": valid_loader}
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=[
Beispiel #17
0
valid_loader = DataLoader(valid_dataset, batch_size=hyper_params['batch_size'], shuffle=False)

loaders = {"train": train_loader, "valid": valid_loader}


optimizer = torch.optim.Adam(model.parameters(), hyper_params['learning_rate'])

scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)

criterion = WeightedBCEDiceLoss(
    lambda_dice=hyper_params['lambda_dice'],
    lambda_bce=hyper_params['lambda_bceWithLogits']
)

runner = SupervisedRunner(device=device)

logdir = hyper_params['logdir']

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(), CometCallback(experiment), EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    #resume=f"{logdir}/checkpoints/last_full.pth",
    num_epochs=hyper_params['num_epochs'],
    verbose=True
)
Beispiel #18
0
def run(config_file):
    config = load_config(config_file)

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(data_folder=config.data.train_dir,
                           df_path=config.data.train_df_path,
                           phase=phase,
                           batch_size=config.train.batch_size,
                           num_workers=config.num_workers,
                           idx_fold=config.data.params.idx_fold,
                           transforms=all_transforms[phase],
                           num_classes=config.data.num_classes,
                           pseudo_label_path=config.train.pseudo_label_path,
                           debug=config.debug)
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    # train setting
    criterion = get_loss(config)
    params = [
        {
            'params': model.decoder.parameters(),
            'lr': config.optimizer.params.decoder_lr
        },
        {
            'params': model.encoder.parameters(),
            'lr': config.optimizer.params.encoder_lr
        },
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model)

    callbacks = [DiceCallback(), IouCallback()]

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/best_full.pth'))

    if config.train.mixup:
        callbacks.append(MixupCallback())

    if config.train.cutmix:
        callbacks.append(CutMixCallback())

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )
        # And only then we aggregate everything into one loss.
        CriterionAggregatorCallback(
            prefix="loss",
            loss_aggregate_fn=
            "weighted_sum",  # can be "sum", "weighted_sum" or "mean"
            # because we want weighted sum, we need to add scale for each loss
            loss_keys={
                "loss_dice": 0.5,
                "loss_iou": 0.5,
                "loss_bce": 1.0
            },
        ),

        # metrics
        DiceCallback(input_key="mask"),
        IouCallback(input_key="mask"),
    ],
    # path to save logs
    logdir=logdir,
    num_epochs=num_epochs,

    # save our best checkpoint by CE metric
    main_metric="ce",
    # CE needs to be minimised.
    minimize_metric=True,

    # for FP16. It uses the variable from the very first cell
    fp16=fp16_params,

    # prints train logs
Beispiel #20
0
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
# 损失函数计算
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
# from catalyst.dl.runner import SupervisedRunner
runner = SupervisedRunner()

'''
Training section
'''
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=True
)
# 画loss_function的图
utils.plot_metrics(
    logdir=logdir,
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr']
)

# 导入validation
encoded_pixels = []
loaders = {"infer": valid_loader}
runner.infer(
def main():

    fold_path = args.fold_path
    fold_num = args.fold_num
    model_name = args.model_name
    train_csv = args.train_csv
    sub_csv = args.sub_csv
    encoder = args.encoder
    num_workers = args.num_workers
    batch_size = args.batch_size
    num_epochs = args.num_epochs
    learn_late = args.learn_late
    attention_type = args.attention_type

    train = pd.read_csv(train_csv)
    sub = pd.read_csv(sub_csv)

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1])
    sub['im_id'] = sub['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv')
    val_fold = pd.read_csv(f'{fold_path}/valid_file_fold_{fold_num}.csv')

    train_ids = np.array(train_fold.file_name)
    valid_ids = np.array(val_fold.file_name)

    encoder_weights = 'imagenet'
    attention_type = None if attention_type == 'None' else attention_type

    if model_name == 'Unet':
        model = smp.Unet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
            attention_type=attention_type,
        )
    if model_name == 'Linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'FPN':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'ORG':
        model = Linknet_resnet18_ASPP()

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        encoder, encoder_weights)

    train_dataset = CloudDataset(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        drop_last=True,
        pin_memory=True,
    )
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation"

    #for batch_idx, (data, target) in enumerate(loaders['train']):
    #    print(batch_idx)

    print(logdir)

    if model_name == 'ORG':
        optimizer = NAdam([
            {
                'params': model.parameters(),
                'lr': learn_late
            },
        ])
    else:
        optimizer = NAdam([
            {
                'params': model.decoder.parameters(),
                'lr': learn_late
            },
            {
                'params': model.encoder.parameters(),
                'lr': learn_late
            },
        ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
    criterion = smp.utils.losses.BCEDiceLoss()

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=1e-7)
                 ],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=1)
def train(args):
    set_random_seed(42)
    if(args.model=='lstm_diff'):
        model = ULSTMNet(count_channels(args.channels), 1, args.image_size)
    elif(args.model=='lstm_decoder'):
        model = Unet_LstmDecoder(count_channels(args.channels), all_masks=args.allmasks)
    else:
        print('Unknown LSTM model. Return to the default model.')
        model = ULSTMNet(count_channels(args.channels), 1, args.image_size)
    
    if torch.cuda.is_available(): model.cuda()
    print('Loading model')

    model, device = UtilsFactory.prepare_model(model)
    print(device)

    optimizer = get_optimizer(args.optimizer, args.lr, model)
    criterion = get_loss(args.loss)    
    
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[10, 40, 80, 150, 300], gamma=0.2
    )

    save_path = os.path.join(
        args.logdir,
        args.name
    )
    
    os.system(f"mkdir {save_path}")

    train_df = pd.read_csv(args.train_df)
    val_df = pd.read_csv(args.val_df)

    train_dataset = LstmDataset(args.neighbours, train_df, 'train',args.channels, args.dataset_path, args.image_size, args.batch_size, args.allmasks)
    valid_dataset = LstmDataset(args.neighbours, val_df, 'valid',args.channels, args.dataset_path, args.image_size, args.batch_size, args.allmasks)

    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, 
        shuffle=sampler is None, num_workers=args.num_workers, sampler=sampler(train_df))
    valid_loader = DataLoader(valid_dataset, batch_size=1, 
        shuffle=False, num_workers=args.num_workers)

    loaders = collections.OrderedDict()

    loaders['train'] = train_loader
    loaders['valid'] = valid_loader

    runner = SupervisedRunner()
    if args.model_weights_path:
        checkpoint = torch.load(args.model_weights_path, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
    
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            DiceCallback()
        ],
        logdir=save_path,
        num_epochs=args.epochs,
        verbose=True
    )

    infer_loader = collections.OrderedDict([('infer', loaders['valid'])])
    runner.infer(
        model=model,
        loaders=infer_loader,
        callbacks=[
            CheckpointCallback(resume=f'{save_path}/checkpoints/best.pth'),
            InferCallback()
        ],
    )

    '''
Beispiel #23
0
def main(args):
    """
    Main code for training for training a U-Net with some user-defined encoder.
    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up model (U-Net with ImageNet Encoders)
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"

    attention_type = None if args.attention_type == "None" else args.attention_type
    model = smp.Unet(encoder_name=args.encoder,
                     encoder_weights=ENCODER_WEIGHTS,
                     classes=4,
                     activation=None,
                     attention_type=attention_type)
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        args.encoder, ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)
    valid_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            "params": model.decoder.parameters(),
            "lr": args.encoder_lr
        },
        {
            "params": model.encoder.parameters(),
            "lr": args.decoder_lr
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks_list = [
        DiceCallback(),
        EarlyStoppingCallback(patience=5, min_delta=0.001),
    ]
    if args.checkpoint_path != "None":  # hacky way to say no checkpoint callback but eh what the heck
        ckpoint_p = Path(args.checkpoint_path)
        fname = ckpoint_p.name
        resume_dir = str(ckpoint_p.parents[0]
                         )  # everything in the path besides the base file name
        print(
            f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}."
        )
        callbacks_list = callbacks_list + [
            CheckpointCallback(resume=fname, resume_dir=resume_dir),
        ]

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks_list,
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
Beispiel #24
0
                          input_target_key='mask')
logdir = f'./logs/{args.model}'
num_epochs = args.epochs
callbacks = [
    CriterionCallback(input_key='mask',
                      multiplier=1.,
                      prefix='loss_dice',
                      criterion_key='dice'),
    CriterionCallback(input_key='mask',
                      prefix='loss_bce',
                      multiplier=0.8,
                      criterion_key='bce'),
    CriterionAggregatorCallback(prefix='loss',
                                loss_keys=["loss_dice", "loss_bce"],
                                loss_aggregate_fn="sum"),
    DiceCallback(input_key='mask'),
    OptimizerCallback(accumulation_steps=32),
    EarlyStoppingCallback(patience=8, min_delta=0.001),
]
if args.checkpoint:
    callbacks.append(
        CheckpointCallback(resume=f'{logdir}/checkpoints/best_full.pth'))
runner.train(
    model=model,
    criterion=criteria,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    main_metric='dice',
    minimize_metric=False,
def main(args):
    """
    Main code for training a classification model.

    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # Reading the in the .csvs
    train = pd.read_csv(os.path.join(args.dset_path, "train.csv"))
    sub = pd.read_csv(os.path.join(args.dset_path, "sample_submission.csv"))

    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up the classification model
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"
    model = ResNet34(pre=ENCODER_WEIGHTS, num_classes=4, use_simple_head=True)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        "resnet34", ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )
    valid_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=0.001)
                 ],
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
    utils.plot_metrics(
        logdir=logdir,
        # specify which metrics we want to plot
        metrics=["loss", "dice", "lr", "_base/lr"])
Beispiel #26
0
def run(config_file):
    config = load_config(config_file)
    if 'COLAB_GPU' in os.environ:
        config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir
    elif 'KAGGLE_WORKING_DIR' in os.environ:
        config.work_dir = '/kaggle/working/' + config.work_dir
    print('working directory:', config.work_dir)

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)
    save_config(config, config.work_dir + '/config.yml')

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            data_folder=config.data.train_dir,
            df_path=config.data.train_df_path,
            phase=phase,
            img_size=(config.data.height, config.data.width),
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            num_classes=config.data.num_classes,
            pseudo_label_path=config.train.pseudo_label_path,
            debug=config.debug
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    model = getattr(smp, config.model.arch)(
        encoder_name=config.model.encoder,
        encoder_weights=config.model.pretrained,
        classes=config.data.num_classes,
        activation=None,
    )

    # train setting
    criterion = get_loss(config)
    params = [
        {'params': model.decoder.parameters(), 'lr': config.optimizer.params.decoder_lr},
        {'params': model.encoder.parameters(), 'lr': config.optimizer.params.encoder_lr},
    ]
    optimizer = get_optimizer(params, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model, device=get_device())

    callbacks = [DiceCallback(), IouCallback()]

    if config.train.early_stop_patience > 0:
        callbacks.append(EarlyStoppingCallback(
            patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [CriterionCallback(),
             OptimizerCallback(accumulation_steps=accumulation_steps)]
        )

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/best.pth'):
        callbacks.append(CheckpointCallback(
            resume=config.work_dir + '/checkpoints/last_full.pth'))

    if config.train.mixup:
        callbacks.append(MixupCallback())

    if config.train.cutmix:
        callbacks.append(CutMixCallback())

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=True,
    )