def main():

    train_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/train/train/*/Images/*.png',
            recursive=True))
    train_mask_list = sorted(
        glob.glob(pathname='./trainlabels/*/TrainId/*.png', recursive=True))
    valid_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/valid/valid/*/Images/*.png',
            recursive=True))
    valid_mask_list = sorted(
        glob.glob(pathname='./validlabels/*/TrainId/*.png', recursive=True))

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        config.ENCODER, config.ENCODER_WEIGHTS)

    train_dataset = Dataset(
        train_image_list,
        train_mask_list,
        augmentation=augmentations.get_training_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    valid_dataset = Dataset(
        valid_image_list,
        valid_mask_list,
        augmentation=augmentations.get_validation_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=True,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=False,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=False)

    loaders = {"train": train_loader, "valid": valid_loader}

    base_optimizer = RAdam([
        {
            'params': model.MODEL.decoder.parameters(),
            'lr': config.LEARNING_RATE
        },
        {
            'params': model.MODEL.encoder.parameters(),
            'lr': 1e-4
        },
        {
            'params': model.MODEL.segmentation_head.parameters(),
            'lr': config.LEARNING_RATE
        },
    ])
    optimizer = Lookahead(base_optimizer)
    criterion = BCEDiceLoss(activation=None)
    runner = SupervisedRunner()
    scheduler = OneCycleLRWithWarmup(optimizer,
                                     num_steps=config.NUM_EPOCHS,
                                     lr_range=(0.0016, 0.0000001),
                                     init_lr=config.LEARNING_RATE,
                                     warmup_steps=2)

    callbacks = [
        IouCallback(activation='none'),
        ClasswiseIouCallback(classes=config.CLASSES, activation='none'),
        EarlyStoppingCallback(patience=config.ES_PATIENCE,
                              metric='iou',
                              minimize=False),
    ]
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=config.LOGDIR,
        num_epochs=config.NUM_EPOCHS,
        # save our best checkpoint by IoU metric
        main_metric="iou",
        # IoU needs to be maximized.
        minimize_metric=False,
        # for FP16. It uses the variable from the very first cell
        fp16=config.FP16_PARAMS,
        # prints train logs
        verbose=True,
    )
예제 #2
0
def train_model():

    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)

    num_workers = 0
    bs = 16
    train_dataset = CloudDataset(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    num_epochs = 40

    # model, criterion, optimizer
    optimizer = RAdam([
        {
            'params': model.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.encoder.parameters(),
            'lr': 1e-3
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.15,
                                  patience=2,
                                  threshold=0.001)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=0.001)
                 ],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=True)

    return True
예제 #3
0
                num_classes=config.num_classes,
                input_key="targets_one_hot",
                class_names=config.class_names
            ),
            F1ScoreCallback(
                input_key="targets_one_hot",
                activation="Softmax"
            ),
            CheckpointCallback(
                save_n_best=1,
                #             resume_dir="./models/classification",
                metrics_filename="metrics.json"
            ),
            EarlyStoppingCallback(
                patience=config.patience,
                metric="auc/_mean",
                minimize=False
            )
        ],
        # path to save logs
        logdir=config.logdir,

        num_epochs=config.num_epochs,

        # save our best checkpoint by AUC metric
        main_metric="auc/_mean",
        # AUC needs to be maximized.
        minimize_metric=False,

        # for FP16. It uses the variable from the very first cell
        fp16=fp16_params,
예제 #4
0
    # elif args.loss == 'lovasz_softmax':
    #     criterion = lovasz_softmax()
    elif args.loss == 'BCEMulticlassDiceLoss':
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == 'MulticlassDiceMetricCallback':
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4),
                     EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth')
    model.cuda()
    utils.unpack_checkpoint(checkpoint, model=model)
    #
    #
    runner = SupervisedRunner()
    if args.train:
        print('Training')
def main(args):
    """
    Main code for training a classification model.

    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # Reading the in the .csvs
    train = pd.read_csv(os.path.join(args.dset_path, "train.csv"))
    sub = pd.read_csv(os.path.join(args.dset_path, "sample_submission.csv"))

    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up the classification model
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"
    model = ResNet34(pre=ENCODER_WEIGHTS, num_classes=4, use_simple_head=True)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        "resnet34", ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )
    valid_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=0.001)
                 ],
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
    utils.plot_metrics(
        logdir=logdir,
        # specify which metrics we want to plot
        metrics=["loss", "dice", "lr", "_base/lr"])
예제 #6
0
valid_loader = DataLoader(valid_dataset, batch_size=hyper_params['batch_size'], shuffle=False)

loaders = {"train": train_loader, "valid": valid_loader}


optimizer = torch.optim.Adam(model.parameters(), hyper_params['learning_rate'])

scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)

criterion = WeightedBCEDiceLoss(
    lambda_dice=hyper_params['lambda_dice'],
    lambda_bce=hyper_params['lambda_bceWithLogits']
)

runner = SupervisedRunner(device=device)

logdir = hyper_params['logdir']

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(), CometCallback(experiment), EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    #resume=f"{logdir}/checkpoints/last_full.pth",
    num_epochs=hyper_params['num_epochs'],
    verbose=True
)
예제 #7
0
import numpy as np
from sklearn.metrics import roc_auc_score


def calc_roc_auc(pred, gt, *args, **kwargs):
    pred = torch.sigmoid(pred).detach().cpu().numpy()
    gt = gt.detach().cpu().numpy().astype(np.uint8)

    pred = np.concatenate([pred.reshape(-1), np.array([0, 0])])
    gt = np.concatenate([gt.reshape(-1), np.array([1, 0])])

    return [roc_auc_score(gt.reshape(-1), pred.reshape(-1))]


runner.train(model=model,
             scheduler=scheduler,
             criterion=criterion,
             optimizer=optimizer,
             loaders=loaders,
             logdir=logdir,
             num_epochs=num_epochs,
             callbacks=[
                 MultiMetricCallback(metric_fn=calc_roc_auc,
                                     prefix='rocauc',
                                     input_key="targets",
                                     output_key="logits",
                                     list_args=['_']),
                 EarlyStoppingCallback(patience=10, min_delta=0.01)
             ],
             verbose=True)
예제 #8
0
def run(config_file, device_id, idx_fold):
    os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id)
    print('info: use gpu No.{}'.format(device_id))

    config = load_config(config_file)

    # for n-folds loop
    if config.data.params.idx_fold == -1:
        config.data.params.idx_fold = idx_fold
        config.work_dir = config.work_dir + '_fold{}'.format(idx_fold)
    elif config.data.params.idx_fold == 0:
        original_fold = int(config.work_dir.split('_fold')[1])
        if original_fold == idx_fold:
            raise Exception(
                'if you specify fold 0, you should use train.py or resume from fold 1.'
            )
        config.data.params.idx_fold = idx_fold
        config.work_dir = config.work_dir.split('_fold')[0] + '_fold{}'.format(
            idx_fold)
    else:
        raise Exception('you should use train.py if idx_fold is specified.')
    print('info: training for fold {}'.format(idx_fold))

    if not os.path.exists(config.work_dir):
        os.makedirs(config.work_dir, exist_ok=True)

    all_transforms = {}
    all_transforms['train'] = get_transforms(config.transforms.train)
    all_transforms['valid'] = get_transforms(config.transforms.test)

    dataloaders = {
        phase: make_loader(
            df_path=config.data.train_df_path,
            data_dir=config.data.train_dir,
            features=config.data.features,
            phase=phase,
            img_size=(config.data.height, config.data.width),
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx_fold,
            transforms=all_transforms[phase],
            horizontal_flip=config.train.horizontal_flip,
            model_scale=config.data.model_scale,
            debug=config.debug,
            pseudo_path=config.data.pseudo_path,
        )
        for phase in ['train', 'valid']
    }

    # create segmentation model with pre trained encoder
    num_features = len(config.data.features)
    print('info: num_features =', num_features)
    model = CenterNetFPN(
        slug=config.model.encoder,
        num_classes=num_features,
    )

    optimizer = get_optimizer(model, config)
    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(model=model, device=get_device())

    # train setting
    criterion, callbacks = get_criterion_and_callback(config)

    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [OptimizerCallback(accumulation_steps=accumulation_steps)])

    # to resume from check points if exists
    if os.path.exists(config.work_dir + '/checkpoints/last_full.pth'):
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric=config.train.main_metric,
        minimize_metric=config.train.minimize_metric,
        callbacks=callbacks,
        verbose=True,
        fp16=config.train.fp16,
    )
예제 #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--encoder', type=str, default='efficientnet-b0')
    parser.add_argument('--model', type=str, default='unet')
    parser.add_argument('--pretrained', type=str, default='imagenet')
    parser.add_argument('--logdir', type=str, default='../logs/')
    parser.add_argument('--exp_name', type=str)
    parser.add_argument('--data_folder', type=str, default='../input/')
    parser.add_argument('--height', type=int, default=320)
    parser.add_argument('--width', type=int, default=640)
    parser.add_argument('--batch_size', type=int, default=2)
    parser.add_argument('--accumulate', type=int, default=8)
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--enc_lr', type=float, default=1e-2)
    parser.add_argument('--dec_lr', type=float, default=1e-3)
    parser.add_argument('--optim', type=str, default="radam")
    parser.add_argument('--loss', type=str, default="bcedice")
    parser.add_argument('--schedule', type=str, default="rlop")
    parser.add_argument('--early_stopping', type=bool, default=True)

    args = parser.parse_args()

    encoder = args.encoder
    model = args.model
    pretrained = args.pretrained
    logdir = args.logdir
    name = args.exp_name
    data_folder = args.data_folder
    height = args.height
    width = args.width
    bs = args.batch_size
    accumulate = args.accumulate
    epochs = args.epochs
    enc_lr = args.enc_lr
    dec_lr = args.dec_lr
    optim = args.optim
    loss = args.loss
    schedule = args.schedule
    early_stopping = args.early_stopping

    if model == 'unet':
        model = smp.Unet(encoder_name=encoder,
                         encoder_weights=pretrained,
                         classes=4,
                         activation=None)
    if model == 'fpn':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'pspnet':
        model = smp.PSPNet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=pretrained,
            classes=4,
            activation=None,
        )
    if model == 'aspp':
        print('aspp can only be used with resnet34')
        model = aspp(num_class=4)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(encoder, pretrained)
    log = os.path.join(logdir, name)

    ds = get_dataset(path=data_folder)
    prepared_ds = prepare_dataset(ds)

    train_set, valid_set = get_train_test(ds)

    train_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=train_set,
                            transforms=training1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)
    valid_ds = CloudDataset(df=prepared_ds,
                            datatype='train',
                            img_ids=valid_set,
                            transforms=valid1(h=height, w=width),
                            preprocessing=get_preprocessing(preprocessing_fn),
                            folder=data_folder)

    train_loader = DataLoader(train_ds,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=multiprocessing.cpu_count())
    valid_loader = DataLoader(valid_ds,
                              batch_size=bs,
                              shuffle=False,
                              num_workers=multiprocessing.cpu_count())

    loaders = {
        'train': train_loader,
        'valid': valid_loader,
    }

    num_epochs = epochs

    if args.model != "aspp":
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.encoder.parameters(),
                    'lr': enc_lr
                },
                {
                    'params': model.decoder.parameters(),
                    'lr': dec_lr
                },
            ])
    elif args.model == 'aspp':
        if optim == "radam":
            optimizer = RAdam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adam":
            optimizer = Adam([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "adamw":
            optimizer = AdamW([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])
        if optim == "sgd":
            optimizer = SGD([
                {
                    'params': model.parameters(),
                    'lr': enc_lr
                },
            ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=5)
    if schedule == "rlop":
        scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=3)
    if schedule == "noam":
        scheduler = NoamLR(optimizer, 10)

    if loss == "bcedice":
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    if loss == "dice":
        criterion = smp.utils.losses.DiceLoss(eps=1.)
    if loss == "bcejaccard":
        criterion = smp.utils.losses.BCEJaccardLoss(eps=1.)
    if loss == "jaccard":
        criterion == smp.utils.losses.JaccardLoss(eps=1.)
    if loss == 'bce':
        criterion = NewBCELoss()

    callbacks = [NewDiceCallback(), CriterionCallback()]

    callbacks.append(OptimizerCallback(accumulation_steps=accumulate))
    if early_stopping:
        callbacks.append(EarlyStoppingCallback(patience=5, min_delta=0.001))

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=log,
        num_epochs=num_epochs,
        verbose=True,
    )
예제 #10
0
def main(data_path='/data/SN6_buildings/train/AOI_11_Rotterdam/',
         config_path='/project/configs/senet154_gcc_fold1.py',
         gpu='0'):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    config = get_config(config_path)
    model_name = config['model_name']
    fold_number = config['fold_number']
    alias = config['alias']
    log_path = osp.join(config['logs_path'],
                        alias + str(fold_number) + '_' + model_name)

    device = torch.device(config['device'])
    weights = config['weights']
    loss_name = config['loss']
    optimizer_name = config['optimizer']
    lr = config['lr']
    decay = config['decay']
    momentum = config['momentum']
    epochs = config['epochs']
    fp16 = config['fp16']
    n_classes = config['n_classes']
    input_channels = config['input_channels']
    main_metric = config['main_metric']

    best_models_count = config['best_models_count']
    minimize_metric = config['minimize_metric']
    min_delta = config['min_delta']

    train_images = data_path
    data_type = config['data_type']
    masks_data_path = config['masks_data_path']
    folds_file = config['folds_file']
    train_augs = config['train_augs']
    preprocessing_fn = config['preprocessing_fn']
    limit_files = config['limit_files']
    batch_size = config['batch_size']
    shuffle = config['shuffle']
    num_workers = config['num_workers']
    valid_augs = config['valid_augs']
    val_batch_size = config['val_batch_size']
    multiplier = config['multiplier']

    train_dataset = SemSegDataset(images_dir=train_images,
                                  data_type=data_type,
                                  masks_dir=masks_data_path,
                                  mode='train',
                                  n_classes=n_classes,
                                  folds_file=folds_file,
                                  fold_number=fold_number,
                                  augmentation=train_augs,
                                  preprocessing=preprocessing_fn,
                                  limit_files=limit_files,
                                  multiplier=multiplier)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=shuffle,
                              num_workers=num_workers)

    valid_dataset = SemSegDataset(images_dir=train_images,
                                  data_type=data_type,
                                  mode='valid',
                                  folds_file=folds_file,
                                  n_classes=n_classes,
                                  fold_number=fold_number,
                                  augmentation=valid_augs,
                                  preprocessing=preprocessing_fn,
                                  limit_files=limit_files)

    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=val_batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    model = make_model(model_name=model_name,
                       weights=weights,
                       n_classes=n_classes,
                       input_channels=input_channels).to(device)

    loss = get_loss(loss_name=loss_name)
    optimizer = get_optimizer(optimizer_name=optimizer_name,
                              model=model,
                              lr=lr,
                              momentum=momentum,
                              decay=decay)

    if config['scheduler'] == 'reduce_on_plateau':
        print('reduce lr')
        alpha = config['alpha']
        patience = config['patience']
        threshold = config['thershold']
        min_lr = config['min_lr']
        mode = config['scheduler_mode']
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer,
            factor=alpha,
            verbose=True,
            patience=patience,
            mode=mode,
            threshold=threshold,
            min_lr=min_lr)
    elif config['scheduler'] == 'steps':
        print('steps lr')
        steps = config['steps']
        step_gamma = config['step_gamma']
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                         milestones=steps,
                                                         gamma=step_gamma)
    else:
        scheduler = None

    callbacks = []

    dice_callback = DiceCallback()
    callbacks.append(dice_callback)
    callbacks.append(CheckpointCallback(save_n_best=best_models_count))
    callbacks.append(
        EarlyStoppingCallback(patience=config['early_stopping'],
                              metric=main_metric,
                              minimize=minimize_metric,
                              min_delta=min_delta))

    runner = SupervisedRunner(device=device)
    loaders = {'train': train_loader, 'valid': valid_loader}

    runner.train(model=model,
                 criterion=loss,
                 optimizer=optimizer,
                 loaders=loaders,
                 scheduler=scheduler,
                 callbacks=callbacks,
                 logdir=log_path,
                 num_epochs=epochs,
                 verbose=True,
                 main_metric=main_metric,
                 minimize_metric=minimize_metric,
                 fp16=fp16)
예제 #11
0
    'valid': dataloader_val
}  #collections.OrderedDict({'train': dataloader_train, 'valid': dataloader_val})

model = ReverseModel()

optimizer = Lookahead(RAdam(params=model.parameters(), lr=1e-3))

criterion = {"bce": nn.BCEWithLogitsLoss()}

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=0.25,
                                                       patience=2)

callbacks = [
    CriterionCallback(input_key='start', prefix="loss", criterion_key="bce"),
    EarlyStoppingCallback(patience=5),
]

runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir="./logs",
    num_epochs=5,  #TODO 
    main_metric="loss",
    minimize_metric=True,
    verbose=True,
)
예제 #12
0
def main(config):
    opts = config()
    path = opts.path
    train = pd.read_csv(f'{path}/train.csv')
    pseudo_label = pd.read_csv(
        './submissions/submission_segmentation_and_classifier.csv')

    n_train = len(os.listdir(f'{path}/train_images'))
    n_test = len(os.listdir(f'{path}/test_images'))
    print(f'There are {n_train} images in train dataset')
    print(f'There are {n_test} images in test dataset')

    train.loc[train['EncodedPixels'].isnull() == False,
              'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()
    train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
        lambda x: x.split('_')[0]).value_counts().value_counts()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])
    id_mask_count = train.loc[train['EncodedPixels'].isnull() == False,
                              'Image_Label'].apply(lambda x: x.split('_')[
                                  0]).value_counts().reset_index().rename(
                                      columns={
                                          'index': 'img_id',
                                          'Image_Label': 'count'
                                      })
    print(id_mask_count.head())

    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(
                         lambda x: x.split('_')[1]).value_counts()
    pseudo_label.loc[pseudo_label['EncodedPixels'].isnull() == False,
                     'Image_Label'].apply(lambda x: x.split('_')[0]
                                          ).value_counts().value_counts()

    pseudo_label['label'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[1])
    pseudo_label['im_id'] = pseudo_label['Image_Label'].apply(
        lambda x: x.split('_')[0])
    pseudo_label_ids = pseudo_label.loc[
        pseudo_label['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
            lambda x: x.split('_')[0]).value_counts().reset_index().rename(
                columns={
                    'index': 'img_id',
                    'Image_Label': 'count'
                })
    print(pseudo_label_ids.head())

    if not os.path.exists("csvs/train_all.csv"):
        train_ids, valid_ids = train_test_split(
            id_mask_count,
            random_state=39,
            stratify=id_mask_count['count'],
            test_size=0.1)
        valid_ids.to_csv("csvs/valid_threshold.csv", index=False)
        train_ids.to_csv("csvs/train_all.csv", index=False)
    else:
        train_ids = pd.read_csv("csvs/train_all.csv")
        valid_ids = pd.read_csv("csvs/valid_threshold.csv")

    for fold, ((train_ids_new, valid_ids_new),
               (train_ids_pl, valid_ids_pl)) in enumerate(
                   zip(
                       stratified_groups_kfold(train_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0),
                       stratified_groups_kfold(pseudo_label_ids,
                                               target='count',
                                               n_splits=opts.fold_max,
                                               random_state=0))):

        train_ids_new.to_csv(f'csvs/train_fold{fold}.csv')
        valid_ids_new.to_csv(f'csvs/valid_fold{fold}.csv')
        train_ids_new = train_ids_new['img_id'].values
        valid_ids_new = valid_ids_new['img_id'].values

        train_ids_pl = train_ids_pl['img_id'].values
        valid_ids_pl = valid_ids_pl['img_id'].values

        ENCODER = opts.backborn
        ENCODER_WEIGHTS = opts.encoder_weights
        DEVICE = 'cuda'

        ACTIVATION = None
        model = get_model(
            model_type=opts.model_type,
            encoder=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            activation=ACTIVATION,
            n_classes=opts.class_num,
            task=opts.task,
            center=opts.center,
            attention_type=opts.attention_type,
            head='simple',
            classification=opts.classification,
        )
        model = convert_model(model)
        preprocessing_fn = encoders.get_preprocessing_fn(
            ENCODER, ENCODER_WEIGHTS)

        num_workers = opts.num_workers
        bs = opts.batchsize

        train_dataset = CloudDataset(
            df=train,
            label_smoothing_eps=opts.label_smoothing_eps,
            datatype='train',
            img_ids=train_ids_new,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset = CloudDataset(
            df=train,
            datatype='valid',
            img_ids=valid_ids_new,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        ################# make pseudo label dataset #######################
        train_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=train_ids_pl,
            transforms=get_training_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_dataset_pl = CloudPseudoLabelDataset(
            df=pseudo_label,
            datatype='train',
            img_ids=valid_ids_pl,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))

        #         train_dataset = ConcatDataset([train_dataset, train_dataset_pl])
        #         valid_dataset = ConcatDataset([valid_dataset, valid_dataset_pl])
        train_dataset = ConcatDataset([train_dataset, valid_dataset_pl])
        ################# make pseudo label dataset #######################
        train_loader = DataLoader(train_dataset,
                                  batch_size=bs,
                                  shuffle=True,
                                  num_workers=num_workers,
                                  drop_last=True)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=bs,
                                  shuffle=False,
                                  num_workers=num_workers,
                                  drop_last=True)

        loaders = {"train": train_loader, "valid": valid_loader}
        num_epochs = opts.max_epoch
        logdir = f"{opts.logdir}/fold{fold}"
        optimizer = get_optimizer(optimizer=opts.optimizer,
                                  lookahead=opts.lookahead,
                                  model=model,
                                  separate_decoder=True,
                                  lr=opts.lr,
                                  lr_e=opts.lr_e)
        opt_level = 'O1'
        model.cuda()
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=opt_level)
        scheduler = opts.scheduler(optimizer)
        criterion = opts.criterion
        runner = SupervisedRunner()
        if opts.task == "segmentation":
            callbacks = [DiceCallback()]
        else:
            callbacks = []
        if opts.early_stop:
            callbacks.append(
                EarlyStoppingCallback(patience=10, min_delta=0.001))
        if opts.mixup:
            callbacks.append(MixupCallback(alpha=0.25))
        if opts.accumeration is not None:
            callbacks.append(CriterionCallback())
            callbacks.append(
                OptimizerCallback(accumulation_steps=opts.accumeration))
        print(
            f"############################## Start training of fold{fold}! ##############################"
        )
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=callbacks,
                     logdir=logdir,
                     num_epochs=num_epochs,
                     verbose=True)
        print(
            f"############################## Finish training of fold{fold}! ##############################"
        )
        del model
        del loaders
        del runner
        torch.cuda.empty_cache()
        gc.collect()
    # elif args.loss == 'lovasz_softmax':
    #     criterion = lovasz_softmax()
    elif args.loss == 'BCEMulticlassDiceLoss':
        criterion = BCEMulticlassDiceLoss()
    elif args.loss == 'MulticlassDiceMetricCallback':
        criterion = MulticlassDiceMetricCallback()
    elif args.loss == 'BCE':
        criterion = nn.BCEWithLogitsLoss()
    else:
        criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    if args.multigpu:
        model = nn.DataParallel(model)

    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4), EarlyStoppingCallback(patience=5, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    runner = SupervisedRunner()
    if args.train:
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            callbacks=callbacks,
예제 #14
0
def train_model(
    df_train,
    df_valid,
    model_class,
    model_params,
    vectorizer,
    general_params,
):
    vectorizer = copy.deepcopy(vectorizer)
    vectorizer.fit(df_train["text"])

    df_train = make_df(df_train, vectorizer)
    train_ds = GeneralDataset(
        df_train["tokens"].values,
        labels=df_train["label"].values,
        max_sentence_len=general_params["max_sentence_len"],
    )
    trainloader = DataLoader(
        dataset=train_ds,
        batch_size=general_params["batch_size"],
        shuffle=True,
        num_workers=general_params["num_workers"],
    )

    df_valid = make_df(df_valid, vectorizer)
    valid_ds = GeneralDataset(
        df_valid["tokens"].values,
        labels=df_valid["label"].values,
        max_sentence_len=general_params["max_sentence_len"],
    )
    validloader = DataLoader(
        dataset=valid_ds,
        batch_size=general_params["batch_size"],
        shuffle=False,
        num_workers=general_params["num_workers"],
    )

    loaders = collections.OrderedDict()
    loaders["train"] = trainloader
    loaders["valid"] = validloader

    model_params = copy.deepcopy(model_params)
    model_params.update({"vocab_size": len(vectorizer.vocabulary_)})
    model = model_class(**model_params).float()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), general_params["lr"])

    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        callbacks=[
            AccuracyCallback(),
            EarlyStoppingCallback(patience=general_params["patience"],
                                  metric="accuracy01",
                                  minimize=False),
        ],
        logdir=general_params["logdir"],
        num_epochs=general_params["num_epochs"],
        main_metric="accuracy01",
        minimize_metric=False,
        load_best_on_end=True,
        verbose=False,
    )

    with open(os.path.join(general_params["logdir"], "vectorizer.pickle"),
              "wb") as output_file:
        pickle.dump(vectorizer, output_file)
예제 #15
0
num_epochs = args.epochs
callbacks = [
    CriterionCallback(input_key='mask',
                      multiplier=1.,
                      prefix='loss_dice',
                      criterion_key='dice'),
    CriterionCallback(input_key='mask',
                      prefix='loss_bce',
                      multiplier=0.8,
                      criterion_key='bce'),
    CriterionAggregatorCallback(prefix='loss',
                                loss_keys=["loss_dice", "loss_bce"],
                                loss_aggregate_fn="sum"),
    DiceCallback(input_key='mask'),
    OptimizerCallback(accumulation_steps=32),
    EarlyStoppingCallback(patience=8, min_delta=0.001),
]
if args.checkpoint:
    callbacks.append(
        CheckpointCallback(resume=f'{logdir}/checkpoints/best_full.pth'))
runner.train(
    model=model,
    criterion=criteria,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    main_metric='dice',
    minimize_metric=False,
    logdir=logdir,
    # fp16={"opt_level": "O1"},
예제 #16
0
def main():

    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--seed', type=int, default=1234, help='Random seed')
    arg('--model-name',
        type=str,
        default=Path('seresnext101'),
        help='String model name used for saving')
    arg('--run-root',
        type=Path,
        default=Path('../results'),
        help='Directory for saving model')
    arg('--data-root', type=Path, default=Path('../data'))
    arg('--image-size', type=int, default=224, help='Image size for training')
    arg('--batch-size',
        type=int,
        default=16,
        help='Batch size during training')
    arg('--fold', type=int, default=0, help='Validation fold')
    arg('--n-epochs', type=int, default=10, help='Epoch to run')
    arg('--learning-rate',
        type=float,
        default=1e-3,
        help='Initial learning rate')
    arg('--step', type=int, default=1, help='Current training step')
    arg('--patience', type=int, default=4)
    arg('--criterion', type=str, default='bce', help='Criterion')
    arg('--optimizer', default='Adam', help='Name of the optimizer')
    arg('--continue_train', type=bool, default=False)
    arg('--checkpoint',
        type=str,
        default=Path('../results'),
        help='Checkpoint file path')
    arg('--workers', type=int, default=2)
    arg('--debug', type=bool, default=True)
    args = parser.parse_args()

    set_seed(args.seed)
    """
    
    SET PARAMS
    
    """
    args.debug = True
    ON_KAGGLE = configs.ON_KAGGLE
    N_CLASSES = configs.NUM_CLASSES
    args.image_size = configs.SIZE
    args.data_root = configs.DATA_ROOT
    use_cuda = cuda.is_available()
    fold = args.fold
    num_workers = args.workers
    num_epochs = args.n_epochs
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    """

    LOAD DATA
    
    """
    print(os.listdir(args.data_root))
    folds = pd.read_csv(args.data_root / 'folds.csv')
    train_root = args.data_root / 'train'

    if args.debug:
        folds = folds.head(50)
    train_fold = folds[folds['fold'] != fold]
    valid_fold = folds[folds['fold'] == fold]
    check_fold(train_fold, valid_fold)

    def get_dataloader(df: pd.DataFrame, image_transform) -> DataLoader:
        """
        Calls dataloader to load Imet Dataset
        """
        return DataLoader(
            ImetDataset(train_root, df, image_transform),
            shuffle=True,
            batch_size=batch_size,
            num_workers=num_workers,
        )

    train_loader = get_dataloader(train_fold, image_transform=albu_transform)
    valid_loader = get_dataloader(valid_fold, image_transform=valid_transform)
    print('{} items in train, {} in valid'.format(len(train_loader.dataset),
                                                  len(valid_loader.dataset)))
    loaders = OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader
    """
    
    MODEL
    
    """
    model = seresnext101(num_classes=N_CLASSES)
    if use_cuda:
        model = model.cuda()

    criterion = nn.BCEWithLogitsLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               factor=0.5,
                                               patience=args.patience)
    """
    
    MODEL RUNNER
    
    """
    # call an instance of the model runner
    runner = SupervisedRunner()
    # logs folder
    current_time = datetime.now().strftime('%b%d_%H_%M')
    prefix = f'{current_time}_{args.model_name}'
    logdir = os.path.join(args.run_root, prefix)
    os.makedirs(logdir, exist_ok=False)

    print('\tTrain session    :', prefix)
    print('\tOn KAGGLE      :', ON_KAGGLE)
    print('\tDebug          :', args.debug)
    print('\tClasses number :', N_CLASSES)
    print('\tModel          :', args.model_name)
    print('\tParameters     :', model.parameters())
    print('\tImage size     :', args.image_size)
    print('\tEpochs         :', num_epochs)
    print('\tWorkers        :', num_workers)
    print('\tLog dir        :', logdir)
    print('\tLearning rate  :', learning_rate)
    print('\tBatch size     :', batch_size)
    print('\tPatience       :', args.patience)

    if args.continue_train:
        state = load_model(model, args.checkpoint)
        epoch = state['epoch']
        step = state['step']
        print('Loaded model weights from {}, epoch {}, step {}'.format(
            args.checkpoint, epoch, step))

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            F1ScoreCallback(threshold=0.5),
            #F2ScoreCallback(num_classes=N_CLASSES),
            EarlyStoppingCallback(patience=args.patience, min_delta=0.01)
        ],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True)

    # by default it only plots loss, works in IPython Notebooks
    #utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"])
    """
    
    INFERENCE TEST
    
    """
    loaders = OrderedDict([("infer", loaders["train"])])
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
    print(runner.callbacks[1].predictions["logits"])
예제 #17
0
    def train(self):
        # TODO: Make it work for all modes, right now only it defaults to pcl.
        callbacks = [
            EarlyStoppingCallback(patience=15,
                                  metric="loss",
                                  minimize=True,
                                  min_delta=0),
        ]

        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                               mode="min")
        train_dataset = TensorDataset(self.tr_eps,
                                      torch.arange(self.tr_eps.shape[0]))
        val_dataset = TensorDataset(self.val_eps,
                                    torch.arange(self.val_eps.shape[0]))
        runner = CustomRunner()
        v_bs = self.val_eps.shape[0]
        loaders = {
            "train":
            DataLoader(
                train_dataset,
                batch_size=self.batch_size,
                num_workers=1,
                shuffle=True,
            ),
            "valid":
            DataLoader(
                val_dataset,
                batch_size=self.batch_size,
                num_workers=1,
                shuffle=True,
            ),
        }

        model = self.model
        num_features = 2
        # model training
        train_loader_param = {
            "batch_size": 64,
            "shuffle": True,
        }
        val_loader_param = {
            "batch_size": 32,
            "shuffle": True,
        }

        loaders_params = {
            "train": train_loader_param,
            "valid": val_loader_param,
        }

        # datasets = {
        #               "batch_size": 64,
        #               "num_workers": 1,
        #               "loaders_params": loaders_params,
        #               "get_datasets_fn": self.datasets_fn,
        #               "num_features": num_features,

        #          },

        runner.train(
            model=model,
            optimizer=self.optimizer,
            scheduler=scheduler,
            loaders=loaders,
            callbacks=callbacks,
            logdir="./logs",
            num_epochs=self.epochs,
            verbose=True,
            distributed=False,
            load_best_on_end=True,
            main_metric="loss",
        )
def main():

    fold_path = args.fold_path
    fold_num = args.fold_num
    model_name = args.model_name
    train_csv = args.train_csv
    sub_csv = args.sub_csv
    encoder = args.encoder
    num_workers = args.num_workers
    batch_size = args.batch_size
    num_epochs = args.num_epochs
    learn_late = args.learn_late
    attention_type = args.attention_type

    train = pd.read_csv(train_csv)
    sub = pd.read_csv(sub_csv)

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1])
    sub['im_id'] = sub['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv')
    val_fold = pd.read_csv(f'{fold_path}/val_file_fold_{fold_num}.csv')

    train_ids = np.array(train_fold.file_name)
    valid_ids = np.array(val_fold.file_name)

    encoder_weights = 'imagenet'

    if model_name == 'ORG_Link18':
        model = Linknet_resnet18_Classifer()

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        encoder, encoder_weights)

    train_dataset = CloudDataset_Multi(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    valid_dataset = CloudDataset_Multi(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False)

    loaders = {"train": train_loader, "valid": valid_loader}

    logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation"

    print(logdir)

    if model_name == 'ORG_Link18':
        optimizer = Nadam([
            {
                'params': model.parameters(),
                'lr': learn_late
            },
        ])
    else:
        optimizer = Nadam([
            {
                'params': model.decoder.parameters(),
                'lr': learn_late
            },
            {
                'params': model.encoder.parameters(),
                'lr': learn_late
            },
        ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
    criterion = Multi_Loss()

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=1)
예제 #19
0
파일: train.py 프로젝트: markson14/Kaggle
def train_model(train_parameters):

    k = train_parameters["k"]
    loaders = train_parameters["loaders"]
    num_epochs = train_parameters["num_epochs"]
    net = train_parameters["net"]
    ENCODER = train_parameters["ENCODER"]
    ENCODER_WEIGHTS = train_parameters["ENCODER_WEIGHTS"]
    ACTIVATION = train_parameters["ACTIVATION"]

    model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION)
    """ multi-gpu """
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    model.to("cuda")

    #     if k==0:
    #         summary(model.module.encoder,(3,384,576))

    logdir = "./logs/segmentation_{}_{}Fold".format(net, k)

    # model, criterion, optimizer
    optimizer = RAdam([
        {
            'params': model.module.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.module.encoder.parameters(),
            'lr': 1e-3
        },
        #         {'params': model.decoder.parameters(), 'lr': 1e-2},
        #         {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    #     criterion = FocalLoss()
    #     criterion = FocalDiceLoss()
    # criterion = smp.utils.losses.DiceLoss(eps=1.)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    runner = SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            EarlyStoppingCallback(patience=10, min_delta=0.001),
            DiceCallback()
        ],
        #                    AUCCallback(),
        #                    IouCallback()],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True)

    del loaders, optimizer, scheduler, model, runner
    torch.cuda.empty_cache()
    gc.collect()
    print("Collect GPU cache")
예제 #20
0
    #     loss.backward()
    #     optimizer.step()

    # model training
    runner = CustomRunner()
    logdir = "./logdir"
    runner.train(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        num_epochs=EPOCHS,
        loaders=loaders,
        logdir=logdir,
        verbose=True,
        timeit=True,
        callbacks=[EarlyStoppingCallback(patience=10)]
    )

    # # model training
    # runner = SupervisedRunner()
    # logdir = "./logdir"
    # runner.train(
    #     model=model,
    #     criterion=criterion,
    #     optimizer=optimizer,
    #     scheduler=scheduler,
    #     verbose=True,
    #     timeit=True,
    #     loaders=loaders,
    #     logdir=logdir,
    #     num_epochs=EPOCHS,
def main():

    fold_path = args.fold_path
    fold_num = args.fold_num
    model_name = args.model_name
    train_csv = args.train_csv
    sub_csv = args.sub_csv
    encoder = args.encoder
    num_workers = args.num_workers
    batch_size = args.batch_size
    num_epochs = args.num_epochs
    learn_late = args.learn_late
    attention_type = args.attention_type

    train = pd.read_csv(train_csv)
    sub = pd.read_csv(sub_csv)

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1])
    sub['im_id'] = sub['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv')
    val_fold = pd.read_csv(f'{fold_path}/valid_file_fold_{fold_num}.csv')

    train_ids = np.array(train_fold.file_name)
    valid_ids = np.array(val_fold.file_name)

    encoder_weights = 'imagenet'
    attention_type = None if attention_type == 'None' else attention_type

    if model_name == 'Unet':
        model = smp.Unet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
            attention_type=attention_type,
        )
    if model_name == 'Linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'FPN':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'ORG':
        model = Linknet_resnet18_ASPP()

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        encoder, encoder_weights)

    train_dataset = CloudDataset(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        drop_last=True,
        pin_memory=True,
    )
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation"

    #for batch_idx, (data, target) in enumerate(loaders['train']):
    #    print(batch_idx)

    print(logdir)

    if model_name == 'ORG':
        optimizer = NAdam([
            {
                'params': model.parameters(),
                'lr': learn_late
            },
        ])
    else:
        optimizer = NAdam([
            {
                'params': model.decoder.parameters(),
                'lr': learn_late
            },
            {
                'params': model.encoder.parameters(),
                'lr': learn_late
            },
        ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
    criterion = smp.utils.losses.BCEDiceLoss()

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=1e-7)
                 ],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=1)
예제 #22
0
        ])

        model.to(device)
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=0.6,
                                      patience=s_patience)
        # criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
        # scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
        criterion = BCEDiceLoss(eps=1.)
        # criterion = DiceLoss(eps=1.) #Try this too
        runner = SupervisedRunner()

        # Train
        runner.train(model=model,
                     criterion=criterion,
                     optimizer=optimizer,
                     scheduler=scheduler,
                     loaders=loaders,
                     callbacks=[
                         DiceCallback(),
                         EarlyStoppingCallback(patience=train_patience,
                                               min_delta=0.001)
                     ],
                     logdir=logdir,
                     num_epochs=epochs,
                     verbose=True)
        secs = time.time() - start
        print(f"Done in {secs:.2f} seconds ({secs/3600:.2f} hours)")

# git fetch --all && git reset --hard origin/master
예제 #23
0
def main(args):
    """
    Main code for training for training a U-Net with some user-defined encoder.
    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up model (U-Net with ImageNet Encoders)
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"

    attention_type = None if args.attention_type == "None" else args.attention_type
    model = smp.Unet(encoder_name=args.encoder,
                     encoder_weights=ENCODER_WEIGHTS,
                     classes=4,
                     activation=None,
                     attention_type=attention_type)
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        args.encoder, ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)
    valid_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            "params": model.decoder.parameters(),
            "lr": args.encoder_lr
        },
        {
            "params": model.encoder.parameters(),
            "lr": args.decoder_lr
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks_list = [
        DiceCallback(),
        EarlyStoppingCallback(patience=5, min_delta=0.001),
    ]
    if args.checkpoint_path != "None":  # hacky way to say no checkpoint callback but eh what the heck
        ckpoint_p = Path(args.checkpoint_path)
        fname = ckpoint_p.name
        resume_dir = str(ckpoint_p.parents[0]
                         )  # everything in the path besides the base file name
        print(
            f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}."
        )
        callbacks_list = callbacks_list + [
            CheckpointCallback(resume=fname, resume_dir=resume_dir),
        ]

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks_list,
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
예제 #24
0
    loaders = OrderedDict()
    loaders["train"] = train_dl
    loaders["valid"] = valid_dl

    # model
    model = AttentionModel(INPUT_DIM, HID_DIM, OUTPUT_DIM, RECURRENT_Layers,
                           DROPOUT).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 60])
    criterion = torch.nn.CrossEntropyLoss()

    # model training
    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        logdir=logdir,
        num_epochs=EPOCHS,
        verbose=True,
        callbacks=[
            AccuracyCallback(num_classes=5, topk_args=[1, 2]),
            EarlyStoppingCallback(metric='accuracy01',
                                  minimize=False,
                                  patience=10)
        ],
    )
def main(train, test, features, target):
    # get args
    args = parse_arguments()
    params = yaml_to_json(args.yaml_path)

    # hyper param
    num_folds = params.fold
    seed = params.seed
    base_path = params.base_path
    target_cols = params.target
    features_cols = params.features
    preprocessed_data_path = params.preprocessed_data
    batch_size = params.batch_size
    num_epochs = params.epochs
    # ex) '/hoge/logs'
    base_logdir = params.base_logdir

    # fix seed
    set_global_seed(seed)
    device = get_device()

    # set up logdir
    now = datetime.now()
    base_logdir = os.path.join(base_logdir + now.strftime("%Y%m%d%H%M%S"))
    os.makedirs(base_logdir, exist_ok=True)
    # dump yaml contents
    with open(os.path.join(base_logdir, 'params.json'), mode="w") as f:
        json.dump(params, f, indent=4)
    # dump this scripts
    my_file_path = os.path.abspath(__file__)
    shutil.copyfile(my_file_path, base_logdir)

    # load dataset
    if preprocessed_data_path == '':
        train, test, sample_submission = read_data(base_path)  # noqa
        # TODO: You should implement these function!!
        train, test = preprocess(train, test)  # noqa
        train, test = build_feature(train, test)  # noqa
    else:
        train = pd.read_csv(preprocessed_data_path + 'train.csv')
        test = pd.read_csv(preprocessed_data_path + 'test.csv')
        sample_submission = pd.read_csv(preprocessed_data_path +
                                        'sample_submission.csv')

    # execute CV
    # TODO: set your CV method
    kf = KFold(n_splits=num_folds, random_state=seed)
    ids = kf.split(train)
    fold_scores = []
    test_preds = []
    for fold, (train_idx, valid_idx) in enumerate(ids):
        print('Fold {}'.format(fold + 1))

        logdir = os.path.join(base_logdir + 'fold_{}'.format(fold + 1))
        os.makedirs(logdir, exist_ok=True)

        # data
        X_train = train[features_cols]
        # 目的変数の正規化は...?
        Y_train = train[target_cols]
        X_test = train[features_cols]

        # create dataloaders
        train_dls, test_dl = create_data_loader(
            X_train.iloc[train_idx].to_numpy(),
            Y_train.iloc[train_idx].to_numpy(),
            X_train.iloc[valid_idx].to_numpy(),
            Y_train.iloc[valid_idx].to_numpy(),
            X_test.to_numpy(),
            batch_size=batch_size)

        # init models
        # TODO: set your model and learning condition
        # ここは関数を用意して、キーワードで取り出すようにできると汎用性は上がる
        model = SampleNN(input_dim=1000, out_dim=1)
        criterion = nn.BCELoss()
        optimizer = torch.optim.AdamW(model.parameters())
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

        # init catalyst runner
        runner = SupervisedRunner(device=device)
        # model training
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=train_dls,
            logdir=logdir,
            num_epochs=num_epochs,
            callbacks=[EarlyStoppingCallback(patience=15, min_delta=0)],
            verbose=False)

        # calculate valid score
        best_model_path = logdir + '/checkpoints/best.pth'
        val_preds = runner.predict_loader(model,
                                          train_dls['valid'],
                                          resume=best_model_path,
                                          verbose=False)
        val_truth = Y_train.iloc[valid_idx].values
        # TODO: set your score function
        cv_score = mean_spearmanr_correlation_score(val_truth, val_preds)
        print('Fold {} CV score : {}'.format(fold + 1, cv_score))
        fold_scores.append(cv_score)

        # test prediction
        test_pred = runner.predict_loader(
            model, test_dl, resume=best_model_path, verbose=False) / num_folds
        test_preds.append(test_pred)

    # submit
    # TODO: set your submit process
    sample_submission[target_cols] = np.mean(test_preds, axis=0)
    sample_submission.to_csv('submission.csv')
    return True
예제 #26
0
def main():
    train = pd.read_csv('./data_process/data/train_flip_aug_resize.csv')

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.replace('_' + x.split('_')[-1], ''))

    train['img_label'] = train.EncodedPixels.apply(lambda x: 0 if x is np.nan else 1)

    img_label = train.groupby('im_id')['img_label'].agg(list).reset_index()

    kf = KFold(n_splits=5, shuffle=True, random_state=777)
    fold = 0
    for train, val in kf.split(img_label):

        train_df = img_label.iloc[train]
        image_train = np.array(train_df.im_id)
        label_train = np.array(train_df.img_label)

        val_df = img_label.iloc[val]
        image_val = np.array(val_df.im_id)
        label_val = np.array(val_df.img_label)

        train_dataset = CloudClassDataset(
            datatype='train',
            img_ids=image_train,
            img_labels=label_train,
            transforms=get_training_augmentation(),
            preprocessing=ort_get_preprocessing()
        )

        valid_dataset = CloudClassDataset(
            datatype='train',
            img_ids=image_val,
            img_labels=label_val,
            transforms=get_validation_augmentation(),
            preprocessing=ort_get_preprocessing()
        )

        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=8)
        valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=8)

        resnet_model = ResNet()

        loaders = {
            "train": train_loader,
            "valid": valid_loader
        }

        logdir = f"./class/segmentation/fold_{fold}/"

        print(logdir)

        optimizer = Nadam([
            {'params': resnet_model.parameters(), 'lr':  1e-3},
        ])

        scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
        criterion = nn.BCEWithLogitsLoss()
        runner = SupervisedRunner()

        runner.train(
            model=resnet_model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            callbacks=[EarlyStoppingCallback(patience=5, min_delta=1e-7)],
            logdir=logdir,
            num_epochs=15,
            verbose=1
        )
        fold +=1
예제 #27
0
파일: fastai_model.py 프로젝트: dodler/kgl
                          criterion_key="h1"),
        CriterionCallback(input_key="h2_targets",
                          output_key="h2_logits",
                          prefix="loss_h2",
                          criterion_key="h2"),
        CriterionCallback(input_key="h3_targets",
                          output_key="h3_logits",
                          prefix="loss_h3",
                          criterion_key="h3"),
        crit_agg,
    ])

callbacks.extend([
    score_callback,
    EarlyStoppingCallback(metric='weight_recall',
                          patience=early_stop_epochs,
                          min_delta=0.001)
])

callbacks.append(OptimizerCallback(grad_clip_params={'params': 1.0}), )

runner.train(
    fp16=args.fp16,
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir=logdir,
    num_epochs=num_epochs,
예제 #28
0
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
# 损失函数计算
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
# from catalyst.dl.runner import SupervisedRunner
runner = SupervisedRunner()

'''
Training section
'''
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=True
)
# 画loss_function的图
utils.plot_metrics(
    logdir=logdir,
    # specify which metrics we want to plot
    metrics=["loss", "dice", 'lr', '_base/lr']
)

# 导入validation
encoded_pixels = []
loaders = {"infer": valid_loader}
runner.infer(
예제 #29
0
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[3, 8],
                                                 gamma=0.3)

# model runner
runner = SupervisedRunner()

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[EarlyStoppingCallback(patience=2, min_delta=0.01)],
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
)

# In[ ]:

# utils.plot_metrics(logdir=logdir, metrics=["loss", "_base/lr"])

# # Setup 4 - training with additional metrics

# In[ ]:

from catalyst.dl.runner import SupervisedRunner
from catalyst.dl.callbacks import EarlyStoppingCallback, AccuracyCallback
예제 #30
0
def run(config_file):
    config = load_config(config_file)

    config.work_dir = 'result/' + config.work_dir
    print('working directory:', config.work_dir)

    all_transforms = {}
    all_transforms['train'] = Transform(size=config.data.image_size,
                                        threshold=20.,
                                        sigma=-1.,
                                        blur_ratio=0.2,
                                        noise_ratio=0.2,
                                        cutout_ratio=0.2,
                                        grid_distortion_ratio=0.2,
                                        random_brightness_ratio=0.2,
                                        piece_affine_ratio=0.2,
                                        ssr_ratio=0.2)
    all_transforms['valid'] = Transform(size=config.data.image_size)

    dataloaders = {
        phase: make_loader(
            phase=phase,
            batch_size=config.train.batch_size,
            num_workers=config.num_workers,
            idx_fold=config.data.params.idx,
            fold_csv=config.data.params.fold_csv,
            transforms=all_transforms[phase],
            # debug=config.debug
        )
        for phase in ['train', 'valid']
    }
    model = get_model(config)
    model = model.to(device)
    # we have multiple criterions
    criterion = {
        "ce": nn.CrossEntropyLoss(),
        # Define your awesome losses in here. Ex: Focal, lovasz, etc
    }
    optimizer = RAdam(model.parameters(), lr=config.optimizer.params.lr)
    if config.optimizer.lookahead.apply:
        optimizer = Lookahead(optimizer)

    scheduler = get_scheduler(optimizer, config)

    # model runner
    runner = SupervisedRunner(
        device=device,
        input_key="images",
        output_key=("logit_grapheme_root", "logit_vowel_diacritic",
                    "logit_consonant_diacritic"),
        input_target_key=("grapheme_roots", "vowel_diacritics",
                          "consonant_diacritics"),
    )

    callbacks = []

    if config.train.early_stop_patience > 0:
        callbacks.append(
            EarlyStoppingCallback(patience=config.train.early_stop_patience))

    if config.train.accumulation_size > 0:
        accumulation_steps = config.train.accumulation_size // config.train.batch_size
        callbacks.extend(
            [OptimizerCallback(accumulation_steps=accumulation_steps)])

    # to resume from check points if exists
    if os.path.exists(config.work_dir +
                      '/checkpoints/best.pth') and config.train.resume:
        callbacks.append(
            CheckpointCallback(resume=config.work_dir +
                               '/checkpoints/last_full.pth'))
    if config.train.mixup:
        CC = MixupCallback
    else:
        CC = CriterionCallback

    callbacks.extend([
        CC(
            input_key="grapheme_roots",
            output_key="logit_grapheme_root",
            criterion_key='ce',
            prefix='loss_gr',
        ),
        CC(
            input_key="vowel_diacritics",
            output_key="logit_vowel_diacritic",
            criterion_key='ce',
            prefix='loss_wd',
        ),
        CC(
            input_key="consonant_diacritics",
            output_key="logit_consonant_diacritic",
            criterion_key='ce',
            prefix='loss_cd',
        ),
        CriterionAggregatorCallback(
            prefix="loss",
            loss_aggregate_fn="weighted_sum",
            loss_keys={
                "loss_gr": 2.0,
                "loss_wd": 1.0,
                "loss_cd": 1.0
            },
        ),

        # metrics
        HMacroAveragedRecall(),
    ])

    # model training
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=dataloaders,
        logdir=config.work_dir,
        num_epochs=config.train.num_epochs,
        main_metric="hmar",
        minimize_metric=False,
        monitoring_params=None,
        callbacks=callbacks,
        verbose=True,
        fp16=False,
    )