コード例 #1
0
def validate(model, val_dataset, config):
    loaders = collections.OrderedDict()
    val_loader = utils.get_loader(
        val_dataset,
        open_fn=lambda x: {
            "input_audio": x[-1],
            "input_video": x[1],
            "targets": x[0]
        },
        batch_size=config.batch_size,
        num_workers=config.workers,
        shuffle=False,
    )

    loaders["valid"] = val_loader

    runner = SupervisedRunner(
        input_key=["input_audio",
                   "input_video"])  # parameters of the model in forward(...)
    runner.infer(
        model,
        loaders,
        callbacks=collections.OrderedDict({
            "snr_callback": SNRCallback(),
            "sdr_callback": SDRCallback()
        }),
        verbose=True,
    )
コード例 #2
0
def main(config):
    """Main code for training a classification model.

    Args:
        config (dict): dictionary read from a yaml file
            i.e. configs/train_seg1.yml

    Returns:
        None

    """
    # setting up the train/val split with filenames
    seed = config["io_params"]["split_seed"]
    seed_everything(seed)

    # Seg only for now
    exp = TrainSegExperiment(config)
    output_key = "logits"

    print(f"Seed: {seed}")

    runner = SupervisedRunner(output_key=output_key)

    runner.train(model=exp.model,
                 criterion=exp.criterion,
                 optimizer=exp.opt,
                 scheduler=exp.lr_scheduler,
                 loaders=exp.loaders,
                 callbacks=exp.cb_list,
                 logdir=config["runner_params"]["logdir"],
                 num_epochs=config["runner_params"]["num_epochs"],
                 valid_loader="val",
                 verbose=config["runner_params"]["verbose"],
                 fp16=config["runner_params"]["fp16"])
コード例 #3
0
ファイル: train.py プロジェクト: yuv4r4j/ml-recipe-bone-age
def main(args: Namespace) -> None:
    input_shape = (1, int(args.crop_size[0] * args.scale), int(args.crop_size[1] * args.scale))
    print('Input shape', 'x'.join(map(str, input_shape)), '[CxHxW]')

    set_global_seed(args.seed)

    train_loader, test_loader = get_loaders(args)
    loaders = OrderedDict([('train', train_loader), ('valid', test_loader)])

    model = m46(input_shape=input_shape, model_type=args.model_type)
    criterion = model.loss_function
    optimizer = torch.optim.Adam(lr=2e-5, betas=(0.5, 0.999), params=model.parameters())

    output_key = 'probs' if args.model_type == 'gender' else 'preds'
    runner = SupervisedRunner(input_key='image', output_key=output_key,
                              input_target_key='label',
                              device=args.device if is_available() else tdevice('cpu')
                              )
    callbacks = [clb.CriterionCallback(input_key='label', output_key=output_key)]
    if args.model_type == 'gender':
        callbacks += [clb.AccuracyCallback(prefix='accuracy', input_key='label',
                                           output_key=output_key, accuracy_args=[1],
                                           threshold=.5, num_classes=1, activation='none')]
    runner.train(
        model=model, criterion=criterion, optimizer=optimizer,
        scheduler=None, loaders=loaders, logdir=str(args.logdir),
        num_epochs=args.n_epoch, verbose=True, main_metric='loss',
        valid_loader='valid', callbacks=callbacks, minimize_metric=True,
        checkpoint_data={'params': model.init_params}
    )
コード例 #4
0
def generate_test_preds(class_params):

    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
    dummy_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids[:1],  transforms=get_validation_augmentation(),
                                preprocessing=get_preprocessing(preprocessing_fn))
    dummy_loader = DataLoader(dummy_dataset, batch_size=1, shuffle=False, num_workers=0)

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )
    runner = SupervisedRunner(model)

    # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights
    # from our checkpoint
    loaders = {"test": dummy_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(
                resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )

    # Now we do real inference on the full dataset
    test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids,  transforms=get_validation_augmentation(),
                                preprocessing=get_preprocessing(preprocessing_fn))
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

    encoded_pixels = []
    image_id = 0
    for i, test_batch in enumerate(tqdm.tqdm(test_loader)):
        runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits'].cpu().detach().numpy()
        for i, batch in enumerate(runner_out):
            for probability in batch:

                # probability = probability.cpu().detach().numpy()
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0],
                                                    class_params[image_id % 4][1])
                if num_predict == 0:
                    encoded_pixels.append('')
                else:
                    r = mask2rle(predict)
                    encoded_pixels.append(r)
                image_id += 1

    print("Saving submission...")
    sub['EncodedPixels'] = encoded_pixels
    sub.to_csv('submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
    print("Saved.")
コード例 #5
0
def train(args):
    ckp = None
    if os.path.exists(args.log_dir + '/checkpoints/best.pth'):
        ckp = args.log_dir + '/checkpoints/best.pth'
    model = create_model(args.encoder_type, ckp=ckp).cuda()
    loaders = get_train_val_loaders(args.encoder_type,
                                    batch_size=args.batch_size,
                                    ifold=args.ifold)

    # model, criterion, optimizer
    if args.encoder_type.startswith('myunet'):
        optimizer = RAdam(model.parameters(), lr=args.lr)
    else:
        base_optim = RAdam([
            {
                'params': model.decoder.parameters(),
                'lr': args.lr
            },
            {
                'params': model.encoder.parameters(),
                'lr': args.lr / 10.
            },
        ])
        #base_optim = RAdam(model.parameters(),lr = 0.001)
        optimizer = Lookahead(base_optim, k=5, alpha=0.5)
    #scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2)

    if args.lrs == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=args.factor,
                                      patience=args.patience,
                                      min_lr=args.min_lr)
    else:
        scheduler = CosineAnnealingLR(optimizer,
                                      args.t_max,
                                      eta_min=args.min_lr)

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks = [
        DiceCallback(),
        EarlyStoppingCallback(patience=15, min_delta=0.001),
    ]
    #if os.path.exists(args.log_dir + '/checkpoints/best_full.pth'):
    #    callbacks.append(CheckpointCallback(resume=args.log_dir + '/checkpoints/best_full.pth'))

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks,
                 logdir=args.log_dir,
                 num_epochs=args.num_epochs,
                 verbose=True)
コード例 #6
0
def generate_valid_preds(args):

    train_ids, valid_ids, logdir = args
    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, 'imagenet')
    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=0)

    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )

    runner = SupervisedRunner()
    # Generate validation predictions
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )

    valid_preds = np.load('data/valid_preds.npy')

    for im_id, preds in zip(valid_ids,
                            runner.callbacks[0].predictions["logits"]):

        preds = preds.transpose((1, 2, 0))
        preds = cv2.resize(preds, (525, 350))
        preds = preds.transpose((2, 0, 1))

        indexes = train.index[train['im_id'] == im_id]
        valid_preds[indexes[0]] = preds[0]  # fish
        valid_preds[indexes[1]] = preds[1]  # flower
        valid_preds[indexes[2]] = preds[2]  # gravel
        valid_preds[indexes[3]] = preds[3]  # sugar

    np.save('data/valid_preds.npy', valid_preds)

    return True
コード例 #7
0
def run_validation(data,
                   valid_path,
                   image_size,
                   batch_size,
                   splits,
                   fold_idx,
                   model,
                   exp_name,
                   labels,
                   ttatype=None):
    logdir = 'logs/{}_fold{}/'.format(exp_name, fold_idx)
    valid_data = data.loc[splits['test_idx'][fold_idx], :]
    model.load_state_dict(
        torch.load(os.path.join(logdir,
                                'checkpoints/best.pth'))['model_state_dict'])
    model.eval()
    if ttatype == 'd4':
        model = tta.TTAWrapper(model, tta.d4_image2label)
    elif ttatype == 'fliplr_image2label':
        model = tta.TTAWrapper(model, tta.d4_image2label)
    runner = SupervisedRunner(model=model)
    val_dataset = EyeDataset(dataset_path=valid_path,
                             labels=data.loc[splits['test_idx'][fold_idx],
                                             labels].values,
                             ids=data.loc[splits['test_idx'][fold_idx],
                                          'id'].values,
                             albumentations_tr=aug_val(image_size))
    val_loader = DataLoader(val_dataset,
                            num_workers=8,
                            pin_memory=False,
                            batch_size=batch_size,
                            shuffle=False)
    loaders = collections.OrderedDict()
    loaders["valid"] = val_loader
    #predictions = runner.predict_loader(loaders["valid"], resume=f"{logdir}/checkpoints/best.pth")
    runner.infer(model=model, loaders=loaders, callbacks=[InferCallback()])
    predictions = runner.callbacks[0].predictions['logits']
    probabilities = softmax(torch.from_numpy(predictions), dim=1).numpy()
    for idx in range(probabilities.shape[0]):
        if all(probabilities[idx, :] < 0.5):
            probabilities[idx, 0] = 1.0
    predicted_labels = pd.DataFrame(probabilities, columns=labels)
    predicted_labels['id'] = data.loc[splits['test_idx'][fold_idx],
                                      'id'].values
    predicted_labels.loc[:, 'group'] = predicted_labels.id.apply(
        lambda x: x.split('_')[0])
    valid_data.loc[:, 'group'] = valid_data.id.apply(lambda x: x.split('_')[0])
    valid_data_groupped = valid_data.groupby(['group']).aggregate(
        dict(zip(labels, ['max'] * (len(labels)))))
    predicted_labels_groupped = predicted_labels.groupby(['group']).aggregate(
        dict(zip(labels, ['max'] * (len(labels)))))
    return (valid_data_groupped, predicted_labels_groupped)
コード例 #8
0
def train_model():

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )


    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    num_workers = 0
    bs = 10
    train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms=get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms=get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=num_workers)

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    num_epochs = 40

    # model, criterion, optimizer
    optimizer = RAdam([
        {'params': model.decoder.parameters(), 'lr': 1e-2},
        {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2, threshold=0.001)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)

    runner = SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True
    )

    return True
def main():
    test = pd.read_csv(
        '/home/yuko/kaggle_understanding_cloud_organization/src/data_process/data/sample_submission.csv'
    )

    test['label'] = test['Image_Label'].apply(lambda x: x.split('_')[-1])
    test['im_id'] = test['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    test['img_label'] = test.EncodedPixels.apply(lambda x: 0
                                                 if x is np.nan else 1)

    img_label = test.groupby('im_id')['img_label'].agg(list).reset_index()

    test_id = np.array(img_label.im_id)

    test_dataset = CloudClassDataset(datatype='test',
                                     img_ids=test_id,
                                     transforms=get_validation_augmentation(),
                                     preprocessing=ort_get_preprocessing())

    test_loader = DataLoader(test_dataset,
                             batch_size=8,
                             shuffle=False,
                             num_workers=16)

    loaders = {"infer": test_loader}

    for fold in range(5):
        runner = SupervisedRunner()
        clf_model = ResNet()

        checkpoint = torch.load(
            f'/home/yuko/kaggle_understanding_cloud_organization/src/class/segmentation/fold_{fold}/checkpoints/best.pth'
        )
        clf_model.load_state_dict(checkpoint['model_state_dict'])
        clf_model.eval()
        runner.infer(
            model=clf_model,
            loaders=loaders,
            callbacks=[InferCallback()],
        )
        callbacks_num = 0
        pred = runner.callbacks[callbacks_num].predictions["logits"]

        df_pred = pd.DataFrame(
            [pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]]).T
        df_pred.to_csv(
            f'/home/yuko/kaggle_understanding_cloud_organization/src/class/segmentation/pred_{fold}.csv'
        )
def train_model(epoch, train_loader, valid_loader, valid_dataset, log_dir):
    # create segmentation model with pretrained encoder

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    model = smp.FPN(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=len(CLASSES),
        activation=ACTIVATION,
    )

    loss = smp.utils.losses.BCEDiceLoss()

    optimizer = Nadam(model.parameters(), lr=1e-5)
    model = nn.DataParallel(model)
    # optimizer = torch.optim.Adam([{'params': model.module.decoder.parameters(), 'lr': 1e-4},
    #                               # decrease lr for encoder in order not to permute
    #                               # pre-trained weights with large gradients on training start
    #                               {'params': model.module.encoder.parameters(), 'lr': 1e-6}, ])

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=(epoch // 9) + 1)

    runner = SupervisedRunner()

    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }

    runner.train(
        model=model,
        criterion=loss,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), IouCallback(), EarlyStoppingCallback(
            patience=6, min_delta=0.001)],
        logdir=log_dir,
        num_epochs=epoch,
        verbose=True
    )

    probabilities, valid_masks = valid_model(
        runner, model, valid_loader, valid_dataset,  log_dir)

    get_optimal_thres(probabilities, valid_masks)
コード例 #11
0
ファイル: train_yaml.py プロジェクト: jchen42703/kits19-cnn
def main(config):
    """
    Main code for training a classification model.

    Args:
        config (dict): dictionary read from a yaml file
            i.e. experiments/finetune_classification.yml
    Returns:
        None
    """
    # setting up the train/val split with filenames
    seed = config["io_params"]["split_seed"]
    seed_everything(seed)
    mode = config["mode"].lower()
    assert mode in ["classification", "segmentation", "both"], \
        "The `mode` must be one of ['classification', 'segmentation', 'both']."
    if mode == "classification":
        raise NotImplementedError
    elif mode == "segmentation":
        if config["dim"] == 2:
            exp = TrainSegExperiment2D(config)
        elif config["dim"] == 3:
            exp = TrainSegExperiment(config)
        output_key = "logits"
    elif mode == "both":
        if config["dim"] == 2:
            exp = TrainClfSegExperiment2D(config)
        elif config["dim"] == 3:
            exp = TrainClfSegExperiment3D(config)
        output_key = ["seg_logits", "clf_logits"]

    print(f"Seed: {seed}\nMode: {mode}")

    runner = SupervisedRunner(output_key=output_key)

    runner.train(model=exp.model,
                 criterion=exp.criterion,
                 optimizer=exp.opt,
                 scheduler=exp.lr_scheduler,
                 loaders=exp.loaders,
                 callbacks=exp.cb_list,
                 **config["runner_params"])
    # Not saving plots if plot_params not specified in config
    if not config.get("plot_params"):
        figs = plot_metrics(logdir=config["runner_params"]["logdir"],
                            metrics=config["plot_params"]["metrics"])
        save_figs(figs, save_dir=config["plot_params"]["save_dir"])
コード例 #12
0
def main(args):
    logdir = "./logdir"
    num_epochs = 42

    # detect gpu
    device = utils.get_device()
    utils.fp
    print(f"device: {device}")

    # dataset
    trainset = ImageNetK(
        '/run/media/mooziisp/仓库/datasets/Kaggle-ILSVRC/ILSVRC',
        split='train',
        transform=transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor()
        ]))
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=64,
                                              shuffle=True,
                                              num_workers=2,
                                              pin_memory=True)

    loaders = {"train": trainloader}

    # define net
    net = models.resnet18(pretrained=False, num_classes=1000)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=1e-4)

    # trainer
    runner = SupervisedRunner(device=device)
    runner.train(model=net,
                 criterion=criterion,
                 optimizer=optimizer,
                 loaders=loaders,
                 logdir=logdir,
                 callbacks=[AccuracyCallback(num_classes=1000)],
                 num_epochs=num_epochs,
                 verbose=True)
コード例 #13
0
def main(config):
    """
    Main code for training a classification model.

    Args:
        config (dict): dictionary read from a yaml file
            i.e. experiments/finetune_classification.yml
    Returns:
        None
    """
    # setting up the train/val split with filenames
    seed = config["io_params"]["split_seed"]
    seed_everything(seed)
    mode = config["mode"].lower()
    assert mode in ["both", "classification", "segmentation"], \
        "The `mode` must be one of ['both', 'classification', 'segmentation']."
    if mode == "classification":
        exp = TrainClassificationExperiment(config)
        output_key = "logits"
    elif mode == "segmentation":
        exp = TrainSegExperiment(config)
        output_key = "logits"
    elif mode == "both":
        exp = TrainClfSegExperiment(config)
        output_key = ["clf_logits", "seg_logits"]

    print(f"Seed: {seed}\nMode: {mode}")

    runner = SupervisedRunner(output_key=output_key)

    runner.train(model=exp.model,
                 criterion=exp.criterion,
                 optimizer=exp.opt,
                 scheduler=exp.lr_scheduler,
                 loaders=exp.loaders,
                 callbacks=exp.cb_list,
                 logdir=config["logdir"],
                 num_epochs=config["num_epochs"],
                 verbose=True,
                 fp16=config["fp16"])
コード例 #14
0
def main():
    args = get_args()

    config = py2cfg(args.config_path)

    train_batch_size = config.train_parameters.train_batch_size
    val_batch_size = config.train_parameters.val_batch_size
    model = config.model

    train_samples = get_samples("train", config)
    val_samples = get_samples("val", config)

    train_aug = config.train_augmentations

    val_aug = config.val_augmentations

    if config.train_parameters.tta == "lr":
        model = TTAWrapper(model, fliplr_image2mask)
    elif config.train_parameters.tta == "d4":
        model = TTAWrapper(model, d4_image2mask)

    if config.train_parameters.sync_bn:
        model = apex.parallel.convert_syncbn_model(model)

    train_loader = DataLoader(
        SegmentationDataset(
            train_samples,
            train_aug,
            num_samples=config.num_samples,
            downsample_mask_factor=config.train_parameters.
            downsample_mask_factor,
        ),
        batch_size=train_batch_size,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True,
    )

    valid_loader = DataLoader(
        SegmentationDataset(val_samples, val_aug),
        batch_size=val_batch_size,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=False,
    )

    data_loaders = OrderedDict()
    data_loaders["train"] = train_loader
    data_loaders["valid"] = valid_loader

    callbacks = config.callbacks
    if args.checkpoint_path is not None:
        callbacks += [CheckpointCallback(resume=args.checkpoint_path)]

    # model training
    runner = SupervisedRunner()
    runner.train(
        model=model,
        criterion=config.loss,
        optimizer=config.optimizer,
        callbacks=callbacks,
        logdir=config.logdir,
        loaders=data_loaders,
        num_epochs=config.train_parameters.num_epochs,
        scheduler=config.scheduler,
        verbose=True,
        minimize_metric=True,
        fp16=config.train_parameters.fp16,
    )
コード例 #15
0
ファイル: train.py プロジェクト: markson14/Kaggle
def train_model(train_parameters):

    k = train_parameters["k"]
    loaders = train_parameters["loaders"]
    num_epochs = train_parameters["num_epochs"]
    net = train_parameters["net"]
    ENCODER = train_parameters["ENCODER"]
    ENCODER_WEIGHTS = train_parameters["ENCODER_WEIGHTS"]
    ACTIVATION = train_parameters["ACTIVATION"]

    model = load_model(net, ENCODER, ENCODER_WEIGHTS, ACTIVATION)
    """ multi-gpu """
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    model.to("cuda")

    #     if k==0:
    #         summary(model.module.encoder,(3,384,576))

    logdir = "./logs/segmentation_{}_{}Fold".format(net, k)

    # model, criterion, optimizer
    optimizer = RAdam([
        {
            'params': model.module.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.module.encoder.parameters(),
            'lr': 1e-3
        },
        #         {'params': model.decoder.parameters(), 'lr': 1e-2},
        #         {'params': model.encoder.parameters(), 'lr': 1e-3},
    ])

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    #     criterion = FocalLoss()
    #     criterion = FocalDiceLoss()
    # criterion = smp.utils.losses.DiceLoss(eps=1.)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    runner = SupervisedRunner()

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[
            EarlyStoppingCallback(patience=10, min_delta=0.001),
            DiceCallback()
        ],
        #                    AUCCallback(),
        #                    IouCallback()],
        logdir=logdir,
        num_epochs=num_epochs,
        verbose=True)

    del loaders, optimizer, scheduler, model, runner
    torch.cuda.empty_cache()
    gc.collect()
    print("Collect GPU cache")
コード例 #16
0
                                    'classes': 4,
                                    'activation': None}),
          'fpn': (smp.FPN, {'encoder_name': args.encoder,
                            'encoder_weights': 'imagenet',
                            'classes': 4,
                            'activation': None}),
          'attn_linknet': (LinkNetGated, {'num_classes': 4,
                                          'in_channels': 3
                                          })}
model = models[args.model.lower()][0](**models[args.model.lower()][1])
encoded_pixels = []
loaders = {"infer": valid_loader}
logdir = f'./logs/{args.model}/fold_{args.fold}'
gc.collect()
runner = SupervisedRunner(model=model,
                          device='cuda',
                          input_key='image',
                          input_target_key='mask')
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=[
        CheckpointCallback(
            resume=f"{logdir}/checkpoints/best.pth"),
        InferCallback()
    ],
    # fp16={"opt_level": "O1"},
)
valid_masks = []
probabilities = np.zeros((2220, 350, 525))
for i, (batch, output) in enumerate(tqdm.tqdm(zip(
        valid_dataset, runner.callbacks[0].predictions["logits"]))):
コード例 #17
0
ファイル: fastai_model.py プロジェクト: dodler/kgl
optimizer = Over9000(params=model.parameters(), lr=lr)

scheduler = OneCycleLRWithWarmup(optimizer,
                                 num_steps=num_epochs,
                                 lr_range=(0.2e-2, 1e-2),
                                 warmup_steps=2,
                                 momentum_range=(1e-3, 0.1e-1))

criterion = {
    "h1": torch.nn.CrossEntropyLoss(),
    "h2": torch.nn.CrossEntropyLoss(),
    "h3": torch.nn.CrossEntropyLoss(),
}

runner = SupervisedRunner(input_key='features',
                          output_key=["h1_logits", "h2_logits", 'h3_logits'])

early_stop_epochs = get_dict_value_or_default(dict_=config,
                                              key='early_stop_epochs',
                                              default_value=30)

loss_agg_fn = get_dict_value_or_default(config, 'loss_aggregate_fn', 'mean')
if loss_agg_fn == 'mean' or loss_agg_fn == 'sum':
    crit_agg = CriterionAggregatorCallback(
        prefix="loss",
        loss_keys=["loss_h1", "loss_h2", 'loss_h3'],
        loss_aggregate_fn=config['loss_aggregate_fn'])
elif loss_agg_fn == 'weighted_sum':
    weights = get_dict_value_or_default(config, 'weights', [0.3, 0.3, 0.3])
    crit_agg = CriterionAggregatorCallback(
        prefix="loss",
コード例 #18
0
def main(args):
    """
    Main code for training a classification model.

    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # Reading the in the .csvs
    train = pd.read_csv(os.path.join(args.dset_path, "train.csv"))
    sub = pd.read_csv(os.path.join(args.dset_path, "sample_submission.csv"))

    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up the classification model
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"
    model = ResNet34(pre=ENCODER_WEIGHTS, num_classes=4, use_simple_head=True)

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        "resnet34", ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )
    valid_dataset = ClassificationSteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=0.001)
                 ],
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
    utils.plot_metrics(
        logdir=logdir,
        # specify which metrics we want to plot
        metrics=["loss", "dice", "lr", "_base/lr"])
コード例 #19
0
def generate_class_params(i_dont_know_how_to_return_values_without_map):

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)
    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              shuffle=False,
                              num_workers=0)

    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=4,
        activation=ACTIVATION,
    )

    runner = SupervisedRunner()
    # Generate validation predictions
    encoded_pixels = []
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )

    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))
    for i, (batch, output) in enumerate(
            tqdm.tqdm(
                zip(valid_dataset,
                    runner.callbacks[0].predictions["logits"]))):
        image, mask = batch
        for m in mask:
            if m.shape != (350, 525):
                m = cv2.resize(m,
                               dsize=(525, 350),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability,
                                         dsize=(525, 350),
                                         interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(30, 100, 5):
            t /= 100
            for ms in [1200, 5000, 10000]:
                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(
                        sigmoid(probability), t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts,
                                   columns=['threshold', 'size', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]

        class_params[class_id] = (best_threshold, best_size)

    return class_params
コード例 #20
0
def main(args):
    """
    Main code for training for training a U-Net with some user-defined encoder.
    Args:
        args (instance of argparse.ArgumentParser): arguments must be compiled with parse_args
    Returns:
        None
    """
    # setting up the train/val split with filenames
    train, sub, id_mask_count = setup_train_and_sub_df(args.dset_path)
    # setting up the train/val split with filenames
    seed_everything(args.split_seed)
    train_ids, valid_ids = train_test_split(id_mask_count["im_id"].values,
                                            random_state=args.split_seed,
                                            stratify=id_mask_count["count"],
                                            test_size=args.test_size)
    # setting up model (U-Net with ImageNet Encoders)
    ENCODER_WEIGHTS = "imagenet"
    DEVICE = "cuda"

    attention_type = None if args.attention_type == "None" else args.attention_type
    model = smp.Unet(encoder_name=args.encoder,
                     encoder_weights=ENCODER_WEIGHTS,
                     classes=4,
                     activation=None,
                     attention_type=attention_type)
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        args.encoder, ENCODER_WEIGHTS)

    # Setting up the I/O
    train_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="train",
        im_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)
    valid_dataset = SteelDataset(
        args.dset_path,
        df=train,
        datatype="valid",
        im_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn),
        use_resized_dataset=args.use_resized_dataset)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}
    # everything is saved here (i.e. weights + stats)
    logdir = "./logs/segmentation"

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            "params": model.decoder.parameters(),
            "lr": args.encoder_lr
        },
        {
            "params": model.encoder.parameters(),
            "lr": args.decoder_lr
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks_list = [
        DiceCallback(),
        EarlyStoppingCallback(patience=5, min_delta=0.001),
    ]
    if args.checkpoint_path != "None":  # hacky way to say no checkpoint callback but eh what the heck
        ckpoint_p = Path(args.checkpoint_path)
        fname = ckpoint_p.name
        resume_dir = str(ckpoint_p.parents[0]
                         )  # everything in the path besides the base file name
        print(
            f"Loading {fname} from {resume_dir}. Checkpoints will also be saved in {resume_dir}."
        )
        callbacks_list = callbacks_list + [
            CheckpointCallback(resume=fname, resume_dir=resume_dir),
        ]

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks_list,
                 logdir=logdir,
                 num_epochs=args.num_epochs,
                 verbose=True)
コード例 #21
0
ファイル: trainer.py プロジェクト: wyn314/asteroid
def train(
        model: torch.nn.Module,
        dataset: torch.utils.data.Dataset,
        optimizer: torch.optim.Optimizer,
        criterion: torch.nn.Module,
        config: ParamConfig,
        val_dataset: torch.utils.data.Dataset = None,
        logdir: str = "./logdir",
        resume: Union[str, None] = "logdir/checkpoints/best_full.pth") -> None:
    """
        train the model with specified paremeters
        Args:
            model: neural network model
            dataset: training dataset
            optimizer: optimizer
            criterion: loss function
            val_dataset: validation dataset
            logdir: logdir location to save checkpoints
            resume: path where the partially trained model is stored
    """

    loaders = collections.OrderedDict()
    train_loader = utils.get_loader(dataset,
                                    open_fn=lambda x: {
                                        "input_audio": x[-1],
                                        "input_video": x[1],
                                        "targets": x[0]
                                    },
                                    batch_size=config.batch_size,
                                    num_workers=config.workers,
                                    shuffle=True)
    val_loader = utils.get_loader(val_dataset,
                                  open_fn=lambda x: {
                                      "input_audio": x[-1],
                                      "input_video": x[1],
                                      "targets": x[0]
                                  },
                                  batch_size=config.batch_size,
                                  num_workers=config.workers,
                                  shuffle=True)
    loaders = {"train": train_loader, "valid": val_loader}

    scheduler = torch.optim.lr_scheduler.CyclicLR(
        optimizer,
        base_lr=config.learning_rate,
        max_lr=config.learning_rate * 10,
        step_size_up=4 * len(train_loader),
        mode="triangular",
        cycle_momentum=False)

    runner = SupervisedRunner(input_key=["input_audio", "input_video"])
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 logdir=logdir,
                 verbose=True,
                 num_epochs=config.epochs,
                 resume=resume,
                 callbacks=collections.OrderedDict({
                     "snr_callback":
                     SNRCallback(),
                     "sched_callback":
                     SchedulerCallback(mode="batch")
                 }))
コード例 #22
0
    if args.task == 'segmentation':
        callbacks = [DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]
    elif args.task == 'classification':
        callbacks = [AUCCallback(class_names=['Fish', 'Flower', 'Gravel', 'Sugar'], num_classes=4),
                     EarlyStoppingCallback(patience=10, min_delta=0.001), CriterionCallback()]

    if args.gradient_accumulation:
        callbacks.append(OptimizerCallback(accumulation_steps=args.gradient_accumulation))

    checkpoint = utils.load_checkpoint(f'{logdir}/checkpoints/best.pth')
    model.cuda()
    utils.unpack_checkpoint(checkpoint, model=model)
    #
    #
    runner = SupervisedRunner()
    if args.train:
        print('Training')
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            main_metric='dice',
            minimize_metric=False,
            scheduler=scheduler,
            loaders=loaders,
            callbacks=callbacks,
            logdir=logdir,
            num_epochs=args.num_epochs,
            verbose=True
        )
コード例 #23
0
def find_class_params(args):
    runner = SupervisedRunner()
    model = create_model(args.encoder_type)
    valid_loader = get_train_val_loaders(args.encoder_type,
                                         batch_size=args.batch_size)['valid']

    encoded_pixels = []
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[CheckpointCallback(resume=args.ckp),
                   InferCallback()],
    )
    print(runner.callbacks)
    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))
    for i, (batch, output) in enumerate(
            tqdm(
                zip(valid_loader.dataset,
                    runner.callbacks[0].predictions["logits"]))):
        image, mask = batch
        for m in mask:
            if m.shape != (350, 525):
                m = cv2.resize(m,
                               dsize=(525, 350),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability,
                                         dsize=(525, 350),
                                         interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(0, 100, 5):
            t /= 100
            #for ms in [0, 100, 1200, 5000, 10000]:
            for ms in [5000, 10000, 15000, 20000, 22500, 25000, 30000]:

                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(
                        sigmoid(probability), t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts,
                                   columns=['threshold', 'size', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]

        class_params[class_id] = (best_threshold, best_size)
    print(class_params)
    return class_params, runner
コード例 #24
0
def generate_test_preds(args):

    valid_dice, class_params, = args

    test_preds = np.zeros((len(sub), 350, 525), dtype=np.float32)

    for i in range(NFOLDS):
        logdir = LOG_DIR_BASE + str(i)

        preprocessing_fn = smp.encoders.get_preprocessing_fn(
            ENCODER, ENCODER_WEIGHTS)
        dummy_dataset = CloudDataset(
            df=sub,
            datatype='test',
            img_ids=test_ids[:1],
            transforms=get_validation_augmentation(),
            preprocessing=get_preprocessing(preprocessing_fn))
        dummy_loader = DataLoader(dummy_dataset,
                                  batch_size=1,
                                  shuffle=False,
                                  num_workers=0)

        model = smp.Unet(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=ACTIVATION,
        )
        runner = SupervisedRunner(model)

        # HACK: We are loading a few examples from our dummy loader so catalyst will properly load the weights
        # from our checkpoint
        loaders = {"test": dummy_loader}
        runner.infer(
            model=model,
            loaders=loaders,
            callbacks=[
                CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
                InferCallback()
            ],
        )

        # Now we do real inference on the full dataset
        test_dataset = CloudDataset(
            df=sub,
            datatype='test',
            img_ids=test_ids,
            transforms=get_validation_augmentation(),
            preprocessing=get_preprocessing(preprocessing_fn))
        test_loader = DataLoader(test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 num_workers=0)

        image_id = 0
        for batch_index, test_batch in enumerate(tqdm.tqdm(test_loader)):
            runner_out = runner.predict_batch(
                {"features":
                 test_batch[0].cuda()})['logits'].cpu().detach().numpy()
            for preds in runner_out:

                preds = preds.transpose((1, 2, 0))
                preds = cv2.resize(
                    preds,
                    (525, 350))  # height and width are backward in cv2...
                preds = preds.transpose((2, 0, 1))

                idx = batch_index * 4
                test_preds[idx + 0] += sigmoid(preds[0]) / NFOLDS  # fish
                test_preds[idx + 1] += sigmoid(preds[1]) / NFOLDS  # flower
                test_preds[idx + 2] += sigmoid(preds[2]) / NFOLDS  # gravel
                test_preds[idx + 3] += sigmoid(preds[3]) / NFOLDS  # sugar

    # Convert ensembled predictions to RLE predictions
    encoded_pixels = []
    for image_id, preds in enumerate(test_preds):

        predict, num_predict = post_process(preds,
                                            class_params[image_id % 4][0],
                                            class_params[image_id % 4][1])
        if num_predict == 0:
            encoded_pixels.append('')
        else:
            r = mask2rle(predict)
            encoded_pixels.append(r)

    print("Saving submission...")
    sub['EncodedPixels'] = encoded_pixels
    sub.to_csv('unet_submission_{}.csv'.format(valid_dice),
               columns=['Image_Label', 'EncodedPixels'],
               index=False)
    print("Saved.")
コード例 #25
0
def training(train_ids, valid_ids, num_split, encoder, decoder):
    """
    模型训练
    """
    train = "./data/Clouds_Classify/train.csv"

    # Data overview
    train = pd.read_csv(open(train))
    train.head()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])

    ENCODER = encoder
    ENCODER_WEIGHTS = 'imagenet'

    if decoder == 'unet':
        model = smp.Unet(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=None,
        )
    else:
        model = smp.FPN(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            classes=4,
            activation=None,
        )
    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        ENCODER, ENCODER_WEIGHTS)

    num_workers = 4
    bs = 12
    train_dataset = CloudDataset(
        df=train,
        transforms=get_training_augmentation(),
        datatype='train',
        img_ids=train_ids,
        preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(
        df=train,
        transforms=get_validation_augmentation(),
        datatype='valid',
        img_ids=valid_ids,
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=bs,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    num_epochs = 50
    logdir = "./logs/log_{}_{}/log_{}".format(encoder, decoder, num_split)

    # model, criterion, optimizer
    optimizer = torch.optim.Adam([
        {
            'params': model.decoder.parameters(),
            'lr': 1e-2
        },
        {
            'params': model.encoder.parameters(),
            'lr': 1e-3
        },
    ])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.35, patience=4)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[DiceCallback()],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=True)

    # Exploring predictions
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[
            CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
            InferCallback()
        ],
    )
コード例 #26
0
def main(config):
    opts = config()
    path = opts.path
    train = pd.read_csv(f'{path}/train.csv')
    sub = pd.read_csv(f'{path}/sample_submission.csv')

    n_train = len(os.listdir(f'{path}/train_images'))
    n_test = len(os.listdir(f'{path}/test_images'))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
    sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])
    train.loc[train['EncodedPixels'].isnull() == False,
              'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()
    train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(
        lambda x: x.split('_')[0]).value_counts().value_counts()

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
    train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])

    valid_ids = pd.read_csv("csvs/valid_threshold.csv")["img_id"].values
    test_ids = sub['Image_Label'].apply(
        lambda x: x.split('_')[0]).drop_duplicates().values
    #     print(valid_ids)
    ENCODER = opts.backborn
    ENCODER_WEIGHTS = opts.encoder_weights
    DEVICE = 'cuda'

    ACTIVATION = None
    model = get_model(model_type=opts.model_type,
                      encoder=ENCODER,
                      encoder_weights=ENCODER_WEIGHTS,
                      activation=ACTIVATION,
                      n_classes=opts.class_num,
                      task=opts.task,
                      attention_type=opts.attention_type,
                      head='simple',
                      center=opts.center,
                      tta=opts.tta)
    if opts.refine:
        model = get_ref_model(infer_model=model,
                              encoder=opts.ref_backborn,
                              encoder_weights=ENCODER_WEIGHTS,
                              activation=ACTIVATION,
                              n_classes=opts.class_num,
                              preprocess=opts.preprocess,
                              tta=opts.tta)
    model = convert_model(model)
    preprocessing_fn = encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

    encoded_pixels = []
    runner = SupervisedRunner()
    probabilities = np.zeros((2220, 350, 525))

    for i in range(opts.fold_max):
        if opts.refine:
            logdir = f"{opts.logdir}_refine/fold{i}"
        else:
            logdir = f"{opts.logdir}/fold{i}"
        valid_dataset = CloudDataset(
            df=train,
            datatype='valid',
            img_ids=valid_ids,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=opts.batchsize,
                                  shuffle=False,
                                  num_workers=opts.num_workers)
        loaders = {"infer": valid_loader}
        runner.infer(
            model=model,
            loaders=loaders,
            callbacks=[
                CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),
                InferCallback()
            ],
        )
        valid_masks = []
        for i, (batch, output) in enumerate(
                tqdm.tqdm(
                    zip(valid_dataset,
                        runner.callbacks[0].predictions["logits"]))):
            image, mask = batch
            for m in mask:
                if m.shape != (350, 525):
                    m = cv2.resize(m,
                                   dsize=(525, 350),
                                   interpolation=cv2.INTER_LINEAR)
                valid_masks.append(m)

            for j, probability in enumerate(output):
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability,
                                             dsize=(525, 350),
                                             interpolation=cv2.INTER_LINEAR)
                probabilities[i * 4 + j, :, :] += sigmoid(probability)

    probabilities /= opts.fold_max
    if opts.tta:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_valid.npy',
            probabilities)
    else:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_valid.npy',
            probabilities)

    torch.cuda.empty_cache()
    gc.collect()

    class_params = {}
    cv_d = []
    for class_id in tqdm.trange(opts.class_num, desc='class_id', leave=False):
        #         print(class_id)
        attempts = []
        for tt in tqdm.trange(0, 100, 10, desc='top_threshold', leave=False):
            tt /= 100
            for bt in tqdm.trange(0,
                                  100,
                                  10,
                                  desc='bot_threshold',
                                  leave=False):
                bt /= 100
                for ms in tqdm.tqdm([
                        0, 100, 1000, 5000, 10000, 11000, 14000, 15000, 16000,
                        18000, 19000, 20000, 21000, 23000, 25000, 27000, 30000,
                        50000
                ],
                                    desc='min_size',
                                    leave=False):
                    masks = []
                    for i in range(class_id, len(probabilities), 4):
                        probability = probabilities[i]
                        predict, num_predict = post_process(
                            probability, tt, ms, bt)

                        masks.append(predict)

                    d = []
                    for i, j in zip(masks, valid_masks[class_id::4]):
                        #                     print(i.shape, j.shape)
                        if (i.sum() == 0) & (j.sum() == 0):
                            d.append(1)
                        else:
                            d.append(dice(i, j))
                    attempts.append((tt, ms, bt, np.mean(d)))

        attempts_df = pd.DataFrame(
            attempts,
            columns=['top_threshold', 'size', 'bottom_threshold', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        cv_d.append(attempts_df['dice'].values[0])
        best_top_threshold = attempts_df['top_threshold'].values[0]
        best_size = attempts_df['size'].values[0]
        best_bottom_threshold = attempts_df['bottom_threshold'].values[0]

        class_params[class_id] = (best_top_threshold, best_size,
                                  best_bottom_threshold)
    cv_d = np.array(cv_d)
    print("CV Dice:", np.mean(cv_d))
    pathlist = [
        "../input/test_images/" + i.split("_")[0] for i in sub['Image_Label']
    ]

    del masks
    del valid_masks
    del probabilities
    gc.collect()

    ############# predict ###################
    probabilities = np.zeros((n_test, 4, 350, 525))
    for fold in tqdm.trange(opts.fold_max, desc='fold loop'):
        if opts.refine:
            logdir = f"{opts.logdir}_refine/fold{fold}"
        else:
            logdir = f"{opts.logdir}/fold{fold}"


#         loaders = {"test": test_loader}
        test_dataset = CloudDataset(
            df=sub,
            datatype='test',
            img_ids=test_ids,
            transforms=get_validation_augmentation(opts.img_size),
            preprocessing=get_preprocessing(preprocessing_fn))
        test_loader = DataLoader(test_dataset,
                                 batch_size=opts.batchsize,
                                 shuffle=False,
                                 num_workers=opts.num_workers)
        runner_out = runner.predict_loader(
            model,
            test_loader,
            resume=f"{logdir}/checkpoints/best.pth",
            verbose=True)
        for i, batch in enumerate(
                tqdm.tqdm(runner_out, desc='probability loop')):
            for j, probability in enumerate(batch):
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability,
                                             dsize=(525, 350),
                                             interpolation=cv2.INTER_LINEAR)
                probabilities[i, j, :, :] += sigmoid(probability)
        gc.collect()
    probabilities /= opts.fold_max
    if opts.tta:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_tta_test.npy',
            probabilities)
    else:
        np.save(
            f'probabilities/{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}_test.npy',
            probabilities)
    image_id = 0
    print("##################### start post_process #####################")
    for i in tqdm.trange(n_test, desc='post porocess loop'):
        for probability in probabilities[i]:
            predict, num_predict = post_process(probability,
                                                class_params[image_id % 4][0],
                                                class_params[image_id % 4][1],
                                                class_params[image_id % 4][2])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                black_mask = get_black_mask(pathlist[image_id])
                predict = np.multiply(predict, black_mask)
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1
        gc.collect()
    print("##################### Finish post_process #####################")
    #######################################
    sub['EncodedPixels'] = encoded_pixels
    sub.to_csv(
        f'submissions/submission_{opts.logdir.split("/")[-1]}_{opts.img_size[0]}x{opts.img_size[1]}.csv',
        columns=['Image_Label', 'EncodedPixels'],
        index=False)
コード例 #27
0
    model = plant.get_model(config.model_name, config.num_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=config.patience, verbose=True, mode="min",
                                                           factor=0.3)

    device = utils.get_device()

    if config.is_fp16_used:
        fp16_params = dict(opt_level="O1")  # params for FP16
    else:
        fp16_params = None

    runner = SupervisedRunner(device=device)

    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        # We can specify the callbacks list for the experiment;
        # For this task, we will check accuracy, AUC and F1 metrics
        callbacks=[
            AccuracyCallback(num_classes=config.num_classes),
            AUCCallback(
                num_classes=config.num_classes,
                input_key="targets_one_hot",
                class_names=config.class_names
コード例 #28
0
ファイル: cloud_main_2heads.py プロジェクト: dodler/kgl
        super().__init__()
        self.alpha = alpha
        self.focal = FocalLoss(gamma)

    def forward(self, input, target):
        loss = self.alpha * self.focal(input, target) - torch.log(
            dice_loss(input, target))
        return loss.mean()


criterion = {
    "cls": MixedLoss(),
    "seg": smp.utils.losses.BCEDiceLoss(eps=1.),
}

runner = SupervisedRunner(input_key=["seg_features"],
                          output_key=["cls_logits", "seg_logits"])


def calc_metric(pred, gt, *args, **kwargs):
    pred = torch.sigmoid(pred).detach().cpu().numpy()
    gt = gt.detach().cpu().numpy().astype(np.uint8)
    try:
        return [roc_auc_score(gt.reshape(-1), pred.reshape(-1))]
    except:
        return [0]


callbacks = [
    CriterionCallback(input_key="cls_targets",
                      output_key="cls_logits",
                      prefix="loss_cls",
コード例 #29
0
# In[ ]:

from catalyst.dl.runner import SupervisedRunner

# experiment setup
num_epochs = NUM_EPOCHS
logdir = "./logs/cifar_simple_notebook_1"

# model, criterion, optimizer
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

# model runner
runner = SupervisedRunner()

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    check=True,
)

# In[ ]:

# you can use plotly and tensorboard to plot metrics inside jupyter
コード例 #30
0
def main():

    fold_path = args.fold_path
    fold_num = args.fold_num
    model_name = args.model_name
    train_csv = args.train_csv
    sub_csv = args.sub_csv
    encoder = args.encoder
    num_workers = args.num_workers
    batch_size = args.batch_size
    num_epochs = args.num_epochs
    learn_late = args.learn_late
    attention_type = args.attention_type

    train = pd.read_csv(train_csv)
    sub = pd.read_csv(sub_csv)

    train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[-1])
    train['im_id'] = train['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[-1])
    sub['im_id'] = sub['Image_Label'].apply(
        lambda x: x.replace('_' + x.split('_')[-1], ''))

    train_fold = pd.read_csv(f'{fold_path}/train_file_fold_{fold_num}.csv')
    val_fold = pd.read_csv(f'{fold_path}/valid_file_fold_{fold_num}.csv')

    train_ids = np.array(train_fold.file_name)
    valid_ids = np.array(val_fold.file_name)

    encoder_weights = 'imagenet'
    attention_type = None if attention_type == 'None' else attention_type

    if model_name == 'Unet':
        model = smp.Unet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
            attention_type=attention_type,
        )
    if model_name == 'Linknet':
        model = smp.Linknet(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'FPN':
        model = smp.FPN(
            encoder_name=encoder,
            encoder_weights=encoder_weights,
            classes=4,
            activation='softmax',
        )
    if model_name == 'ORG':
        model = Linknet_resnet18_ASPP()

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        encoder, encoder_weights)

    train_dataset = CloudDataset(
        df=train,
        datatype='train',
        img_ids=train_ids,
        transforms=get_training_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    valid_dataset = CloudDataset(
        df=train,
        datatype='valid',
        img_ids=valid_ids,
        transforms=get_validation_augmentation(),
        preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        drop_last=True,
        pin_memory=True,
    )
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    loaders = {"train": train_loader, "valid": valid_loader}

    logdir = f"./log/logs_{model_name}_fold_{fold_num}_{encoder}/segmentation"

    #for batch_idx, (data, target) in enumerate(loaders['train']):
    #    print(batch_idx)

    print(logdir)

    if model_name == 'ORG':
        optimizer = NAdam([
            {
                'params': model.parameters(),
                'lr': learn_late
            },
        ])
    else:
        optimizer = NAdam([
            {
                'params': model.decoder.parameters(),
                'lr': learn_late
            },
            {
                'params': model.encoder.parameters(),
                'lr': learn_late
            },
        ])

    scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=0)
    criterion = smp.utils.losses.BCEDiceLoss()

    runner = SupervisedRunner()

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=[
                     DiceCallback(),
                     EarlyStoppingCallback(patience=5, min_delta=1e-7)
                 ],
                 logdir=logdir,
                 num_epochs=num_epochs,
                 verbose=1)