Esempio n. 1
0
def check_score(model_name, fold, epoch, run=None):
    import sklearn.metrics

    model_str = build_model_str(model_name, fold, run)
    model_info = MODELS[model_name]

    oof_dir = f'{BaseConfig.oof_dir}/{model_str}'
    print('\n', model_str, '\n')

    pred = torch.load(f'{oof_dir}/{epoch:03}.pt')
    epoch_labels = pred['epoch_labels']
    epoch_predictions = pred['epoch_predictions']

    def double_any(d):
        return np.column_stack([d, d[:, -1:]])

    print(
        sklearn.metrics.log_loss(
            double_any(epoch_labels).flatten(),
            double_any(epoch_predictions).flatten()))

    class_weights = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0, 2.0])
    # return F.binary_cross_entropy_with_logits(y_pred, y_true, class_weights.repeat(y_pred.shape[0], 1))
    print(
        F.binary_cross_entropy(
            torch.from_numpy((double_any(epoch_predictions)).flatten()),
            torch.from_numpy((double_any(epoch_labels)).flatten())))

    print(
        F.binary_cross_entropy(
            torch.from_numpy(epoch_predictions).reshape(-1),
            torch.from_numpy(epoch_labels).reshape(-1),
            class_weights.repeat(epoch_predictions.shape[0], 1).reshape(-1)))

    print(
        F.binary_cross_entropy(
            torch.from_numpy(epoch_predictions),
            torch.from_numpy(epoch_labels),
            class_weights.repeat(epoch_predictions.shape[0], 1)))

    loss = F.binary_cross_entropy(torch.from_numpy(epoch_predictions),
                                  torch.from_numpy(epoch_labels),
                                  class_weights.repeat(
                                      epoch_predictions.shape[0], 1),
                                  reduction='none')

    print(loss.shape)
    loss = loss.cpu().detach().numpy()
    loss = np.mean(loss, axis=1)

    # plt.hist(loss, bins=1024)
    plt.plot(np.sort(-1 * loss) * -1)
    plt.axvline()
    plt.axhline()
    plt.show()
Esempio n. 2
0
def predict(model_name,
            fold,
            epoch,
            is_test,
            df_out_path,
            mode='normal',
            run=None):
    model_str = build_model_str(model_name, fold, run)
    model_info = MODELS[model_name]

    checkpoints_dir = f'{BaseConfig.checkpoints_dir}/{model_str}'
    print('\n', model_name, '\n')

    model = model_info.factory(**model_info.args)
    model.output_segmentation = False

    preprocess_func = []
    if 'h_flip' in mode:
        preprocess_func.append(
            albumentations.HorizontalFlip(always_apply=True))
    if 'v_flip' in mode:
        preprocess_func.append(albumentations.VerticalFlip(always_apply=True))
    if 'rot90' in mode:
        preprocess_func.append(Rotate90(always_apply=True))

    dataset_valid = dataset.IntracranialDataset(
        csv_file='test2.csv' if is_test else '5fold.csv',
        folds=[fold],
        preprocess_func=albumentations.Compose(preprocess_func),
        return_labels=not is_test,
        is_test=is_test,
        **{
            **model_info.dataset_args, "add_segmentation_masks": False,
            "segmentation_oversample": 1
        })

    model.eval()
    print(f'load {checkpoints_dir}/{epoch:03}.pt')
    checkpoint = torch.load(f'{checkpoints_dir}/{epoch:03}.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.cuda()

    data_loader = DataLoader(dataset_valid,
                             shuffle=False,
                             num_workers=8,
                             batch_size=model_info.batch_size * 2)

    all_paths = []
    all_study_id = []
    all_slice_num = []
    all_gt = []
    all_pred = []

    data_iter = tqdm(enumerate(data_loader), total=len(data_loader))
    for iter_num, batch in data_iter:
        with torch.set_grad_enabled(False):
            y_hat = torch.sigmoid(model(batch['image'].float().cuda()))
            all_pred.append(y_hat.cpu().numpy())
            all_paths.extend(batch['path'])
            all_study_id.extend(batch['study_id'])
            all_slice_num.extend(batch['slice_num'].cpu().numpy())

            if not is_test:
                y = batch['labels']
                all_gt.append(y.numpy())

    pred_columns = [
        'pred_epidural', 'pred_intraparenchymal', 'pred_intraventricular',
        'pred_subarachnoid', 'pred_subdural', 'pred_any'
    ]
    gt_columns = [
        'gt_epidural', 'gt_intraparenchymal', 'gt_intraventricular',
        'gt_subarachnoid', 'gt_subdural', 'gt_any'
    ]

    if is_test:
        all_pred = np.concatenate(all_pred)
        df = pd.DataFrame(all_pred, columns=pred_columns)
    else:
        all_pred = np.concatenate(all_pred)
        all_gt = np.concatenate(all_gt)
        df = pd.DataFrame(np.hstack((all_gt, all_pred)),
                          columns=gt_columns + pred_columns)

    df = pd.concat((df,
                    pd.DataFrame({
                        'path': all_paths,
                        'study_id': all_study_id,
                        'slice_num': all_slice_num
                    })),
                   axis=1)
    df.to_csv(df_out_path, index=False)
Esempio n. 3
0
def predict(model_name,
            fold,
            epoch,
            is_test,
            df_out_path,
            mode='normal',
            run=None):
    model_str = build_model_str(model_name, fold, run)
    model_info = MODELS[model_name]

    checkpoints_dir = f'{BaseConfig.checkpoints_dir}/{model_str}'
    print('\n', model_name, '\n')

    model = model_info.factory(**model_info.args)

    preprocess_func = []
    if 'h_flip' in mode:
        preprocess_func.append(
            albumentations.HorizontalFlip(always_apply=True))
    if 'v_flip' in mode:
        preprocess_func.append(albumentations.VerticalFlip(always_apply=True))
    if 'rot90' in mode:
        preprocess_func.append(Rotate90(always_apply=True))
    preprocess_func.append(albumentations.pytorch.ToTensorV2())

    dataset_valid = dataset_3d_v2.IntracranialDataset(
        csv_file='test2.csv' if is_test else '5fold.csv',
        folds=[fold],
        preprocess_func=albumentations.Compose(preprocess_func),
        return_labels=not is_test,
        is_test=is_test,
        return_all_slices=True,
        **{**model_info.dataset_args})

    print(f'load {checkpoints_dir}/{epoch:03}.pt')
    checkpoint = torch.load(f'{checkpoints_dir}/{epoch:03}.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.cuda()
    model.eval()

    batch_size = 1  # always use batch size 1 as nb slices is variable and likely not to fit GPU
    data_loader = DataLoader(dataset_valid,
                             shuffle=False,
                             num_workers=8,
                             batch_size=batch_size)

    all_paths = []
    all_study_id = []
    all_slice_num = []
    all_gt = []
    all_pred = []

    data_iter = tqdm(enumerate(data_loader), total=len(data_loader))
    for iter_num, batch in data_iter:
        # if iter_num > 100:
        #     break
        with torch.set_grad_enabled(False):
            all_paths += batch['path']
            nb_slices = len(batch['path'])
            study_id = batch['study_id'][0]
            all_study_id += [study_id] * nb_slices
            all_slice_num += list(batch['slice_num'][0].cpu().numpy())

            y_hat = torch.sigmoid(model(batch['image'].float().cuda()))[0]
            all_pred.append(y_hat.detach().cpu().numpy())

            if not is_test:
                y = batch['labels'].detach().cpu().numpy()[0]
                all_gt.append(y)

    pred_columns = [
        'pred_epidural', 'pred_intraparenchymal', 'pred_intraventricular',
        'pred_subarachnoid', 'pred_subdural', 'pred_any'
    ]
    gt_columns = [
        'gt_epidural', 'gt_intraparenchymal', 'gt_intraventricular',
        'gt_subarachnoid', 'gt_subdural', 'gt_any'
    ]

    if is_test:
        all_pred = np.concatenate(all_pred)
        df = pd.DataFrame(all_pred, columns=pred_columns)
    else:
        all_pred = np.concatenate(all_pred)
        all_gt = np.concatenate(all_gt)
        df = pd.DataFrame(np.hstack((all_gt, all_pred)),
                          columns=gt_columns + pred_columns)
        print(all_pred.shape, all_gt.shape)

        class_weights = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0, 2.0])
        print(
            F.binary_cross_entropy(torch.from_numpy(all_pred),
                                   torch.from_numpy(all_gt),
                                   class_weights.repeat(all_pred.shape[0], 1)))

    df = pd.concat((df,
                    pd.DataFrame({
                        'path': all_paths,
                        'study_id': all_study_id,
                        'slice_num': all_slice_num
                    })),
                   axis=1)
    df.to_csv(df_out_path, index=False)
Esempio n. 4
0
def train(model_name, fold, run=None, resume_epoch=-1):
    model_str = build_model_str(model_name, fold, run)

    model_info = MODELS[model_name]

    checkpoints_dir = f'{BaseConfig.checkpoints_dir}/{model_str}'
    tensorboard_dir = f'{BaseConfig.tensorboard_dir}/{model_str}'
    oof_dir = f'{BaseConfig.oof_dir}/{model_str}'
    os.makedirs(checkpoints_dir, exist_ok=True)
    os.makedirs(tensorboard_dir, exist_ok=True)
    os.makedirs(oof_dir, exist_ok=True)
    print('\n', model_name, '\n')

    logger = SummaryWriter(log_dir=tensorboard_dir)

    model = model_info.factory(**model_info.args)
    model = model.cuda()

    # try:
    #     torchsummary.summary(model, (8, 400, 400))
    #     print('\n', model_name, '\n')
    # except:
    #     raise
    #     pass

    model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()

    dataset_train = dataset_3d_v2.IntracranialDataset(
        csv_file='5fold-rev3.csv',
        folds=[f for f in range(BaseConfig.nb_folds) if f != fold],
        random_slice=True,
        preprocess_func=albumentations.Compose([
            albumentations.ShiftScaleRotate(shift_limit=16. / 256,
                                            scale_limit=0.05,
                                            rotate_limit=30,
                                            interpolation=cv2.INTER_LINEAR,
                                            border_mode=cv2.BORDER_REPLICATE,
                                            p=0.75),
            albumentations.Flip(),
            albumentations.RandomRotate90(),
            albumentations.pytorch.ToTensorV2()
        ]),
        **model_info.dataset_args)

    dataset_valid = dataset_3d_v2.IntracranialDataset(
        csv_file='5fold.csv',
        folds=[fold],
        random_slice=False,
        return_all_slices=True,
        preprocess_func=albumentations.pytorch.ToTensorV2(),
        **model_info.dataset_args)

    model.train()
    if model_info.optimiser == 'radam':
        optimizer = radam.RAdam(model.parameters(), lr=model_info.initial_lr)
    elif model_info.optimiser == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=model_info.initial_lr,
                                    momentum=0.95,
                                    nesterov=True)
    elif model_info.optimiser == 'adabound':
        optimizer = adabound.AdaBound(model.parameters(),
                                      lr=model_info.initial_lr,
                                      final_lr=0.1)

    milestones = [32, 48, 64]
    if model_info.optimiser_milestones:
        milestones = model_info.optimiser_milestones

    if model_info.scheduler == 'steps':
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=milestones,
                                                   gamma=0.2)
    elif model_info.scheduler == 'cos_restarts':
        scheduler = CosineAnnealingLRWithRestarts(optimizer=optimizer,
                                                  T_max=8,
                                                  T_mult=1.2)

    print(
        f'Num training images: {len(dataset_train)} validation images: {len(dataset_valid)}'
    )

    if resume_epoch > -1:
        checkpoint = torch.load(f'{checkpoints_dir}/{resume_epoch:03}.pt')
        model.module.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    data_loaders = {
        'train':
        DataLoader(dataset_train,
                   num_workers=16,
                   shuffle=True,
                   drop_last=True,
                   batch_size=model_info.batch_size),
        'val':
        DataLoader(dataset_valid, shuffle=False, num_workers=4, batch_size=1)
    }

    class_weights = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0, 2.0]).cuda()

    def criterium(y_pred, y_true):
        y_pred = y_pred.reshape(-1, 6)
        y_true = y_true.reshape(-1, 6)
        cw = class_weights.repeat(y_pred.shape[0], 1)
        return F.binary_cross_entropy_with_logits(y_pred, y_true, cw)

    # fit new layers first:
    if resume_epoch == -1 and model_info.is_pretrained:
        model.train()
        model.module.freeze_encoder()
        data_loader = data_loaders['train']
        pre_fit_steps = len(dataset_train) // model_info.batch_size // 8
        data_iter = tqdm(enumerate(data_loader), total=pre_fit_steps)
        epoch_loss = []
        initial_optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
        # initial_optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
        for iter_num, data in data_iter:
            if iter_num > pre_fit_steps:
                break
            with torch.set_grad_enabled(True):
                img = data['image'].float().cuda()
                labels = data['labels'].float().cuda()
                pred = model(img)
                loss = criterium(pred, labels)
                # loss.backward()
                (loss / model_info.accumulation_steps).backward()
                if (iter_num + 1) % model_info.accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 100.0)
                    initial_optimizer.step()
                    initial_optimizer.zero_grad()
                epoch_loss.append(float(loss))

                data_iter.set_description(
                    f'Loss: Running {np.mean(epoch_loss[-100:]):1.4f} Avg {np.mean(epoch_loss):1.4f}'
                )
        del initial_optimizer
    model.module.unfreeze_encoder()

    phase_period = {'train': 1, 'val': 2}

    for epoch_num in range(resume_epoch + 1, 80):
        for phase in ['train', 'val']:
            if epoch_num % phase_period[phase] == 0:
                model.train(phase == 'train')
                epoch_loss = []
                epoch_labels = []
                epoch_predictions = []
                epoch_sample_paths = []

                if 'on_epoch' in model.module.__dir__():
                    model.module.on_epoch(epoch_num)

                data_loader = data_loaders[phase]
                data_iter = tqdm(enumerate(data_loader),
                                 total=len(data_loader))
                for iter_num, data in data_iter:
                    img = data['image'].float().cuda()
                    labels = data['labels'].float().cuda()

                    with torch.set_grad_enabled(phase == 'train'):
                        pred = model(img)
                        loss = criterium(pred, labels)

                        if phase == 'train':
                            (loss / model_info.accumulation_steps).backward()
                            if (iter_num +
                                    1) % model_info.accumulation_steps == 0:
                                torch.nn.utils.clip_grad_norm_(
                                    model.parameters(), 32.0)
                                optimizer.step()
                                optimizer.zero_grad()

                        epoch_loss.append(float(loss))

                        epoch_labels.append(
                            np.row_stack(labels.detach().cpu().numpy()))
                        epoch_predictions.append(
                            np.row_stack(
                                torch.sigmoid(pred).detach().cpu().numpy()))

                        # print(labels.shape, epoch_labels[-1].shape, pred.shape, epoch_predictions[-1].shape)
                        epoch_sample_paths += data['path']

                    data_iter.set_description(
                        f'{epoch_num} Loss: Running {np.mean(epoch_loss[-100:]):1.4f} Avg {np.mean(epoch_loss):1.4f}'
                    )

                epoch_labels = np.row_stack(epoch_labels)
                epoch_predictions = np.row_stack(epoch_predictions)
                if phase == 'val':
                    # recalculate loss as depth dimension is variable
                    epoch_loss_mean = float(
                        F.binary_cross_entropy(
                            torch.from_numpy(epoch_predictions).cuda(),
                            torch.from_numpy(epoch_labels).cuda(),
                            class_weights.repeat(epoch_labels.shape[0], 1)))
                    print(epoch_loss_mean)
                    logger.add_scalar(f'loss_{phase}', epoch_loss_mean,
                                      epoch_num)
                else:
                    logger.add_scalar(f'loss_{phase}', np.mean(epoch_loss),
                                      epoch_num)
                logger.add_scalar('lr', optimizer.param_groups[0]['lr'],
                                  epoch_num)  # scheduler.get_lr()[0]
                try:
                    log_metrics(logger=logger,
                                phase=phase,
                                epoch_num=epoch_num,
                                y=epoch_labels,
                                y_hat=epoch_predictions)
                except Exception:
                    pass

                if phase == 'val':
                    torch.save(
                        {
                            'epoch': epoch_num,
                            'sample_paths': epoch_sample_paths,
                            'epoch_labels': epoch_labels,
                            'epoch_predictions': epoch_predictions,
                        }, f'{oof_dir}/{epoch_num:03}.pt')

            logger.flush()

            if phase == 'val':
                scheduler.step(epoch=epoch_num)
            else:
                # print(f'{checkpoints_dir}/{epoch_num:03}.pt')
                torch.save(
                    {
                        'epoch': epoch_num,
                        'model_state_dict': model.module.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                    }, f'{checkpoints_dir}/{epoch_num:03}.pt')