Ejemplo n.º 1
0
def run_models_inference(model_checkpoints: List[str],
                         test_csv: pd.DataFrame,
                         data_dir,
                         images_dir='test_images',
                         preprocessing=None,
                         image_size=None,
                         crop_black=True,
                         **kwargs) -> List[pd.DataFrame]:
    checkpoint = torch.load(model_checkpoints[0])
    if preprocessing is None:
        preprocessing = checkpoint['checkpoint_data']['cmd_args'].get(
            'preprocessing', None)

    if image_size is None:
        image_size = checkpoint['checkpoint_data']['cmd_args'].get(
            'image_size', 512)
        image_size = (image_size, image_size)

    image_fnames = test_csv['id_code'].apply(lambda x: image_with_name_in_dir(
        os.path.join(data_dir, images_dir), x))

    if 'diagnosis' in test_csv:
        targets = test_csv['diagnosis'].values
    else:
        targets = None

    test_ds = RetinopathyDataset(
        image_fnames, targets,
        get_test_transform(image_size,
                           preprocessing=preprocessing,
                           crop_black=crop_black))
    return run_models_inference_via_dataset(model_checkpoints, test_ds,
                                            **kwargs)
Ejemplo n.º 2
0
def run_image_preprocessing(params,
                            image_df: DataFrame,
                            image_paths=None,
                            preprocessing=None,
                            image_size=None,
                            crop_black=True,
                            **kwargs) -> RetinopathyDataset:
    if image_paths is not None:
        if preprocessing is None:
            preprocessing = params.get('preprocessing', None)

        if image_size is None:
            image_size = params.get('image_size', 1024)
            image_size = (image_size, image_size)

        if 'diagnosis' in image_df:
            targets = image_df['diagnosis'].values
        else:
            targets = None

        return RetinopathyDataset(
            image_paths, targets,
            get_test_transform(image_size,
                               preprocessing=preprocessing,
                               crop_black=crop_black))
Ejemplo n.º 3
0
def run_model_inference(checkpoint,
                        params,
                        retino: pd.DataFrame,
                        image_paths=None,
                        preprocessing=None,
                        image_size=None,
                        crop_black=True,
                        **kwargs) -> pd.DataFrame:
    if image_paths is not None:
        if preprocessing is None:
            preprocessing = params.get('preprocessing', None)

        if image_size is None:
            image_size = params.get('image_size', 512)
            image_size = (image_size, image_size)

        if 'diagnosis' in retino:
            targets = retino['diagnosis'].values
        else:
            targets = None

        dataset = RetinopathyDataset(
            image_paths, targets,
            get_test_transform(image_size,
                               preprocessing=preprocessing,
                               crop_black=crop_black))
        return run_model_inference_via_dataset(dataset=dataset,
                                               checkpoint=checkpoint,
                                               params=params,
                                               **kwargs)
Ejemplo n.º 4
0
def get_datasets_universal(
        train_on: List[str],
        valid_on: List[str],
        data_dir='data',
        image_size=(512, 512),
        augmentation='medium',
        preprocessing=None,
        target_dtype=int,
        random_state=42,
        coarse_grading=False,
        folds=4) -> Tuple[RetinopathyDataset, RetinopathyDataset, List]:
    train_x, train_y, sizes = get_dataset(train_on, folds=folds, data_dir=data_dir, random_state=random_state)
    valid_x, valid_y, _ = get_dataset(valid_on, folds=folds, data_dir=data_dir, random_state=random_state)

    train_transform = get_train_transform(image_size,
                                          augmentation=augmentation,
                                          preprocessing=preprocessing,
                                          crop_black=False)
    valid_transform = get_test_transform(image_size,
                                         preprocessing=preprocessing,
                                         crop_black=False)

    train_ds = RetinopathyDataset(train_x, train_y,
                                  transform=train_transform,
                                  dtype=target_dtype)

    valid_ds = RetinopathyDataset(valid_x, valid_y,
                                  transform=valid_transform,
                                  dtype=target_dtype)

    return train_ds, valid_ds, sizes
    def get_transforms(stage: str = None,
                       image_size=(512, 512),
                       augmentation='medium',
                       mode: str = None):
        if mode == 'train':
            return get_train_transform(image_size=image_size,
                                       augmentation=augmentation,
                                       crop_black=False)

        return get_test_transform(image_size=image_size, crop_black=False)
def test_inference():
    model_checkpoint = '../pretrained/seresnext50_gap_512_medium_aptos2019_idrid_fold0_hopeful_easley.pth'
    checkpoint = torch.load(model_checkpoint)
    model_name = checkpoint['checkpoint_data']['cmd_args']['model']

    num_classes = len(get_class_names())
    model = get_model(model_name, pretrained=False, num_classes=num_classes)
    model.load_state_dict(checkpoint['model_state_dict'])

    for image_fname in [
            # '4_left.png',
            # '35_left.png',
            '44_right.png',
            '68_right.png',
            # '92_left.png'
    ]:
        transform = get_test_transform(image_size=(512, 512), crop_black=True)

        image = cv2.imread(image_fname)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        image_transformed = transform(image=image)['image']
        image_transformed = tensor_from_rgb_image(image_transformed).unsqueeze(
            0)

        with torch.no_grad():
            model = model.eval().cuda()
            predictions = model(image_transformed.cuda())
            print(predictions['logits'].softmax(dim=1))
            print(predictions['regression'])

        add_mild_dr = AddMicroaneurisms(p=1)
        data = add_mild_dr(image=image, diagnosis=0)
        image_transformed = transform(image=data['image'])['image']
        image_transformed = tensor_from_rgb_image(image_transformed).unsqueeze(
            0)

        with torch.no_grad():
            model = model.eval().cuda()
            predictions = model(image_transformed.cuda())
            print(predictions['logits'].softmax(dim=1))
            print(predictions['regression'])
Ejemplo n.º 7
0
def get_datasets(
        data_dir='data',
        image_size=(512, 512),
        augmentation='medium',
        preprocessing=None,
        use_aptos2019=True,
        use_aptos2019_test_pl1=False,
        use_aptos2015_pl1=False,
        use_aptos2015=False,
        use_aptos2015_test_private=False,
        use_idrid=False,
        use_messidor=False,
        use_messidor2_pl1=False,
        use_unsupervised=False,
        target_dtype=int,
        random_state=42,
        coarse_grading=False,
        fold=None,
        folds=4) -> Tuple[RetinopathyDataset, RetinopathyDataset, List]:
    assert use_aptos2019 or use_aptos2015 or use_aptos2015_test_private or use_idrid or use_messidor
    assert not (use_aptos2015 and use_aptos2015_pl1)

    trainset_sizes = []
    data_split = [], [], [], []

    aptos2019_dir = os.path.join(data_dir, 'aptos-2019')
    aptos2015_dir = os.path.join(data_dir, 'aptos-2015')

    if use_aptos2019:
        x, y = get_aptos2019_train(aptos2019_dir)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_aptos2015_pl1:
        # Add training data
        aptos2015_train_pseudolabel_round_1 = pd.read_csv(
            os.path.join(aptos2015_dir,
                         'aptos2015_train_pseudolabel_round_1.csv'))
        aptos2015_train_pseudolabel_round_1 = aptos2015_train_pseudolabel_round_1[
            aptos2015_train_pseudolabel_round_1['diagnosis'] != -100]
        # x = np.array(aptos2015_train_pseudolabel_round_1['id_code'].apply(
        #     lambda x: os.path.join(aptos2015_dir, 'train_images_768', f'{x}.png')))
        x = np.array(aptos2015_train_pseudolabel_round_1['id_code'].apply(
            lambda x: os.path.join(aptos2015_dir, 'train_images_768',
                                   f'{x}.jpeg')))
        y = np.array(aptos2015_train_pseudolabel_round_1['diagnosis'],
                     dtype=int)

        # For training part of aptos2015 - add it conventionaly
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

        # For public test validation data add only unhealthy samples to train set
        aptos2015_test_public_pl1 = pd.read_csv(
            os.path.join(aptos2015_dir,
                         'aptos2015_test_public_pseudolabel_round_1.csv'))
        aptos2015_test_public_pl1 = aptos2015_test_public_pl1[
            aptos2015_test_public_pl1['diagnosis'] != -100]
        # x = np.array(aptos2015_test_public_pl1['id_code'].apply(
        #     lambda x: os.path.join(aptos2015_dir, 'test_images_768', f'{x}.png')))
        x = np.array(
            aptos2015_test_public_pl1['id_code'].apply(lambda x: os.path.join(
                aptos2015_dir, 'test_images_768', f'{x}.jpeg')))
        y = np.array(aptos2015_test_public_pl1['diagnosis'], dtype=int)

        # For pseudolabeled data, we add only one fold of it to clear training data
        # From test set add only unhealthy
        train_x, valid_x, train_y, valid_y = split_train_valid(
            x, y, fold=fold, folds=folds, random_state=random_state)

        train_x = train_x[train_y > 0]
        train_y = train_y[train_y > 0]

        split = train_x, valid_x, train_y, valid_y
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(train_x[0])

        # Add Aptos2015 private test to validation set entirely
        aptos2015_test_private_pl1 = pd.read_csv(
            os.path.join(aptos2015_dir,
                         'aptos2015_test_private_pseudolabel_round_1.csv'))
        aptos2015_test_private_pl1 = aptos2015_test_private_pl1[
            aptos2015_test_private_pl1['diagnosis'] != -100]
        # x = np.array(aptos2015_test_private_pl1['id_code'].apply(
        #     lambda x: os.path.join(aptos2015_dir, 'test_images_768', f'{x}.png')))
        x = np.array(
            aptos2015_test_private_pl1['id_code'].apply(lambda x: os.path.join(
                aptos2015_dir, 'test_images_768', f'{x}.jpeg')))
        y = np.array(aptos2015_test_private_pl1['diagnosis'], dtype=int)

        # From test set add only unhealthy
        x = x[y > 0]
        y = y[y > 0]
        data_split = append_train_test(data_split, ([], x, [], y))

    if use_messidor2_pl1:
        messidor2_dir = os.path.join(data_dir, 'messidor_2')
        messidor2_pseudolabel_round_1 = pd.read_csv(
            os.path.join(messidor2_dir,
                         'train_labels_pseudolabel_round_1.csv'))
        confident_labels_mask = messidor2_pseudolabel_round_1[
            'diagnosis'] != -100
        messidor2_pseudolabel_round_1 = messidor2_pseudolabel_round_1[
            confident_labels_mask]

        x = np.array(messidor2_pseudolabel_round_1['id_code'].apply(
            lambda x: os.path.join(messidor2_dir, 'train_images_768',
                                   f'{x}.png')))
        y = np.array(messidor2_pseudolabel_round_1['diagnosis'], dtype=int)

        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_aptos2015:
        x, y = get_aptos2015_train(aptos2015_dir, healthy_eye_fraction=0.2)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_aptos2015_test_private:
        x, y = get_aptos2015_test_private(aptos2015_dir,
                                          healthy_eye_fraction=0.2)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_idrid:
        x, y = get_idrid_train(os.path.join(data_dir, 'idrid'))
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_messidor:
        x, y = get_messidor(os.path.join(data_dir, 'messidor'),
                            include_grade_3=False)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    train_x, train_y, valid_x, valid_y = data_split

    if use_idrid:
        # Regardless of used datasets let's use some data from validation (holdout)
        data_idrid_test = get_idrid_test(os.path.join(data_dir, 'idrid'))
        valid_x.extend(data_idrid_test[0])
        valid_y.extend(data_idrid_test[1])

    if use_aptos2015:
        data_aptos15_public = get_aptos2015_test_public(
            aptos2015_dir, healthy_eye_fraction=0.1)
        valid_x.extend(data_aptos15_public[0])
        valid_y.extend(data_aptos15_public[1])

    train_transform = get_train_transform(image_size,
                                          augmentation=augmentation,
                                          preprocessing=preprocessing,
                                          crop_black=False)
    valid_transform = get_test_transform(image_size,
                                         preprocessing=preprocessing,
                                         crop_black=False)

    if coarse_grading:
        assert not use_unsupervised

        coarse_grading_map = np.array([0, 1, 1, 1, 2])

        train_y = coarse_grading_map[np.array(train_y)]
        valid_y = coarse_grading_map[np.array(valid_y)]

    print('Train', count_targets(train_y), "Valid", count_targets(valid_y))

    if use_unsupervised:
        aptos2019, _ = get_aptos2019_test(aptos2019_dir)
        print('Adding', len(aptos2019),
              'unlabeled samples from aptos2019 (test)')

        diaretdb0_v_1_1 = fs.find_images_in_dir(
            os.path.join(data_dir, 'diaretdb0_v_1_1', 'train_images_768'))
        print('Adding', len(diaretdb0_v_1_1),
              'unlabeled samples from diaretdb0_v_1_1')

        diaretdb1_v_1_1 = fs.find_images_in_dir(
            os.path.join(data_dir, 'diaretdb1_v_1_1', 'train_images_768'))
        print('Adding', len(diaretdb1_v_1_1),
              'unlabeled samples from diaretdb1_v_1_1')

        origa1 = fs.find_images_in_dir(
            os.path.join(data_dir, 'origa', 'glaucoma_768'))
        print('Adding', len(origa1), 'unlabeled samples from origa1')

        origa2 = fs.find_images_in_dir(
            os.path.join(data_dir, 'origa', 'sanas_768'))
        print('Adding', len(origa2), 'unlabeled samples from origa2')

        stare = fs.find_images_in_dir(
            os.path.join(data_dir, 'stare', 'train_images_768'))
        print('Adding', len(stare), 'unlabeled samples from stare')

        unlabeled_samples = diaretdb0_v_1_1 + diaretdb1_v_1_1 + stare + origa1 + origa2 + aptos2019.tolist(
        )

        if not use_messidor:
            messidor = fs.find_images_in_dir(
                os.path.join(data_dir, 'messidor', 'train_images_768'))
            unlabeled_samples += messidor
            print('Adding', len(messidor), 'unlabeled samples from Messidor')

        if not use_aptos2015:
            dataset_dir = os.path.join(data_dir, 'aptos-2015')
            x, y = get_aptos2015_train(dataset_dir, healthy_eye_fraction=0.1)
            unlabeled_samples += x.tolist()
            print('Adding', len(x), 'unlabeled samples from Aptos 2015')

        if not use_aptos2015_test_private:
            dataset_dir = os.path.join(data_dir, 'aptos-2015')
            x, y = get_aptos2015_test_private(dataset_dir,
                                              healthy_eye_fraction=0.1)
            unlabeled_samples += x.tolist()
            print('Adding', len(x),
                  'unlabeled samples from Aptos 2015 Test (Private)')

        unlabeled_targets = [UNLABELED_CLASS] * len(unlabeled_samples)
        print('Using', len(unlabeled_samples), 'unlabeled samples')

        train_x.extend(unlabeled_samples)
        train_y.extend(unlabeled_targets)

        train_ds = RetinopathyDatasetV2(train_x,
                                        train_y,
                                        transform=train_transform,
                                        normalize=valid_transform,
                                        dtype=target_dtype)
        trainset_sizes.append(len(unlabeled_samples))
    else:
        train_ds = RetinopathyDataset(train_x,
                                      train_y,
                                      transform=train_transform,
                                      dtype=target_dtype)

    valid_ds = RetinopathyDataset(valid_x,
                                  valid_y,
                                  transform=valid_transform,
                                  dtype=target_dtype)

    return train_ds, valid_ds, trainset_sizes