Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-dd",
                        "--data-dir",
                        type=str,
                        default=os.environ.get("KAGGLE_2020_ALASKA2"))

    args = parser.parse_args()
    data_dir = args.data_dir

    cover = fs.find_images_in_dir(os.path.join(data_dir, "Cover"))
    jimi = fs.find_images_in_dir(os.path.join(data_dir, "JMiPOD"))
    juni = fs.find_images_in_dir(os.path.join(data_dir, "JUNIWARD"))
    uerd = fs.find_images_in_dir(os.path.join(data_dir, "UERD"))

    for cover_fname, jimi_fname, juni_fname, uerd_fname in zip(
            tqdm(cover), jimi, juni, uerd):
        cover = decode_bgr_from_dct(fs.change_extension(cover_fname, ".npz"))
        jimi = decode_bgr_from_dct(fs.change_extension(jimi_fname, ".npz"))
        juni = decode_bgr_from_dct(fs.change_extension(juni_fname, ".npz"))
        uerd = decode_bgr_from_dct(fs.change_extension(uerd_fname, ".npz"))

        jimi_mask = block8_sum(np.abs(cover - jimi).sum(axis=2)) > 0
        juni_mask = block8_sum(np.abs(cover - juni).sum(axis=2)) > 0
        uerd_mask = block8_sum(np.abs(cover - uerd).sum(axis=2)) > 0

        cover_mask = jimi_mask | juni_mask | uerd_mask

        cv2.imwrite(fs.change_extension(cover_fname, ".png"), cover_mask * 255)
        cv2.imwrite(fs.change_extension(jimi_fname, ".png"), jimi_mask * 255)
        cv2.imwrite(fs.change_extension(juni_fname, ".png"), juni_mask * 255)
        cv2.imwrite(fs.change_extension(uerd_fname, ".png"), uerd_mask * 255)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-dd",
                        "--data-dir",
                        type=str,
                        default=os.environ.get("KAGGLE_2020_ALASKA2"))

    args = parser.parse_args()
    data_dir = args.data_dir

    cover = os.path.join(data_dir, "Cover")
    JMiPOD = os.path.join(data_dir, "JMiPOD")
    JUNIWARD = os.path.join(data_dir, "JUNIWARD")
    UERD = os.path.join(data_dir, "UERD")

    dataset = (fs.find_images_in_dir(cover) + fs.find_images_in_dir(JMiPOD) +
               fs.find_images_in_dir(JUNIWARD) + fs.find_images_in_dir(UERD))
    # dataset = dataset[:500]

    mean, std = compute_mean_std(tqdm(dataset))
    print(mean.size())
    print(std.size())
    print(
        "Mean",
        np.array2string(to_numpy(mean),
                        precision=2,
                        separator=",",
                        max_line_width=119))
    print(
        "Std ",
        np.array2string(to_numpy(std),
                        precision=2,
                        separator=",",
                        max_line_width=119))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-dd",
                        "--data-dir",
                        type=str,
                        default=os.environ.get("KAGGLE_2020_ALASKA2"))

    args = parser.parse_args()

    data_dir = args.data_dir

    original_images = np.array(
        fs.find_images_in_dir(os.path.join(data_dir, "Cover")))
    image_sizes = np.array(
        [os.stat(fname).st_size for fname in original_images])
    order = np.argsort(image_sizes)
    original_images = original_images[order]
    num_folds = 4
    num_images = len(original_images)

    folds_lut = (list(range(num_folds)) * num_images)[:num_images]
    folds_lut = np.array(folds_lut)

    df = defaultdict(list)
    df[INPUT_IMAGE_ID_KEY].extend(
        [os.path.basename(x) for x in original_images])
    df[INPUT_FOLD_KEY].extend(folds_lut)

    df = pd.DataFrame.from_dict(df)
    df.to_csv("folds.csv", index=False)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-dd",
                        "--data-dir",
                        type=str,
                        default=os.environ.get("KAGGLE_2020_ALASKA2"))

    args = parser.parse_args()
    data_dir = args.data_dir

    test_dir = os.path.join(data_dir, "Test")
    dataset = fs.find_images_in_dir(test_dir)

    # dataset = dataset[:500]
    df = defaultdict(list)
    for image_fname in tqdm(dataset):
        dct_fname = fs.change_extension(image_fname, ".npz")
        dct_data = np.load(dct_fname)
        qm0 = dct_data["qm0"]
        qm1 = dct_data["qm1"]
        qf = quality_factror_from_qm(qm0)
        fsize = os.stat(image_fname).st_size

        df["image_id"].append(os.path.basename(image_fname))
        df["quality"].append(qf)
        df["qm0"].append(qm0.flatten().tolist())
        df["qm1"].append(qm1.flatten().tolist())
        df["file_size"].append(fsize)

    df = pd.DataFrame.from_dict(df)
    df.to_csv("test_dataset_qf_qt.csv", index=False)
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-dd",
                        "--data-dir",
                        type=str,
                        default=os.environ.get("KAGGLE_2020_ALASKA2"))

    args = parser.parse_args()

    data_dir = args.data_dir
    cover_images = [
        x for x in fs.find_images_in_dir(os.path.join(data_dir, "Cover"))
        if str.endswith(x, ".jpg")
    ]

    # cover_images = cover_images[:100]

    pool = Pool(6)
    results_df = []

    for df in tqdm(pool.imap(compute_statistics, cover_images),
                   total=len(cover_images)):
        results_df.append(df)

    results_df = pd.concat([pd.DataFrame.from_dict(x) for x in results_df])
    results_df.to_csv("analyze_embeddings.csv", index=False)
def convert_dir(input_dir, output_dir, image_size=768, workers=32):
    os.makedirs(output_dir, exist_ok=True)
    images = fs.find_images_in_dir(input_dir)

    processing_fn = partial(preprocess, output_dir=output_dir, image_size=image_size)

    with Pool(workers) as wp:
        for image_id in tqdm(wp.imap_unordered(processing_fn, images), total=len(images)):
            pass
Ejemplo n.º 7
0
def main():
    images_dir = 'c:\\datasets\\ILSVRC2013_DET_val'

    canny_cnn = maybe_cuda(CannyModel())
    optimizer = Adam(canny_cnn.parameters(), lr=1e-4)

    images = find_images_in_dir(images_dir)
    train_images, valid_images = train_test_split(images, test_size=0.1, random_state=1234)

    num_workers = 6
    num_epochs = 100
    batch_size = 16

    if False:
        train_images = train_images[:batch_size * 4]
        valid_images = valid_images[:batch_size * 4]

    train_loader = DataLoader(EdgesDataset(train_images), batch_size=batch_size, num_workers=num_workers, shuffle=True,
                              drop_last=True, pin_memory=True)
    valid_loader = DataLoader(EdgesDataset(valid_images), batch_size=batch_size, num_workers=num_workers,
                              pin_memory=True)

    loaders = collections.OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 40], gamma=0.3)

    # model runner
    runner = SupervisedRunner()
    # checkpoint = UtilsFactory.load_checkpoint("logs/checkpoints//best.pth")
    # UtilsFactory.unpack_checkpoint(checkpoint, model=canny_cnn)

    # model training
    runner.train(
        model=canny_cnn,
        criterion=FocalLoss(),
        optimizer=optimizer,
        scheduler=scheduler,
        callbacks=[
            JaccardCallback(),
            ShowPolarBatchesCallback(visualize_canny_predictions, metric='jaccard', minimize=False),
            EarlyStoppingCallback(patience=5, min_delta=0.01, metric='jaccard', minimize=False),
        ],
        loaders=loaders,
        logdir='logs',
        num_epochs=num_epochs,
        verbose=True,
        main_metric='jaccard',
        minimize_metric=False
        # check=True
    )
Ejemplo n.º 8
0
def cut_test_dataset_in_patches(data_dir, tile_size, tile_step, image_margin):
    train_imgs = fs.find_images_in_dir(os.path.join(data_dir, "test",
                                                    "images"))

    images_dir = os.path.join(data_dir, "test_tiles", "images")

    df = defaultdict(list)

    for train_img in tqdm(train_imgs, total=len(train_imgs), desc="test_imgs"):
        img_tiles = split_image(train_img, images_dir, tile_size, tile_step,
                                image_margin)
        df["image"].extend(img_tiles)
        df["image_id"].extend([fs.id_from_fname(train_img)] * len(img_tiles))

    return pd.DataFrame.from_dict(df)
def get_pseudolabeling_dataset(data_dir: str,
                               include_masks: bool,
                               image_size=(224, 224),
                               augmentation=None,
                               need_weight_mask=False):
    images = fs.find_images_in_dir(
        os.path.join(data_dir, "test_tiles", "images"))

    masks_dir = os.path.join(data_dir, "test_tiles", "masks")
    os.makedirs(masks_dir, exist_ok=True)

    masks = [
        os.path.join(masks_dir,
                     fs.id_from_fname(image_fname) + ".png")
        for image_fname in images
    ]

    if augmentation == "hard":
        transfrom = A.Compose(
            [crop_transform(image_size, input_size=768),
             hard_augmentations()])
    elif augmentation == "medium":
        transfrom = A.Compose([
            crop_transform(image_size, input_size=768),
            medium_augmentations()
        ])
    elif augmentation == "light":
        transfrom = A.Compose([
            crop_transform(image_size, input_size=768),
            light_augmentations()
        ])
    else:
        transfrom = A.Normalize()

    return InriaImageMaskDataset(
        images,
        masks if include_masks else None,
        transform=transfrom,
        image_loader=read_inria_image,
        mask_loader=read_inria_mask_with_pseudolabel,
        need_weight_mask=need_weight_mask,
    )
Ejemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-dd", "--data-dir", type=str, default=os.environ.get("KAGGLE_2020_ALASKA2"))
    parser.add_argument("-od", "--output-dir", type=str, default=os.environ.get("KAGGLE_2020_ALASKA2"))
    parser.add_argument("-f", "--folder", type=str, default=None)
    parser.add_argument("-p", "--part", type=int, default=None)

    args = parser.parse_args()

    data_dir = args.data_dir
    if args.folder is None:
        original_images = (
            fs.find_images_in_dir(os.path.join(data_dir, "Cover"))
            + fs.find_images_in_dir(os.path.join(data_dir, "JMiPOD"))
            + fs.find_images_in_dir(os.path.join(data_dir, "JUNIWARD"))
            + fs.find_images_in_dir(os.path.join(data_dir, "UERD"))
            + fs.find_images_in_dir(os.path.join(data_dir, "Test"))
        )
    else:
        original_images = fs.find_images_in_dir(os.path.join(data_dir, args.folder))
        if args.part is not None:
            half = len(original_images) // 2
            if args.part == 0:
                original_images = original_images[:half]
                print("First half")
            else:
                original_images = original_images[half:]
                print("Second half")

        print(original_images[0])

    os.makedirs(args.output_dir, exist_ok=True)
    process_fn = partial(extract_and_save_dct_jpegio, output_dir=args.output_dir)
    with Pool(16) as wp:
        for _ in tqdm(wp.imap_unordered(process_fn, original_images), total=len(original_images)):
            pass
Ejemplo n.º 11
0
def get_xview2_extra_dataset(
        data_dir: str,
        image_size=(224, 224),
        augmentation="hard",
        need_weight_mask=False,
        fast=False) -> Tuple[Dataset, WeightedRandomSampler]:
    """
    Create additional train dataset using xView2 dataset
    :param data_dir: xView2 dataset directory
    :param fast: Fast training model. Use only one image per location for training and one image per location for validation
    :param image_size: Size of image crops during training & validation
    :param need_weight_mask: If True, adds 'edge' target mask
    :param augmentation: Type of image augmentations to use
    'random' - crops tiles from source images randomly.
    'tiles' - crop image in overlapping tiles (guaranteed to process entire dataset)
    :return: (train_loader, valid_loader)
    """

    if augmentation == "hard":
        train_transform = hard_augmentations()
    elif augmentation == "medium":
        train_transform = medium_augmentations()
    elif augmentation == "light":
        train_transform = light_augmentations()
    elif augmentation == "safe":
        train_transform = safe_augmentations()
    else:
        train_transform = []

    def is_pre_image(fname):
        return "_pre_" in fname

    train1_img = list(
        filter(
            is_pre_image,
            fs.find_images_in_dir(os.path.join(data_dir, "train", "images"))))
    train1_msk = list(
        filter(is_pre_image,
               fs.find_images_in_dir(os.path.join(data_dir, "train",
                                                  "masks"))))

    train2_img = list(
        filter(
            is_pre_image,
            fs.find_images_in_dir(os.path.join(data_dir, "tier3", "images"))))
    train2_msk = list(
        filter(is_pre_image,
               fs.find_images_in_dir(os.path.join(data_dir, "tier3",
                                                  "masks"))))

    if fast:
        train1_img = train1_img[:128]
        train1_msk = train1_msk[:128]

        train2_img = train2_img[:128]
        train2_msk = train2_msk[:128]

    train_transform = A.Compose(
        [crop_transform_xview2(image_size, input_size=1024), train_transform])

    trainset = InriaImageMaskDataset(
        image_filenames=train1_img + train2_img,
        mask_filenames=train1_msk + train2_msk,
        transform=train_transform,
        mask_loader=read_xview_mask,
        need_weight_mask=need_weight_mask,
    )

    num_train_samples = int(
        len(trainset) * (1024 * 1024) / (image_size[0] * image_size[1]))
    crops_in_image = (1024 * 1024) / (image_size[0] * image_size[1])
    if fast:
        num_train_samples = 128

    train_sampler = WeightedRandomSampler(
        torch.ones(len(trainset)) * crops_in_image, num_train_samples)

    return trainset, None if fast else train_sampler
Ejemplo n.º 12
0
def get_datasets(
        data_dir='data',
        image_size=(512, 512),
        augmentation='medium',
        preprocessing=None,
        use_aptos2019=True,
        use_aptos2019_test_pl1=False,
        use_aptos2015_pl1=False,
        use_aptos2015=False,
        use_aptos2015_test_private=False,
        use_idrid=False,
        use_messidor=False,
        use_messidor2_pl1=False,
        use_unsupervised=False,
        target_dtype=int,
        random_state=42,
        coarse_grading=False,
        fold=None,
        folds=4) -> Tuple[RetinopathyDataset, RetinopathyDataset, List]:
    assert use_aptos2019 or use_aptos2015 or use_aptos2015_test_private or use_idrid or use_messidor
    assert not (use_aptos2015 and use_aptos2015_pl1)

    trainset_sizes = []
    data_split = [], [], [], []

    aptos2019_dir = os.path.join(data_dir, 'aptos-2019')
    aptos2015_dir = os.path.join(data_dir, 'aptos-2015')

    if use_aptos2019:
        x, y = get_aptos2019_train(aptos2019_dir)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_aptos2015_pl1:
        # Add training data
        aptos2015_train_pseudolabel_round_1 = pd.read_csv(
            os.path.join(aptos2015_dir,
                         'aptos2015_train_pseudolabel_round_1.csv'))
        aptos2015_train_pseudolabel_round_1 = aptos2015_train_pseudolabel_round_1[
            aptos2015_train_pseudolabel_round_1['diagnosis'] != -100]
        # x = np.array(aptos2015_train_pseudolabel_round_1['id_code'].apply(
        #     lambda x: os.path.join(aptos2015_dir, 'train_images_768', f'{x}.png')))
        x = np.array(aptos2015_train_pseudolabel_round_1['id_code'].apply(
            lambda x: os.path.join(aptos2015_dir, 'train_images_768',
                                   f'{x}.jpeg')))
        y = np.array(aptos2015_train_pseudolabel_round_1['diagnosis'],
                     dtype=int)

        # For training part of aptos2015 - add it conventionaly
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

        # For public test validation data add only unhealthy samples to train set
        aptos2015_test_public_pl1 = pd.read_csv(
            os.path.join(aptos2015_dir,
                         'aptos2015_test_public_pseudolabel_round_1.csv'))
        aptos2015_test_public_pl1 = aptos2015_test_public_pl1[
            aptos2015_test_public_pl1['diagnosis'] != -100]
        # x = np.array(aptos2015_test_public_pl1['id_code'].apply(
        #     lambda x: os.path.join(aptos2015_dir, 'test_images_768', f'{x}.png')))
        x = np.array(
            aptos2015_test_public_pl1['id_code'].apply(lambda x: os.path.join(
                aptos2015_dir, 'test_images_768', f'{x}.jpeg')))
        y = np.array(aptos2015_test_public_pl1['diagnosis'], dtype=int)

        # For pseudolabeled data, we add only one fold of it to clear training data
        # From test set add only unhealthy
        train_x, valid_x, train_y, valid_y = split_train_valid(
            x, y, fold=fold, folds=folds, random_state=random_state)

        train_x = train_x[train_y > 0]
        train_y = train_y[train_y > 0]

        split = train_x, valid_x, train_y, valid_y
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(train_x[0])

        # Add Aptos2015 private test to validation set entirely
        aptos2015_test_private_pl1 = pd.read_csv(
            os.path.join(aptos2015_dir,
                         'aptos2015_test_private_pseudolabel_round_1.csv'))
        aptos2015_test_private_pl1 = aptos2015_test_private_pl1[
            aptos2015_test_private_pl1['diagnosis'] != -100]
        # x = np.array(aptos2015_test_private_pl1['id_code'].apply(
        #     lambda x: os.path.join(aptos2015_dir, 'test_images_768', f'{x}.png')))
        x = np.array(
            aptos2015_test_private_pl1['id_code'].apply(lambda x: os.path.join(
                aptos2015_dir, 'test_images_768', f'{x}.jpeg')))
        y = np.array(aptos2015_test_private_pl1['diagnosis'], dtype=int)

        # From test set add only unhealthy
        x = x[y > 0]
        y = y[y > 0]
        data_split = append_train_test(data_split, ([], x, [], y))

    if use_messidor2_pl1:
        messidor2_dir = os.path.join(data_dir, 'messidor_2')
        messidor2_pseudolabel_round_1 = pd.read_csv(
            os.path.join(messidor2_dir,
                         'train_labels_pseudolabel_round_1.csv'))
        confident_labels_mask = messidor2_pseudolabel_round_1[
            'diagnosis'] != -100
        messidor2_pseudolabel_round_1 = messidor2_pseudolabel_round_1[
            confident_labels_mask]

        x = np.array(messidor2_pseudolabel_round_1['id_code'].apply(
            lambda x: os.path.join(messidor2_dir, 'train_images_768',
                                   f'{x}.png')))
        y = np.array(messidor2_pseudolabel_round_1['diagnosis'], dtype=int)

        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_aptos2015:
        x, y = get_aptos2015_train(aptos2015_dir, healthy_eye_fraction=0.2)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_aptos2015_test_private:
        x, y = get_aptos2015_test_private(aptos2015_dir,
                                          healthy_eye_fraction=0.2)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_idrid:
        x, y = get_idrid_train(os.path.join(data_dir, 'idrid'))
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    if use_messidor:
        x, y = get_messidor(os.path.join(data_dir, 'messidor'),
                            include_grade_3=False)
        split = split_train_valid(x,
                                  y,
                                  fold=fold,
                                  folds=folds,
                                  random_state=random_state)
        data_split = append_train_test(data_split, split)
        trainset_sizes.append(split[0])

    train_x, train_y, valid_x, valid_y = data_split

    if use_idrid:
        # Regardless of used datasets let's use some data from validation (holdout)
        data_idrid_test = get_idrid_test(os.path.join(data_dir, 'idrid'))
        valid_x.extend(data_idrid_test[0])
        valid_y.extend(data_idrid_test[1])

    if use_aptos2015:
        data_aptos15_public = get_aptos2015_test_public(
            aptos2015_dir, healthy_eye_fraction=0.1)
        valid_x.extend(data_aptos15_public[0])
        valid_y.extend(data_aptos15_public[1])

    train_transform = get_train_transform(image_size,
                                          augmentation=augmentation,
                                          preprocessing=preprocessing,
                                          crop_black=False)
    valid_transform = get_test_transform(image_size,
                                         preprocessing=preprocessing,
                                         crop_black=False)

    if coarse_grading:
        assert not use_unsupervised

        coarse_grading_map = np.array([0, 1, 1, 1, 2])

        train_y = coarse_grading_map[np.array(train_y)]
        valid_y = coarse_grading_map[np.array(valid_y)]

    print('Train', count_targets(train_y), "Valid", count_targets(valid_y))

    if use_unsupervised:
        aptos2019, _ = get_aptos2019_test(aptos2019_dir)
        print('Adding', len(aptos2019),
              'unlabeled samples from aptos2019 (test)')

        diaretdb0_v_1_1 = fs.find_images_in_dir(
            os.path.join(data_dir, 'diaretdb0_v_1_1', 'train_images_768'))
        print('Adding', len(diaretdb0_v_1_1),
              'unlabeled samples from diaretdb0_v_1_1')

        diaretdb1_v_1_1 = fs.find_images_in_dir(
            os.path.join(data_dir, 'diaretdb1_v_1_1', 'train_images_768'))
        print('Adding', len(diaretdb1_v_1_1),
              'unlabeled samples from diaretdb1_v_1_1')

        origa1 = fs.find_images_in_dir(
            os.path.join(data_dir, 'origa', 'glaucoma_768'))
        print('Adding', len(origa1), 'unlabeled samples from origa1')

        origa2 = fs.find_images_in_dir(
            os.path.join(data_dir, 'origa', 'sanas_768'))
        print('Adding', len(origa2), 'unlabeled samples from origa2')

        stare = fs.find_images_in_dir(
            os.path.join(data_dir, 'stare', 'train_images_768'))
        print('Adding', len(stare), 'unlabeled samples from stare')

        unlabeled_samples = diaretdb0_v_1_1 + diaretdb1_v_1_1 + stare + origa1 + origa2 + aptos2019.tolist(
        )

        if not use_messidor:
            messidor = fs.find_images_in_dir(
                os.path.join(data_dir, 'messidor', 'train_images_768'))
            unlabeled_samples += messidor
            print('Adding', len(messidor), 'unlabeled samples from Messidor')

        if not use_aptos2015:
            dataset_dir = os.path.join(data_dir, 'aptos-2015')
            x, y = get_aptos2015_train(dataset_dir, healthy_eye_fraction=0.1)
            unlabeled_samples += x.tolist()
            print('Adding', len(x), 'unlabeled samples from Aptos 2015')

        if not use_aptos2015_test_private:
            dataset_dir = os.path.join(data_dir, 'aptos-2015')
            x, y = get_aptos2015_test_private(dataset_dir,
                                              healthy_eye_fraction=0.1)
            unlabeled_samples += x.tolist()
            print('Adding', len(x),
                  'unlabeled samples from Aptos 2015 Test (Private)')

        unlabeled_targets = [UNLABELED_CLASS] * len(unlabeled_samples)
        print('Using', len(unlabeled_samples), 'unlabeled samples')

        train_x.extend(unlabeled_samples)
        train_y.extend(unlabeled_targets)

        train_ds = RetinopathyDatasetV2(train_x,
                                        train_y,
                                        transform=train_transform,
                                        normalize=valid_transform,
                                        dtype=target_dtype)
        trainset_sizes.append(len(unlabeled_samples))
    else:
        train_ds = RetinopathyDataset(train_x,
                                      train_y,
                                      transform=train_transform,
                                      dtype=target_dtype)

    valid_ds = RetinopathyDataset(valid_x,
                                  valid_y,
                                  transform=valid_transform,
                                  dtype=target_dtype)

    return train_ds, valid_ds, trainset_sizes
Ejemplo n.º 13
0
def main():
    dataset = fs.find_images_in_dir("/home/bloodaxe/datasets/ALASKA2/Cover")
    dataset = dataset[:500]
    print("YCbCr", compute_mean_std(tqdm(dataset)))
import numpy as np
import cv2
from pytorch_toolbelt.utils import fs
from tqdm import tqdm


def compute_mean_std(dataset, read_image=cv2.imread):
    """
    https://stats.stackexchange.com/questions/25848/how-to-sum-a-standard-deviation
    """
    one_over_255 = float(1.0 / 255.0)

    global_mean = np.zeros(3, dtype=np.float64)
    global_var = np.zeros(3, dtype=np.float64)

    n_items = 0

    for image_fname in dataset:
        x = read_image(image_fname) * one_over_255
        mean, stddev = cv2.meanStdDev(x)

        global_mean += np.squeeze(mean)
        global_var += np.squeeze(stddev)**2
        n_items += 1

    return global_mean / n_items, np.sqrt(global_var / n_items)


dataset = fs.find_images_in_dir("d:\\datasets\\ALASKA2\\Cover")
print(compute_mean_std(tqdm(dataset)))