Esempio n. 1
0
def get_trainval_indices(dataset, fold_index=0, n_splits=5, xy_transforms=None, batch_size=32, n_workers=8, seed=None):

    trainval_split = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)

    if xy_transforms is not None:
        targets_dataset = TransformedDataset(dataset, transforms=xy_transforms)
    else:
        targets_dataset = dataset

    n_samples = len(targets_dataset)
    x = np.zeros((n_samples, 1))
    y = np.zeros((n_samples,), dtype=np.uint8)

    def id_collate_fn(_x):
        return _x

    data_loader = DataLoader(targets_dataset, batch_size=batch_size, num_workers=n_workers, collate_fn=id_collate_fn)

    for i, dp in enumerate(data_loader):
        y[i * batch_size: (i + 1) * batch_size] = [p[1] for p in dp]

    train_index = None
    test_index = None
    for i, (train_index, test_index) in enumerate(trainval_split.split(x, y)):
        if i == fold_index:
            break
    return train_index, test_index
Esempio n. 2
0
def get_trainval_data_loaders(dataset, train_index, val_index,
                              train_transforms=None,
                              val_transforms=None,
                              train_batch_size=32,
                              val_batch_size=32,
                              collate_fn=default_collate,
                              num_workers=8,
                              pin_memory=True):
    assert isinstance(dataset, Dataset)
    if train_transforms is not None:
        assert isinstance(train_transforms, (list, tuple))
        train_transforms = Compose(train_transforms)

    if val_transforms is not None:
        assert isinstance(val_transforms, (list, tuple))
        val_transforms = Compose(val_transforms)

    train_sampler = SubsetRandomSampler(train_index)
    val_sampler = SubsetRandomSampler(val_index)

    dataset = TransformedDataset(dataset, transforms=read_image,
                                 target_transforms=lambda l: l - 1)

    train_aug_dataset = TransformedDataset(dataset, transforms=train_transforms)
    val_aug_dataset = TransformedDataset(dataset, transforms=val_transforms)

    train_batches = DataLoader(train_aug_dataset, batch_size=train_batch_size,
                               sampler=train_sampler,
                               num_workers=num_workers,
                               collate_fn=collate_fn,
                               pin_memory=True, drop_last=True)

    val_batches = DataLoader(val_aug_dataset, batch_size=val_batch_size,
                             sampler=val_sampler,
                             num_workers=num_workers,
                             collate_fn=collate_fn,
                             pin_memory=pin_memory, drop_last=True)

    return train_batches, val_batches
Esempio n. 3
0
def get_data_loader(dataset_or_path,
                    data_transform=None,
                    target_transform=None,
                    sample_indices=None,
                    sampler=None,
                    collate_fn=default_collate,
                    batch_size=16,
                    num_workers=8, pin_memory=True):
    assert isinstance(dataset_or_path, Dataset) or \
        (isinstance(dataset_or_path, (str, Path)) and Path(dataset_or_path).exists()), \
        "Dataset or path should be either Dataset or path to images, but given {}".format(dataset_or_path)

    assert sample_indices is None or sampler is None, "Both are not possible"

    if data_transform is not None and isinstance(data_transform, (list, tuple)):
        data_transform = Compose(data_transform)

    if isinstance(dataset_or_path, (str, Path)) and Path(dataset_or_path).exists():
        dataset = TrainvalFilesDataset(dataset_or_path)
    else:
        dataset = dataset_or_path

    if sample_indices is None and sampler is None:
        sample_indices = np.arange(len(dataset))

    if sample_indices is not None:
        sampler = SubsetRandomSampler(sample_indices)

    dataset = TransformedDataset(dataset, transforms=read_image, target_transforms=lambda l: l - 1)
    if data_transform is not None or target_transform is not None:
        dataset = TransformedDataset(dataset, transforms=data_transform, target_transforms=target_transform)

    data_loader = DataLoader(dataset, batch_size=batch_size,
                             sampler=sampler,
                             collate_fn=collate_fn,
                             num_workers=num_workers, pin_memory=pin_memory)
    return data_loader
Esempio n. 4
0
    RandomResizedCrop(350, scale=(0.8, 1.0)),
    RandomHorizontalFlip(p=0.5),
]

common_transform = [
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]

BATCH_SIZE = 180
NUM_WORKERS = 15

n_classes = 128
dataset = FilesFromCsvDataset("output/filtered_train_dataset.csv")
dataset = TransformedDataset(dataset,
                             transforms=read_image,
                             target_transforms=lambda l: l - 1)
dataset = TransformedDataset(dataset, transforms=Compose(TRAIN_TRANSFORMS))
dataset = RandomMultiImageAugDataset(dataset,
                                     n_classes,
                                     aug_fn=partial(basic_random_half_blend,
                                                    alpha=0.3))
dataset = TransformedDataset(dataset, transforms=Compose(common_transform))

TRAIN_LOADER = get_data_loader(dataset,
                               data_transform=None,
                               batch_size=BATCH_SIZE,
                               num_workers=NUM_WORKERS,
                               cuda=True)

val_dataset = FilesFromCsvDataset("output/filtered_val_dataset.csv")
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]
VAL_TRANSFORMS = [
    CenterCrop(size=350),
    RandomHorizontalFlip(p=0.5),
    RandomVerticalFlip(p=0.5),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]

BATCH_SIZE = 32
NUM_WORKERS = 8

dataset = TrainvalFilesDataset(DATASET_PATH / "train_400x400")
dataset = TransformedDataset(dataset,
                             transforms=lambda x: x,
                             target_transforms=lambda y: y - 1)
reduced_train_indices = get_reduced_train_indices(dataset,
                                                  max_n_samples_per_class=1250,
                                                  seed=SEED)
del dataset

TRAIN_LOADER = get_data_loader(dataset_path=DATASET_PATH / "train_400x400",
                               data_transform=TRAIN_TRANSFORMS,
                               sample_indices=reduced_train_indices,
                               batch_size=BATCH_SIZE,
                               num_workers=NUM_WORKERS,
                               cuda=True)

VAL_LOADER = get_data_loader(dataset_path=DATASET_PATH / "val_400x400",
                             data_transform=VAL_TRANSFORMS,
Esempio n. 6
0
DEBUG = True
SEED = 2018

OUTPUT_PATH = "output"


meta_features_path = Path("output")
meta_features_list = [
    meta_features_path / "val_probas_inceptionresnetv2_350_resized_crop" / "20180428_1622" / "probas.csv",
    meta_features_path / "val_probas_inceptionv4_350_resized_crop" / "20180428_1633" / "probas.csv",
]
meta_features = get_metafeatures(meta_features_list)

dataset = FilesFromCsvDataset("output/filtered_val_dataset.csv")
dataset = TransformedDataset(dataset,
                             transforms=lambda x: (x, Image.open(x).size),
                             target_transforms=lambda l: l - 1)
df_imsize_targets = get_imsize_and_targets(dataset)

X = pd.concat([meta_features, df_imsize_targets[['width', 'height']]], axis=1)
X.dropna(inplace=True)
X = X.values
Y = df_imsize_targets['target'].values

# Cross-validation parameters
CV_SPLIT = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

MODEL = cat.CatBoostClassifier

SCORINGS = ["neg_log_loss", ]