Beispiel #1
0
 def setup_train_dataset(self):
     """
     Each self.batch_size of examples follows the same distribution
     """
     bd = BlockDesigner(self.train_examples)
     if self.sample_class:
         samp = Sampler(bd.remainder(), seed=self.random_seed)
         images, labels = samp.custom_distribution(self.sample_class, self.batch_size, self.custom_distribution)
         return {"X": images, "y": labels}
     else:
         blocks = bd.break_off_multiple_blocks(self.n_train_batches, self.batch_size)
         images = []
         labels = []
         for block in blocks:
             for y, ids in block.items():
                 for id in ids:
                     images.append(id)
                     labels.append(y)
         return {"X": images, "y": labels}
Beispiel #2
0
    def __init__(self,
                 train_image_dir="data/train/centered_crop/",
                 image_shape=(128, 128, 3),
                 batch_size=128,
                 cache_size_factor=8,
                 center=0,
                 normalize=0,
                 amplify=1,
                 train_flip='no_flip',
                 shuffle=1,
                 test_image_dir=None,
                 random_seed=None,
                 valid_dataset_size=4864,
                 valid_flip='no_flip',
                 test_flip='no_flip',
                 sample_class=None,
                 custom_distribution=None,
                 train_color_cast='no_cast',
                 valid_color_cast='no_cast',
                 test_color_cast='no_cast',
                 color_cast_range=20):
        self.train_image_dir = train_image_dir
        self.test_image_dir = test_image_dir
        self.image_shape = image_shape
        self.batch_size = batch_size
        self.cache_size = (self.batch_size * cache_size_factor) # size in images
        self.center = center
        self.mean = None
        self.normalize = normalize
        self.std = None
        self.amplify = amplify
        self.train_set_flipper = ImageFlipOracle(train_flip)
        test_set_flipper = ImageFlipOracle(test_flip)
        self.train_flip_lambda = self.train_set_flipper.get_flip_lambda(train_flip)
        self.valid_flip_lambda = self.train_set_flipper.get_flip_lambda(valid_flip, deterministic=True)
        self.test_flip_lambda = test_set_flipper.get_flip_lambda(test_flip, deterministic=True)
        self.valid_dataset_size = valid_dataset_size
        self.random_seed = random_seed
        self.sample_class = sample_class
        self.custom_distribution = custom_distribution
        color_cast_oracle = ColorCastOracle(self.image_shape[-1], color_cast_range)
        self.train_color_cast_lambda = color_cast_oracle.get_color_cast_lambda(train_color_cast)
        self.valid_color_cast_lambda = color_cast_oracle.get_color_cast_lambda(valid_color_cast)
        self.test_color_cast_lambda = color_cast_oracle.get_color_cast_lambda(test_color_cast)

        bd = BlockDesigner(TRAIN_LABELS_CSV_PATH, seed=self.random_seed)

        valid_examples = bd.break_off_block(self.valid_dataset_size)
        self.train_examples = bd.remainder()
        self.n_train_batches = int(bd.size() / self.batch_size)

        self.valid_dataset = self.setup_valid_dataset(valid_examples)
        self.train_dataset = None if shuffle else self.setup_train_dataset()
        self.test_dataset = self.setup_test_dataset()
        self.n_test_examples = len(self.test_dataset["X"])

        if self.sample_class:
            self.n_train_batches = int(len(self.train_dataset["X"]) / self.batch_size) # override in case Sampler is used (TODO make this neater)
        self.train_dataset_size = self.n_train_batches * self.batch_size

        if self.center == 1 or self.normalize == 1:
            self.calc_mean_std_image()