def __init__(self, data_dir, use_transforms=False, pytorch=True): """ Args: data_dir: Directory including both gray image directory and ground truth directory. """ super().__init__() # Loop through the files in red folder and combine, into a dictionary, the other bands self.files = self.create_dict(data_dir) self.pytorch = pytorch self.use_transforms = use_transforms self.augmenter = Augmenter()
def get_augmenter(train_raw, domain): if OPTIONS.augment: aug_types = OPTIONS.augment.split('+') augmenter = Augmenter(domain, train_raw, aug_types) return augmenter else: return None
def plot_with_augmentations(data, idx): aug = Augmenter() fig, ax = plt.subplots(1, 2) fig1, fig2 = data.remove_image_borders(idx) ax[0].imshow(fig1) ax[1].imshow(fig2) plt.show()
class DatasetLoader(Dataset): def __init__(self, data_dir, use_transforms=False, pytorch=True): """ Args: data_dir: Directory including both gray image directory and ground truth directory. """ super().__init__() # Loop through the files in red folder and combine, into a dictionary, the other bands self.files = self.create_dict(data_dir) self.pytorch = pytorch self.use_transforms = use_transforms self.augmenter = Augmenter() def create_dict(self, data_dir): """ Args: data_dir: Directory including both gray image directory and ground truth directory. """ return def combine_files(self, gray_file: Path, gt_dir): return def __len__(self): return len(self.files) def open_as_array(self, idx, invert=False): return def open_mask(self, idx, add_dims=False): return def __getitem__(self, idx): #get the image and mask as arrays img_as_array = self.open_as_array(idx) mask_as_array = self.open_mask(idx, add_dims=False) if self.use_transforms: img_as_array, mask_as_array = self.augmenter.transform_image( image=img_as_array, mask=mask_as_array, transform=cfg.TRAINING.TRANSFORM) if self.pytorch: img_as_array = img_as_array.transpose((2, 0, 1)) # squeeze makes sure we get the right shape for the mask x = torch.tensor(img_as_array, dtype=torch.float32) y = torch.tensor(np.squeeze(mask_as_array), dtype=torch.torch.int64) return x, y def get_as_pil(self, idx): #fjernes? #get an image for visualization arr = 256 * self.open_mask(idx) return Image.fromarray(arr.astype(np.uint8), 'RGB')
def main(): random.seed(0) base_data = gen_nested() base_train, base_test = base_data[:100], base_data[-500:] write_data('train_base100.tsv', base_train) write_data('test_base500.tsv', base_test) domain = domains.new('artificial') augmenter_entity = Augmenter(domain, base_train, ['entity']) augmenter_nesting = Augmenter(domain, base_train, ['nesting', 'entity']) deeper = sample_nested(depth=4, num=500) entity_data = augmenter_entity.sample(500) nesting_data = augmenter_nesting.sample(500) aug_nums = (25, 50, 75, 100, 150, 200, 250, 300, 400, 500) for n in aug_nums: write_data('train_base%d.tsv' % (100 + n), base_data[:(100 + n)]) write_data('train_base100_entity%d.tsv' % n, base_train + entity_data[:n]) write_data('train_base100_nesting%d.tsv' % n, base_train + nesting_data[:n]) write_data('train_base100_deeper%d.tsv' % n, base_train + deeper[:n])
def num_classes(self): return 3 if __name__ == '__main__': from augmentation import get_augumentation, Resizer, Normalizer, Augmenter # from augmentation import get_augumentation # dataset = FLIRDataset(root_dir='/data_host/FLIR_ADAS/FLIR_ADAS_1_3', set_name='train', # transform=get_augumentation(phase='train')) # dataset = FLIRDataset(root_dir='/data_host/FLIR_ADAS/FLIR_ADAS', set_name='train', # transform=transforms.Compose([Normalizer(),Augmenter(),Resizer()])) dataset = FLIRDataset(root_dir='/data_host/FLIR_ADAS/FLIR_ADAS', set_name='val', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) # rand_id = 0 rand_id = random.randint(0, len(dataset) - 1) sample = dataset[rand_id] # print('sample: ', sample) dataset.flir.info() img = sample['img'].numpy() annot = sample['annot'].numpy() print('img:') print(img) print(img.shape) print('annot:') print(annot)
def __init__(self, data_dir="data", img_size=48, rotate_angle=360, shift=4, n_epochs=200, parallel=True, pad=False, normalize=True, rng=np.random.RandomState(123)): self.data_dir = data_dir self.class_labels = {} self._num_label = 0 self.img_size = img_size self.train_file = os.path.join("data", "tidy", "train_%d.npy" % img_size) self.trainlabels_file = os.path.join("data", "tidy", "train_labels_%d.npy" % img_size) self.test_file = os.path.join("data", "tidy", "test_%d.npy" % img_size) self.vanilla_file = os.path.join("data", "tidy", "vtrain_%d.npy" % img_size) self.vanillalabels_file = os.path.join( "data", "tidy", "vtrain_labels_%d.npy" % img_size) self.mapfile = os.path.join("data", "tidy", "train_labmapping.pkl") self.trainfile = os.path.join("data", "tidy", "train.pkl") self.n_testimages = 130400 # filenames in the testset order self.testfilenames = [] self.rng = rng self.normalize = normalize X, y = self.load_images() self.X_train, self.X_valid, self.y_train, self.y_valid = self.train_test_split( X, y) del X, y if pad: self.resize_f = functools.partial(square, output_shape=(self.img_size, self.img_size), flatten=False) else: self.resize_f = lambda x: resize(x, output_shape=(self.img_size, self. img_size), mode='constant', cval=1.) self.X_train_resized = np.vstack( tuple([ x.reshape(1, self.img_size, self.img_size) for x in map(self.resize_f, self.X_train) ])) self.X_valid_resized = np.vstack( tuple([ x.reshape(1, self.img_size * self.img_size) for x in map(self.resize_f, self.X_valid) ])) del self.X_train, self.X_valid if parallel: self.queue = Queue(min(1, n_epochs + 1)) self.augmenter = Augmenter(self.queue, self.X_train_resized, max_items=n_epochs + 1, random_seed=self.rng.randint(9999), max_angle=rotate_angle, max_shift=shift, normalize=normalize, flatten=True) self.augmenter.start()
class DataSetLoader: def __init__(self, data_dir="data", img_size=48, rotate_angle=360, shift=4, n_epochs=200, parallel=True, pad=False, normalize=True, rng=np.random.RandomState(123)): self.data_dir = data_dir self.class_labels = {} self._num_label = 0 self.img_size = img_size self.train_file = os.path.join("data", "tidy", "train_%d.npy" % img_size) self.trainlabels_file = os.path.join("data", "tidy", "train_labels_%d.npy" % img_size) self.test_file = os.path.join("data", "tidy", "test_%d.npy" % img_size) self.vanilla_file = os.path.join("data", "tidy", "vtrain_%d.npy" % img_size) self.vanillalabels_file = os.path.join( "data", "tidy", "vtrain_labels_%d.npy" % img_size) self.mapfile = os.path.join("data", "tidy", "train_labmapping.pkl") self.trainfile = os.path.join("data", "tidy", "train.pkl") self.n_testimages = 130400 # filenames in the testset order self.testfilenames = [] self.rng = rng self.normalize = normalize X, y = self.load_images() self.X_train, self.X_valid, self.y_train, self.y_valid = self.train_test_split( X, y) del X, y if pad: self.resize_f = functools.partial(square, output_shape=(self.img_size, self.img_size), flatten=False) else: self.resize_f = lambda x: resize(x, output_shape=(self.img_size, self. img_size), mode='constant', cval=1.) self.X_train_resized = np.vstack( tuple([ x.reshape(1, self.img_size, self.img_size) for x in map(self.resize_f, self.X_train) ])) self.X_valid_resized = np.vstack( tuple([ x.reshape(1, self.img_size * self.img_size) for x in map(self.resize_f, self.X_valid) ])) del self.X_train, self.X_valid if parallel: self.queue = Queue(min(1, n_epochs + 1)) self.augmenter = Augmenter(self.queue, self.X_train_resized, max_items=n_epochs + 1, random_seed=self.rng.randint(9999), max_angle=rotate_angle, max_shift=shift, normalize=normalize, flatten=True) self.augmenter.start() def load_images(self): # get cached data if os.path.isfile(self.trainfile) and os.path.isfile(self.mapfile): with open(self.mapfile, 'r') as lfile: self.class_labels = cPickle.load(lfile) with open(self.trainfile, 'r') as tfile: images, y = cPickle.load(tfile) return pd.Series(images), np.array(y, dtype='int32') images = [] y = [] for directory in sorted( glob.iglob(os.path.join(self.data_dir, "train", "*"))): print("processing %s" % directory) files = os.listdir(directory) n_images = len(files) # the last directory is a class label self.class_labels[self._num_label] = os.path.split(directory)[-1] # create labels list y.extend([self._num_label] * n_images) self._num_label += 1 for i, image in enumerate(files): images.append( imread(os.path.join(directory, image), as_grey=True)) # cache images as array for future use with open(self.mapfile, 'w') as lfile: cPickle.dump(self.class_labels, lfile) with open(self.trainfile, 'w') as tfile: cPickle.dump((images, y), tfile) return pd.Series(images), np.array(y, dtype='int32') def train_gen(self, padded=False, augment=False): assert len(self.X_train_resized) == len(self.y_train) n_samples = len(self.X_train_resized) # xs = np.zeros((n_samples, self.img_size * self.img_size), dtype='float32') # yield train set permutations indefinately while True: shuff_ind = self.rng.permutation(n_samples) if augment: #yield self.X_train_resized[shuff_ind].astype('float32'), self.y_train[shuff_ind] rotated = self.queue.get().astype(theano.config.floatX) if self.normalize: rotated = (rotated - np.mean(rotated, axis=1, keepdims=True)) \ /(rotated.std(axis=1, keepdims=True) + 1e-5) yield rotated[shuff_ind], self.y_train[shuff_ind] else: reshaped = self.X_train_resized.reshape( self.X_train_resized.shape[0], self.img_size * self.img_size) yield reshaped[shuff_ind].astype( theano.config.floatX), self.y_train[shuff_ind] #transform the training set # xs = np.vstack(tuple( # map(functools.partial(transform, # rng=self.rng, # image_size=(self.img_size, self.img_size)), # self.X_train))) def valid_gen(self, padded=False): # will return same shuffled images while True: shuff_ind = self.rng.permutation(len(self.X_valid_resized)) xs = self.X_valid_resized if self.normalize: xs = (xs - np.mean(xs, axis=1, keepdims=True)) \ /(xs.std(axis=1, keepdims=True) + 1e-5) yield xs[shuff_ind].astype( theano.config.floatX), self.y_valid[shuff_ind] def load_train(self): # check if a dataset with the given image size has already been processed if os.path.isfile(self.train_file) and os.path.isfile( self.trainlabels_file): X = np.load(self.train_file) y = np.load(self.trainlabels_file) with open( os.path.join("data", "tidy", "train_%d_labmapping.npy" % self.img_size), 'r') as lfile: self.class_labels = json.load(lfile) return X, y x = [] y = [] for directory in sorted( glob.iglob(os.path.join(self.data_dir, "train", "*"))): print("processing %s" % directory) files = os.listdir(directory) # set up the array to store images and labels n_images = len(files) images = np.zeros((n_images, self.img_size * self.img_size), dtype='float32') # the last directory is a class label self.class_labels[self._num_label] = os.path.split(directory)[-1] # create labels list y.extend([self._num_label] * n_images) self._num_label += 1 for i, image in enumerate(files): img_array = imread(os.path.join(directory, image), as_grey=True) images[i, ...] = resize( img_array, (self.img_size, self.img_size)).reshape(1, -1) x.append(images) # concatenate the arrays from all classes and append labels x = np.vstack(tuple(x)) y = np.array(y, dtype='int32') # save the processed files np.save(self.train_file, x) np.save(self.trainlabels_file, y) # also save label to index mapping return x, y def load_test(self): testdir = os.path.join(self.data_dir, "test") # if a test dataset is present load it from file if os.path.isfile(self.test_file): self.testfilenames = os.listdir(testdir) return np.load(self.test_file) # read test images images = np.zeros((self.n_testimages, self.img_size * self.img_size), dtype='float32') for i, imfile in enumerate(os.listdir(testdir)): img_array = imread(os.path.join(testdir, imfile), as_grey=True) images[i, ...] = resize(img_array, (self.img_size, self.img_size)).reshape( 1, -1) self.testfilenames.append(imfile) assert len(images) == len( self.testfilenames ), "Number of files doesn't match number of images" if self.normalize: images = (images - np.mean(images, axis=1, keepdims=True)) \ /(images.std(axis=1, keepdims=True) + 1e-5) # cache the resulting array for future use np.save(self.test_file, images) return images def train_test_split(self, X, y, test_size=0.1): sss = StratifiedShuffleSplit(y, n_iter=1, random_state=self.rng, test_size=test_size) # we only split once so do not use iter, just convert to list and get first split train, test = list(sss).pop() return X[train], X[test], y[train], y[test] def save_submission(self, y_pred, file_suffix=""): # sanity-check h, w = y_pred.shape assert w == len(self.class_labels), "Not all class labels present" # number of test cases assert h == len( self.testfilenames), "Not all test observations present" colnames = [self.class_labels[ind] for ind in xrange(121)] dfr = pd.DataFrame(y_pred, index=self.testfilenames, columns=colnames) dfr.to_csv(os.path.join(self.data_dir, "submissions", "submission-%s.csv" % file_suffix), format="%f", index_label="image")
->masks_orig ->train ->val ->augs ->img ->mask ''' # Augment original training set of images in_img = r'C:\Users\ahls_st\Documents\MasterThesis\IKONOS\With_Hedges\FourBands\Splits\imgs\train' #change aug_img_dir = r'D:\Steve\IKONOS\4band_geo_only\imgs' #change in_mask = r'C:\Users\ahls_st\Documents\MasterThesis\IKONOS\With_Hedges\ThreeBands\Splits\BGR\Masks\train' #change aug_mask_dir = r'D:\Steve\IKONOS\4band_geo_only\masks' #change # Creates augmented versions of the training data. Can choose to do single, or combined augmentations augmenter = Augmenter(in_img, in_mask, '.png') #augmenter.augment(aug_img_dir, aug_mask_dir, n_bands=3) # performs all augmentations as singles augmenter.augment_combo(aug_img_dir, aug_mask_dir, n_bands=4, times=23, n_geo=2, n_spec=0) #performs geo_only augmentations augmenter.random_crop(aug_img_dir, aug_mask_dir, n_bands=4, num=8) #randomly crops # Get rid of hedge masks where only a little bit is showing at the edge of the image # Best to inspect the removed files before continuing to make sure the erase threshold is ok if not os.path.exists(os.path.join(aug_mask_dir, 'Cleaned')): os.mkdir(os.path.join(aug_mask_dir, 'Cleaned'))