예제 #1
0
    def __init__(self, data_dir, use_transforms=False, pytorch=True):
        """
        Args:
            data_dir: Directory including both gray image directory and ground truth directory.
        """
        super().__init__()

        # Loop through the files in red folder and combine, into a dictionary, the other bands
        self.files = self.create_dict(data_dir)
        self.pytorch = pytorch
        self.use_transforms = use_transforms
        self.augmenter = Augmenter()
예제 #2
0
def get_augmenter(train_raw, domain):
    if OPTIONS.augment:
        aug_types = OPTIONS.augment.split('+')
        augmenter = Augmenter(domain, train_raw, aug_types)
        return augmenter
    else:
        return None
예제 #3
0
def plot_with_augmentations(data, idx):
    aug = Augmenter()

    fig, ax = plt.subplots(1, 2)
    fig1, fig2 = data.remove_image_borders(idx)
    ax[0].imshow(fig1)
    ax[1].imshow(fig2)
    plt.show()
예제 #4
0
class DatasetLoader(Dataset):
    def __init__(self, data_dir, use_transforms=False, pytorch=True):
        """
        Args:
            data_dir: Directory including both gray image directory and ground truth directory.
        """
        super().__init__()

        # Loop through the files in red folder and combine, into a dictionary, the other bands
        self.files = self.create_dict(data_dir)
        self.pytorch = pytorch
        self.use_transforms = use_transforms
        self.augmenter = Augmenter()

    def create_dict(self, data_dir):
        """
        Args:
            data_dir: Directory including both gray image directory and ground truth directory.
        """
        return

    def combine_files(self, gray_file: Path, gt_dir):
        return

    def __len__(self):
        return len(self.files)

    def open_as_array(self, idx, invert=False):
        return

    def open_mask(self, idx, add_dims=False):
        return

    def __getitem__(self, idx):
        #get the image and mask as arrays
        img_as_array = self.open_as_array(idx)
        mask_as_array = self.open_mask(idx, add_dims=False)

        if self.use_transforms:
            img_as_array, mask_as_array = self.augmenter.transform_image(
                image=img_as_array,
                mask=mask_as_array,
                transform=cfg.TRAINING.TRANSFORM)

        if self.pytorch:
            img_as_array = img_as_array.transpose((2, 0, 1))

        # squeeze makes sure we get the right shape for the mask
        x = torch.tensor(img_as_array, dtype=torch.float32)
        y = torch.tensor(np.squeeze(mask_as_array), dtype=torch.torch.int64)

        return x, y

    def get_as_pil(self, idx):  #fjernes?
        #get an image for visualization
        arr = 256 * self.open_mask(idx)

        return Image.fromarray(arr.astype(np.uint8), 'RGB')
예제 #5
0
def main():
    random.seed(0)
    base_data = gen_nested()
    base_train, base_test = base_data[:100], base_data[-500:]
    write_data('train_base100.tsv', base_train)
    write_data('test_base500.tsv', base_test)

    domain = domains.new('artificial')
    augmenter_entity = Augmenter(domain, base_train, ['entity'])
    augmenter_nesting = Augmenter(domain, base_train, ['nesting', 'entity'])
    deeper = sample_nested(depth=4, num=500)
    entity_data = augmenter_entity.sample(500)
    nesting_data = augmenter_nesting.sample(500)
    aug_nums = (25, 50, 75, 100, 150, 200, 250, 300, 400, 500)
    for n in aug_nums:
        write_data('train_base%d.tsv' % (100 + n), base_data[:(100 + n)])
        write_data('train_base100_entity%d.tsv' % n,
                   base_train + entity_data[:n])
        write_data('train_base100_nesting%d.tsv' % n,
                   base_train + nesting_data[:n])
        write_data('train_base100_deeper%d.tsv' % n, base_train + deeper[:n])
예제 #6
0
    def num_classes(self):
        return 3


if __name__ == '__main__':
    from augmentation import get_augumentation, Resizer, Normalizer, Augmenter
    # from augmentation import get_augumentation
    # dataset = FLIRDataset(root_dir='/data_host/FLIR_ADAS/FLIR_ADAS_1_3', set_name='train',
    #                       transform=get_augumentation(phase='train'))
    # dataset = FLIRDataset(root_dir='/data_host/FLIR_ADAS/FLIR_ADAS', set_name='train',
    #                       transform=transforms.Compose([Normalizer(),Augmenter(),Resizer()]))
    dataset = FLIRDataset(root_dir='/data_host/FLIR_ADAS/FLIR_ADAS',
                          set_name='val',
                          transform=transforms.Compose(
                              [Normalizer(),
                               Augmenter(),
                               Resizer()]))

    # rand_id = 0
    rand_id = random.randint(0, len(dataset) - 1)
    sample = dataset[rand_id]

    # print('sample: ', sample)
    dataset.flir.info()
    img = sample['img'].numpy()
    annot = sample['annot'].numpy()
    print('img:')
    print(img)
    print(img.shape)
    print('annot:')
    print(annot)
예제 #7
0
    def __init__(self,
                 data_dir="data",
                 img_size=48,
                 rotate_angle=360,
                 shift=4,
                 n_epochs=200,
                 parallel=True,
                 pad=False,
                 normalize=True,
                 rng=np.random.RandomState(123)):
        self.data_dir = data_dir
        self.class_labels = {}
        self._num_label = 0
        self.img_size = img_size
        self.train_file = os.path.join("data", "tidy",
                                       "train_%d.npy" % img_size)
        self.trainlabels_file = os.path.join("data", "tidy",
                                             "train_labels_%d.npy" % img_size)
        self.test_file = os.path.join("data", "tidy", "test_%d.npy" % img_size)
        self.vanilla_file = os.path.join("data", "tidy",
                                         "vtrain_%d.npy" % img_size)
        self.vanillalabels_file = os.path.join(
            "data", "tidy", "vtrain_labels_%d.npy" % img_size)
        self.mapfile = os.path.join("data", "tidy", "train_labmapping.pkl")
        self.trainfile = os.path.join("data", "tidy", "train.pkl")
        self.n_testimages = 130400
        # filenames in the testset order
        self.testfilenames = []
        self.rng = rng
        self.normalize = normalize
        X, y = self.load_images()
        self.X_train, self.X_valid, self.y_train, self.y_valid = self.train_test_split(
            X, y)
        del X, y

        if pad:
            self.resize_f = functools.partial(square,
                                              output_shape=(self.img_size,
                                                            self.img_size),
                                              flatten=False)
        else:
            self.resize_f = lambda x: resize(x,
                                             output_shape=(self.img_size, self.
                                                           img_size),
                                             mode='constant',
                                             cval=1.)

        self.X_train_resized = np.vstack(
            tuple([
                x.reshape(1, self.img_size, self.img_size)
                for x in map(self.resize_f, self.X_train)
            ]))
        self.X_valid_resized = np.vstack(
            tuple([
                x.reshape(1, self.img_size * self.img_size)
                for x in map(self.resize_f, self.X_valid)
            ]))
        del self.X_train, self.X_valid
        if parallel:
            self.queue = Queue(min(1, n_epochs + 1))
            self.augmenter = Augmenter(self.queue,
                                       self.X_train_resized,
                                       max_items=n_epochs + 1,
                                       random_seed=self.rng.randint(9999),
                                       max_angle=rotate_angle,
                                       max_shift=shift,
                                       normalize=normalize,
                                       flatten=True)
            self.augmenter.start()
예제 #8
0
class DataSetLoader:
    def __init__(self,
                 data_dir="data",
                 img_size=48,
                 rotate_angle=360,
                 shift=4,
                 n_epochs=200,
                 parallel=True,
                 pad=False,
                 normalize=True,
                 rng=np.random.RandomState(123)):
        self.data_dir = data_dir
        self.class_labels = {}
        self._num_label = 0
        self.img_size = img_size
        self.train_file = os.path.join("data", "tidy",
                                       "train_%d.npy" % img_size)
        self.trainlabels_file = os.path.join("data", "tidy",
                                             "train_labels_%d.npy" % img_size)
        self.test_file = os.path.join("data", "tidy", "test_%d.npy" % img_size)
        self.vanilla_file = os.path.join("data", "tidy",
                                         "vtrain_%d.npy" % img_size)
        self.vanillalabels_file = os.path.join(
            "data", "tidy", "vtrain_labels_%d.npy" % img_size)
        self.mapfile = os.path.join("data", "tidy", "train_labmapping.pkl")
        self.trainfile = os.path.join("data", "tidy", "train.pkl")
        self.n_testimages = 130400
        # filenames in the testset order
        self.testfilenames = []
        self.rng = rng
        self.normalize = normalize
        X, y = self.load_images()
        self.X_train, self.X_valid, self.y_train, self.y_valid = self.train_test_split(
            X, y)
        del X, y

        if pad:
            self.resize_f = functools.partial(square,
                                              output_shape=(self.img_size,
                                                            self.img_size),
                                              flatten=False)
        else:
            self.resize_f = lambda x: resize(x,
                                             output_shape=(self.img_size, self.
                                                           img_size),
                                             mode='constant',
                                             cval=1.)

        self.X_train_resized = np.vstack(
            tuple([
                x.reshape(1, self.img_size, self.img_size)
                for x in map(self.resize_f, self.X_train)
            ]))
        self.X_valid_resized = np.vstack(
            tuple([
                x.reshape(1, self.img_size * self.img_size)
                for x in map(self.resize_f, self.X_valid)
            ]))
        del self.X_train, self.X_valid
        if parallel:
            self.queue = Queue(min(1, n_epochs + 1))
            self.augmenter = Augmenter(self.queue,
                                       self.X_train_resized,
                                       max_items=n_epochs + 1,
                                       random_seed=self.rng.randint(9999),
                                       max_angle=rotate_angle,
                                       max_shift=shift,
                                       normalize=normalize,
                                       flatten=True)
            self.augmenter.start()

    def load_images(self):
        # get cached data
        if os.path.isfile(self.trainfile) and os.path.isfile(self.mapfile):
            with open(self.mapfile, 'r') as lfile:
                self.class_labels = cPickle.load(lfile)
            with open(self.trainfile, 'r') as tfile:
                images, y = cPickle.load(tfile)
            return pd.Series(images), np.array(y, dtype='int32')
        images = []
        y = []
        for directory in sorted(
                glob.iglob(os.path.join(self.data_dir, "train", "*"))):
            print("processing %s" % directory)
            files = os.listdir(directory)
            n_images = len(files)
            # the last directory is a class label
            self.class_labels[self._num_label] = os.path.split(directory)[-1]
            # create labels list
            y.extend([self._num_label] * n_images)
            self._num_label += 1
            for i, image in enumerate(files):
                images.append(
                    imread(os.path.join(directory, image), as_grey=True))
        # cache images as array for future use
        with open(self.mapfile, 'w') as lfile:
            cPickle.dump(self.class_labels, lfile)
        with open(self.trainfile, 'w') as tfile:
            cPickle.dump((images, y), tfile)
        return pd.Series(images), np.array(y, dtype='int32')

    def train_gen(self, padded=False, augment=False):
        assert len(self.X_train_resized) == len(self.y_train)
        n_samples = len(self.X_train_resized)
        # xs = np.zeros((n_samples, self.img_size * self.img_size), dtype='float32')
        # yield train set permutations indefinately
        while True:
            shuff_ind = self.rng.permutation(n_samples)
            if augment:
                #yield self.X_train_resized[shuff_ind].astype('float32'), self.y_train[shuff_ind]
                rotated = self.queue.get().astype(theano.config.floatX)
                if self.normalize:
                    rotated = (rotated - np.mean(rotated, axis=1, keepdims=True)) \
                              /(rotated.std(axis=1, keepdims=True) + 1e-5)
                yield rotated[shuff_ind], self.y_train[shuff_ind]
            else:
                reshaped = self.X_train_resized.reshape(
                    self.X_train_resized.shape[0],
                    self.img_size * self.img_size)
                yield reshaped[shuff_ind].astype(
                    theano.config.floatX), self.y_train[shuff_ind]
            #transform the training set
            # xs = np.vstack(tuple(
            #      map(functools.partial(transform,
            #                            rng=self.rng,
            #                            image_size=(self.img_size, self.img_size)),
            #          self.X_train)))

    def valid_gen(self, padded=False):
        # will return same shuffled images
        while True:
            shuff_ind = self.rng.permutation(len(self.X_valid_resized))
            xs = self.X_valid_resized
            if self.normalize:
                xs = (xs - np.mean(xs, axis=1, keepdims=True)) \
                     /(xs.std(axis=1, keepdims=True) + 1e-5)
            yield xs[shuff_ind].astype(
                theano.config.floatX), self.y_valid[shuff_ind]

    def load_train(self):
        # check if a dataset with the given image size has already been processed
        if os.path.isfile(self.train_file) and os.path.isfile(
                self.trainlabels_file):
            X = np.load(self.train_file)
            y = np.load(self.trainlabels_file)
            with open(
                    os.path.join("data", "tidy",
                                 "train_%d_labmapping.npy" % self.img_size),
                    'r') as lfile:
                self.class_labels = json.load(lfile)
            return X, y
        x = []
        y = []
        for directory in sorted(
                glob.iglob(os.path.join(self.data_dir, "train", "*"))):
            print("processing %s" % directory)
            files = os.listdir(directory)
            # set up the array to store images and labels
            n_images = len(files)
            images = np.zeros((n_images, self.img_size * self.img_size),
                              dtype='float32')
            # the last directory is a class label
            self.class_labels[self._num_label] = os.path.split(directory)[-1]
            # create labels list
            y.extend([self._num_label] * n_images)
            self._num_label += 1
            for i, image in enumerate(files):
                img_array = imread(os.path.join(directory, image),
                                   as_grey=True)
                images[i, ...] = resize(
                    img_array, (self.img_size, self.img_size)).reshape(1, -1)
            x.append(images)
        # concatenate the arrays from all classes and append labels
        x = np.vstack(tuple(x))
        y = np.array(y, dtype='int32')
        # save the processed files
        np.save(self.train_file, x)
        np.save(self.trainlabels_file, y)
        # also save label to index mapping

        return x, y

    def load_test(self):
        testdir = os.path.join(self.data_dir, "test")
        # if a test dataset is present load it from file
        if os.path.isfile(self.test_file):
            self.testfilenames = os.listdir(testdir)
            return np.load(self.test_file)
        # read test images
        images = np.zeros((self.n_testimages, self.img_size * self.img_size),
                          dtype='float32')
        for i, imfile in enumerate(os.listdir(testdir)):
            img_array = imread(os.path.join(testdir, imfile), as_grey=True)
            images[i, ...] = resize(img_array,
                                    (self.img_size, self.img_size)).reshape(
                                        1, -1)
            self.testfilenames.append(imfile)
        assert len(images) == len(
            self.testfilenames
        ), "Number of files doesn't match number of images"
        if self.normalize:
            images = (images - np.mean(images, axis=1, keepdims=True)) \
                 /(images.std(axis=1, keepdims=True) + 1e-5)
        # cache the resulting array for future use
        np.save(self.test_file, images)
        return images

    def train_test_split(self, X, y, test_size=0.1):
        sss = StratifiedShuffleSplit(y,
                                     n_iter=1,
                                     random_state=self.rng,
                                     test_size=test_size)
        # we only split once so do not use iter, just convert to list and get first split
        train, test = list(sss).pop()
        return X[train], X[test], y[train], y[test]

    def save_submission(self, y_pred, file_suffix=""):
        # sanity-check
        h, w = y_pred.shape
        assert w == len(self.class_labels), "Not all class labels present"
        # number of test cases
        assert h == len(
            self.testfilenames), "Not all test observations present"
        colnames = [self.class_labels[ind] for ind in xrange(121)]
        dfr = pd.DataFrame(y_pred, index=self.testfilenames, columns=colnames)
        dfr.to_csv(os.path.join(self.data_dir, "submissions",
                                "submission-%s.csv" % file_suffix),
                   format="%f",
                   index_label="image")
예제 #9
0
  ->masks_orig
    ->train
    ->val
  ->augs
    ->img
    ->mask
'''

# Augment original training set of images
in_img = r'C:\Users\ahls_st\Documents\MasterThesis\IKONOS\With_Hedges\FourBands\Splits\imgs\train'  #change
aug_img_dir = r'D:\Steve\IKONOS\4band_geo_only\imgs'  #change
in_mask = r'C:\Users\ahls_st\Documents\MasterThesis\IKONOS\With_Hedges\ThreeBands\Splits\BGR\Masks\train'  #change
aug_mask_dir = r'D:\Steve\IKONOS\4band_geo_only\masks'  #change

# Creates augmented versions of the training data. Can choose to do single, or combined augmentations
augmenter = Augmenter(in_img, in_mask, '.png')
#augmenter.augment(aug_img_dir, aug_mask_dir, n_bands=3) # performs all augmentations as singles
augmenter.augment_combo(aug_img_dir,
                        aug_mask_dir,
                        n_bands=4,
                        times=23,
                        n_geo=2,
                        n_spec=0)  #performs geo_only augmentations
augmenter.random_crop(aug_img_dir, aug_mask_dir, n_bands=4,
                      num=8)  #randomly crops

# Get rid of hedge masks where only a little bit is showing at the edge of the image
# Best to inspect the removed files before continuing to make sure the erase threshold is ok

if not os.path.exists(os.path.join(aug_mask_dir, 'Cleaned')):
    os.mkdir(os.path.join(aug_mask_dir, 'Cleaned'))