Example #1
0
    def __init__(self, teacher, dataset_path, store_path, dataset_config,
                 best_trade_off):
        self.dataset_path = dataset_path
        self.store_path = store_path
        self.teacher = teacher
        self.dataset_config = dataset_config
        self.rotate = dataset_config.use_rotation
        self.trade_off = best_trade_off

        if os.path.exists(self.store_path):
            raise Exception("Store path already exists")
        else:
            os.makedirs(self.store_path)
            os.makedirs(os.path.join(self.store_path, "train"))
            os.makedirs(os.path.join(self.store_path, "valid"))
            os.makedirs(os.path.join(self.store_path, "test"))

        self.evaluate = util.create_simple_predictor(teacher['model'],
                                                     teacher['params'])
        self.creator = Creator(
            self.dataset_path,
            dim=(self.dataset_config.input_dim,
                 self.dataset_config.output_dim),
            preproccessing=self.dataset_config.use_preprocessing,
            std=self.dataset_config.dataset_std,
            reduce_training=self.dataset_config.reduce_training,
            reduce_testing=self.dataset_config.reduce_testing,
            reduce_validation=self.dataset_config.reduce_validation,
            only_mixed=self.dataset_config.only_mixed_labels,
            mix_ratio=self.dataset_config.mix_ratio)
        self.creator.load_dataset()
Example #2
0
    def load(self, dataset_path, params, batch_size=1):
        print_section('Creating aerial image dataset')

        self.std = params.dataset_std
        chunks = params.chunk_size

        #TODO: ensure that the dataset is as expected.
        creator = Creator(dataset_path,
                          dim=(params.input_dim, params.output_dim),
                          rotation=params.use_rotation,
                          preproccessing=params.use_preprocessing,
                          std=self.std,
                          only_mixed=params.only_mixed_labels,
                          reduce_testing=params.reduce_testing,
                          reduce_training=params.reduce_training,
                          reduce_validation=params.reduce_validation)
        train, valid, test = creator.dynamically_create(
            params.samples_per_image,
            enable_label_noise=params.use_label_noise,
            label_noise=params.label_noise,
            only_mixed=params.only_mixed_labels)

        #Testing dataset size requirements
        AerialDataset.dataset_check('train', train, batch_size)
        AerialDataset.dataset_check('valid', valid, batch_size)
        AerialDataset.dataset_check('test', test, batch_size)

        AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape,
                                           chunks)

        self.set_nr_examples(train, valid, test)

        nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks)

        training_chunks = self._chunkify(train, nr_of_chunks, batch_size)

        AerialDataset.dataset_chunk_stats(len(training_chunks),
                                          len(training_chunks[0][0]),
                                          len(training_chunks[-1][0]))

        self.active = self.shared_dataset(training_chunks[0],
                                          cast_to_int=False)
        self.set['train'] = self.active[0], T.cast(self.active[1], 'int32')
        self.set['validation'] = self.shared_dataset(valid, cast_to_int=True)
        self.set['test'] = self.shared_dataset(test, cast_to_int=True)

        #Not stored on the GPU, unlike the shared variables defined above.
        self.all_training = training_chunks
        return True
Example #3
0
    def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off):
        self.dataset_path = dataset_path
        self.store_path = store_path
        self.teacher = teacher
        self.dataset_config = dataset_config
        self.rotate = dataset_config.use_rotation
        self.trade_off = best_trade_off

        if os.path.exists(self.store_path):
            raise Exception("Store path already exists")
        else:
            os.makedirs(self.store_path)
            os.makedirs(os.path.join(self.store_path, "train"))
            os.makedirs(os.path.join(self.store_path, "valid"))
            os.makedirs(os.path.join(self.store_path, "test"))

        self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params'])
        self.creator = Creator(
            self.dataset_path,
            dim=(self.dataset_config.input_dim, self.dataset_config.output_dim),
            preproccessing=self.dataset_config.use_preprocessing,
            std=self.dataset_config.dataset_std,
            reduce_training=self.dataset_config.reduce_training,
            reduce_testing=self.dataset_config.reduce_testing,
            reduce_validation=self.dataset_config.reduce_validation,
            only_mixed=self.dataset_config.only_mixed_labels,
            mix_ratio=self.dataset_config.mix_ratio
        )
        self.creator.load_dataset()
Example #4
0
    def load(self, dataset_path, params, batch_size=1):
        print_section('Creating aerial image dataset')

        self.std = params.dataset_std
        chunks = params.chunk_size

        #TODO: ensure that the dataset is as expected.
        creator = Creator(dataset_path,
                          dim=(params.input_dim, params.output_dim),
                          rotation=params.use_rotation,
                          preproccessing=params.use_preprocessing,
                          std=self.std,
                          only_mixed=params.only_mixed_labels,
                          reduce_testing=params.reduce_testing,
                          reduce_training=params.reduce_training,
                          reduce_validation=params.reduce_validation)
        train, valid, test = creator.dynamically_create(
            params.samples_per_image,
            enable_label_noise=params.use_label_noise,
            label_noise=params.label_noise,
            only_mixed=params.only_mixed_labels
        )

        #Testing dataset size requirements
        AerialDataset.dataset_check('train', train, batch_size)
        AerialDataset.dataset_check('valid', valid, batch_size)
        AerialDataset.dataset_check('test', test, batch_size)

        AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks)

        self.set_nr_examples(train, valid, test)

        nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks)

        training_chunks = self._chunkify(train, nr_of_chunks, batch_size)

        AerialDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0]))

        self.active = self.shared_dataset(training_chunks[0], cast_to_int=False)
        self.set['train'] = self.active[0], T.cast(self.active[1], 'int32')
        self.set['validation'] = self.shared_dataset(valid, cast_to_int=True )
        self.set['test'] = self.shared_dataset(test, cast_to_int=True)

        #Not stored on the GPU, unlike the shared variables defined above.
        self.all_training = training_chunks
        return True
Example #5
0
 def _create_dataset(self, set_name):
     dim = (self.dataset_config.input_dim, self.dataset_config.output_dim)
     path = self.dataset_path
     preprocessing = self.dataset_config.use_preprocessing
     print("---- Using preprossing: {}".format(preprocessing))
     std = self.dataset_config.dataset_std
     samples_per_image = 200
     creator = Creator(path, dim=dim, preproccessing=preprocessing, std=std)
     creator.load_dataset()
     #Creating a shared variable of sampled test data
     raw_set = None
     if set_name == "valid":
         raw_set = creator.valid
     else:
         raw_set = creator.test
     aerial = AerialDataset()
     return aerial.shared_dataset(creator.sample_data(raw_set, samples_per_image), cast_to_int=True)
Example #6
0
 def _create_dataset(self, set_name):
     dim = (self.dataset_config.input_dim, self.dataset_config.output_dim)
     path = self.dataset_path
     preprocessing = self.dataset_config.use_preprocessing
     print("---- Using preprossing: {}".format(preprocessing))
     std = self.dataset_config.dataset_std
     samples_per_image = 200
     creator = Creator(path, dim=dim, preproccessing=preprocessing, std=std)
     creator.load_dataset()
     #Creating a shared variable of sampled test data
     raw_set = None
     if set_name == "valid":
         raw_set = creator.valid
     else:
         raw_set = creator.test
     aerial = AerialDataset()
     return aerial.shared_dataset(creator.sample_data(
         raw_set, samples_per_image),
                                  cast_to_int=True)
Example #7
0
class CurriculumDataset(object):
    def __init__(self, teacher, dataset_path, store_path, dataset_config,
                 best_trade_off):
        self.dataset_path = dataset_path
        self.store_path = store_path
        self.teacher = teacher
        self.dataset_config = dataset_config
        self.rotate = dataset_config.use_rotation
        self.trade_off = best_trade_off

        if os.path.exists(self.store_path):
            raise Exception("Store path already exists")
        else:
            os.makedirs(self.store_path)
            os.makedirs(os.path.join(self.store_path, "train"))
            os.makedirs(os.path.join(self.store_path, "valid"))
            os.makedirs(os.path.join(self.store_path, "test"))

        self.evaluate = util.create_simple_predictor(teacher['model'],
                                                     teacher['params'])
        self.creator = Creator(
            self.dataset_path,
            dim=(self.dataset_config.input_dim,
                 self.dataset_config.output_dim),
            preproccessing=self.dataset_config.use_preprocessing,
            std=self.dataset_config.dataset_std,
            reduce_training=self.dataset_config.reduce_training,
            reduce_testing=self.dataset_config.reduce_testing,
            reduce_validation=self.dataset_config.reduce_validation,
            only_mixed=self.dataset_config.only_mixed_labels,
            mix_ratio=self.dataset_config.mix_ratio)
        self.creator.load_dataset()

    def create_dataset(self,
                       is_baseline,
                       thresholds=None,
                       base_sample=100,
                       secondary_sample=100):
        print("---- Starting sampling. WARNING: this might take a while.")

        #Sampling at different thresholds.
        if thresholds == None:
            thresholds = np.arange(0.05, 1, 0.05)
        if is_baseline:
            thresholds = np.ones(thresholds.shape)

        print("---- Main dataset")
        self._generate_stage("stage0", thresholds[0], base_sample)
        for i in range(1, thresholds.shape[0]):
            print("---- Stage{} dataset".format(i))
            self._generate_stage("stage{}".format(i), thresholds[i],
                                 secondary_sample)

        self._generate_set("test", self.creator.test, base_sample)
        self._generate_set("valid", self.creator.valid, base_sample)

    def _generate_set(self, set_name, dataset, samples):
        '''
        Validation and test data is also pre-generated. This means the result is self contained.
        '''
        data, labels = self.creator.sample_data(dataset, samples)
        stage_path = os.path.join(self.store_path, set_name)
        os.makedirs(os.path.join(stage_path, "labels"))
        os.makedirs(os.path.join(stage_path, "data"))
        np.save(os.path.join(stage_path, "labels", "examples"), labels)
        np.save(os.path.join(stage_path, "data", "examples"), data)

    def _generate_stage(self, name, threshold, samples):
        '''
        Training set is a special case, which involve training folder with several stages. These
         stages can be introduced in the active training data over time. Slowly transforming the simple distribution
         to the real dataset distribution of data.
        :return:
        '''
        print("SAMPLES ", samples)
        stage_path = os.path.join(self.store_path, "train", name)
        os.makedirs(stage_path)
        data, labels = self.creator.sample_data(
            self.creator.train,
            samples,
            mixed_labels=self.dataset_config.only_mixed_labels,
            curriculum=self.evaluate,
            curriculum_threshold=threshold,
            rotation=self.rotate,
            best_trade_off=self.trade_off)
        os.makedirs(os.path.join(stage_path, "labels"))
        os.makedirs(os.path.join(stage_path, "data"))
        np.save(os.path.join(stage_path, "labels", "examples"), labels)
        np.save(os.path.join(stage_path, "data", "examples"), data)
Example #8
0
#Dataset path. Config used if not supplied
is_alt_dataset, alt_dataset = get_command('-dataset')
if is_alt_dataset:
    dataset_path = alt_dataset
#==============================================================

store = ParamStorage()
teacher = store.load_params(path=teacher_location)
evaluate = util.create_simple_predictor(teacher['model'], teacher['params'])

if not verify:
    creator = Creator(pr_path,
                      dim=(dataset_params.input_dim,
                           dataset_params.output_dim),
                      preproccessing=dataset_params.use_preprocessing,
                      std=dataset_params.dataset_std,
                      reduce_training=dataset_params.reduce_training,
                      reduce_testing=dataset_params.reduce_testing,
                      reduce_validation=dataset_params.reduce_validation)
    creator.load_dataset()

    data, labels = creator.sample_data(creator.train,
                                       samples,
                                       rotation=dataset_params.use_rotation)
else:
    aerial_data = AerialCurriculumDataset()
    data, labels = aerial_data.load_set(dataset_path, "train", stage=stage)

road_diff = []
non_road_diff = []
all_diff = []
Example #9
0
if is_alt_dataset:
    dataset_path = alt_dataset
#==============================================================



store = ParamStorage()
teacher = store.load_params(path=teacher_location)
evaluate = util.create_simple_predictor(teacher['model'], teacher['params'])

if not verify:
    creator = Creator(
        pr_path,
        dim=(dataset_params.input_dim, dataset_params.output_dim),
        preproccessing=dataset_params.use_preprocessing,
        std=dataset_params.dataset_std,
        reduce_training=dataset_params.reduce_training,
        reduce_testing=dataset_params.reduce_testing,
        reduce_validation=dataset_params.reduce_validation
    )
    creator.load_dataset()

    data, labels = creator.sample_data(
        creator.train,
        samples,
        rotation=dataset_params.use_rotation
    )
else:
    aerial_data = AerialCurriculumDataset()
    data, labels = aerial_data.load_set(dataset_path, "train", stage=stage)
Example #10
0
class CurriculumDataset(object):

    def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off):
        self.dataset_path = dataset_path
        self.store_path = store_path
        self.teacher = teacher
        self.dataset_config = dataset_config
        self.rotate = dataset_config.use_rotation
        self.trade_off = best_trade_off

        if os.path.exists(self.store_path):
            raise Exception("Store path already exists")
        else:
            os.makedirs(self.store_path)
            os.makedirs(os.path.join(self.store_path, "train"))
            os.makedirs(os.path.join(self.store_path, "valid"))
            os.makedirs(os.path.join(self.store_path, "test"))

        self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params'])
        self.creator = Creator(
            self.dataset_path,
            dim=(self.dataset_config.input_dim, self.dataset_config.output_dim),
            preproccessing=self.dataset_config.use_preprocessing,
            std=self.dataset_config.dataset_std,
            reduce_training=self.dataset_config.reduce_training,
            reduce_testing=self.dataset_config.reduce_testing,
            reduce_validation=self.dataset_config.reduce_validation,
            only_mixed=self.dataset_config.only_mixed_labels,
            mix_ratio=self.dataset_config.mix_ratio
        )
        self.creator.load_dataset()


    def create_dataset(self, is_baseline, thresholds=None, base_sample=100, secondary_sample=100):
        print("---- Starting sampling. WARNING: this might take a while.")

        #Sampling at different thresholds.
        if thresholds == None:
            thresholds = np.arange(0.05 , 1, 0.05)
        if is_baseline:
            thresholds = np.ones(thresholds.shape)

        print("---- Main dataset")
        self._generate_stage("stage0", thresholds[0], base_sample)
        for i in range(1, thresholds.shape[0]):
            print("---- Stage{} dataset".format(i))
            self._generate_stage("stage{}".format(i), thresholds[i], secondary_sample)

        self._generate_set("test", self.creator.test, base_sample)
        self._generate_set("valid", self.creator.valid, base_sample)


    def _generate_set(self, set_name, dataset, samples):
        '''
        Validation and test data is also pre-generated. This means the result is self contained.
        '''
        data, labels = self.creator.sample_data(dataset, samples)
        stage_path = os.path.join(self.store_path, set_name)
        os.makedirs(os.path.join(stage_path, "labels"))
        os.makedirs(os.path.join(stage_path, "data"))
        np.save(os.path.join(stage_path, "labels", "examples"), labels)
        np.save(os.path.join(stage_path, "data", "examples"), data)


    def _generate_stage(self, name, threshold, samples):
        '''
        Training set is a special case, which involve training folder with several stages. These
         stages can be introduced in the active training data over time. Slowly transforming the simple distribution
         to the real dataset distribution of data.
        :return:
        '''
        print("SAMPLES ", samples)
        stage_path = os.path.join(self.store_path, "train", name)
        os.makedirs(stage_path)
        data, labels = self.creator.sample_data(
            self.creator.train,
            samples,
            mixed_labels=self.dataset_config.only_mixed_labels,
            curriculum=self.evaluate,
            curriculum_threshold=threshold,
            rotation=self.rotate,
            best_trade_off=self.trade_off
        )
        os.makedirs(os.path.join(stage_path, "labels"))
        os.makedirs(os.path.join(stage_path, "data"))
        np.save(os.path.join(stage_path, "labels", "examples"), labels)
        np.save(os.path.join(stage_path, "data", "examples"), data)
Example #11
0
width and height of dim_label.
'''
dataset_dir = "/home/olav/Pictures/Norwegian_roads_dataset_vbase"
x_grid = 5
y_grid = 4
dim_data = 64
dim_label = 16
padding = 8

def to_rgb(im, w, h):
    ret = np.empty((w, h, 3), dtype=np.uint8)
    ret[:, :, 2] =  ret[:, :, 1] =  ret[:, :, 0] =  im
    return ret

l_pad = (dim_data -dim_label)/2
c = Creator(dataset_dir, preproccessing=False, only_mixed=True)
c.load_dataset()
data, labels = c.sample_data(c.train, 10, mixed_labels=True)

shuffled_index = range(len(data))
random.shuffle(shuffled_index)

width = x_grid*2*dim_data + (padding*x_grid)
height = y_grid*dim_data + (padding*y_grid)
patch_showcase = np.zeros((height, width, 3), dtype=np.uint8)
patch_showcase[:, :, :] = (255, 255, 255)

#Puts the label and images in a grid pattern, which include padding inbetween .
for i in range(0,height, dim_data + padding):
    for j in range(0, width, (dim_data*2) +padding):
        idx = shuffled_index.pop()
Example #12
0
dataset_dir = "/home/olav/Pictures/Norwegian_roads_dataset_vbase"
x_grid = 5
y_grid = 4
dim_data = 64
dim_label = 16
padding = 8


def to_rgb(im, w, h):
    ret = np.empty((w, h, 3), dtype=np.uint8)
    ret[:, :, 2] = ret[:, :, 1] = ret[:, :, 0] = im
    return ret


l_pad = (dim_data - dim_label) / 2
c = Creator(dataset_dir, preproccessing=False, only_mixed=True)
c.load_dataset()
data, labels = c.sample_data(c.train, 10, mixed_labels=True)

shuffled_index = range(len(data))
random.shuffle(shuffled_index)

width = x_grid * 2 * dim_data + (padding * x_grid)
height = y_grid * dim_data + (padding * y_grid)
patch_showcase = np.zeros((height, width, 3), dtype=np.uint8)
patch_showcase[:, :, :] = (255, 255, 255)

#Puts the label and images in a grid pattern, which include padding inbetween .
for i in range(0, height, dim_data + padding):
    for j in range(0, width, (dim_data * 2) + padding):
        idx = shuffled_index.pop()