def salinas_a_loader(folder): img = open_file( folder + CUSTOM_DATASETS_CONFIG['SalinasA']['img'])['salinasA_corrected'] gt = open_file(folder + CUSTOM_DATASETS_CONFIG['SalinasA']['gt'])['salinasA_gt'] gt = gt.astype('uint8') # remap for contiguous integers to avoid index out of bounds salinas_a_remap = {0: 0, 1: 1, 10: 2, 11: 3, 12: 4, 13: 5, 14: 6} for k, v in salinas_a_remap.items(): gt = np.where(gt == k, v, gt) rgb_bands = (47, 27, 13) label_values = [ "Unclassified", "Brocoli_green_weeds_1", "Corn_senesced_green_weeds", "Lettuce_romaine_4wk", "Lettuce_romaine_5wk", "Lettuce_romaine_6wk", "Lettuce_romaine_7wk" ] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def washington_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['Washington']['img']) gt = open_file(folder + CUSTOM_DATASETS_CONFIG['Washington']['gt']) rgb_bands = (60, 27, 17) # http://sugs.u-strasbg.fr/omiv/imagemining/documents/IMAGEMINING-DallaMurra-practicals.pdf label_values = [ "Roofs", "Street", "Path", "Grass", "Trees", "Water", "Shadow" ] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def urban_210_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['Urban-210']['img'])['Y'] img = np.reshape(img, (307, 307, 210)) gt = np.asarray( np.matrix( open_file(folder + CUSTOM_DATASETS_CONFIG['Urban-210']['gt']) ['A'].argmax(0))) gt = np.reshape(gt, (307, 307)) rgb_bands = ( 17, 14, 3 ) # manually calculated, assuming linear distribution of bands among wavelengths label_values = ["Asphalt", "Grass", "Tree", "Roof", "Metal", "Dirt"] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def samson_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['Samson']['img'])['V'] img = np.reshape(img, (95, 95, 156)) gt = np.asarray( np.matrix( open_file(folder + CUSTOM_DATASETS_CONFIG['Samson']['gt'])['A']).argmax(0)) gt = np.reshape(gt, (95, 95)) rgb_bands = ( 9, 44, 54 ) # manually calculated, assuming linear distribution of bands among wavelengths label_values = ["Rock", "Tree", "Water"] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def dfc2018_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['DFC2018_HSI']['img'])[:, :, :-2] gt = open_file(folder + CUSTOM_DATASETS_CONFIG['DFC2018_HSI']['gt']) gt = gt.astype('uint8') rgb_bands = (47, 31, 15) label_values = [ "Unclassified", "Healthy grass", "Stressed grass", "Artificial turf", "Evergreen trees", "Deciduous trees", "Bare earth", "Water", "Residential buildings", "Non-residential buildings", "Roads", "Sidewalks", "Crosswalks", "Major thoroughfares", "Highways", "Railways", "Paved parking lots", "Unpaved parking lots", "Cars", "Trains", "Stadium seats" ] ignored_labels = [0] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def jasper_ridge_224_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['JasperRidge-224']['img'])['Y'] img = np.reshape(img, (100, 100, 224)) gt = np.asarray( np.matrix( open_file(folder + CUSTOM_DATASETS_CONFIG['JasperRidge-224']['gt']) ['A'].argmax(0))) gt = np.reshape(gt, (100, 100)) rgb_bands = ( 5, 16, 20 ) # manually calculated, assuming linear distribution of bands among wavelengths label_values = ["Road", "Soil", "Water", "Tree"] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def usa_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['USA']['img'])['T2'] gt_preprocess_me = open_file( folder + CUSTOM_DATASETS_CONFIG['USA']['img'])['Multiple'] # need to preprocess gt gt = np.zeros(shape=(gt_preprocess_me.shape[0], gt_preprocess_me.shape[1])) for i in range(gt_preprocess_me.shape[0]): for j in range(gt_preprocess_me.shape[1]): if (gt_preprocess_me[i][j] == [255, 0, 0]).all(): gt[i][j] = 0 elif (gt_preprocess_me[i][j] == [0, 255, 0]).all(): gt[i][j] = 1 elif (gt_preprocess_me[i][j] == [0, 0, 255]).all(): gt[i][j] = 2 elif (gt_preprocess_me[i][j] == [255, 255, 0]).all(): gt[i][j] = 3 elif (gt_preprocess_me[i][j] == [255, 0, 255]).all(): gt[i][j] = 4 elif (gt_preprocess_me[i][j] == [0, 255, 255]).all(): gt[i][j] = 5 # from here on, unused elifs elif (gt_preprocess_me[i][j] == [0, 0, 0]).all(): gt[i][j] = 6 elif (gt_preprocess_me[i][j] == [255, 255, 255]).all(): gt[i][j] = 7 gt = gt.astype(dtype='uint8') rgb_bands = (0, 0, 0) # not given label_values = [ "soil", "irrigated fields", "river", "building", "type of cultivated land", "grassland" ] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def china_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['China']['img'])['T2'] gt_preprocess_me = open_file( folder + CUSTOM_DATASETS_CONFIG['China']['img'])['Multiple'] # need to preprocess gt gt = np.zeros(shape=(gt_preprocess_me.shape[0], gt_preprocess_me.shape[1])) for i in range(gt_preprocess_me.shape[0]): for j in range(gt_preprocess_me.shape[1]): if (gt_preprocess_me[i][j] == [254, 0, 0]).all(): gt[i][j] = 0 elif (gt_preprocess_me[i][j] == [0, 254, 0]).all(): gt[i][j] = 1 elif (gt_preprocess_me[i][j] == [0, 0, 254]).all(): gt[i][j] = 2 elif (gt_preprocess_me[i][j] == [254, 254, 0]).all(): gt[i][j] = 3 # from here on, unused elifs elif (gt_preprocess_me[i][j] == [254, 0, 254]).all(): gt[i][j] = 4 elif (gt_preprocess_me[i][j] == [0, 254, 254]).all(): gt[i][j] = 5 elif (gt_preprocess_me[i][j] == [0, 0, 0]).all(): gt[i][j] = 6 elif (gt_preprocess_me[i][j] == [254, 254, 254]).all(): gt[i][j] = 7 gt = gt.astype(dtype='uint8') rgb_bands = (0, 0, 0) # not given label_values = [ "soil", "river", "tree", "building", "road", "agricultural field" ] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def cuprite_188_loader(folder): img = open_file(folder + CUSTOM_DATASETS_CONFIG['Cuprite-188']['img'])['Y'] img = np.reshape(img, (250, 190, 188)) # only includes GT: endmembers. gt = np.asarray( np.matrix( open_file(folder + CUSTOM_DATASETS_CONFIG['Cuprite-188']['gt']) ['M'].argmax(1))) gt = np.transpose(gt) gt = np.reshape(gt, (188)) rgb_bands = (183, 193, 203) # not sure but does not matter label_values = [ "Alunite", "Andradite", "Buddingtonite", "Dumortierite", "Kaolinite1", "Kaolinite2", "Muscovite", "Montmorillonite", "Nontronite", "Pyrope", "Sphene", "Chalcedony" ] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def salinas_loader(folder): img = open_file( folder + CUSTOM_DATASETS_CONFIG['Salinas']['img'])['salinas_corrected'] gt = open_file(folder + CUSTOM_DATASETS_CONFIG['Salinas']['gt'])['salinas_gt'] gt = gt.astype('uint8') rgb_bands = (47, 27, 13) label_values = [ "Unclassified", "Brocoli_green_weeds_1", "Brocoli_green_weeds_2", "Fallow", "Fallow_rough_plow", "Fallow_smooth", "Stubble", "Celery", "Grapes_untrained", "Soil_vinyard_develop", "Corn_senesced_green_weeds", "Lettuce_romaine_4wk", "Lettuce_romaine_5wk", "Lettuce_romaine_6wk", "Lettuce_romaine_7wk", "Vinyard_untrained", "Vinyard_vertical_trellis" ] ignored_labels = [] palette = None return img, gt, rgb_bands, ignored_labels, label_values, palette
def get_dataset(dataset_name, target_folder="./", datasets=DATASETS_CONFIG): """ Gets the dataset specified by name and return the related components. Args: dataset_name: string with the name of the dataset target_folder (optional): folder to store the datasets, defaults to ./ datasets (optional): dataset configuration dictionary, defaults to prebuilt one Returns: img: 3D hyperspectral image (WxHxB) gt: 2D int array of labels label_values: list of class names ignored_labels: list of int classes to ignore rgb_bands: int tuple that correspond to red, green and blue bands """ palette = None from DeepHyperX.custom_datasets import CUSTOM_DATASETS_CONFIG datasets.update(CUSTOM_DATASETS_CONFIG) if dataset_name not in datasets.keys(): raise ValueError("{} dataset is unknown.".format(dataset_name)) dataset = datasets[dataset_name] folder = target_folder + datasets[dataset_name].get('folder', dataset_name + '/') if dataset.get('download', True): # Download the dataset if is not present if not os.path.isdir(folder): os.mkdir(folder) for url in datasets[dataset_name]['urls']: # download the files filename = url.split('/')[-1] if not os.path.exists(folder + filename): with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc="Downloading {}".format(filename)) as t: urlretrieve(url, filename=folder + filename, reporthook=t.update_to) elif not os.path.isdir(folder): print("WARNING: {} is not downloadable.".format(dataset_name)) if dataset_name == 'PaviaC': # Load the image img = open_file(folder + 'Pavia.mat')['pavia'] rgb_bands = (55, 41, 12) gt = open_file(folder + 'Pavia_gt.mat')['pavia_gt'] label_values = ["Undefined", "Water", "Trees", "Asphalt", "Self-Blocking Bricks", "Bitumen", "Tiles", "Shadows", "Meadows", "Bare Soil"] ignored_labels = [0] elif dataset_name == 'PaviaU': # Load the image img = open_file(folder + 'PaviaU.mat')['paviaU'] rgb_bands = (55, 41, 12) gt = open_file(folder + 'PaviaU_gt.mat')['paviaU_gt'] label_values = ['Undefined', 'Asphalt', 'Meadows', 'Gravel', 'Trees', 'Painted metal sheets', 'Bare Soil', 'Bitumen', 'Self-Blocking Bricks', 'Shadows'] ignored_labels = [0] elif dataset_name == 'IndianPines': # Load the image img = open_file(folder + 'Indian_pines_corrected.mat') img = img['indian_pines_corrected'] rgb_bands = (43, 21, 11) # AVIRIS sensor gt = open_file(folder + 'Indian_pines_gt.mat')['indian_pines_gt'] label_values = ["Undefined", "Alfalfa", "Corn-notill", "Corn-mintill", "Corn", "Grass-pasture", "Grass-trees", "Grass-pasture-mowed", "Hay-windrowed", "Oats", "Soybean-notill", "Soybean-mintill", "Soybean-clean", "Wheat", "Woods", "Buildings-Grass-Trees-Drives", "Stone-Steel-Towers"] ignored_labels = [0] elif dataset_name == 'Botswana': # Load the image img = open_file(folder + 'Botswana.mat')['Botswana'] rgb_bands = (75, 33, 15) gt = open_file(folder + 'Botswana_gt.mat')['Botswana_gt'] label_values = ["Undefined", "Water", "Hippo grass", "Floodplain grasses 1", "Floodplain grasses 2", "Reeds", "Riparian", "Firescar", "Island interior", "Acacia woodlands", "Acacia shrublands", "Acacia grasslands", "Short mopane", "Mixed mopane", "Exposed soils"] ignored_labels = [0] elif dataset_name == 'KSC': # Load the image img = open_file(folder + 'KSC.mat')['KSC'] rgb_bands = (43, 21, 11) # AVIRIS sensor gt = open_file(folder + 'KSC_gt.mat')['KSC_gt'] label_values = ["Undefined", "Scrub", "Willow swamp", "Cabbage palm hammock", "Cabbage palm/oak hammock", "Slash pine", "Oak/broadleaf hammock", "Hardwood swamp", "Graminoid marsh", "Spartina marsh", "Cattail marsh", "Salt marsh", "Mud flats", "Wate"] ignored_labels = [0] else: # Custom dataset img, gt, rgb_bands, ignored_labels, label_values, palette = CUSTOM_DATASETS_CONFIG[dataset_name]['loader'](folder) # Filter NaN out nan_mask = np.isnan(img.sum(axis=-1)) if np.count_nonzero(nan_mask) > 0: print("Warning: NaN have been found in the data. It is preferable to remove them beforehand. Learning on NaN data is disabled.") img[nan_mask] = 0 gt[nan_mask] = 0 ignored_labels.append(0) ignored_labels = list(set(ignored_labels)) # Normalization img = np.asarray(img, dtype='float32') img = (img - np.min(img)) / (np.max(img) - np.min(img)) return img, gt, label_values, ignored_labels, rgb_bands, palette