def salinas_a_loader(folder):
    img = open_file(
        folder +
        CUSTOM_DATASETS_CONFIG['SalinasA']['img'])['salinasA_corrected']
    gt = open_file(folder +
                   CUSTOM_DATASETS_CONFIG['SalinasA']['gt'])['salinasA_gt']
    gt = gt.astype('uint8')

    # remap for contiguous integers to avoid index out of bounds
    salinas_a_remap = {0: 0, 1: 1, 10: 2, 11: 3, 12: 4, 13: 5, 14: 6}
    for k, v in salinas_a_remap.items():
        gt = np.where(gt == k, v, gt)

    rgb_bands = (47, 27, 13)

    label_values = [
        "Unclassified", "Brocoli_green_weeds_1", "Corn_senesced_green_weeds",
        "Lettuce_romaine_4wk", "Lettuce_romaine_5wk", "Lettuce_romaine_6wk",
        "Lettuce_romaine_7wk"
    ]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def washington_loader(folder):
    img = open_file(folder + CUSTOM_DATASETS_CONFIG['Washington']['img'])
    gt = open_file(folder + CUSTOM_DATASETS_CONFIG['Washington']['gt'])

    rgb_bands = (60, 27, 17)

    # http://sugs.u-strasbg.fr/omiv/imagemining/documents/IMAGEMINING-DallaMurra-practicals.pdf
    label_values = [
        "Roofs", "Street", "Path", "Grass", "Trees", "Water", "Shadow"
    ]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def urban_210_loader(folder):
    img = open_file(folder + CUSTOM_DATASETS_CONFIG['Urban-210']['img'])['Y']
    img = np.reshape(img, (307, 307, 210))
    gt = np.asarray(
        np.matrix(
            open_file(folder + CUSTOM_DATASETS_CONFIG['Urban-210']['gt'])
            ['A'].argmax(0)))
    gt = np.reshape(gt, (307, 307))

    rgb_bands = (
        17, 14, 3
    )  # manually calculated, assuming linear distribution of bands among wavelengths

    label_values = ["Asphalt", "Grass", "Tree", "Roof", "Metal", "Dirt"]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def samson_loader(folder):
    img = open_file(folder + CUSTOM_DATASETS_CONFIG['Samson']['img'])['V']
    img = np.reshape(img, (95, 95, 156))
    gt = np.asarray(
        np.matrix(
            open_file(folder +
                      CUSTOM_DATASETS_CONFIG['Samson']['gt'])['A']).argmax(0))
    gt = np.reshape(gt, (95, 95))

    rgb_bands = (
        9, 44, 54
    )  # manually calculated, assuming linear distribution of bands among wavelengths

    label_values = ["Rock", "Tree", "Water"]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def dfc2018_loader(folder):
    img = open_file(folder +
                    CUSTOM_DATASETS_CONFIG['DFC2018_HSI']['img'])[:, :, :-2]
    gt = open_file(folder + CUSTOM_DATASETS_CONFIG['DFC2018_HSI']['gt'])
    gt = gt.astype('uint8')

    rgb_bands = (47, 31, 15)

    label_values = [
        "Unclassified", "Healthy grass", "Stressed grass", "Artificial turf",
        "Evergreen trees", "Deciduous trees", "Bare earth", "Water",
        "Residential buildings", "Non-residential buildings", "Roads",
        "Sidewalks", "Crosswalks", "Major thoroughfares", "Highways",
        "Railways", "Paved parking lots", "Unpaved parking lots", "Cars",
        "Trains", "Stadium seats"
    ]
    ignored_labels = [0]
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def jasper_ridge_224_loader(folder):
    img = open_file(folder +
                    CUSTOM_DATASETS_CONFIG['JasperRidge-224']['img'])['Y']
    img = np.reshape(img, (100, 100, 224))
    gt = np.asarray(
        np.matrix(
            open_file(folder + CUSTOM_DATASETS_CONFIG['JasperRidge-224']['gt'])
            ['A'].argmax(0)))
    gt = np.reshape(gt, (100, 100))

    rgb_bands = (
        5, 16, 20
    )  # manually calculated, assuming linear distribution of bands among wavelengths

    label_values = ["Road", "Soil", "Water", "Tree"]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def usa_loader(folder):
    img = open_file(folder + CUSTOM_DATASETS_CONFIG['USA']['img'])['T2']
    gt_preprocess_me = open_file(
        folder + CUSTOM_DATASETS_CONFIG['USA']['img'])['Multiple']

    # need to preprocess gt
    gt = np.zeros(shape=(gt_preprocess_me.shape[0], gt_preprocess_me.shape[1]))

    for i in range(gt_preprocess_me.shape[0]):
        for j in range(gt_preprocess_me.shape[1]):
            if (gt_preprocess_me[i][j] == [255, 0, 0]).all():
                gt[i][j] = 0
            elif (gt_preprocess_me[i][j] == [0, 255, 0]).all():
                gt[i][j] = 1
            elif (gt_preprocess_me[i][j] == [0, 0, 255]).all():
                gt[i][j] = 2
            elif (gt_preprocess_me[i][j] == [255, 255, 0]).all():
                gt[i][j] = 3
            elif (gt_preprocess_me[i][j] == [255, 0, 255]).all():
                gt[i][j] = 4
            elif (gt_preprocess_me[i][j] == [0, 255, 255]).all():
                gt[i][j] = 5
            # from here on, unused elifs
            elif (gt_preprocess_me[i][j] == [0, 0, 0]).all():
                gt[i][j] = 6
            elif (gt_preprocess_me[i][j] == [255, 255, 255]).all():
                gt[i][j] = 7

    gt = gt.astype(dtype='uint8')
    rgb_bands = (0, 0, 0)  # not given

    label_values = [
        "soil", "irrigated fields", "river", "building",
        "type of cultivated land", "grassland"
    ]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def china_loader(folder):
    img = open_file(folder + CUSTOM_DATASETS_CONFIG['China']['img'])['T2']
    gt_preprocess_me = open_file(
        folder + CUSTOM_DATASETS_CONFIG['China']['img'])['Multiple']

    # need to preprocess gt
    gt = np.zeros(shape=(gt_preprocess_me.shape[0], gt_preprocess_me.shape[1]))

    for i in range(gt_preprocess_me.shape[0]):
        for j in range(gt_preprocess_me.shape[1]):
            if (gt_preprocess_me[i][j] == [254, 0, 0]).all():
                gt[i][j] = 0
            elif (gt_preprocess_me[i][j] == [0, 254, 0]).all():
                gt[i][j] = 1
            elif (gt_preprocess_me[i][j] == [0, 0, 254]).all():
                gt[i][j] = 2
            elif (gt_preprocess_me[i][j] == [254, 254, 0]).all():
                gt[i][j] = 3
            # from here on, unused elifs
            elif (gt_preprocess_me[i][j] == [254, 0, 254]).all():
                gt[i][j] = 4
            elif (gt_preprocess_me[i][j] == [0, 254, 254]).all():
                gt[i][j] = 5
            elif (gt_preprocess_me[i][j] == [0, 0, 0]).all():
                gt[i][j] = 6
            elif (gt_preprocess_me[i][j] == [254, 254, 254]).all():
                gt[i][j] = 7

    gt = gt.astype(dtype='uint8')
    rgb_bands = (0, 0, 0)  # not given

    label_values = [
        "soil", "river", "tree", "building", "road", "agricultural field"
    ]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def cuprite_188_loader(folder):
    img = open_file(folder + CUSTOM_DATASETS_CONFIG['Cuprite-188']['img'])['Y']
    img = np.reshape(img, (250, 190, 188))  # only includes GT: endmembers.
    gt = np.asarray(
        np.matrix(
            open_file(folder + CUSTOM_DATASETS_CONFIG['Cuprite-188']['gt'])
            ['M'].argmax(1)))
    gt = np.transpose(gt)
    gt = np.reshape(gt, (188))

    rgb_bands = (183, 193, 203)  # not sure but does not matter

    label_values = [
        "Alunite", "Andradite", "Buddingtonite", "Dumortierite", "Kaolinite1",
        "Kaolinite2", "Muscovite", "Montmorillonite", "Nontronite", "Pyrope",
        "Sphene", "Chalcedony"
    ]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
def salinas_loader(folder):
    img = open_file(
        folder + CUSTOM_DATASETS_CONFIG['Salinas']['img'])['salinas_corrected']
    gt = open_file(folder +
                   CUSTOM_DATASETS_CONFIG['Salinas']['gt'])['salinas_gt']
    gt = gt.astype('uint8')

    rgb_bands = (47, 27, 13)

    label_values = [
        "Unclassified", "Brocoli_green_weeds_1", "Brocoli_green_weeds_2",
        "Fallow", "Fallow_rough_plow", "Fallow_smooth", "Stubble", "Celery",
        "Grapes_untrained", "Soil_vinyard_develop",
        "Corn_senesced_green_weeds", "Lettuce_romaine_4wk",
        "Lettuce_romaine_5wk", "Lettuce_romaine_6wk", "Lettuce_romaine_7wk",
        "Vinyard_untrained", "Vinyard_vertical_trellis"
    ]

    ignored_labels = []
    palette = None

    return img, gt, rgb_bands, ignored_labels, label_values, palette
Exemple #11
0
def get_dataset(dataset_name, target_folder="./", datasets=DATASETS_CONFIG):
    """ Gets the dataset specified by name and return the related components.
    Args:
        dataset_name: string with the name of the dataset
        target_folder (optional): folder to store the datasets, defaults to ./
        datasets (optional): dataset configuration dictionary, defaults to prebuilt one
    Returns:
        img: 3D hyperspectral image (WxHxB)
        gt: 2D int array of labels
        label_values: list of class names
        ignored_labels: list of int classes to ignore
        rgb_bands: int tuple that correspond to red, green and blue bands
    """
    palette = None

    from DeepHyperX.custom_datasets import CUSTOM_DATASETS_CONFIG
    datasets.update(CUSTOM_DATASETS_CONFIG)
    if dataset_name not in datasets.keys():
        raise ValueError("{} dataset is unknown.".format(dataset_name))

    dataset = datasets[dataset_name]

    folder = target_folder + datasets[dataset_name].get('folder', dataset_name + '/')
    if dataset.get('download', True):
        # Download the dataset if is not present
        if not os.path.isdir(folder):
            os.mkdir(folder)
        for url in datasets[dataset_name]['urls']:
            # download the files
            filename = url.split('/')[-1]
            if not os.path.exists(folder + filename):
                with TqdmUpTo(unit='B', unit_scale=True, miniters=1,
                          desc="Downloading {}".format(filename)) as t:
                    urlretrieve(url, filename=folder + filename,
                                     reporthook=t.update_to)
    elif not os.path.isdir(folder):
       print("WARNING: {} is not downloadable.".format(dataset_name))

    if dataset_name == 'PaviaC':
        # Load the image
        img = open_file(folder + 'Pavia.mat')['pavia']

        rgb_bands = (55, 41, 12)

        gt = open_file(folder + 'Pavia_gt.mat')['pavia_gt']

        label_values = ["Undefined", "Water", "Trees", "Asphalt",
                        "Self-Blocking Bricks", "Bitumen", "Tiles", "Shadows",
                        "Meadows", "Bare Soil"]

        ignored_labels = [0]

    elif dataset_name == 'PaviaU':
        # Load the image
        img = open_file(folder + 'PaviaU.mat')['paviaU']

        rgb_bands = (55, 41, 12)

        gt = open_file(folder + 'PaviaU_gt.mat')['paviaU_gt']

        label_values = ['Undefined', 'Asphalt', 'Meadows', 'Gravel', 'Trees',
                        'Painted metal sheets', 'Bare Soil', 'Bitumen',
                        'Self-Blocking Bricks', 'Shadows']

        ignored_labels = [0]

    elif dataset_name == 'IndianPines':
        # Load the image
        img = open_file(folder + 'Indian_pines_corrected.mat')
        img = img['indian_pines_corrected']

        rgb_bands = (43, 21, 11)  # AVIRIS sensor

        gt = open_file(folder + 'Indian_pines_gt.mat')['indian_pines_gt']
        label_values = ["Undefined", "Alfalfa", "Corn-notill", "Corn-mintill",
                        "Corn", "Grass-pasture", "Grass-trees",
                        "Grass-pasture-mowed", "Hay-windrowed", "Oats",
                        "Soybean-notill", "Soybean-mintill", "Soybean-clean",
                        "Wheat", "Woods", "Buildings-Grass-Trees-Drives",
                        "Stone-Steel-Towers"]

        ignored_labels = [0]

    elif dataset_name == 'Botswana':
        # Load the image
        img = open_file(folder + 'Botswana.mat')['Botswana']

        rgb_bands = (75, 33, 15)

        gt = open_file(folder + 'Botswana_gt.mat')['Botswana_gt']
        label_values = ["Undefined", "Water", "Hippo grass",
                        "Floodplain grasses 1", "Floodplain grasses 2",
                        "Reeds", "Riparian", "Firescar", "Island interior",
                        "Acacia woodlands", "Acacia shrublands",
                        "Acacia grasslands", "Short mopane", "Mixed mopane",
                        "Exposed soils"]

        ignored_labels = [0]

    elif dataset_name == 'KSC':
        # Load the image
        img = open_file(folder + 'KSC.mat')['KSC']

        rgb_bands = (43, 21, 11)  # AVIRIS sensor

        gt = open_file(folder + 'KSC_gt.mat')['KSC_gt']
        label_values = ["Undefined", "Scrub", "Willow swamp",
                        "Cabbage palm hammock", "Cabbage palm/oak hammock",
                        "Slash pine", "Oak/broadleaf hammock",
                        "Hardwood swamp", "Graminoid marsh", "Spartina marsh",
                        "Cattail marsh", "Salt marsh", "Mud flats", "Wate"]

        ignored_labels = [0]
    else:
        # Custom dataset
        img, gt, rgb_bands, ignored_labels, label_values, palette = CUSTOM_DATASETS_CONFIG[dataset_name]['loader'](folder)

    # Filter NaN out
    nan_mask = np.isnan(img.sum(axis=-1))
    if np.count_nonzero(nan_mask) > 0:
       print("Warning: NaN have been found in the data. It is preferable to remove them beforehand. Learning on NaN data is disabled.")
    img[nan_mask] = 0
    gt[nan_mask] = 0
    ignored_labels.append(0)

    ignored_labels = list(set(ignored_labels))
    # Normalization
    img = np.asarray(img, dtype='float32')
    img = (img - np.min(img)) / (np.max(img) - np.min(img))
    return img, gt, label_values, ignored_labels, rgb_bands, palette