Esempio n. 1
0
 def test_XYextraction(self):
     X = processing.extract_ROI(raster,vector)
     
     self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'Type')
     
     assert(X.ndim == 2)
     
     X,y = processing.extract_ROI(raster,vector,'Class')
     assert(X.shape[0] == y.shape[0])
     
     X,y,g = processing.extract_ROI(raster,vector,'Class','uniquefid')
     assert(X.shape[0] == y.shape[0] == g.shape[0])
 
     self.assertRaises(ValueError,processing.extract_ROI,'wrong/path','wrong/path/too')
     assert(processing.extract_ROI(raster,vector).shape[1] == gdal.Open(raster).RasterCount)
     self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'kodidk')
Esempio n. 2
0
def zonal_stats(in_image, in_vector, unique_id, stats=[
                'mean', 'median', 'std'], verbose=False):
    """
    Extract zonal stats according to an predifined id.

    Parameters
    -----------
    in_image : str.
        Path of the raster file where the vector file will be rasterize.
    in_vector : str.
        Path of the vector file to rasterize.
    unique_id : str or False.
        If False, MuseoToolBox will create a field called 'uniquefid' using  thefunction :func:`_add_vector_unique_fid`.
    stats : list, optional (default=['mean','median','std']).
        str in list must be a function available from numpy.
        For example ['var'] will output the variance per band and per unique id.
    verbose : bool or int, optional (default=True).
        The higher is the int verbose, the more it will returns informations.


    Returns
    --------
    stats : np.ndarray
        Returns as many np.ndarray  as number of stats asked.
        Stats ordered by bands.

        For example (each line correspond to the unique_id ordered asc) :

        +-------------+---------------+-------------+
        | mean band_1 +  mean band_2  | mean band_3 |
        +-------------+---------------+-------------+
        | mean band_1 +  mean band_2  | mean band_3 |
        +-------------+---------------+-------------+

    Examples
    ---------
    >>> raster,vector = mtb.datasets.load_historical_data()
    >>> mean,var = mtb.stats.zonal_stats(raster,vector,'uniquefid',stats=['mean','var'])
    >>> mean.shape
    (17, 3)
    >>> mean[0,:] # mean of the first unique_id
    array([117.75219446, 109.80958812,  79.64213369])
    >>> var[0,:]
    array([1302.29983482, 1250.59980003, 1015.76659747])
    """
    if unique_id is False:
        _add_vector_unique_fid(in_vector, 'unique_id', verbose=verbose)
        unique_id = 'unique_id'

    X, y = extract_ROI(in_image, in_vector, unique_id)
    n_unique_id = len(np.unique(y))
    n_bands = X.shape[1]

    out_stats = [np.zeros([n_unique_id, n_bands]) for n in range(len(stats))]

    for idx_stat, stat in enumerate(stats):
        stat_function = getattr(__import__('numpy'), stat)
        for pos, label in enumerate(np.unique(y)):
            res = stat_function(X[np.where(y == label)], axis=0)
            out_stats[idx_stat][pos, :] = res

    return out_stats
Esempio n. 3
0
                            n_repeats=2,
                            random_state=12,
                            verbose=False)
for tr, vl in SKF.split(X=None, y=y):
    print(tr, vl)

###############################################################################
# .. note::
#    Split is made to generate each fold

# Show label

for tr, vl in SKF.split(X=None, y=y):
    print(y[tr], y[vl])

##############################################################################
# .. note::
#    The first one is made with polygon only.
#    When learning/predicting, all pixels will be taken in account
#    TO generate a full X and y labels, extract samples from ROI

X, y = processing.extract_ROI(raster, vector, field)

for tr, vl in SKF.split(X, y):
    print(tr, vl)
    print(tr.shape, vl.shape)

##########################
# Plot example
from __drawCVmethods import plotMethod
plotMethod('SKF-pixel')
Esempio n. 4
0
##############################################################################
# Import librairies
# -------------------------------------------

from museotoolbox.cross_validation import LeaveOneSubGroupOut
from museotoolbox.processing import extract_ROI
from museotoolbox import datasets

##############################################################################
# Load HistoricalMap dataset
# -------------------------------------------

raster, vector = datasets.load_historical_data()
field = 'Class'
group = 'uniquefid'
X, y, s = extract_ROI(raster, vector, field, group)

##############################################################################
# Create CV
# -------------------------------------------
# if n_splits is False (default), the number of splits will be the smallest
# number of subgroup of all labels.

valid_size = 0.5  # Means 50%
LOSGO = LeaveOneSubGroupOut(verbose=False, random_state=12)  #

###############################################################################
# .. note::
#    Split is made to generate each fold

LOSGO.get_n_splits(X, y, s)
Esempio n. 5
0
def load_pottoks(return_X_y=True, return_target=True, return_only_path=False):
    """
    Load two images of pottoks.

    Parameters
    -----------
    return_X_y : bool, optional (default=True)
        If True, will return the array in 2d where there are labels (X) and the labels (y)
        If False, will return only X in 3d (the array of the image)
    return_target : bool, optional (default=True)
        If True, will return two arrays in a list
        If False, will return only the source (a brown pottok)

    Examples
    --------
    >>> brown_pottok_arr, brown_pottok_label = load_pottoks(return_target=False)
    >>> brown_pottok_arr.shape
    (4610, 3)
    >>> brown_pottok_label.shape
    (4610,)
    """
    brown_pottok_uri = os.path.join(
        __pathFile,
        'brownpottok')
    black_pottok_uri = os.path.join(
        __pathFile,
        'blackpottok')

    to_return = []
    
    if return_only_path :
        to_return.extend([brown_pottok_uri + '.tif', brown_pottok_uri + '.gpkg'])  
        if return_target : 
            to_return.extend([black_pottok_uri + '.tif', black_pottok_uri + '.gpkg'])  

        
        
    elif return_X_y:
        Xs, ys = extract_ROI(brown_pottok_uri + '.tif',
                             brown_pottok_uri + '.gpkg', 'level')
        to_return.extend([Xs, ys])
        


        if return_target:
            Xt, yt = extract_ROI(
                black_pottok_uri + '.tif', black_pottok_uri + '.gpkg', 'level')
            to_return.extend([Xt, yt])

    elif return_X_y is False:
        brown_pottok_arr = RasterMath(
            brown_pottok_uri + '.tif',
            return_3d=True,
            verbose=False).get_image_as_array()

        to_return.append(brown_pottok_arr)
        if return_target:
            black_pottok_arr = RasterMath(
                black_pottok_uri + '.tif',
                return_3d=True,
                verbose=False).get_image_as_array()
            to_return.append(black_pottok_arr)

    return to_return
from museotoolbox.cross_validation import SpatialLeaveOneSubGroupOut
from museotoolbox import datasets, processing
##############################################################################
# Load HistoricalMap dataset
# -------------------------------------------

_, centroid = datasets.load_historical_data(low_res=True, centroid=True)
raster, vector = datasets.load_historical_data(low_res=True)

field = 'Class'

##############################################################################
# Extract label ('Class' field) and groups ('uniquefid' field)
# Compute distanceMatrix with centroid (one point per group)

X, y, groups = processing.extract_ROI(raster, vector, field, 'uniquefid')
distance_matrix, distance_label = processing.get_distance_matrix(
    raster, centroid, 'uniquefid')

##############################################################################
# Create CV
# -------------------------------------------
# n_splits will be the number  of the least populated class

SLOSGO = SpatialLeaveOneSubGroupOut(distance_thresold=100,
                                    distance_matrix=distance_matrix,
                                    distance_label=distance_label,
                                    random_state=12)

###############################################################################
# .. note::
Esempio n. 7
0
# -------------------------------------------

from museotoolbox.ai import SuperLearner
from museotoolbox.cross_validation import RandomStratifiedKFold
from museotoolbox.processing import extract_ROI
from museotoolbox import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

##############################################################################
# Load HistoricalMap dataset
# -------------------------------------------

raster,vector = datasets.load_historical_data(low_res=True)
field = 'Class'
X,y = extract_ROI(raster,vector,field)
##############################################################################
# Create CV
# -------------------------------------------

SKF = RandomStratifiedKFold(n_splits=2,
                random_state=12,verbose=False)

##############################################################################
# Initialize Random-Forest and metrics
# --------------------------------------

classifier = RandomForestClassifier(random_state=12,n_jobs=1)

# 
kappa = metrics.make_scorer(metrics.cohen_kappa_score)
Esempio n. 8
0
 def test_extract_position(self):
     X,pixel_position=processing.extract_ROI(raster,vector,get_pixel_position=True,prefer_memory=False)
     assert(pixel_position.shape[0] == X.shape[0])
Esempio n. 9
0
raster, vector = datasets.load_historical_data(low_res=True)
field = 'Class'
group = 'uniquefid'

##############################################################################
# Create CV
# -------------------------------------------
valid_size = 0.5  # Means 50%
LPSGO = LeavePSubGroupOut(valid_size=0.5, random_state=12, verbose=False)

###############################################################################
# Extract X,y and group.
# -------------------------------------------

X, y, g = processing.extract_ROI(raster, vector, field, group)

###############################################################################
# .. note::
#    Split is made to generate each fold

for tr, vl in LPSGO.split(X, y, g):
    print(tr.shape, vl.shape)

print('y label with number of samples')
print(np.unique(y[tr], return_counts=True))
##############################################################################
# Differences with scikit-learn
# -------------------------------------------
from sklearn.model_selection import LeavePGroupsOut
# You need to specify the number of groups