def test_XYextraction(self): X = processing.extract_ROI(raster,vector) self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'Type') assert(X.ndim == 2) X,y = processing.extract_ROI(raster,vector,'Class') assert(X.shape[0] == y.shape[0]) X,y,g = processing.extract_ROI(raster,vector,'Class','uniquefid') assert(X.shape[0] == y.shape[0] == g.shape[0]) self.assertRaises(ValueError,processing.extract_ROI,'wrong/path','wrong/path/too') assert(processing.extract_ROI(raster,vector).shape[1] == gdal.Open(raster).RasterCount) self.assertRaises(ValueError,processing.extract_ROI,raster,vector,'kodidk')
def zonal_stats(in_image, in_vector, unique_id, stats=[ 'mean', 'median', 'std'], verbose=False): """ Extract zonal stats according to an predifined id. Parameters ----------- in_image : str. Path of the raster file where the vector file will be rasterize. in_vector : str. Path of the vector file to rasterize. unique_id : str or False. If False, MuseoToolBox will create a field called 'uniquefid' using thefunction :func:`_add_vector_unique_fid`. stats : list, optional (default=['mean','median','std']). str in list must be a function available from numpy. For example ['var'] will output the variance per band and per unique id. verbose : bool or int, optional (default=True). The higher is the int verbose, the more it will returns informations. Returns -------- stats : np.ndarray Returns as many np.ndarray as number of stats asked. Stats ordered by bands. For example (each line correspond to the unique_id ordered asc) : +-------------+---------------+-------------+ | mean band_1 + mean band_2 | mean band_3 | +-------------+---------------+-------------+ | mean band_1 + mean band_2 | mean band_3 | +-------------+---------------+-------------+ Examples --------- >>> raster,vector = mtb.datasets.load_historical_data() >>> mean,var = mtb.stats.zonal_stats(raster,vector,'uniquefid',stats=['mean','var']) >>> mean.shape (17, 3) >>> mean[0,:] # mean of the first unique_id array([117.75219446, 109.80958812, 79.64213369]) >>> var[0,:] array([1302.29983482, 1250.59980003, 1015.76659747]) """ if unique_id is False: _add_vector_unique_fid(in_vector, 'unique_id', verbose=verbose) unique_id = 'unique_id' X, y = extract_ROI(in_image, in_vector, unique_id) n_unique_id = len(np.unique(y)) n_bands = X.shape[1] out_stats = [np.zeros([n_unique_id, n_bands]) for n in range(len(stats))] for idx_stat, stat in enumerate(stats): stat_function = getattr(__import__('numpy'), stat) for pos, label in enumerate(np.unique(y)): res = stat_function(X[np.where(y == label)], axis=0) out_stats[idx_stat][pos, :] = res return out_stats
n_repeats=2, random_state=12, verbose=False) for tr, vl in SKF.split(X=None, y=y): print(tr, vl) ############################################################################### # .. note:: # Split is made to generate each fold # Show label for tr, vl in SKF.split(X=None, y=y): print(y[tr], y[vl]) ############################################################################## # .. note:: # The first one is made with polygon only. # When learning/predicting, all pixels will be taken in account # TO generate a full X and y labels, extract samples from ROI X, y = processing.extract_ROI(raster, vector, field) for tr, vl in SKF.split(X, y): print(tr, vl) print(tr.shape, vl.shape) ########################## # Plot example from __drawCVmethods import plotMethod plotMethod('SKF-pixel')
############################################################################## # Import librairies # ------------------------------------------- from museotoolbox.cross_validation import LeaveOneSubGroupOut from museotoolbox.processing import extract_ROI from museotoolbox import datasets ############################################################################## # Load HistoricalMap dataset # ------------------------------------------- raster, vector = datasets.load_historical_data() field = 'Class' group = 'uniquefid' X, y, s = extract_ROI(raster, vector, field, group) ############################################################################## # Create CV # ------------------------------------------- # if n_splits is False (default), the number of splits will be the smallest # number of subgroup of all labels. valid_size = 0.5 # Means 50% LOSGO = LeaveOneSubGroupOut(verbose=False, random_state=12) # ############################################################################### # .. note:: # Split is made to generate each fold LOSGO.get_n_splits(X, y, s)
def load_pottoks(return_X_y=True, return_target=True, return_only_path=False): """ Load two images of pottoks. Parameters ----------- return_X_y : bool, optional (default=True) If True, will return the array in 2d where there are labels (X) and the labels (y) If False, will return only X in 3d (the array of the image) return_target : bool, optional (default=True) If True, will return two arrays in a list If False, will return only the source (a brown pottok) Examples -------- >>> brown_pottok_arr, brown_pottok_label = load_pottoks(return_target=False) >>> brown_pottok_arr.shape (4610, 3) >>> brown_pottok_label.shape (4610,) """ brown_pottok_uri = os.path.join( __pathFile, 'brownpottok') black_pottok_uri = os.path.join( __pathFile, 'blackpottok') to_return = [] if return_only_path : to_return.extend([brown_pottok_uri + '.tif', brown_pottok_uri + '.gpkg']) if return_target : to_return.extend([black_pottok_uri + '.tif', black_pottok_uri + '.gpkg']) elif return_X_y: Xs, ys = extract_ROI(brown_pottok_uri + '.tif', brown_pottok_uri + '.gpkg', 'level') to_return.extend([Xs, ys]) if return_target: Xt, yt = extract_ROI( black_pottok_uri + '.tif', black_pottok_uri + '.gpkg', 'level') to_return.extend([Xt, yt]) elif return_X_y is False: brown_pottok_arr = RasterMath( brown_pottok_uri + '.tif', return_3d=True, verbose=False).get_image_as_array() to_return.append(brown_pottok_arr) if return_target: black_pottok_arr = RasterMath( black_pottok_uri + '.tif', return_3d=True, verbose=False).get_image_as_array() to_return.append(black_pottok_arr) return to_return
from museotoolbox.cross_validation import SpatialLeaveOneSubGroupOut from museotoolbox import datasets, processing ############################################################################## # Load HistoricalMap dataset # ------------------------------------------- _, centroid = datasets.load_historical_data(low_res=True, centroid=True) raster, vector = datasets.load_historical_data(low_res=True) field = 'Class' ############################################################################## # Extract label ('Class' field) and groups ('uniquefid' field) # Compute distanceMatrix with centroid (one point per group) X, y, groups = processing.extract_ROI(raster, vector, field, 'uniquefid') distance_matrix, distance_label = processing.get_distance_matrix( raster, centroid, 'uniquefid') ############################################################################## # Create CV # ------------------------------------------- # n_splits will be the number of the least populated class SLOSGO = SpatialLeaveOneSubGroupOut(distance_thresold=100, distance_matrix=distance_matrix, distance_label=distance_label, random_state=12) ############################################################################### # .. note::
# ------------------------------------------- from museotoolbox.ai import SuperLearner from museotoolbox.cross_validation import RandomStratifiedKFold from museotoolbox.processing import extract_ROI from museotoolbox import datasets from sklearn.ensemble import RandomForestClassifier from sklearn import metrics ############################################################################## # Load HistoricalMap dataset # ------------------------------------------- raster,vector = datasets.load_historical_data(low_res=True) field = 'Class' X,y = extract_ROI(raster,vector,field) ############################################################################## # Create CV # ------------------------------------------- SKF = RandomStratifiedKFold(n_splits=2, random_state=12,verbose=False) ############################################################################## # Initialize Random-Forest and metrics # -------------------------------------- classifier = RandomForestClassifier(random_state=12,n_jobs=1) # kappa = metrics.make_scorer(metrics.cohen_kappa_score)
def test_extract_position(self): X,pixel_position=processing.extract_ROI(raster,vector,get_pixel_position=True,prefer_memory=False) assert(pixel_position.shape[0] == X.shape[0])
raster, vector = datasets.load_historical_data(low_res=True) field = 'Class' group = 'uniquefid' ############################################################################## # Create CV # ------------------------------------------- valid_size = 0.5 # Means 50% LPSGO = LeavePSubGroupOut(valid_size=0.5, random_state=12, verbose=False) ############################################################################### # Extract X,y and group. # ------------------------------------------- X, y, g = processing.extract_ROI(raster, vector, field, group) ############################################################################### # .. note:: # Split is made to generate each fold for tr, vl in LPSGO.split(X, y, g): print(tr.shape, vl.shape) print('y label with number of samples') print(np.unique(y[tr], return_counts=True)) ############################################################################## # Differences with scikit-learn # ------------------------------------------- from sklearn.model_selection import LeavePGroupsOut # You need to specify the number of groups