Load data
Filter areas where we have LiDAR estimates
Subsample if desired/required
#-------------------------------------------------------------------------------
"""
print('Loading data')

# load a template raster
#template_file = '%s/lidar/processed/%s_AGB_07-31-19_regridded.tif' % (path2data,site_id)
lidar_agb_file = '/exports/csce/datastore/geos/users/dmilodow/FOREST2020/LiDARupscaling/data/lidar_calibration/kiuic_lidar_agb_median.tif'
lidar = io.load_geotiff(lidar_agb_file, option=1)
target = lidar.values.copy()
target[target < 0] = np.nan

# Load predictors & target
data_layers_all, data_mask, labels_all = io.load_predictors(
    layers=['sentinel2', 'alos'])
data_layers_s2, temp, labels_s2 = io.load_predictors(layers=['sentinel2'])
data_layers_alos, data_mask, labels_alos = io.load_predictors(layers=['alos'])

# load forest mask
forest_mask_file = "/exports/csce/datastore/geos/groups/gcel/YucatanBiomass/data/forest_mask/%s_forest_mask_20m.tif" % site_id
forest = xr.open_rasterio(forest_mask_file).values[0]
forest_mask = forest == 1
forest_mask = forest_mask * data_mask

# Keep only areas for which we have biomass estimates
training_mask = np.isfinite(target)
training_mask = image.binary_erosion(training_mask, iterations=1)
training_mask = training_mask * forest_mask

# Apply masks to the predictor dataset to be ingested into sklearn routines
Exemple #2
0
Load data
Filter areas where we have LiDAR estimates
Subsample if desired/required
#-------------------------------------------------------------------------------
"""
print('Loading data')

# load a template raster
lidar_agb_file = '../data/lidar_calibration/%sm/kiuic_lidar_agb_%s_median.tif' % (
    resolution.zfill(3), version_trials)
lidar = io.load_geotiff(lidar_agb_file, option=1)
target = lidar.values.copy()
target[target < 0] = np.nan

# Load predictors & target
data_layers, data_mask, labels = io.load_predictors(
    layers=['sentinel2', 'alos'], resolution=resolution)
#layers_to_remove = ['ASM','homogeneity','correlation']
#layers_to_remove = ['ASM','homogeneity','correlation','contrast','dissimilarity']
layers_to_remove = []
n_predictors = data_layers.shape[0]
layer_mask = np.ones(n_predictors, dtype='bool')
labels_update = []
for ii, lab in enumerate(labels):
    for layer in layers_to_remove:
        if layer in lab:
            print('remove', lab)
            layer_mask[ii] = False
    if layer_mask[ii]:
        labels_update.append(lab)
data_layers = data_layers[layer_mask]
labels = labels_update
"""
Project Info
"""
site_id = 'kiuic'
version = '001'
path2alg = '../saved_models/'
"""
#===============================================================================
PART A: LOAD IN DATA AND SUBSET THE TRAINING DATA
Load data
Filter areas where we have LiDAR estimates
Subsample if desired/required
#-------------------------------------------------------------------------------
"""
# Load predictors & target
predictors, target, landmask, labels = io.load_predictors()

# Keep only areas for which we have biomass estimates
mask = np.isfinite(target)
X = predictors[mask, :]
y = target[mask]
"""
#===============================================================================
PART B: CAL-VAL
Cal-val
Cal-val figures
Importances via permutation importance
#-------------------------------------------------------------------------------
"""
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,