class TestSample(TestCase): def setUp(self) -> None: predictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7] self.stack = Raster(predictors) self.strata = Raster(nc.strata) def tearDown(self) -> None: self.stack.close() self.strata.close() def test_sample_strata(self): # extract using a strata raster and returning two arrays size = 100 categories = self.strata.read(masked=True).flatten() categories = categories[~categories.mask] n_categories = np.unique(categories).shape[0] n_samples = size * n_categories X, xy = self.stack.sample(size=size, strata=self.strata, return_array=True) self.assertEqual(X.shape, (n_samples, 6)) self.assertEqual(xy.shape, (n_samples, 2)) # extract using a strata raster and returning a dataframe samples = self.stack.sample(size=size, strata=self.strata, return_array=False) self.assertEqual(samples.shape, (n_samples, 7)) def test_sample_no_strata(self): size = 100 X, xy = self.stack.sample(size=size, return_array=True) self.assertEqual(X.shape, (size, 6)) self.assertEqual(xy.shape, (size, 2)) samples = self.stack.sample(size=size, return_array=False) self.assertEqual(samples.shape, (size, 7))
# prediction result = stack.predict(estimator=lr, dtype='int16', nodata=0) result_prob = stack.predict_proba(estimator=lr) result.names result_prob.names result.plot() plt.show() result_prob.plot() plt.show() # sampling # extract training data using a random sample df_rand = stack.sample(size=1000, random_state=1) df_rand.plot() plt.show() # extract training data using a stratified random sample from a map containing categorical data # here we are taking 50 samples per category strata = rasterio.open(os.path.join(basedir, 'pyspatialml', 'nc_dataset', 'strata.tif')) df_strata = stack.sample(size=5, strata=strata, random_state=1) df_strata = df_strata.dropna() fig, ax = plt.subplots() ax.imshow(strata.read(1, masked=True), extent=rasterio.plot.plotting_extent(strata)) df_strata.plot(ax=ax, markersize=2, color='red') plt.show() from pyspatialml.transformations import one_hot_encode