def test_extract_raster(self): stack = Raster(self.predictors) # extract training data from labelled pixels training_px = rasterio.open(nc.labelled_pixels) X, y, xy = stack.extract_raster( response=training_px, value_name='id', return_array=True) # remove masked values mask2d = X.mask.any(axis=1) X = X[~mask2d] y = y[~mask2d] xy = xy[~mask2d] # check shapes of extracted pixels self.assertTupleEqual(X.shape, (2436, 6)) self.assertTupleEqual(y.shape, (2436, )) self.assertTupleEqual(xy.shape, (2436, 2))
training_lines['geometry'] = training_lines.geometry.boundary # Plot some training data plt.imshow(stack.lsat7_2000_70.read(masked=True), extent=rasterio.plot.plotting_extent(stack.lsat7_2000_70.ds)) plt.scatter(x=training_pt.bounds.iloc[:, 0], y=training_pt.bounds.iloc[:, 1], s=2, color='black') plt.show() # Create a training dataset by extracting the raster values at the training point locations: stack = Raster(predictors) df_points = stack.extract_vector(response=training_pt, field='id') df_polygons = stack.extract_vector(response=training_py, field='id') df_lines = stack.extract_vector(response=training_lines, field='id') df_raster = stack.extract_raster(response=training_px, value_name='id') df_points.head() # Next we can train a logistic regression classifier: from sklearn.linear_model import LogisticRegressionCV from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.model_selection import cross_validate # define the classifier with standardization of the input features in a pipeline lr = Pipeline( [('scaling', StandardScaler()), ('classifier', LogisticRegressionCV(n_jobs=-1))]) # fit the classifier X = df_points.drop(columns=['id', 'geometry'])
class TestExtract(TestCase): def setUp(self) -> None: self.predictors = [ nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7 ] self.extracted_grass = pd.read_table(nc.extracted_pixels, delimiter=" ") self.stack = Raster(self.predictors) def tearDown(self) -> None: self.stack.close() def test_extract_points(self): training_pt = geopandas.read_file(nc.points) # check that extracted training data as a DataFrame match known values df = self.stack.extract_vector(gdf=training_pt) df = df.dropna() training_pt = training_pt.dropna() self.assertTrue( (df["lsat7_2000_10"].values == training_pt["b1"].values).all()) self.assertTrue( (df["lsat7_2000_20"].values == training_pt["b2"].values).all()) self.assertTrue( (df["lsat7_2000_30"].values == training_pt["b3"].values).all()) self.assertTrue( (df["lsat7_2000_40"].values == training_pt["b4"].values).all()) self.assertTrue( (df["lsat7_2000_50"].values == training_pt["b5"].values).all()) self.assertTrue( (df["lsat7_2000_70"].values == training_pt["b7"].values).all()) def test_extract_polygons(self): # extract training data from polygons training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = df.merge( right=training_py.loc[:, ("id", "label")], left_on="geometry_idx", right_on="index", right_index=True, ) # compare to extracted data using GRASS GIS self.assertEqual(df.shape[0], self.extracted_grass.shape[0]) self.assertAlmostEqual(df["lsat7_2000_10"].mean(), self.extracted_grass["b1"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_20"].mean(), self.extracted_grass["b2"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_30"].mean(), self.extracted_grass["b3"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_40"].mean(), self.extracted_grass["b4"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_50"].mean(), self.extracted_grass["b5"].mean(), places=2) self.assertAlmostEqual(df["lsat7_2000_70"].mean(), self.extracted_grass["b7"].mean(), places=2) def test_extract_lines(self): # extract training data from lines training_py = geopandas.read_file(nc.polygons) training_lines = deepcopy(training_py) training_lines["geometry"] = training_lines.geometry.boundary df = self.stack.extract_vector(gdf=training_lines).dropna() # check shapes of extracted pixels self.assertEqual(df.shape[0], 948) def test_extract_raster(self): # extract training data from labelled pixels with rasterio.open(nc.labelled_pixels) as src: df = self.stack.extract_raster(src) df = df.dropna() self.assertEqual(df.shape[0], self.extracted_grass.shape[0]) self.assertAlmostEqual(df["lsat7_2000_10"].mean(), self.extracted_grass["b1"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_20"].mean(), self.extracted_grass["b2"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_30"].mean(), self.extracted_grass["b3"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_40"].mean(), self.extracted_grass["b4"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_50"].mean(), self.extracted_grass["b5"].mean(), places=3) self.assertAlmostEqual(df["lsat7_2000_70"].mean(), self.extracted_grass["b7"].mean(), places=3)