Example #1
0
    def test_extract_raster(self):

        stack = Raster(self.predictors)

        # extract training data from labelled pixels
        training_px = rasterio.open(nc.labelled_pixels)
        X, y, xy = stack.extract_raster(
            response=training_px, value_name='id', return_array=True)

        # remove masked values
        mask2d = X.mask.any(axis=1)
        X = X[~mask2d]
        y = y[~mask2d]
        xy = xy[~mask2d]

        # check shapes of extracted pixels
        self.assertTupleEqual(X.shape, (2436, 6))
        self.assertTupleEqual(y.shape, (2436, ))
        self.assertTupleEqual(xy.shape, (2436, 2))
Example #2
0
training_lines['geometry'] = training_lines.geometry.boundary

# Plot some training data
plt.imshow(stack.lsat7_2000_70.read(masked=True),
           extent=rasterio.plot.plotting_extent(stack.lsat7_2000_70.ds))
plt.scatter(x=training_pt.bounds.iloc[:, 0],
            y=training_pt.bounds.iloc[:, 1],
            s=2, color='black')
plt.show()

# Create a training dataset by extracting the raster values at the training point locations:
stack = Raster(predictors)
df_points = stack.extract_vector(response=training_pt, field='id')
df_polygons = stack.extract_vector(response=training_py, field='id')
df_lines = stack.extract_vector(response=training_lines, field='id')
df_raster = stack.extract_raster(response=training_px, value_name='id')
df_points.head()

# Next we can train a logistic regression classifier:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_validate

# define the classifier with standardization of the input features in a pipeline
lr = Pipeline(
    [('scaling', StandardScaler()),
     ('classifier', LogisticRegressionCV(n_jobs=-1))])

# fit the classifier
X = df_points.drop(columns=['id', 'geometry'])
Example #3
0
class TestExtract(TestCase):
    def setUp(self) -> None:
        self.predictors = [
            nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7
        ]
        self.extracted_grass = pd.read_table(nc.extracted_pixels,
                                             delimiter=" ")

        self.stack = Raster(self.predictors)

    def tearDown(self) -> None:
        self.stack.close()

    def test_extract_points(self):
        training_pt = geopandas.read_file(nc.points)

        # check that extracted training data as a DataFrame match known values
        df = self.stack.extract_vector(gdf=training_pt)
        df = df.dropna()
        training_pt = training_pt.dropna()

        self.assertTrue(
            (df["lsat7_2000_10"].values == training_pt["b1"].values).all())
        self.assertTrue(
            (df["lsat7_2000_20"].values == training_pt["b2"].values).all())
        self.assertTrue(
            (df["lsat7_2000_30"].values == training_pt["b3"].values).all())
        self.assertTrue(
            (df["lsat7_2000_40"].values == training_pt["b4"].values).all())
        self.assertTrue(
            (df["lsat7_2000_50"].values == training_pt["b5"].values).all())
        self.assertTrue(
            (df["lsat7_2000_70"].values == training_pt["b7"].values).all())

    def test_extract_polygons(self):
        # extract training data from polygons
        training_py = geopandas.read_file(nc.polygons)
        df = self.stack.extract_vector(gdf=training_py)
        df = df.dropna()

        df = df.merge(
            right=training_py.loc[:, ("id", "label")],
            left_on="geometry_idx",
            right_on="index",
            right_index=True,
        )

        # compare to extracted data using GRASS GIS
        self.assertEqual(df.shape[0], self.extracted_grass.shape[0])
        self.assertAlmostEqual(df["lsat7_2000_10"].mean(),
                               self.extracted_grass["b1"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_20"].mean(),
                               self.extracted_grass["b2"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_30"].mean(),
                               self.extracted_grass["b3"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_40"].mean(),
                               self.extracted_grass["b4"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_50"].mean(),
                               self.extracted_grass["b5"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_70"].mean(),
                               self.extracted_grass["b7"].mean(),
                               places=2)

    def test_extract_lines(self):
        # extract training data from lines
        training_py = geopandas.read_file(nc.polygons)
        training_lines = deepcopy(training_py)
        training_lines["geometry"] = training_lines.geometry.boundary
        df = self.stack.extract_vector(gdf=training_lines).dropna()

        # check shapes of extracted pixels
        self.assertEqual(df.shape[0], 948)

    def test_extract_raster(self):
        # extract training data from labelled pixels
        with rasterio.open(nc.labelled_pixels) as src:
            df = self.stack.extract_raster(src)

        df = df.dropna()

        self.assertEqual(df.shape[0], self.extracted_grass.shape[0])
        self.assertAlmostEqual(df["lsat7_2000_10"].mean(),
                               self.extracted_grass["b1"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_20"].mean(),
                               self.extracted_grass["b2"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_30"].mean(),
                               self.extracted_grass["b3"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_40"].mean(),
                               self.extracted_grass["b4"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_50"].mean(),
                               self.extracted_grass["b5"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_70"].mean(),
                               self.extracted_grass["b7"].mean(),
                               places=3)