Ejemplo n.º 1
0
    def test_regression(self):

        meuse_predictors = os.listdir(meuse_dir)
        meuse_predictors = [
            os.path.join(meuse_dir, i) for i in meuse_predictors
            if i.endswith('.tif')
        ]
        stack = Raster(meuse_predictors)
        self.assertEqual(stack.count, 21)

        training_pt = gpd.read_file(os.path.join(meuse_dir, 'meuse.shp'))
        training = stack.extract_vector(response=training_pt, field='cadmium')
        training['copper'] = stack.extract_vector(response=training_pt,
                                                  field='copper')['copper']
        training['lead'] = stack.extract_vector(response=training_pt,
                                                field='lead')['lead']
        training['zinc'] = stack.extract_vector(response=training_pt,
                                                field='zinc')['zinc']

        # single target regression
        regr = RandomForestRegressor(n_estimators=50)
        X = training.loc[:, stack.names]
        y = training['zinc']
        regr.fit(X, y)

        single_regr = stack.predict(regr)
        self.assertIsInstance(single_regr, Raster)
        self.assertEqual(single_regr.count, 1)

        # multi-target regression
        y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']]
        regr.fit(X, y)
        multi_regr = stack.predict(regr)
        self.assertIsInstance(multi_regr, Raster)
        self.assertEqual(multi_regr.count, 4)
Ejemplo n.º 2
0
    def test_extract_points(self):

        stack = Raster(self.predictors)

        # extract training data from points
        training_pt = geopandas.read_file(
            os.path.join(nc_dir, 'landsat96_points.shp'))
        X, y, xy = stack.extract_vector(response=training_pt,
                                        field='id',
                                        return_array=True)

        # remove masked values
        mask2d = X.mask.any(axis=1)
        X = X[~mask2d]
        y = y[~mask2d]
        xy = xy[~mask2d]

        # check shapes of extracted pixels
        self.assertTupleEqual(X.shape, (562, 6))
        self.assertTupleEqual(y.shape, (562, ))
        self.assertTupleEqual(xy.shape, (562, 2))

        # check summarized values of extracted y values
        self.assertTrue(
            np.equal(np.bincount(y), np.asarray([0, 161, 3, 76, 36, 275, 8,
                                                 3])).all())

        # check extracted X values
        self.assertAlmostEqual(X[:, 0].mean(), 81.588968, places=2)
        self.assertAlmostEqual(X[:, 1].mean(), 67.619217, places=2)
        self.assertAlmostEqual(X[:, 2].mean(), 67.455516, places=2)
        self.assertAlmostEqual(X[:, 3].mean(), 69.153025, places=2)
        self.assertAlmostEqual(X[:, 4].mean(), 90.051601, places=2)
        self.assertAlmostEqual(X[:, 5].mean(), 59.558719, places=2)
Ejemplo n.º 3
0
    def test_regression(self):

        stack = Raster(ms.predictors)

        training_pt = gpd.read_file(ms.meuse)
        training = stack.extract_vector(
            response=training_pt,
            columns=['cadmium', 'copper', 'lead', 'zinc'])

        # single target regression
        regr = RandomForestRegressor(n_estimators=50)
        X = training.loc[:, stack.names]
        y = training['zinc']
        regr.fit(X, y)

        single_regr = stack.predict(regr)
        self.assertIsInstance(single_regr, Raster)
        self.assertEqual(single_regr.count, 1)

        # multi-target regression
        y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']]
        regr.fit(X, y)
        multi_regr = stack.predict(regr)
        self.assertIsInstance(multi_regr, Raster)
        self.assertEqual(multi_regr.count, 4)
Ejemplo n.º 4
0
    def test_classification(self):

        stack = Raster(self.predictors)
        training_pt = gpd.read_file(nc.points)

        df_points = stack.extract_vector(response=training_pt, columns='id')

        clf = RandomForestClassifier(n_estimators=50)
        X = df_points.drop(columns=['id', 'geometry'])
        y = df_points.id
        clf.fit(X, y)

        # classification
        cla = stack.predict(estimator=clf, dtype='int16', nodata=0)
        self.assertIsInstance(cla, Raster)
        self.assertEqual(cla.count, 1)
        self.assertEqual(cla.read(masked=True).count(), 135092)

        # class probabilities
        probs = stack.predict_proba(estimator=clf)
        self.assertIsInstance(cla, Raster)
        self.assertEqual(probs.count, 7)

        for _, layer in probs:
            self.assertEqual(layer.read(masked=True).count(), 135092)
Ejemplo n.º 5
0
    def test_extract_polygons(self):

        stack = Raster(self.predictors)

        # extract training data from polygons
        training_py = geopandas.read_file(nc.polygons)
        X, y, xy = stack.extract_vector(
            response=training_py, columns='id', return_array=True)

        # remove masked values
        mask2d = X.mask.any(axis=1)
        X = X[~mask2d]
        y = y[~mask2d]
        xy = xy[~mask2d]

        # check shapes of extracted pixels
        self.assertTupleEqual(X.shape, (2436, 6))
        self.assertTupleEqual(y.shape, (2436, ))
        self.assertTupleEqual(xy.shape, (2436, 2))
Ejemplo n.º 6
0
class TestAlter(TestCase):
    def setUp(self) -> None:
        predictors = [
            nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7
        ]
        self.stack = Raster(predictors)
        points = gpd.read_file(nc.points)
        data = self.stack.extract_vector(points)
        self.data = data.dropna()

    def tearDown(self) -> None:
        self.stack.close()

    def test_alter(self):
        scaler = StandardScaler()
        scaler.fit(self.data.drop(columns=["geometry"]).values)
        out = self.stack.alter(scaler)

        self.assertIsInstance(out, Raster)
        self.assertEqual(out.shape, self.stack.shape)
Ejemplo n.º 7
0
    def test_extract_lines(self):

        stack = Raster(self.predictors)

        # extract training data from lines
        training_py = geopandas.read_file(nc.polygons)
        training_lines = deepcopy(training_py)
        training_lines['geometry'] = training_lines.geometry.boundary
        X, y, xy = stack.extract_vector(
            response=training_lines, columns='id', return_array=True)

        # remove masked values
        mask2d = X.mask.any(axis=1)
        X = X[~mask2d]
        y = y[~mask2d]
        xy = xy[~mask2d]

        # check shapes of extracted pixels
        self.assertTupleEqual(X.shape, (948, 6))
        self.assertTupleEqual(y.shape, (948, ))
        self.assertTupleEqual(xy.shape, (948, 2))
Ejemplo n.º 8
0
training_pt = geopandas.read_file(nc.points)
training_px = rasterio.open(os.path.join(nc.labelled_pixels))
training_lines = deepcopy(training_py)
training_lines['geometry'] = training_lines.geometry.boundary

# Plot some training data
plt.imshow(stack.lsat7_2000_70.read(masked=True),
           extent=rasterio.plot.plotting_extent(stack.lsat7_2000_70.ds))
plt.scatter(x=training_pt.bounds.iloc[:, 0],
            y=training_pt.bounds.iloc[:, 1],
            s=2, color='black')
plt.show()

# Create a training dataset by extracting the raster values at the training point locations:
stack = Raster(predictors)
df_points = stack.extract_vector(response=training_pt, field='id')
df_polygons = stack.extract_vector(response=training_py, field='id')
df_lines = stack.extract_vector(response=training_lines, field='id')
df_raster = stack.extract_raster(response=training_px, value_name='id')
df_points.head()

# Next we can train a logistic regression classifier:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_validate

# define the classifier with standardization of the input features in a pipeline
lr = Pipeline(
    [('scaling', StandardScaler()),
     ('classifier', LogisticRegressionCV(n_jobs=-1))])
Ejemplo n.º 9
0
class TestExtract(TestCase):
    def setUp(self) -> None:
        self.predictors = [
            nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7
        ]
        self.extracted_grass = pd.read_table(nc.extracted_pixels,
                                             delimiter=" ")

        self.stack = Raster(self.predictors)

    def tearDown(self) -> None:
        self.stack.close()

    def test_extract_points(self):
        training_pt = geopandas.read_file(nc.points)

        # check that extracted training data as a DataFrame match known values
        df = self.stack.extract_vector(gdf=training_pt)
        df = df.dropna()
        training_pt = training_pt.dropna()

        self.assertTrue(
            (df["lsat7_2000_10"].values == training_pt["b1"].values).all())
        self.assertTrue(
            (df["lsat7_2000_20"].values == training_pt["b2"].values).all())
        self.assertTrue(
            (df["lsat7_2000_30"].values == training_pt["b3"].values).all())
        self.assertTrue(
            (df["lsat7_2000_40"].values == training_pt["b4"].values).all())
        self.assertTrue(
            (df["lsat7_2000_50"].values == training_pt["b5"].values).all())
        self.assertTrue(
            (df["lsat7_2000_70"].values == training_pt["b7"].values).all())

    def test_extract_polygons(self):
        # extract training data from polygons
        training_py = geopandas.read_file(nc.polygons)
        df = self.stack.extract_vector(gdf=training_py)
        df = df.dropna()

        df = df.merge(
            right=training_py.loc[:, ("id", "label")],
            left_on="geometry_idx",
            right_on="index",
            right_index=True,
        )

        # compare to extracted data using GRASS GIS
        self.assertEqual(df.shape[0], self.extracted_grass.shape[0])
        self.assertAlmostEqual(df["lsat7_2000_10"].mean(),
                               self.extracted_grass["b1"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_20"].mean(),
                               self.extracted_grass["b2"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_30"].mean(),
                               self.extracted_grass["b3"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_40"].mean(),
                               self.extracted_grass["b4"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_50"].mean(),
                               self.extracted_grass["b5"].mean(),
                               places=2)
        self.assertAlmostEqual(df["lsat7_2000_70"].mean(),
                               self.extracted_grass["b7"].mean(),
                               places=2)

    def test_extract_lines(self):
        # extract training data from lines
        training_py = geopandas.read_file(nc.polygons)
        training_lines = deepcopy(training_py)
        training_lines["geometry"] = training_lines.geometry.boundary
        df = self.stack.extract_vector(gdf=training_lines).dropna()

        # check shapes of extracted pixels
        self.assertEqual(df.shape[0], 948)

    def test_extract_raster(self):
        # extract training data from labelled pixels
        with rasterio.open(nc.labelled_pixels) as src:
            df = self.stack.extract_raster(src)

        df = df.dropna()

        self.assertEqual(df.shape[0], self.extracted_grass.shape[0])
        self.assertAlmostEqual(df["lsat7_2000_10"].mean(),
                               self.extracted_grass["b1"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_20"].mean(),
                               self.extracted_grass["b2"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_30"].mean(),
                               self.extracted_grass["b3"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_40"].mean(),
                               self.extracted_grass["b4"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_50"].mean(),
                               self.extracted_grass["b5"].mean(),
                               places=3)
        self.assertAlmostEqual(df["lsat7_2000_70"].mean(),
                               self.extracted_grass["b7"].mean(),
                               places=3)
Ejemplo n.º 10
0
training_pt = geopandas.read_file(nc.points)
training_px = rasterio.open(os.path.join(nc.labelled_pixels))
training_lines = deepcopy(training_py)
training_lines['geometry'] = training_lines.geometry.boundary

# Plot some training data
plt.imshow(stack.lsat7_2000_70.read(masked=True),
           extent=rasterio.plot.plotting_extent(stack.lsat7_2000_70.ds))
plt.scatter(x=training_pt.bounds.iloc[:, 0],
            y=training_pt.bounds.iloc[:, 1],
            s=2, color='black')
plt.show()

# Create a training dataset by extracting the raster values at the training point locations:
stack = Raster(predictors)
df_points = stack.extract_vector(response=training_pt, columns='id')
df_polygons = stack.extract_vector(response=training_py, columns='id')
df_lines = stack.extract_vector(response=training_lines, columns='id')
df_raster = stack.extract_raster(response=training_px, value_name='id')
df_points.head()

# Next we can train a logistic regression classifier:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_validate

# define the classifier with standardization of the input features in a pipeline
lr = Pipeline(
    [('scaling', StandardScaler()),
     ('classifier', LogisticRegressionCV(n_jobs=-1))])
ep.plot_bands(result_neigh.read(), extent=extent, ax=axes[1,1], cmap='RdYlGn', 
              alpha=0.8, title="Clasificación KNeighbors", cbar=False)
manzana.boundary.plot(ax=axes[1,1], color='white', alpha=0.5, linewidth=1)
# Plot ndvi
ep.plot_bands(raster_ndvi.read(), extent=extent, ax=axes[2,0], cmap='RdYlGn', 
              alpha=0.8, title="NDVI", cbar=False)
manzana.boundary.plot(ax=axes[2,0], color='white', alpha=0.5, linewidth=1)
# Plot EVI
ep.plot_bands(raster_evi.read(), extent=extent, ax=axes[2,1], cmap='RdYlGn', 
              alpha=0.8, title="EVI", cbar=False)
manzana.boundary.plot(ax=axes[2,1], color='white', alpha=0.5, linewidth=1)
plt.tight_layout()
plt.show()

# %%
df_ndvi = raster_ndvi.extract_vector(training)
df_ndvi.rename(columns={df_ndvi.columns[0]: 'id'}, inplace=True)

df_evi = raster_evi.extract_vector(training)
df_evi.rename(columns={df_evi.columns[0]: 'id'}, inplace=True)

# %%
# Evaluación Test de NDVI 

train_accuracy_score2, train_precision_score2, train_recall_score2 = calculate_binary_class_scores(y, df_ndvi['id'].values)

print('NDVI Test Data Accuracy (%) = ', round(train_accuracy_score2*100,2))
print('NDVI Test Data Precision (%) = ', round(train_precision_score2*100,2))
print('NDVI Test Data Recall (%) = ', round(train_recall_score2*100,2))

cm = confusion_matrix(y, df_ndvi['id'].values)