Esempio n. 1
0
    def test_regression(self):

        stack = Raster(ms.predictors)

        training_pt = gpd.read_file(ms.meuse)
        training = stack.extract_vector(
            response=training_pt,
            columns=['cadmium', 'copper', 'lead', 'zinc'])

        # single target regression
        regr = RandomForestRegressor(n_estimators=50)
        X = training.loc[:, stack.names]
        y = training['zinc']
        regr.fit(X, y)

        single_regr = stack.predict(regr)
        self.assertIsInstance(single_regr, Raster)
        self.assertEqual(single_regr.count, 1)

        # multi-target regression
        y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']]
        regr.fit(X, y)
        multi_regr = stack.predict(regr)
        self.assertIsInstance(multi_regr, Raster)
        self.assertEqual(multi_regr.count, 4)
Esempio n. 2
0
    def test_regression(self):

        meuse_predictors = os.listdir(meuse_dir)
        meuse_predictors = [
            os.path.join(meuse_dir, i) for i in meuse_predictors
            if i.endswith('.tif')
        ]
        stack = Raster(meuse_predictors)
        self.assertEqual(stack.count, 21)

        training_pt = gpd.read_file(os.path.join(meuse_dir, 'meuse.shp'))
        training = stack.extract_vector(response=training_pt, field='cadmium')
        training['copper'] = stack.extract_vector(response=training_pt,
                                                  field='copper')['copper']
        training['lead'] = stack.extract_vector(response=training_pt,
                                                field='lead')['lead']
        training['zinc'] = stack.extract_vector(response=training_pt,
                                                field='zinc')['zinc']

        # single target regression
        regr = RandomForestRegressor(n_estimators=50)
        X = training.loc[:, stack.names]
        y = training['zinc']
        regr.fit(X, y)

        single_regr = stack.predict(regr)
        self.assertIsInstance(single_regr, Raster)
        self.assertEqual(single_regr.count, 1)

        # multi-target regression
        y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']]
        regr.fit(X, y)
        multi_regr = stack.predict(regr)
        self.assertIsInstance(multi_regr, Raster)
        self.assertEqual(multi_regr.count, 4)
Esempio n. 3
0
    def test_classification(self):

        stack = Raster(self.predictors)
        training_pt = gpd.read_file(nc.points)

        df_points = stack.extract_vector(response=training_pt, columns='id')

        clf = RandomForestClassifier(n_estimators=50)
        X = df_points.drop(columns=['id', 'geometry'])
        y = df_points.id
        clf.fit(X, y)

        # classification
        cla = stack.predict(estimator=clf, dtype='int16', nodata=0)
        self.assertIsInstance(cla, Raster)
        self.assertEqual(cla.count, 1)
        self.assertEqual(cla.read(masked=True).count(), 135092)

        # class probabilities
        probs = stack.predict_proba(estimator=clf)
        self.assertIsInstance(cla, Raster)
        self.assertEqual(probs.count, 7)

        for _, layer in probs:
            self.assertEqual(layer.read(masked=True).count(), 135092)
Esempio n. 4
0
# spatial cross-validation
from sklearn.cluster import KMeans

# create 10 spatial clusters based on clustering of the training data point x,y coordinates
clusters = KMeans(n_clusters=34, n_jobs=-1)
clusters.fit(df_polygons.geometry.bounds.iloc[:, 0:2])

# cross validate
scores = cross_validate(
  lr, X, y, groups=clusters.labels_,
  scoring='accuracy',
  cv=3,  n_jobs=1)
scores['test_score'].mean()

# prediction
result = stack.predict(estimator=lr, dtype='int16', nodata=0)
result_prob = stack.predict_proba(estimator=lr)
result.names
result_prob.names

result.plot()
plt.show()

result_prob.plot()
plt.show()

# sampling
# extract training data using a random sample
df_rand = stack.sample(size=1000, random_state=1)
df_rand.plot()
plt.show()
Esempio n. 5
0
# create 10 spatial clusters based on clustering of the training data point x,y coordinates
clusters = KMeans(n_clusters=34, n_jobs=-1)
clusters.fit(df_polygons.geometry.bounds.iloc[:, 0:2])

# cross validate
scores = cross_validate(
  lr, X, y, groups=clusters.labels_,
  scoring='accuracy',
  cv=3,  n_jobs=1)
scores['test_score'].mean()

# prediction
df = stack.read(as_df=True, masked=True)

result = stack.predict(estimator=lr, dtype='int16', nodata=0)
result = stack.predict(estimator=lr, dtype='int16', nodata=0, as_df=True)
result_prob = stack.predict_proba(estimator=lr)
result.names
result_prob.names

result.plot()
plt.show()

result_prob.plot()
plt.show()

# sampling
# extract training data using a random sample
df_rand = stack.sample(size=1000, random_state=1)
df_rand.plot()