예제 #1
0
    def test_classification(self):

        stack = Raster(self.predictors)
        training_pt = gpd.read_file(nc.points)

        df_points = stack.extract_vector(response=training_pt, columns='id')

        clf = RandomForestClassifier(n_estimators=50)
        X = df_points.drop(columns=['id', 'geometry'])
        y = df_points.id
        clf.fit(X, y)

        # classification
        cla = stack.predict(estimator=clf, dtype='int16', nodata=0)
        self.assertIsInstance(cla, Raster)
        self.assertEqual(cla.count, 1)
        self.assertEqual(cla.read(masked=True).count(), 135092)

        # class probabilities
        probs = stack.predict_proba(estimator=clf)
        self.assertIsInstance(cla, Raster)
        self.assertEqual(probs.count, 7)

        for _, layer in probs:
            self.assertEqual(layer.read(masked=True).count(), 135092)
예제 #2
0
from sklearn.cluster import KMeans

# create 10 spatial clusters based on clustering of the training data point x,y coordinates
clusters = KMeans(n_clusters=34, n_jobs=-1)
clusters.fit(df_polygons.geometry.bounds.iloc[:, 0:2])

# cross validate
scores = cross_validate(
  lr, X, y, groups=clusters.labels_,
  scoring='accuracy',
  cv=3,  n_jobs=1)
scores['test_score'].mean()

# prediction
result = stack.predict(estimator=lr, dtype='int16', nodata=0)
result_prob = stack.predict_proba(estimator=lr)
result.names
result_prob.names

result.plot()
plt.show()

result_prob.plot()
plt.show()

# sampling
# extract training data using a random sample
df_rand = stack.sample(size=1000, random_state=1)
df_rand.plot()
plt.show()