def test_classification(self): stack = Raster(self.predictors) training_pt = gpd.read_file(nc.points) df_points = stack.extract_vector(response=training_pt, columns='id') clf = RandomForestClassifier(n_estimators=50) X = df_points.drop(columns=['id', 'geometry']) y = df_points.id clf.fit(X, y) # classification cla = stack.predict(estimator=clf, dtype='int16', nodata=0) self.assertIsInstance(cla, Raster) self.assertEqual(cla.count, 1) self.assertEqual(cla.read(masked=True).count(), 135092) # class probabilities probs = stack.predict_proba(estimator=clf) self.assertIsInstance(cla, Raster) self.assertEqual(probs.count, 7) for _, layer in probs: self.assertEqual(layer.read(masked=True).count(), 135092)
from sklearn.cluster import KMeans # create 10 spatial clusters based on clustering of the training data point x,y coordinates clusters = KMeans(n_clusters=34, n_jobs=-1) clusters.fit(df_polygons.geometry.bounds.iloc[:, 0:2]) # cross validate scores = cross_validate( lr, X, y, groups=clusters.labels_, scoring='accuracy', cv=3, n_jobs=1) scores['test_score'].mean() # prediction result = stack.predict(estimator=lr, dtype='int16', nodata=0) result_prob = stack.predict_proba(estimator=lr) result.names result_prob.names result.plot() plt.show() result_prob.plot() plt.show() # sampling # extract training data using a random sample df_rand = stack.sample(size=1000, random_state=1) df_rand.plot() plt.show()