def test_regression(self): stack = Raster(ms.predictors) training_pt = gpd.read_file(ms.meuse) training = stack.extract_vector( response=training_pt, columns=['cadmium', 'copper', 'lead', 'zinc']) # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, stack.names] y = training['zinc'] regr.fit(X, y) single_regr = stack.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']] regr.fit(X, y) multi_regr = stack.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
def test_regression(self): meuse_predictors = os.listdir(meuse_dir) meuse_predictors = [ os.path.join(meuse_dir, i) for i in meuse_predictors if i.endswith('.tif') ] stack = Raster(meuse_predictors) self.assertEqual(stack.count, 21) training_pt = gpd.read_file(os.path.join(meuse_dir, 'meuse.shp')) training = stack.extract_vector(response=training_pt, field='cadmium') training['copper'] = stack.extract_vector(response=training_pt, field='copper')['copper'] training['lead'] = stack.extract_vector(response=training_pt, field='lead')['lead'] training['zinc'] = stack.extract_vector(response=training_pt, field='zinc')['zinc'] # single target regression regr = RandomForestRegressor(n_estimators=50) X = training.loc[:, stack.names] y = training['zinc'] regr.fit(X, y) single_regr = stack.predict(regr) self.assertIsInstance(single_regr, Raster) self.assertEqual(single_regr.count, 1) # multi-target regression y = training.loc[:, ['zinc', 'cadmium', 'copper', 'lead']] regr.fit(X, y) multi_regr = stack.predict(regr) self.assertIsInstance(multi_regr, Raster) self.assertEqual(multi_regr.count, 4)
def test_classification(self): stack = Raster(self.predictors) training_pt = gpd.read_file(nc.points) df_points = stack.extract_vector(response=training_pt, columns='id') clf = RandomForestClassifier(n_estimators=50) X = df_points.drop(columns=['id', 'geometry']) y = df_points.id clf.fit(X, y) # classification cla = stack.predict(estimator=clf, dtype='int16', nodata=0) self.assertIsInstance(cla, Raster) self.assertEqual(cla.count, 1) self.assertEqual(cla.read(masked=True).count(), 135092) # class probabilities probs = stack.predict_proba(estimator=clf) self.assertIsInstance(cla, Raster) self.assertEqual(probs.count, 7) for _, layer in probs: self.assertEqual(layer.read(masked=True).count(), 135092)
# spatial cross-validation from sklearn.cluster import KMeans # create 10 spatial clusters based on clustering of the training data point x,y coordinates clusters = KMeans(n_clusters=34, n_jobs=-1) clusters.fit(df_polygons.geometry.bounds.iloc[:, 0:2]) # cross validate scores = cross_validate( lr, X, y, groups=clusters.labels_, scoring='accuracy', cv=3, n_jobs=1) scores['test_score'].mean() # prediction result = stack.predict(estimator=lr, dtype='int16', nodata=0) result_prob = stack.predict_proba(estimator=lr) result.names result_prob.names result.plot() plt.show() result_prob.plot() plt.show() # sampling # extract training data using a random sample df_rand = stack.sample(size=1000, random_state=1) df_rand.plot() plt.show()
# create 10 spatial clusters based on clustering of the training data point x,y coordinates clusters = KMeans(n_clusters=34, n_jobs=-1) clusters.fit(df_polygons.geometry.bounds.iloc[:, 0:2]) # cross validate scores = cross_validate( lr, X, y, groups=clusters.labels_, scoring='accuracy', cv=3, n_jobs=1) scores['test_score'].mean() # prediction df = stack.read(as_df=True, masked=True) result = stack.predict(estimator=lr, dtype='int16', nodata=0) result = stack.predict(estimator=lr, dtype='int16', nodata=0, as_df=True) result_prob = stack.predict_proba(estimator=lr) result.names result_prob.names result.plot() plt.show() result_prob.plot() plt.show() # sampling # extract training data using a random sample df_rand = stack.sample(size=1000, random_state=1) df_rand.plot()