def main(): # Define the known data points or "training" data explanatory_fields = "tmin12c tmax8c p_ph_c pmean_wntrc pmean_sumrc irr_lands gt_demc grwsnc d2u2c".split() explanatory_rasters = [os.path.join(TRAINING_DIR, r, "hdr.adf") for r in explanatory_fields] response_raster = os.path.join(TRAINING_DIR, 'iso_zns3-27/hdr.adf') # Take a random stratified sample selected = stratified_sample_raster(response_raster, target_sample_size=20, min_sample_proportion=0.01) # Load the training rasters using the sampled subset train_xs, train_y = load_training_rasters(response_raster, explanatory_rasters, selected) print(train_xs.shape, train_y.shape) # Train the classifier clf = ExtraTreesClassifier(n_estimators=10, n_jobs=1) clf.fit(train_xs, train_y) print(clf) # Cross validate k = 5 scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k) print("%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (k, scores.mean() * 100, scores.std() * 200)) # ... Other model assessment # Run the model on the current data; i.e. predict itself print("Imputing response rasters FOR CURRENT DATA") target_xs, raster_info = load_targets(explanatory_rasters) impute(target_xs, clf, raster_info, outdir="_aez_output_current", linechunk=400, class_prob=True, certainty=True) sys.exit() years = ['2070s'] for year in years: print("Loading target explanatory raster data, swapping out for %s climate data" % year) fdir = os.path.join(TRAINING_DIR, "../RCP85/%s/" % year) # swap out datasets that are predicted to change over time (i.e the climate data only) climate_rasters = "grwsnc pmean_sumrc pmean_wntrc tmax8c tmin12c".split() new_explanatory_rasters = OrderedDict(zip(explanatory_fields, explanatory_rasters)) for cr in climate_rasters: new_explanatory_rasters[cr] = fdir + cr + "/hdr.adf" target_xs, raster_info = load_targets(new_explanatory_rasters.values()) print("Imputing response rasters") impute(target_xs, clf, raster_info, outdir="_aez_output_%s" % year, linechunk=40, class_prob=True, certainty=True)
def main(): # Define the known data points or "training" data explanatory_fields = "d100 dd0 dd5 fday ffp gsdd5 gsp map mat_tenths mmax_tenths mmindd0 mmin_tenths mtcm_tenths mtwm_tenths sday".split() explanatory_rasters = [os.path.join(TRAINING_DIR, "current_" + r + ".img") for r in explanatory_fields] response_shapes = os.path.join(TRAINING_DIR, "DF.shp") # Load the training rasters using the sampled subset try: cached = json.load(open("_cached_training.json")) train_xs = np.array(cached['train_xs']) train_y = np.array(cached['train_y']) except IOError: train_xs, train_y = load_training_vector(response_shapes, explanatory_rasters, response_field='GRIDCODE') cache = {'train_xs': train_xs.tolist(), 'train_y': train_y.tolist()} with open("_cached_training.json", 'w') as fh: fh.write(json.dumps(cache)) print(train_xs.shape, train_y.shape) # Train the classifier clf = ExtraTreesClassifier(n_estimators=120, n_jobs=3) clf.fit(train_xs, train_y) print(clf) # Cross validate k = 5 scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k) print("%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (k, scores.mean() * 100, scores.std() * 200)) # Run the model on the current data; i.e. predict current conditions print("Imputing response rasters FOR CURRENT DATA") target_xs, raster_info = load_targets(explanatory_rasters) impute(target_xs, clf, raster_info, outdir="_usfs_output_current", linechunk=400, class_prob=True, certainty=True) years = ['2060'] for year in years: print("Loading target explanatory raster data, swapping out for %s climate data" % year) # Swap out for future climate rasters new_explanatory_rasters = [os.path.join(TRAINING_DIR, "Ensemble_rcp60_y%s_%s.img" % (year, r)) for r in explanatory_fields] target_xs, raster_info = load_targets(new_explanatory_rasters) print("Imputing response rasters") impute(target_xs, clf, raster_info, outdir="_usfs_output_%s" % year, linechunk=400, class_prob=True, certainty=True)
def test_impute(): from pyimpute import load_training_rasters, load_targets, impute # Load training data train_xs, train_y = load_training_rasters(response_raster, explanatory_rasters) # Train a classifier from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier(n_estimators=10, n_jobs=1) clf.fit(train_xs, train_y) # Load targets target_xs, raster_info = load_targets(explanatory_rasters) # Go... impute(target_xs, clf, raster_info, outdir=TMPOUT, linechunk=400, class_prob=True, certainty=True) assert os.path.exists(os.path.join(TMPOUT, "responses.tif")) assert os.path.exists(os.path.join(TMPOUT, "certainty.tif")) assert os.path.exists(os.path.join(TMPOUT, "probability_90.tif"))
def main(): # Define the known data points or "training" data explanatory_fields = "d100 dd0 dd5 fday ffp gsdd5 gsp map mat_tenths mmax_tenths mmindd0 mmin_tenths mtcm_tenths mtwm_tenths sday".split( ) explanatory_rasters = [ os.path.join(TRAINING_DIR, "current_" + r + ".img") for r in explanatory_fields ] response_shapes = os.path.join(TRAINING_DIR, "DF.shp") # Load the training rasters using the sampled subset try: cached = json.load(open("_cached_training.json")) train_xs = np.array(cached['train_xs']) train_y = np.array(cached['train_y']) except IOError: train_xs, train_y = load_training_vector(response_shapes, explanatory_rasters, response_field='GRIDCODE') cache = {'train_xs': train_xs.tolist(), 'train_y': train_y.tolist()} with open("_cached_training.json", 'w') as fh: fh.write(json.dumps(cache)) print(train_xs.shape, train_y.shape) # Train the classifier clf = ExtraTreesClassifier(n_estimators=120, n_jobs=3) clf.fit(train_xs, train_y) print(clf) # Cross validate k = 5 scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k) print("%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (k, scores.mean() * 100, scores.std() * 200)) # Run the model on the current data; i.e. predict current conditions print("Imputing response rasters FOR CURRENT DATA") target_xs, raster_info = load_targets(explanatory_rasters) impute(target_xs, clf, raster_info, outdir="_usfs_output_current", linechunk=400, class_prob=True, certainty=True) years = ['2060'] for year in years: print( "Loading target explanatory raster data, swapping out for %s climate data" % year) # Swap out for future climate rasters new_explanatory_rasters = [ os.path.join(TRAINING_DIR, "Ensemble_rcp60_y%s_%s.img" % (year, r)) for r in explanatory_fields ] target_xs, raster_info = load_targets(new_explanatory_rasters) print("Imputing response rasters") impute(target_xs, clf, raster_info, outdir="_usfs_output_%s" % year, linechunk=400, class_prob=True, certainty=True)
def test_load_targets(): from pyimpute import load_targets target_xs, raster_info = load_targets(explanatory_rasters) assert sorted(raster_info.keys()) == ['affine', 'crs', 'shape'] assert target_xs.shape == (38304, 7)
def test_load_targets(): from pyimpute import load_targets target_xs, raster_info = load_targets(explanatory_rasters) assert sorted(raster_info.keys()) == ["crs", "shape", "transform"] assert target_xs.shape == (38304, 7)