import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import roc_auc_score from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import LinearRegression from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split import hazelbean as hb L = hb.get_logger() def do_crop_types_ML(**kw): ### - - - - - - - - - ### Load dataset ### - - - - - - - - - L.info('Loading data') baseline_df = pd.read_csv( '../IPBES project/intermediate/baseline_regression_data.csv') L.info('Data loaded') ### - - - - - - - - - ### Cleaning dataset ### - - - - - - - - - ### - - - - - - - - - - - - - - - - ### Feature Engineering / selection ### - - - - - - - - - - - - - - - -
import os, sys, warnings, logging, inspect from osgeo import gdal, osr, ogr import numpy as np import hazelbean as hb L = hb.get_logger( 'arrayframe_numpy_functions', logging_level='warning') # hb.arrayframe.L.setLevel(logging.DEBUG) def raster_calculator_flex( input_, op, output_path, **kwargs ): #, datatype=None, ndv=None, gtiff_creation_options=None, compress=False # If input is a string, put it into a list if isinstance(input_, str): input_ = [input_] elif isinstance(input_, hb.ArrayFrame): input_ = input_.path final_input = [''] * len(input_) for c, i in enumerate(input_): if isinstance(i, hb.ArrayFrame): final_input[c] = i.path else: final_input[c] = i input_ = final_input # Determine size of inputs if isinstance(input_, str) or isinstance(input_, hb.ArrayFrame):
from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.feature_selection import RFE from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline from sklearn.linear_model import Ridge from scipy import stats import xgboost as xgb L = hb.get_logger('data_prep_v3') # Utilities def convert_af_to_1d_df(af): array = af.data.flatten() df = pd.DataFrame(array) return df def concatenate_dfs_horizontally(df_list, column_headers=None): """ Append horizontally, based on index. """ df = pd.concat(df_list, axis=1)
import os, sys, shutil, warnings import pprint from collections import OrderedDict import numpy as np import hazelbean as hb import math from osgeo import gdal import contextlib import logging L = hb.get_logger('hazelbean utils') def hprint(*args, **kwargs): return hb_pprint(*args, **kwargs) def pp(*args, **kwargs): return hb_pprint(*args, **kwargs) def hb_pprint(*args, **kwargs): num_values = len(args) print_level = kwargs.get('print_level', 2) # NO LONGER IMPLEMENTED return_as_string = kwargs.get('return_as_string', False) include_type = kwargs.get('include_type', False)
import pandas as pd import geopandas as gpd from collections import OrderedDict import logging import fiona import hazelbean as hb from hazelbean.ui import validation import os, sys, math, random, shutil, logging from collections import OrderedDict from hazelbean.ui import model, inputs # hb.ui.model.LOGGER.setLevel(logging.WARNING) # hb.ui.inputs.LOGGER.setLevel(logging.WARNING) L = hb.get_logger('seals', logging_level='debug') dev_mode = True class AutoUI(model.InVESTModel): def __init__(self, project): self.p = project model.InVESTModel.__init__(self, # label=u'seals', label=u'User Interface', target=self.p.execute, validator=self.p.validate, localdoc='../documentation')
import os, sys, warnings, logging, shutil from osgeo import gdal, osr, ogr import numpy as np import hazelbean as hb L = hb.get_logger('arrayframe', logging_level='warning') # hb.arrayframe.L.setLevel(logging.DEBUG) class ArrayFrame(object): """DESIRED Functinality to add: starting with an array, save as AF.""" def __init__(self, path, **kwargs): try: assert os.path.exists(path) is True except: raise NameError('Path ' + str(path) + ' does not exist. Attempting to make an ArrayFrame out of it thus failed.') self.load_data_on_init = kwargs.get('load_data_on_init', False) self.path = path self.ds = gdal.Open(path, gdal.GA_Update) self.band = self.ds.GetRasterBand(1) self.num_cols = self.ds.RasterXSize self.n_cols = self.num_cols self.num_rows = self.ds.RasterYSize self.n_rows = self.num_rows self.shape = (self.num_rows, self.num_cols) self.size = self.num_cols * self.num_rows
import math, os, sys, time, random, shutil, logging, csv, json import netCDF4 import numpy as np from osgeo import gdal, osr, ogr import pandas as pd import geopandas as gpd from collections import OrderedDict import logging import scipy import geoecon as ge import hazelbean as hb import multiprocessing L = hb.get_logger('process_ssp_scenarios') def extract_lulc(p): if p.tasks['extract_lulc']: for scenario_name in p.scenario_names: scenario_dir = os.path.join(p.task_dirs['extract_lulc'], scenario_name) hb.create_dirs(scenario_dir) filename = 'multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-' + scenario_name + '-2-1-f_gn_2015-2100.nc' states_path = os.path.join(p.scenarios_data_dir, filename) for year in p.years: year_dir = os.path.join(scenario_dir, str(year)) os.mkdir(year_dir) L.info('Extracting from ' + states_path)
import os, sys, shutil, random, math, atexit, time from osgeo import gdal, ogr, osr import numpy as np import hazelbean as hb import functools from functools import reduce L = hb.get_logger('spatial_projection') def get_wkt_from_epsg_code(epsg_code): srs = osr.SpatialReference() srs.ImportFromEPSG(int(epsg_code)) wkt = srs.ExportToWkt() return wkt def get_datasource_srs_uri(dataset_uri): driver = ogr.GetDriverByName('ESRI Shapefile') dataset = driver.Open(dataset_uri) layer = dataset.GetLayer() spatialRef = layer.GetSpatialRef() return spatialRef def get_dataset_projection_wkt_uri(dataset_uri): """Get the projection of a GDAL dataset as well known text (WKT).
from hazelbean.ui import model, inputs from hazelbean.ui import validation import os, sys import qtpy from qtpy import QtWidgets from qtpy import QtCore from qtpy import QtGui import time import six import qtawesome logging.basicConfig(level=logging.WARNING) hb.ui.model.LOGGER.setLevel(logging.WARNING) hb.ui.inputs.LOGGER.setLevel(logging.WARNING) L = hb.get_logger('seals') L.setLevel(logging.INFO) logging.getLogger('Fiona').setLevel(logging.WARNING) logging.getLogger('fiona.collection').setLevel(logging.WARNING) np.seterr(divide='ignore', invalid='ignore') dev_mode = True # TODOO NOTE This funtion must be here as it is automatically called by the invest model. Consider making this more flexible in the next release. @validation.invest_validator def validate(args, limit_to=None): validation_error_list = [] if not os.path.exists(args['input_lulc_path']):
import os, sys, types, inspect, logging, collections, time, copy # import nose from collections import OrderedDict from osgeo import gdal, osr, ogr import numpy as np import hazelbean as hb import multiprocessing try: import anytree except: 'anytree is probably not needed except for project flow.' L = hb.get_logger('project_flow') L.setLevel(logging.INFO) def op(): pass def run_iterator_in_parallel(p, task, iteration_counter): things_returned = [] for child in task.children: L.info('iter ' + str(iteration_counter) + ': Running task ' + str(child.name) + ' with iterator parent ' + child.parent.name + ' in dir ' + str(p.cur_dir)) r = p.run_task(child) things_returned.append(r) return things_returned
import numpy as np from osgeo import gdal import scipy import hazelbean as hb import hazelbean.pyramids from hazelbean.ui import model, inputs from collections import OrderedDict import seals_utils logging.basicConfig(level=logging.WARNING) hb.ui.model.LOGGER.setLevel(logging.WARNING) hb.ui.inputs.LOGGER.setLevel(logging.WARNING) L = hb.get_logger('seals', logging_level='warning') L.setLevel(logging.INFO) logging.getLogger('Fiona').setLevel(logging.WARNING) logging.getLogger('fiona.collection').setLevel(logging.WARNING) np.seterr(divide='ignore', invalid='ignore') p = hb.ProjectFlow() # TASKS def generate_batch_zones(): global p p.layers_to_stitch = []
import geopandas as gpd import warnings import netCDF4 import logging import pandas as pd import pygeoprocessing.geoprocessing as pgp from pygeoprocessing.geoprocessing import * # Conditional imports try: import geoecon as ge except: ge = None numpy = np L = hb.get_logger('hb_rasterstats') pgp_logger = logging.getLogger('geoprocessing') loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] def convert_polygons_to_id_raster(input_vector_path, output_raster_path, match_raster_path, id_column_label=None, data_type=None, ndv=None, all_touched=None, compress=True): if not id_column_label: # Get the column label of the first column gdf = gpd.read_file(input_vector_path) id_column_label = gdf.columns[0] if not data_type: data_type = 1 if not ndv: