import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import hazelbean as hb
L = hb.get_logger()


def do_crop_types_ML(**kw):

    ### - - - - - - - - -
    ### Load dataset
    ### - - - - - - - - -
    L.info('Loading data')
    baseline_df = pd.read_csv(
        '../IPBES project/intermediate/baseline_regression_data.csv')
    L.info('Data loaded')
    ### - - - - - - - - -
    ### Cleaning dataset
    ### - - - - - - - - -

    ### - - - - - - - - - - - - - - - -
    ### Feature Engineering / selection
    ### - - - - - - - - - - - - - - - -
Beispiel #2
0
import os, sys, warnings, logging, inspect

from osgeo import gdal, osr, ogr
import numpy as np
import hazelbean as hb

L = hb.get_logger(
    'arrayframe_numpy_functions',
    logging_level='warning')  # hb.arrayframe.L.setLevel(logging.DEBUG)


def raster_calculator_flex(
    input_, op, output_path, **kwargs
):  #, datatype=None, ndv=None, gtiff_creation_options=None, compress=False

    # If input is a string, put it into a list
    if isinstance(input_, str):
        input_ = [input_]
    elif isinstance(input_, hb.ArrayFrame):
        input_ = input_.path

    final_input = [''] * len(input_)
    for c, i in enumerate(input_):
        if isinstance(i, hb.ArrayFrame):
            final_input[c] = i.path
        else:
            final_input[c] = i
    input_ = final_input

    # Determine size of inputs
    if isinstance(input_, str) or isinstance(input_, hb.ArrayFrame):
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

from sklearn.feature_selection import RFE
from sklearn.preprocessing import PolynomialFeatures

from sklearn.pipeline import make_pipeline
from sklearn.linear_model import Ridge

from scipy import stats

import xgboost as xgb

L = hb.get_logger('data_prep_v3')


# Utilities
def convert_af_to_1d_df(af):
    array = af.data.flatten()
    df = pd.DataFrame(array)
    return df


def concatenate_dfs_horizontally(df_list, column_headers=None):
    """
    Append horizontally, based on index.
    """

    df = pd.concat(df_list, axis=1)
Beispiel #4
0
import os, sys, shutil, warnings

import pprint
from collections import OrderedDict
import numpy as np

import hazelbean as hb
import math
from osgeo import gdal
import contextlib
import logging

L = hb.get_logger('hazelbean utils')


def hprint(*args, **kwargs):
    return hb_pprint(*args, **kwargs)


def pp(*args, **kwargs):
    return hb_pprint(*args, **kwargs)


def hb_pprint(*args, **kwargs):

    num_values = len(args)

    print_level = kwargs.get('print_level', 2)  # NO LONGER IMPLEMENTED
    return_as_string = kwargs.get('return_as_string', False)
    include_type = kwargs.get('include_type', False)
Beispiel #5
0
import pandas as pd
import geopandas as gpd
from collections import OrderedDict
import logging
import fiona

import hazelbean as hb

from hazelbean.ui import validation
import os, sys, math, random, shutil, logging
from collections import OrderedDict

from hazelbean.ui import model, inputs
# hb.ui.model.LOGGER.setLevel(logging.WARNING)
# hb.ui.inputs.LOGGER.setLevel(logging.WARNING)
L = hb.get_logger('seals', logging_level='debug')
dev_mode = True




class AutoUI(model.InVESTModel):
    def __init__(self, project):
        self.p = project

        model.InVESTModel.__init__(self,
                                   # label=u'seals',
                                   label=u'User Interface',
                                   target=self.p.execute,
                                   validator=self.p.validate,
                                   localdoc='../documentation')
Beispiel #6
0
import os, sys, warnings, logging, shutil

from osgeo import gdal, osr, ogr
import numpy as np
import hazelbean as hb

L = hb.get_logger('arrayframe', logging_level='warning') # hb.arrayframe.L.setLevel(logging.DEBUG)


class ArrayFrame(object):

    """DESIRED Functinality to add: starting with an array, save as AF."""
    def __init__(self, path, **kwargs):
        try:
            assert os.path.exists(path) is True
        except:
            raise NameError('Path ' + str(path) + ' does not exist. Attempting to make an ArrayFrame out of it thus failed.')

        self.load_data_on_init = kwargs.get('load_data_on_init', False)

        self.path = path
        self.ds = gdal.Open(path, gdal.GA_Update)
        self.band = self.ds.GetRasterBand(1)
        self.num_cols = self.ds.RasterXSize
        self.n_cols = self.num_cols
        self.num_rows = self.ds.RasterYSize
        self.n_rows = self.num_rows
        self.shape = (self.num_rows, self.num_cols)
        self.size = self.num_cols * self.num_rows

import math, os, sys, time, random, shutil, logging, csv, json

import netCDF4
import numpy as np
from osgeo import gdal, osr, ogr
import pandas as pd
import geopandas as gpd
from collections import OrderedDict
import logging
import scipy

import geoecon as ge
import hazelbean as hb
import multiprocessing

L = hb.get_logger('process_ssp_scenarios')

def extract_lulc(p):
    if p.tasks['extract_lulc']:

        for scenario_name in p.scenario_names:
            scenario_dir = os.path.join(p.task_dirs['extract_lulc'], scenario_name)
            hb.create_dirs(scenario_dir)

            filename = 'multiple-states_input4MIPs_landState_ScenarioMIP_UofMD-' + scenario_name + '-2-1-f_gn_2015-2100.nc'
            states_path = os.path.join(p.scenarios_data_dir, filename)

            for year in p.years:
                year_dir = os.path.join(scenario_dir, str(year))
                os.mkdir(year_dir)
                L.info('Extracting from ' + states_path)
import os, sys, shutil, random, math, atexit, time

from osgeo import gdal, ogr, osr
import numpy as np

import hazelbean as hb
import functools
from functools import reduce


L = hb.get_logger('spatial_projection')

def get_wkt_from_epsg_code(epsg_code):
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(int(epsg_code))
    wkt = srs.ExportToWkt()

    return wkt


def get_datasource_srs_uri(dataset_uri):
    driver = ogr.GetDriverByName('ESRI Shapefile')
    dataset = driver.Open(dataset_uri)
    layer = dataset.GetLayer()
    spatialRef = layer.GetSpatialRef()
    return spatialRef


def get_dataset_projection_wkt_uri(dataset_uri):
    """Get the projection of a GDAL dataset as well known text (WKT).
Beispiel #9
0
from hazelbean.ui import model, inputs
from hazelbean.ui import validation
import os, sys
import qtpy
from qtpy import QtWidgets
from qtpy import QtCore
from qtpy import QtGui
import time
import six
import qtawesome

logging.basicConfig(level=logging.WARNING)
hb.ui.model.LOGGER.setLevel(logging.WARNING)
hb.ui.inputs.LOGGER.setLevel(logging.WARNING)

L = hb.get_logger('seals')
L.setLevel(logging.INFO)

logging.getLogger('Fiona').setLevel(logging.WARNING)
logging.getLogger('fiona.collection').setLevel(logging.WARNING)

np.seterr(divide='ignore', invalid='ignore')

dev_mode = True


# TODOO NOTE This funtion must be here as it is automatically called by the invest model. Consider making this more flexible in the next release.
@validation.invest_validator
def validate(args, limit_to=None):
    validation_error_list = []
    if not os.path.exists(args['input_lulc_path']):
Beispiel #10
0
import os, sys, types, inspect, logging, collections, time, copy
# import nose
from collections import OrderedDict
from osgeo import gdal, osr, ogr
import numpy as np
import hazelbean as hb
import multiprocessing

try:
    import anytree
except:
    'anytree is probably not needed except for project flow.'

L = hb.get_logger('project_flow')
L.setLevel(logging.INFO)


def op():
    pass


def run_iterator_in_parallel(p, task, iteration_counter):
    things_returned = []
    for child in task.children:
        L.info('iter ' + str(iteration_counter) + ': Running task ' +
               str(child.name) + ' with iterator parent ' + child.parent.name +
               ' in dir ' + str(p.cur_dir))
        r = p.run_task(child)
        things_returned.append(r)

    return things_returned
import numpy as np
from osgeo import gdal
import scipy
import hazelbean as hb
import hazelbean.pyramids
from hazelbean.ui import model, inputs

from collections import OrderedDict

import seals_utils

logging.basicConfig(level=logging.WARNING)
hb.ui.model.LOGGER.setLevel(logging.WARNING)
hb.ui.inputs.LOGGER.setLevel(logging.WARNING)

L = hb.get_logger('seals', logging_level='warning')
L.setLevel(logging.INFO)

logging.getLogger('Fiona').setLevel(logging.WARNING)
logging.getLogger('fiona.collection').setLevel(logging.WARNING)

np.seterr(divide='ignore', invalid='ignore')

p = hb.ProjectFlow()


# TASKS
def generate_batch_zones():
    global p
    p.layers_to_stitch = []
import geopandas as gpd
import warnings
import netCDF4
import logging
import pandas as pd
import pygeoprocessing.geoprocessing as pgp
from pygeoprocessing.geoprocessing import *

# Conditional imports
try:
    import geoecon as ge
except:
    ge = None

numpy = np
L = hb.get_logger('hb_rasterstats')
pgp_logger = logging.getLogger('geoprocessing')

loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]

def convert_polygons_to_id_raster(input_vector_path, output_raster_path, match_raster_path,
                                  id_column_label=None, data_type=None, ndv=None, all_touched=None, compress=True):
    if not id_column_label:
        # Get the column label of the first column
        gdf = gpd.read_file(input_vector_path)
        id_column_label = gdf.columns[0]

    if not data_type:
        data_type = 1

    if not ndv: