def convert_code(in_file, out_file, in_alg='taudem', out_alg='arcgis', datatype=None): """ convert D8 flow direction code from one algorithm to another. Args: in_file: input raster file path out_file: output raster file path in_alg: available algorithms are in FlowModelConst.d8_dirs. "taudem" is the default out_alg: same as in_alg. "arcgis" is the default datatype: default is None and use the datatype of the in_file """ FileClass.check_file_exists(in_file) in_alg = in_alg.lower() out_alg = out_alg.lower() if in_alg not in FlowModelConst.d8_dirs or out_alg not in FlowModelConst.d8_dirs: raise RuntimeError('The input algorithm name should one of %s' % ', '.join(list(FlowModelConst.d8_dirs.keys()))) convert_dict = dict() in_code = FlowModelConst.d8_dirs.get(in_alg) out_code = FlowModelConst.d8_dirs.get(out_alg) assert len(in_code) == len(out_code) for i, tmp_in_code in enumerate(in_code): convert_dict[tmp_in_code] = out_code[i] if datatype is not None and datatype in GDALDataType: RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file, datatype) else: RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file)
def convert_code(in_file, out_file, in_alg='taudem', out_alg='arcgis', datatype=None): """ convert D8 flow direction code from one algorithm to another. Args: in_file: input raster file path out_file: output raster file path in_alg: available algorithms are in FlowModelConst.d8_dirs. "taudem" is the default out_alg: same as in_alg. "arcgis" is the default datatype: default is None and use the datatype of the in_file """ FileClass.check_file_exists(in_file) in_alg = in_alg.lower() out_alg = out_alg.lower() if in_alg not in FlowModelConst.d8_dirs or out_alg not in FlowModelConst.d8_dirs: raise RuntimeError('The input algorithm name should one of %s' % ', '.join(list(FlowModelConst.d8_dirs.keys()))) convert_dict = dict() in_code = FlowModelConst.d8_dirs.get(in_alg) out_code = FlowModelConst.d8_dirs.get(out_alg) assert len(in_code) == len(out_code) for i, tmp_in_code in enumerate(in_code): convert_dict[tmp_in_code] = out_code[i] if datatype is not None and datatype in GDALDataType: RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file, datatype) else: RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file)
def mask_origin_delineated_data(cfg): """Mask the original delineated data by Subbasin raster.""" subbasin_tau_file = cfg.taudems.subbsn geodata2dbdir = cfg.dirs.geodata2db UtilClass.mkdir(geodata2dbdir) mask_file = cfg.spatials.mask RasterUtilClass.get_mask_from_raster(subbasin_tau_file, mask_file) # Total 12 raster files original_files = [ cfg.taudems.subbsn, cfg.taudems.d8flow, cfg.taudems.stream_raster, cfg.taudems.slp, cfg.taudems.filldem, cfg.taudems.d8acc, cfg.taudems.stream_order, cfg.taudems.dinf, cfg.taudems.dinf_d8dir, cfg.taudems.dinf_slp, cfg.taudems.dinf_weight, cfg.taudems.dist2stream_d8 ] # output masked files output_files = [ cfg.taudems.subbsn_m, cfg.taudems.d8flow_m, cfg.taudems.stream_m, cfg.spatials.slope, cfg.spatials.filldem, cfg.spatials.d8acc, cfg.spatials.stream_order, cfg.spatials.dinf, cfg.spatials.dinf_d8dir, cfg.spatials.dinf_slp, cfg.spatials.dinf_weight, cfg.spatials.dist2stream_d8 ] default_values = list() for i in range(len(original_files)): default_values.append(DEFAULT_NODATA) # other input rasters need to be masked # soil and landuse FileClass.check_file_exists(cfg.soil) FileClass.check_file_exists(cfg.landuse) original_files.append(cfg.soil) output_files.append(cfg.spatials.soil_type) default_values.append(cfg.default_soil) original_files.append(cfg.landuse) output_files.append(cfg.spatials.landuse) default_values.append(cfg.default_landuse) # Additional raster file for k, v in cfg.additional_rs.items(): org_v = v if not FileClass.is_file_exists(org_v): v = cfg.spatial_dir + os.path.sep + org_v if not FileClass.is_file_exists(v): print('WARNING: The additional file %s MUST be located in ' 'SPATIAL_DATA_DIR, or provided as full file path!' % k) continue original_files.append(v) output_files.append(cfg.dirs.geodata2db + os.path.sep + k + '.tif') default_values.append(DEFAULT_NODATA) config_file = cfg.logs.mask_cfg # run mask operation print('Mask original delineated data by Subbasin raster...') SpatialDelineation.mask_raster_cpp(cfg.seims_bin, mask_file, original_files, output_files, default_values, config_file)
def raster2shp(rasterfile, vectorshp, layername=None, fieldname=None): """Convert raster to ESRI shapefile""" FileClass.remove_files(vectorshp) FileClass.check_file_exists(rasterfile) # raster to polygon vector exepath = FileClass.get_executable_fullpath("gdal_polygonize.py") str_cmd = 'python %s -f "ESRI Shapefile" %s %s' % (exepath, rasterfile, vectorshp) if layername is not None and fieldname is not None: str_cmd += ' %s %s' % (layername, fieldname) print (str_cmd) print (UtilClass.run_command(str_cmd))
def mask_origin_delineated_data(cfg): """Mask the original delineated data by Subbasin raster.""" subbasin_tau_file = cfg.taudems.subbsn geodata2dbdir = cfg.dirs.geodata2db UtilClass.mkdir(geodata2dbdir) mask_file = cfg.spatials.mask RasterUtilClass.get_mask_from_raster(subbasin_tau_file, mask_file) # Total 12 raster files original_files = [cfg.taudems.subbsn, cfg.taudems.d8flow, cfg.taudems.stream_raster, cfg.taudems.slp, cfg.taudems.filldem, cfg.taudems.d8acc, cfg.taudems.stream_order, cfg.taudems.dinf, cfg.taudems.dinf_d8dir, cfg.taudems.dinf_slp, cfg.taudems.dinf_weight, cfg.taudems.dist2stream_d8] # output masked files output_files = [cfg.taudems.subbsn_m, cfg.taudems.d8flow_m, cfg.taudems.stream_m, cfg.spatials.slope, cfg.spatials.filldem, cfg.spatials.d8acc, cfg.spatials.stream_order, cfg.spatials.dinf, cfg.spatials.dinf_d8dir, cfg.spatials.dinf_slp, cfg.spatials.dinf_weight, cfg.spatials.dist2stream_d8] default_values = list() for i in range(len(original_files)): default_values.append(DEFAULT_NODATA) # other input rasters need to be masked # soil and landuse FileClass.check_file_exists(cfg.soil) FileClass.check_file_exists(cfg.landuse) original_files.append(cfg.soil) output_files.append(cfg.spatials.soil_type) default_values.append(cfg.default_soil) original_files.append(cfg.landuse) output_files.append(cfg.spatials.landuse) default_values.append(cfg.default_landuse) # Additional raster file for k, v in cfg.additional_rs.items(): org_v = v if not FileClass.is_file_exists(org_v): v = cfg.spatial_dir + os.path.sep + org_v if not FileClass.is_file_exists(v): print('WARNING: The additional file %s MUST be located in ' 'SPATIAL_DATA_DIR, or provided as full file path!' % k) continue original_files.append(v) output_files.append(cfg.dirs.geodata2db + os.path.sep + k + '.tif') default_values.append(DEFAULT_NODATA) config_file = cfg.logs.mask_cfg # run mask operation print('Mask original delineated data by Subbasin raster...') SpatialDelineation.mask_raster_cpp(cfg.seims_bin, mask_file, original_files, output_files, default_values, config_file)
def raster2shp(rasterfile, vectorshp, layername=None, fieldname=None, band_num=1, mask='default'): """Convert raster to ESRI shapefile""" FileClass.remove_files(vectorshp) FileClass.check_file_exists(rasterfile) # this allows GDAL to throw Python Exceptions gdal.UseExceptions() src_ds = gdal.Open(rasterfile) if src_ds is None: print('Unable to open %s' % rasterfile) sys.exit(1) try: srcband = src_ds.GetRasterBand(band_num) except RuntimeError as e: # for example, try GetRasterBand(10) print('Band ( %i ) not found, %s' % (band_num, e)) sys.exit(1) if mask == 'default': maskband = srcband.GetMaskBand() elif mask is None or mask.upper() == 'NONE': maskband = None else: mask_ds = gdal.Open(mask) maskband = mask_ds.GetRasterBand(1) # create output datasource if layername is None: layername = FileClass.get_core_name_without_suffix(rasterfile) drv = ogr_GetDriverByName(str('ESRI Shapefile')) dst_ds = drv.CreateDataSource(vectorshp) srs = None if src_ds.GetProjection() != '': srs = osr_SpatialReference() srs.ImportFromWkt(src_ds.GetProjection()) dst_layer = dst_ds.CreateLayer(str(layername), srs=srs) if fieldname is None: fieldname = layername.upper() fd = ogr_FieldDefn(str(fieldname), OFTInteger) dst_layer.CreateField(fd) dst_field = 0 result = gdal.Polygonize(srcband, maskband, dst_layer, dst_field, ['8CONNECTED=8'], callback=None) return result
def read_crop_lookup_table(crop_lookup_file): """read crop lookup table""" FileClass.check_file_exists(crop_lookup_file) data_items = read_data_items_from_txt(crop_lookup_file) attr_dic = dict() fields = data_items[0] n = len(fields) for i in range(n): attr_dic[fields[i]] = dict() for items in data_items[1:]: cur_id = int(items[0]) for i in range(n): dic = attr_dic[fields[i]] try: dic[cur_id] = float(items[i]) except ValueError: dic[cur_id] = items[i] return attr_dic
def read_crop_lookup_table(crop_lookup_file): """read crop lookup table""" FileClass.check_file_exists(crop_lookup_file) data_items = read_data_items_from_txt(crop_lookup_file) attr_dic = dict() fields = data_items[0] n = len(fields) for i in range(n): attr_dic[fields[i]] = dict() for items in data_items[1:]: cur_id = int(items[0]) for i in range(n): dic = attr_dic[fields[i]] try: dic[cur_id] = float(items[i]) except ValueError: dic[cur_id] = items[i] return attr_dic
def raster2shp(rasterfile, vectorshp, layername=None, fieldname=None, band_num=1, mask='default'): """Convert raster to ESRI shapefile""" FileClass.remove_files(vectorshp) FileClass.check_file_exists(rasterfile) # this allows GDAL to throw Python Exceptions gdal.UseExceptions() src_ds = gdal.Open(rasterfile) if src_ds is None: print('Unable to open %s' % rasterfile) sys.exit(1) try: srcband = src_ds.GetRasterBand(band_num) except RuntimeError as e: # for example, try GetRasterBand(10) print('Band ( %i ) not found, %s' % (band_num, e)) sys.exit(1) if mask == 'default': maskband = srcband.GetMaskBand() elif mask is None or mask.upper() == 'NONE': maskband = None else: mask_ds = gdal.Open(mask) maskband = mask_ds.GetRasterBand(1) # create output datasource if layername is None: layername = FileClass.get_core_name_without_suffix(rasterfile) drv = ogr_GetDriverByName(str('ESRI Shapefile')) dst_ds = drv.CreateDataSource(vectorshp) srs = None if src_ds.GetProjection() != '': srs = osr_SpatialReference() srs.ImportFromWkt(src_ds.GetProjection()) dst_layer = dst_ds.CreateLayer(str(layername), srs=srs) if fieldname is None: fieldname = layername.upper() fd = ogr_FieldDefn(str(fieldname), OFTInteger) dst_layer.CreateField(fd) dst_field = 0 result = gdal.Polygonize(srcband, maskband, dst_layer, dst_field, ['8CONNECTED=8'], callback=None) return result
def main(landusef, fieldf, fieldtxt, jsonout): """Construct hydrologically connected fields units data in JSON file format.""" # Check the file existence FileClass.check_file_exists(landusef) FileClass.check_file_exists(fieldf) FileClass.check_file_exists(fieldtxt) # read raster data and check the extent based on landuse. landuser = RasterUtilClass.read_raster(landusef) data_landuse = landuser.data nrows = landuser.nRows ncols = landuser.nCols dx = landuser.dx nodata_landuse = landuser.noDataValue fieldr = RasterUtilClass.read_raster(fieldf) if fieldr.nRows != nrows or fieldr.nCols != ncols: raise ValueError( 'The connected_fields raster MUST have the same dimensions' ' with landuse!') data_fields = fieldr.data nodata_fields = fieldr.noDataValue # Read the initial relationships between fields fields_info = read_field_relationships(fields_txt) # add landuse types and areas for m in range(nrows): for n in range(ncols): cur_lu = int(data_landuse[m][n]) cur_fld = int(data_fields[m][n]) if cur_fld == nodata_fields or cur_lu == nodata_landuse or cur_lu <= 0: continue if cur_fld not in fields_info['units']: raise ValueError( '%d is not recorded in field relationship text!' % cur_fld) if cur_lu not in fields_info['units'][cur_fld]['landuse']: fields_info['units'][cur_fld]['landuse'][cur_lu] = 1 else: fields_info['units'][cur_fld]['landuse'][cur_lu] += 1 for k, v in viewitems(fields_info['units']): area_field = 0. area_max = 0. area_max_lu = 0 for luid, luarea in viewitems(v['landuse']): v['landuse'][luid] = luarea * dx * dx * 1.e-6 area_field += v['landuse'][luid] if v['landuse'][luid] > area_max: area_max = v['landuse'][luid] area_max_lu = luid v['area'] = area_field if v['primarylanduse'] != area_max_lu: print(k, v['primarylanduse'], area_max_lu) v['primarylanduse'] = area_max_lu # save to json json_updown_data = json.dumps(fields_info, indent=4) with open(jsonout, 'w', encoding='utf-8') as f: f.write('%s' % json_updown_data)
def __init__(self, flowdirf, subbsnf, elevf, rdgsrc, flow_model=1, prop=0., ws=None): """Initialize file names.""" FileClass.check_file_exists(flowdirf) FileClass.check_file_exists(subbsnf) FileClass.check_file_exists(elevf) if ws is None: ws = os.path.basename(flowdirf) self.ws = ws if flow_model == 1: suffix = '_dinf.tif' else: suffix = '_d8.tif' self.rdgorg = self.ws + os.sep + 'RdgOrgSrc' + suffix self.boundsrc = self.ws + os.sep + 'RdgPotSrc' + suffix self.boundsrcfilter = self.ws + os.sep + 'RdgPotSrcFilter' + suffix if rdgsrc is None: rdgsrc = self.ws + os.sep + 'rdgsrc' + suffix self.rdgsrc = rdgsrc self.flowmodel = flow_model self.prop = prop # read raster data flowdir_r = RasterUtilClass.read_raster(flowdirf) self.flowdir_data = flowdir_r.data self.nrows = flowdir_r.nRows self.ncols = flowdir_r.nCols self.nodata_flow = flowdir_r.noDataValue self.geotrans = flowdir_r.geotrans self.srs = flowdir_r.srs subbsn_r = RasterUtilClass.read_raster(subbsnf) self.subbsn_data = subbsn_r.data self.nodata_subbsn = subbsn_r.noDataValue elev_r = RasterUtilClass.read_raster(elevf) self.elev_data = elev_r.data self.nodata_elev = elev_r.noDataValue # initialize output arrays self.rdgsrc_data = numpy.ones((self.nrows, self.ncols)) * 1 self.rdgpot = numpy.ones((self.nrows, self.ncols)) * DEFAULT_NODATA
def __init__( self, tag_names, # type: List[Tuple[int, AnyStr]] slpposf, # type: AnyStr reach_shp, # type: AnyStr hillslpf, # type: AnyStr landusef # type: AnyStr ): # type: (...) -> None """Initialization. Args: tag_names: [tag(integer), name(str)], tag should be ascending from up to bottom. slpposf: Crisp classification of slope position full filename. reach_shp: Reach shapefile used to extract the up-down relationships of subbasins hillslpf: Delineated hillslope file by sd_hillslope.py. landusef: Landuse, used to statistics areas of each landuse types within slope position units Attributes: slppos_tags(OrderedDict): {tag: name} subbsin_tree: up-down stream relationships of subbasins. {subbsnID: {'upstream': [], 'downstream': []}} units_updwon: Output json data of slope position units. {"slppos_1": {id:{"downslope": [ids], "upslope": [ids], "landuse": {luID: area} "hillslope": [hillslpID], "subbasin": [subbsnID], "area": area } } "slppos_2": ... } """ # Check the file existence FileClass.check_file_exists(slpposf) FileClass.check_file_exists(reach_shp) FileClass.check_file_exists(hillslpf) FileClass.check_file_exists(landusef) # Set inputs self.ws = os.path.dirname(slpposf) tag_names = sorted(tag_names, key=lambda x: x[0]) # initialize slope position dict with up-down relationships self.slppos_tags = OrderedDict( ) # type: Dict[int, Dict[AnyStr, Union[int, AnyStr]]] for idx, tagname in enumerate(tag_names): tag, name = tagname if len(tag_names) > 1: if idx == 0: self.slppos_tags[int(tag)] = { 'name': name, 'upslope': -1, 'downslope': tag_names[idx + 1][0] } elif idx == len(tag_names) - 1: self.slppos_tags[int(tag)] = { 'name': name, 'upslope': tag_names[idx - 1][0], 'downslope': -1 } else: self.slppos_tags[int(tag)] = { 'name': name, 'upslope': tag_names[idx - 1][0], 'downslope': tag_names[idx + 1][0] } else: self.slppos_tags[int(tag)] = { 'name': name, 'upslope': -1, 'downslope': -1 } self.reach = reach_shp # read raster data and check the extent based on hillslope. hillslpr = RasterUtilClass.read_raster(hillslpf) self.data_hillslp = hillslpr.data self.nrows = hillslpr.nRows self.ncols = hillslpr.nCols self.dx = hillslpr.dx self.nodata_hillslp = hillslpr.noDataValue self.geotrans = hillslpr.geotrans self.srs = hillslpr.srs self.datatype = hillslpr.dataType slpposr = RasterUtilClass.read_raster(slpposf) if slpposr.nRows != self.nrows or slpposr.nCols != self.ncols: raise ValueError( 'The slopeposition raster MUST have the same dimensions' ' with hillslope!') self.data_slppos = slpposr.data self.nodata_slppos = slpposr.noDataValue landuser = RasterUtilClass.read_raster(landusef) if landuser.nRows != self.nrows or landuser.nCols != self.ncols: raise ValueError( 'The landuser raster MUST have the same dimensions' ' with hillslope!') self.data_landuse = landuser.data self.nodata_landuse = landuser.noDataValue # Set intermediate data self.subbsin_num = -1 self.subbsin_tree = dict( ) # type: Dict[int, int] # {subbsnID: dst_subbsnID} self.units_updwon = OrderedDict( ) # type: Dict[AnyStr, Dict[int, Dict[AnyStr, Union[List[float], AnyStr]]]] for tag in self.slppos_tags: self.units_updwon[self.slppos_tags.get(tag).get('name')] = dict() self.slppos_ids = numpy.ones((self.nrows, self.ncols)) * DEFAULT_NODATA self.hierarchy_units = dict( ) # type: Dict[int, Dict[int, Dict[AnyStr, int]]] # Set gene_values of outputs self.outf_units_origin = self.ws + os.path.sep + 'slppos_units_origin_uniqueid.tif' self.outshp_units_origin = self.ws + os.path.sep + 'origin_uniqueid.shp' self.json_units_origin = self.ws + os.path.sep + 'original_updown.json' self.outf_units_merged = self.ws + os.path.sep + 'slppos_units.tif' self.outshp_units_merged = self.ws + os.path.sep + 'slppos_units_merged.shp' self.json_units_merged = self.ws + os.path.sep + 'updown.json'
def __init__(self, tag_names, slpposf, reach_shp, hillslpf, landusef): """Initialization. Args: tag_names: [tag(integer), name(str)], tag should be ascending from up to bottom. slpposf: Crisp classification of slope position. reach_shp: Reach shapefile used to extract the up-down relationships of subbasins hillslpf: Delineated hillslope file by sd_hillslope.py. landusef: Landuse, used to statistics areas of each landuse types within slope position units Attributes: slppos_tags(OrderedDict): {tag: name} subbsin_tree: up-down stream relationships of subbasins. {subbsnID: {'upstream': [], 'downstream': []}} units_updown_info: Output json data of slope position units. {"slppos_1": {id:{"downslope": [ids], "upslope": [ids], "landuse": {luID: area} "hillslope": [hillslpID], "subbasin": [subbsnID], "area": area } } "slppos_2": ... } """ # Check the file existance FileClass.check_file_exists(slpposf) FileClass.check_file_exists(reach_shp) FileClass.check_file_exists(hillslpf) FileClass.check_file_exists(landusef) # Set inputs self.ws = os.path.dirname(slpposf) tag_names = sorted(tag_names, key=lambda x: x[0]) # initialize slope position dict with up-down relationships self.slppos_tags = OrderedDict() for idx, tagname in enumerate(tag_names): tag, name = tagname if len(tag_names) > 1: if idx == 0: self.slppos_tags[int(tag)] = {'name': name, 'upslope': -1, 'downslope': tag_names[idx + 1][0]} elif idx == len(tag_names) - 1: self.slppos_tags[int(tag)] = {'name': name, 'upslope': tag_names[idx - 1][0], 'downslope': -1} else: self.slppos_tags[int(tag)] = {'name': name, 'upslope': tag_names[idx - 1][0], 'downslope': tag_names[idx + 1][0]} else: self.slppos_tags[int(tag)] = {'name': name, 'upslope': -1, 'downslope': -1} self.reach = reach_shp # read raster data and check the extent based on hillslope. hillslpr = RasterUtilClass.read_raster(hillslpf) self.data_hillslp = hillslpr.data self.nrows = hillslpr.nRows self.ncols = hillslpr.nCols self.dx = hillslpr.dx self.nodata_hillslp = hillslpr.noDataValue self.geotrans = hillslpr.geotrans self.srs = hillslpr.srs self.datatype = hillslpr.dataType slpposr = RasterUtilClass.read_raster(slpposf) if slpposr.nRows != self.nrows or slpposr.nCols != self.ncols: raise ValueError('The slopeposition raster MUST have the same dimensions' ' with hillslope!') self.data_slppos = slpposr.data self.nodata_slppos = slpposr.noDataValue landuser = RasterUtilClass.read_raster(landusef) if landuser.nRows != self.nrows or landuser.nCols != self.ncols: raise ValueError('The landuser raster MUST have the same dimensions' ' with hillslope!') self.data_landuse = landuser.data self.nodata_landuse = landuser.noDataValue # Set intermediate data self.subbsin_num = -1 self.subbsin_tree = dict() self.units_updwon = OrderedDict() for tag in self.slppos_tags: self.units_updwon[self.slppos_tags.get(tag).get('name')] = dict() self.slppos_ids = numpy.ones((self.nrows, self.ncols)) * DEFAULT_NODATA self.hierarchy_units = dict() # Set gene_values of outputs self.outf_units_origin = self.ws + os.path.sep + 'slppos_units_origin_uniqueid.tif' self.outshp_units_origin = self.ws + os.path.sep + 'origin_uniqueid.shp' self.json_units_origin = self.ws + os.path.sep + 'original_updown.json' self.outf_units_merged = self.ws + os.path.sep + 'slppos_units.tif' self.outshp_units_merged = self.ws + os.path.sep + 'slppos_units_merged.shp' self.json_units_merged = self.ws + os.path.sep + 'updown.json'
def __init__(self, cf): """Initialization.""" SAConfig.__init__(self, cf) # initialize base class first # Handling self.bmps_info for specific application # 1. Check the required key and values requiredkeys = ['COLLECTION', 'DISTRIBUTION', 'SUBSCENARIO', 'UPDOWNJSON', 'ENVEVAL', 'BASE_ENV'] for k in requiredkeys: if k not in self.bmps_info: raise ValueError('[%s]: MUST be provided!' % k) # 2. Slope position units information updownf = self.bmps_info.get('UPDOWNJSON') FileClass.check_file_exists(updownf) with open(updownf, 'r') as updownfo: self.units_infos = json.load(updownfo) self.units_infos = UtilClass.decode_strs_in_dict(self.units_infos) # 3. Get slope position sequence sptags = cf.get('BMPs', 'slppos_tag_name') self.slppos_tags = json.loads(sptags) self.slppos_tags = UtilClass.decode_strs_in_dict(self.slppos_tags) self.slppos_tagnames = sorted(list(self.slppos_tags.items()), key=operator.itemgetter(0)) self.slppos_unit_num = self.units_infos['overview']['all_units'] self.slppos_to_gene = OrderedDict() self.gene_to_slppos = dict() # method 1: (deprecated) # gene index: 0, 1, 2, ..., n # slppos unit: rdg1, rdg2,..., bks1, bks2,..., vly1, vly2... # idx = 0 # for tag, sp in self.slppos_tagnames: # for uid in self.units_infos[sp]: # self.gene_to_slppos[idx] = uid # self.slppos_to_gene[uid] = idx # idx += 1 # method 2: # gene index: 0, 1, 2, ..., n # slppos unit: rdg1, bks2, vly1,..., rdgn, bksn, vlyn idx = 0 spname = self.slppos_tagnames[0][1] for uid, udict in self.units_infos[spname].items(): spidx = 0 self.gene_to_slppos[idx] = uid self.slppos_to_gene[uid] = idx idx += 1 next_uid = udict['downslope'] while next_uid > 0: self.gene_to_slppos[idx] = next_uid self.slppos_to_gene[next_uid] = idx idx += 1 spidx += 1 spname = self.slppos_tagnames[spidx][1] next_uid = self.units_infos[spname][next_uid]['downslope'] assert (idx == self.slppos_unit_num) # 4. SubScenario IDs and parameters read from MongoDB self.bmps_subids = self.bmps_info.get('SUBSCENARIO') self.bmps_coll = self.bmps_info.get('COLLECTION') self.bmps_params = dict() self.slppos_suit_bmps = dict() self.read_bmp_parameters() self.get_suitable_bmps_for_slppos()
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone / -3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.txt' out_file = work_path + os.path.sep + file_name with open(out_file, 'w') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)
def main(landusef, unitsf, jsonout): """Construct common spatial units data in JSON file format.""" # Check the file existence FileClass.check_file_exists(landusef) FileClass.check_file_exists(unitsf) # read raster data and check the extent based on landuse. landuser = RasterUtilClass.read_raster(landusef) data_landuse = landuser.data nrows = landuser.nRows ncols = landuser.nCols dx = landuser.dx nodata_landuse = landuser.noDataValue fieldr = RasterUtilClass.read_raster(unitsf) if fieldr.nRows != nrows or fieldr.nCols != ncols: raise ValueError( 'The spatial units raster MUST have the same dimensions' ' with landuse!') data_units = fieldr.data nodata_units = fieldr.noDataValue units_info = dict( ) # type: Dict[AnyStr, Dict[Union[int, AnyStr], Dict[AnyStr, Union[int, float, List[Union[int,float]], AnyStr, Dict[int, float]]]]] units_info.setdefault('units', dict()) units_info.setdefault('overview', dict()) units_ids = list() # type: List[int] for m in range(nrows): for n in range(ncols): cur_lu = int(data_landuse[m][n]) cur_unit = int(data_units[m][n]) if cur_unit == nodata_units or cur_lu == nodata_landuse or cur_lu <= 0: continue if cur_unit not in units_ids: units_ids.append(cur_unit) if cur_unit not in units_info['units']: units_info['units'].setdefault(cur_unit, { 'landuse': dict(), 'primarylanduse': 0, 'area': 0. }) if cur_lu not in units_info['units'][cur_unit]['landuse']: units_info['units'][cur_unit]['landuse'][cur_lu] = 1 else: units_info['units'][cur_unit]['landuse'][cur_lu] += 1 for k, v in viewitems(units_info['units']): area_field = 0. area_max = 0. area_max_lu = 0 for luid, luarea in viewitems(v['landuse']): v['landuse'][luid] = luarea * dx * dx * 1.e-6 area_field += v['landuse'][luid] if v['landuse'][luid] > area_max: area_max = v['landuse'][luid] area_max_lu = luid v['area'] = area_field v['primarylanduse'] = area_max_lu units_info['overview'].setdefault('all_units', len(units_ids)) # save to json json_data = json.dumps(units_info, indent=4) with open(json_out, 'w', encoding='utf-8') as f: f.write('%s' % json_data)
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Todo: Not tested yet! Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.csv, flow_localtime_60.csv. Note that `.txt` format is also supported. """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone // 3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = -1 * int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.csv' out_file = work_path + os.path.sep + file_name with open(out_file, 'w', encoding='utf-8') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)