def subbasin_boundary_cells(self, subbsn_perc): """Subbasin boundary cells that are potential ridge sources.""" dir_deltas = FlowModelConst.d8delta_ag.values() subbsn_elevs = dict() def add_elev_to_subbsn_elevs(sid, elev): if sid not in subbsn_elevs: subbsn_elevs[sid] = [elev] else: subbsn_elevs[sid].append(elev) for row in range(self.nrows): for col in range(self.ncols): if MathClass.floatequal(self.subbsn_data[row][col], self.nodata_subbsn): continue for r, c in dir_deltas: new_row = row + r new_col = col + c if 0 <= new_row < self.nrows and 0 <= new_col < self.ncols: if MathClass.floatequal( self.subbsn_data[new_row][new_col], self.nodata_subbsn): subbsnid = self.subbsn_data[row][col] self.rdgpot[row][col] = subbsnid add_elev_to_subbsn_elevs(subbsnid, self.elev_data[row][col]) elif not MathClass.floatequal( self.subbsn_data[row][col], self.subbsn_data[new_row][new_col]): subbsnid = self.subbsn_data[row][col] subbsnid2 = self.subbsn_data[new_row][new_col] self.rdgpot[row][col] = subbsnid self.rdgpot[new_row][new_col] = subbsnid2 add_elev_to_subbsn_elevs(subbsnid, self.elev_data[row][col]) add_elev_to_subbsn_elevs( subbsnid2, self.elev_data[new_row][new_col]) RasterUtilClass.write_gtiff_file(self.boundsrc, self.nrows, self.ncols, self.rdgpot, self.geotrans, self.srs, DEFAULT_NODATA, 6) subbsn_elevs_thresh = dict() for sid, elevs in list(subbsn_elevs.items()): tmpelev = numpy.array(elevs) tmpelev.sort() subbsn_elevs_thresh[sid] = tmpelev[int(len(tmpelev) * subbsn_perc)] for row in range(self.nrows): for col in range(self.ncols): if MathClass.floatequal(self.rdgpot[row][col], DEFAULT_NODATA): continue if self.elev_data[row][col] < subbsn_elevs_thresh[ self.subbsn_data[row][col]]: self.rdgpot[row][col] = DEFAULT_NODATA RasterUtilClass.write_gtiff_file(self.boundsrcfilter, self.nrows, self.ncols, self.rdgpot, self.geotrans, self.srs, DEFAULT_NODATA, 6)
def cal_model_performance(obsl, siml): """Calculate model performance indexes.""" nse = MathClass.nashcoef(obsl, siml) r2 = MathClass.rsquare(obsl, siml) rmse = MathClass.rmse(obsl, siml) pbias = MathClass.pbias(obsl, siml) rsr = MathClass.rsr(obsl, siml) print('NSE: %.2f, R-square: %.2f, PBIAS: %.2f%%, RMSE: %.2f, RSR: %.2f' % (nse, r2, pbias, rmse, rsr))
def cal_model_performance(obsl, siml): """Calculate model performance indexes.""" nse = MathClass.nashcoef(obsl, siml) r2 = MathClass.rsquare(obsl, siml) rmse = MathClass.rmse(obsl, siml) pbias = MathClass.pbias(obsl, siml) rsr = MathClass.rsr(obsl, siml) print('NSE: %.2f, R$^2$: %.2f, PBIAS: %.2f%%, RMSE: %.2f, RSR: %.2f' % (nse, r2, pbias, rmse, rsr))
def filter_ridge_by_subbasin_boundary(self): for row in range(self.nrows): for col in range(self.ncols): if MathClass.floatequal(self.rdgsrc_data[row][col], DEFAULT_NODATA): continue if MathClass.floatequal(self.rdgpot[row][col], DEFAULT_NODATA): self.rdgsrc_data[row][col] = DEFAULT_NODATA RasterUtilClass.write_gtiff_file(self.rdgsrc, self.nrows, self.ncols, self.rdgsrc_data, self.geotrans, self.srs, DEFAULT_NODATA, 6)
def check_orthogonal(angle): """Check the given Dinf angle based on D8 flow direction encoding code by ArcGIS""" flow_dir_taudem = -1 flow_dir = -1 if MathClass.floatequal(angle, FlowModelConst.e): flow_dir_taudem = FlowModelConst.e flow_dir = 1 elif MathClass.floatequal(angle, FlowModelConst.ne): flow_dir_taudem = FlowModelConst.ne flow_dir = 128 elif MathClass.floatequal(angle, FlowModelConst.n): flow_dir_taudem = FlowModelConst.n flow_dir = 64 elif MathClass.floatequal(angle, FlowModelConst.nw): flow_dir_taudem = FlowModelConst.nw flow_dir = 32 elif MathClass.floatequal(angle, FlowModelConst.w): flow_dir_taudem = FlowModelConst.w flow_dir = 16 elif MathClass.floatequal(angle, FlowModelConst.sw): flow_dir_taudem = FlowModelConst.sw flow_dir = 8 elif MathClass.floatequal(angle, FlowModelConst.s): flow_dir_taudem = FlowModelConst.s flow_dir = 4 elif MathClass.floatequal(angle, FlowModelConst.se): flow_dir_taudem = FlowModelConst.se flow_dir = 2 return flow_dir_taudem, flow_dir
def output_runtime_to_log(title, lines, logfile): if logfile is None: return fname = FileClass.get_core_name_without_suffix(title) time_dict = { 'name': fname, 'readt': 0, 'writet': 0, 'computet': 0, 'totalt': 0 } for line in lines: # print(line) line = line.lower() time_value = line.split(os.linesep)[0].split(':')[-1] if not MathClass.isnumerical(time_value): continue time_value = float(time_value) if line.find('read') >= 0 and line.find('time') >= 0: time_dict['readt'] += time_value elif line.find('compute') >= 0 and line.find('time') >= 0: time_dict['computet'] += time_value elif line.find('write') >= 0 and line.find('time') >= 0: time_dict['writet'] += time_value elif line.find('total') >= 0 and line.find('time') >= 0: time_dict['totalt'] += time_value TauDEM.write_time_log(logfile, time_dict)
def reclassify_landcover_parameters(landuse_file, landcover_file, landcover_initial_fields_file, landcover_lookup_file, attr_names, dst_dir, landuse_shp): """relassify landcover_init_param parameters""" land_cover_codes = LanduseUtilClass.initialize_landcover_parameters( landuse_file, landcover_initial_fields_file, dst_dir, landuse_shp) attr_map = LanduseUtilClass.read_crop_lookup_table(landcover_lookup_file) n = len(attr_names) replace_dicts = [] replace_dicts_attrn = dict() dst_crop_tifs = [] for i in range(n): cur_attr = attr_names[i] cur_dict = dict() dic = attr_map[cur_attr] for code in land_cover_codes: if MathClass.floatequal(code, DEFAULT_NODATA): continue if code not in list(cur_dict.keys()): cur_dict[code] = dic.get(code) replace_dicts_attrn[cur_attr] = cur_dict replace_dicts.append(cur_dict) dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif') # print(replace_dicts) # print(len(replace_dicts)) # print(dst_crop_tifs) # print(len(dst_crop_tifs)) # Generate GTIFF landcover_rec_csv = r'D:\SEIMS\data\zts\data_prepare\spatial\test\landcover_rec_csv.csv' RasterUtilClass.landuse_cover_reclassify(landcover_file, landuse_shp, replace_dicts_attrn, landcover_rec_csv) print (landcover_rec_csv)
def get_time_system_from_data_file(in_file): # type: (str) -> (str, int) """Get the time system from the data file. The basic format is: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #LOCALTIME -2, #UTCTIME Returns: time_sys: 'UTCTIME' or 'LOCALTIME' time_zone(int): Positive for West time zone, and negative for East. """ time_sys = 'LOCALTIME' time_zone = time.timezone // 3600 with open(in_file, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: str_line = line.strip() # for LF in LFs: # if LF in line: # str_line = line.split(LF)[0] # break if str_line[0] != '#': break if str_line.lower().find('utc') >= 0: time_sys = 'UTCTIME' time_zone = 0 break if str_line.lower().find('local') >= 0: line_list = StringClass.split_string(str_line, [' ', ',']) if len(line_list) == 2 and MathClass.isnumerical(line_list[1]): time_zone = -1 * int(line_list[1]) break return time_sys, time_zone
def raster_to_gtiff(tif, geotif, change_nodata=False, change_gdal_type=False): """Converting Raster format to GeoTIFF. Args: tif: source raster file path. geotif: output raster file path. change_nodata: change NoDataValue to -9999 or not. gdal_type (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default. change_gdal_type: If True, output the Float32 data type. """ rst_file = RasterUtilClass.read_raster(tif) nodata = rst_file.noDataValue if change_nodata: if not MathClass.floatequal(rst_file.noDataValue, DEFAULT_NODATA): nodata = DEFAULT_NODATA nodata_array = numpy.ones( (rst_file.nRows, rst_file.nCols)) * rst_file.noDataValue nodata_check = numpy.isclose(rst_file.data, nodata_array) rst_file.data[nodata_check] = DEFAULT_NODATA # rst_file.data[rst_file.data == rst_file.noDataValue] = DEFAULT_NODATA gdal_type = rst_file.dataType if change_gdal_type: gdal_type = GDT_Float32 RasterUtilClass.write_gtiff_file(geotif, rst_file.nRows, rst_file.nCols, rst_file.data, rst_file.geotrans, rst_file.srs, nodata, gdal_type)
def raster_reclassify(srcfile, v_dict, dstfile, gdaltype=GDT_Float32): """Reclassify raster by given classifier dict. Args: srcfile: source raster file. v_dict: classifier dict. dstfile: destination file path. gdaltype (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default. """ src_r = RasterUtilClass.read_raster(srcfile) src_data = src_r.data dst_data = numpy.copy(src_data) if gdaltype == GDT_Float32 and src_r.dataType != GDT_Float32: gdaltype = src_r.dataType no_data = src_r.noDataValue new_no_data = DEFAULT_NODATA if gdaltype in [GDT_Unknown, GDT_Byte, GDT_UInt16, GDT_UInt32]: new_no_data = 0 if not MathClass.floatequal(new_no_data, src_r.noDataValue): if src_r.noDataValue not in v_dict: v_dict[src_r.noDataValue] = new_no_data no_data = new_no_data for (k, v) in iteritems(v_dict): dst_data[src_data == k] = v RasterUtilClass.write_gtiff_file(dstfile, src_r.nRows, src_r.nCols, dst_data, src_r.geotrans, src_r.srs, no_data, gdaltype)
def ridge_without_flowin_cell(self): """Find the original ridge sources that have no flow-in cells.""" for row in range(self.nrows): for col in range(self.ncols): tempdir = self.flowdir_data[row][col] if MathClass.floatequal(tempdir, self.nodata_flow): self.rdgsrc_data[row][col] = DEFAULT_NODATA continue if self.flowmodel == 1: # Dinf flow model temp_coor = DinfUtil.downstream_index_dinf( tempdir, row, col) for temprow, tempcol in temp_coor: if 0 <= temprow < self.nrows and 0 <= tempcol < self.ncols: self.rdgsrc_data[temprow][tempcol] = DEFAULT_NODATA else: self.rdgsrc_data[row][col] = DEFAULT_NODATA else: # D8 flow model temprow, tempcol = D8Util.downstream_index( tempdir, row, col) if 0 <= temprow < self.nrows and 0 <= tempcol < self.ncols: self.rdgsrc_data[temprow][tempcol] = DEFAULT_NODATA else: self.rdgsrc_data[row][col] = DEFAULT_NODATA RasterUtilClass.write_gtiff_file(self.rdgorg, self.nrows, self.ncols, self.rdgsrc_data, self.geotrans, self.srs, DEFAULT_NODATA, 6)
def get_time_system_from_data_file(in_file): """Get the time system from the data file. The basic format is: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME """ time_sys = 'LOCALTIME' time_zone = time.timezone // -3600 with open(in_file, 'r') as f: lines = f.readlines() for line in lines: str_line = line.strip() # for LF in LFs: # if LF in line: # str_line = line.split(LF)[0] # break if str_line[0] != '#': break if str_line.lower().find('utc') >= 0: time_sys = 'UTCTIME' time_zone = 0 break if str_line.lower().find('local') >= 0: line_list = StringClass.split_string(str_line, [',']) if len(line_list) == 2 and MathClass.isnumerical(line_list[1]): time_zone = -1 * int(line_list[1]) break return time_sys, time_zone
def reclassify_landcover_parameters(landuse_file, landcover_file, landcover_initial_fields_file, landcover_lookup_file, attr_names, dst_dir): """relassify landcover_init_param parameters""" land_cover_codes = LanduseUtilClass.initialize_landcover_parameters( landuse_file, landcover_initial_fields_file, dst_dir) attr_map = LanduseUtilClass.read_crop_lookup_table( landcover_lookup_file) n = len(attr_names) replace_dicts = list() dst_crop_tifs = list() for i in range(n): cur_attr = attr_names[i] cur_dict = dict() dic = attr_map[cur_attr] for code in land_cover_codes: if MathClass.floatequal(code, DEFAULT_NODATA): continue if code not in list(cur_dict.keys()): cur_dict[code] = dic.get(code) replace_dicts.append(cur_dict) dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif') # print(replace_dicts) # print(len(replace_dicts)) # print(dst_crop_tifs) # print(len(dst_crop_tifs)) # Generate GTIFF for i, v in enumerate(dst_crop_tifs): # print(dst_crop_tifs[i]) RasterUtilClass.raster_reclassify(landcover_file, replace_dicts[i], v)
def reclassify_landcover_parameters(landuse_file, landcover_file, landcover_initial_fields_file, landcover_lookup_file, attr_names, dst_dir, landuse_shp): """relassify landcover_init_param parameters""" land_cover_codes = LanduseUtilClass.initialize_landcover_parameters( landuse_file, landcover_initial_fields_file, dst_dir, landuse_shp) attr_map = LanduseUtilClass.read_crop_lookup_table( landcover_lookup_file) n = len(attr_names) replace_dicts = [] replace_dicts_attrn = dict() dst_crop_tifs = [] for i in range(n): cur_attr = attr_names[i] cur_dict = dict() dic = attr_map[cur_attr] for code in land_cover_codes: if MathClass.floatequal(code, DEFAULT_NODATA): continue if code not in list(cur_dict.keys()): cur_dict[code] = dic.get(code) replace_dicts_attrn[cur_attr] = cur_dict replace_dicts.append(cur_dict) dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif') # print(replace_dicts) # print(len(replace_dicts)) # print(dst_crop_tifs) # print(len(dst_crop_tifs)) # Generate GTIFF landcover_rec_csv = r'D:\SEIMS\data\zts\data_prepare\spatial\test\landcover_rec_csv.csv' RasterUtilClass.landuse_cover_reclassify(landcover_file, landuse_shp, replace_dicts_attrn, landcover_rec_csv) print(landcover_rec_csv)
def raster_reclassify(srcfile, v_dict, dstfile, gdaltype=GDT_Float32): """Reclassify raster by given classifier dict. Args: srcfile: source raster file. v_dict: classifier dict. dstfile: destination file path. gdaltype (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default. """ src_r = RasterUtilClass.read_raster(srcfile) src_data = src_r.data dst_data = numpy.copy(src_data) if gdaltype == GDT_Float32 and src_r.dataType != GDT_Float32: gdaltype = src_r.dataType no_data = src_r.noDataValue new_no_data = DEFAULT_NODATA if gdaltype in [GDT_Unknown, GDT_Byte, GDT_UInt16, GDT_UInt32]: new_no_data = 0 if not MathClass.floatequal(new_no_data, src_r.noDataValue): if src_r.noDataValue not in v_dict: v_dict[src_r.noDataValue] = new_no_data no_data = new_no_data for k, v in v_dict.items(): dst_data[src_data == k] = v RasterUtilClass.write_gtiff_file(dstfile, src_r.nRows, src_r.nCols, dst_data, src_r.geotrans, src_r.srs, no_data, gdaltype)
def reclassify_landcover_parameters(landuse_file, landcover_file, landcover_initial_fields_file, landcover_lookup_file, attr_names, dst_dir): """relassify landcover_init_param parameters""" land_cover_codes = LanduseUtilClass.initialize_landcover_parameters( landuse_file, landcover_initial_fields_file, dst_dir) attr_map = LanduseUtilClass.read_crop_lookup_table(landcover_lookup_file) n = len(attr_names) replace_dicts = list() dst_crop_tifs = list() for i in range(n): cur_attr = attr_names[i] cur_dict = dict() dic = attr_map[cur_attr] for code in land_cover_codes: if MathClass.floatequal(code, DEFAULT_NODATA): continue if code not in list(cur_dict.keys()): cur_dict[code] = dic.get(code) replace_dicts.append(cur_dict) dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif') # print(replace_dicts) # print(len(replace_dicts)) # print(dst_crop_tifs) # print(len(dst_crop_tifs)) # Generate GTIFF for i, v in enumerate(dst_crop_tifs): # print(dst_crop_tifs[i]) RasterUtilClass.raster_reclassify(landcover_file, replace_dicts[i], v)
def cal_cn2(lucc_id, hg): """Calculate CN2 value from landuse ID and Hydro Group number.""" lucc_id = int(lucc_id) if lucc_id < 0 or MathClass.floatequal(lucc_id, nodata_value): return DEFAULT_NODATA else: hg = int(hg) - 1 return cn2_map[lucc_id][hg]
def GetFuzzySlopePositionValues(i_row, i_col): seqvalues = [-9999] * len(fuzslppos_rs) for iseq, fuzdata in enumerate(fuzslppos_rs): curv = fuzdata.data[i_row][i_col] if MathClass.floatequal(curv, fuzdata.noDataValue): return None if curv < 0: return None seqvalues[iseq] = curv return seqvalues
def cal_cn2(lucc_id, hg): """Calculate CN2 value from landuse ID and Hydro Group number.""" lucc_id = int(lucc_id) if lucc_id < 0 or MathClass.floatequal(lucc_id, nodata_value): return DEFAULT_NODATA else: hg = int(hg) - 1 if lucc_id not in cn2_map: print("lucc %d not existed in cn2 lookup table!" % lucc_id) return DEFAULT_NODATA return cn2_map[lucc_id][hg]
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = {ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: ''} for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING)])
def check_individual_diff(old_ind, # type: Union[array.array, List[int], Tuple[int]] new_ind # type: Union[array.array, List[int], Tuple[int]] ): # type: (...) -> bool """Check the gene values of two individuals.""" diff = False for i in range(len(old_ind)): if not MathClass.floatequal(old_ind[i], new_ind[i]): diff = True break return diff
def cal_model_performance(obsl, siml): nse = MathClass.nashcoef(obsl, siml) r2 = MathClass.rsquare(obsl, siml) rmse = MathClass.rmse(obsl, siml) pbias = MathClass.pbias(obsl, siml) rsr = MathClass.rsr(obsl, siml) plt.rcParams['xtick.direction'] = 'out' plt.rcParams['ytick.direction'] = 'out' plt.rcParams['font.family'] = 'Times New Roman' fig, ax = plt.subplots(figsize=(4, 4)) plt.scatter(obsl, siml, marker='.', s=50, color='black') plt.xlabel('Observation', fontsize=20) plt.ylabel('Simulation', fontsize=20) plt.title('\nNSE: %.2f, R$^2$: %.2f, PBIAS: %.2f%%\nRMSE: %.2f, RSR: %.2f' % (nse, r2, pbias, rmse, rsr), color='red', loc='right') minv = math.floor(min(min(obsl), min(siml))) maxv = math.ceil(max(max(obsl), max(siml))) ax.set_xlim(left=minv, right=maxv) ax.set_ylim(bottom=minv, top=maxv) plt.tight_layout() plt.show()
def delete_model_outputs(model_workdir, hostname, port, dbname): """Delete model outputs and scenario in MongoDB.""" f_list = os.listdir(model_workdir) sids = list() for f in f_list: outfilename = model_workdir + os.path.sep + f if os.path.isdir(outfilename): if len(f) > 9: if MathClass.isnumerical(f[-9:]): shutil.rmtree(outfilename) sid = int(f[-9:]) sids.append(sid) if len(sids) > 0: delete_scenarios_by_ids(hostname, port, dbname, sids)
def check_orthogonal(angle): """Check the given Dinf angle based on D8 flow direction encoding code by ArcGIS""" flow_dir = -1 if MathClass.floatequal(angle, FlowModelConst.e): flow_dir = 1 # 1 elif MathClass.floatequal(angle, FlowModelConst.ne): flow_dir = 2 # 128 elif MathClass.floatequal(angle, FlowModelConst.n): flow_dir = 3 # 64 elif MathClass.floatequal(angle, FlowModelConst.nw): flow_dir = 4 # 32 elif MathClass.floatequal(angle, FlowModelConst.w): flow_dir = 5 # 16 elif MathClass.floatequal(angle, FlowModelConst.sw): flow_dir = 6 # 8 elif MathClass.floatequal(angle, FlowModelConst.s): flow_dir = 7 # 4 elif MathClass.floatequal(angle, FlowModelConst.se): flow_dir = 8 # 2 return flow_dir
def compress_dinf(angle, nodata, minfrac=0.01): """Compress dinf flow direction to D8 direction with weight follows ArcGIS D8 codes. Args: angle: D-inf flow direction angle nodata: NoData value minfrac: Minimum flow fraction that accounted, percent, e.g., 0.01 Returns: 1. Updated Dinf values 2. Compressed flow direction follows ArcGIS D8 codes rule 3. Weight of the first direction by counter-clockwise """ if MathClass.floatequal(angle, nodata): return DEFAULT_NODATA, DEFAULT_NODATA, DEFAULT_NODATA angle, d = DinfUtil.check_orthogonal(angle, minfrac=minfrac) if d != -1: return angle, d, 1. if angle < FlowModelConst.ne: a1 = angle d = 129 # 1+128 elif angle < FlowModelConst.n: a1 = angle - FlowModelConst.ne d = 192 # 128+64 elif angle < FlowModelConst.nw: a1 = angle - FlowModelConst.n d = 96 # 64+32 elif angle < FlowModelConst.w: a1 = angle - FlowModelConst.nw d = 48 # 32+16 elif angle < FlowModelConst.sw: a1 = angle - FlowModelConst.w d = 24 # 16+8 elif angle < FlowModelConst.s: a1 = angle - FlowModelConst.sw d = 12 # 8+4 elif angle < FlowModelConst.se: a1 = angle - FlowModelConst.s d = 6 # 4+2 else: a1 = angle - FlowModelConst.se d = 3 # 2+1 return angle, d, 1. - a1 / PI * 4.0
def get_value_by_row_col(self, row, col): """Get raster value by (row, col). Args: row: row number. col: col number. Returns: raster value, None if the input are invalid. """ if row < 0 or row >= self.nRows or col < 0 or col >= self.nCols: raise ValueError("The row or col must be >=0 and less than " "nRows (%d) or nCols (%d)!" % (self.nRows, self.nCols)) else: value = self.data[int(round(row))][int(round(col))] if MathClass.floatequal(value, self.noDataValue): return None else: return value
def compress_dinf(angle, nodata): """Compress dinf flow direction to D8 direction with weight follows ArcGIS D8 codes. Args: angle: D-inf flow direction angle nodata: NoData value Returns: 1. Updated Dinf values 2. Compressed flow direction follows ArcGIS D8 codes rule 3. Weight of the first direction """ if MathClass.floatequal(angle, nodata): return DEFAULT_NODATA, DEFAULT_NODATA, DEFAULT_NODATA taud, d = DinfUtil.check_orthogonal(angle) if d != -1: return taud, d, 1 if angle < FlowModelConst.ne: a1 = angle d = 129 # 1+128 elif angle < FlowModelConst.n: a1 = angle - FlowModelConst.ne d = 192 # 128+64 elif angle < FlowModelConst.nw: a1 = angle - FlowModelConst.n d = 96 # 64+32 elif angle < FlowModelConst.w: a1 = angle - FlowModelConst.nw d = 48 # 32+16 elif angle < FlowModelConst.sw: a1 = angle - FlowModelConst.w d = 24 # 16+8 elif angle < FlowModelConst.s: a1 = angle - FlowModelConst.sw d = 12 # 8+4 elif angle < FlowModelConst.se: a1 = angle - FlowModelConst.s d = 6 # 4+2 else: a1 = angle - FlowModelConst.se d = 3 # 2+1 return angle, d, a1 / PI * 4.0
def output_runtime_to_log(title, lines, logfile): if logfile is None: return fname = FileClass.get_core_name_without_suffix(title) time_dict = {'name': fname, 'readt': 0, 'writet': 0, 'computet': 0, 'totalt': 0} for line in lines: # print(line) line = line.lower() time_value = line.split(os.linesep)[0].split(':')[-1] if not MathClass.isnumerical(time_value): continue time_value = float(time_value) if line.find('read') >= 0 and line.find('time') >= 0: time_dict['readt'] += time_value elif line.find('compute') >= 0 and line.find('time') >= 0: time_dict['computet'] += time_value elif line.find('write') >= 0 and line.find('time') >= 0: time_dict['writet'] += time_value elif line.find('total') >= 0 and line.find('time') >= 0: time_dict['totalt'] += time_value TauDEM.write_time_log(logfile, time_dict)
def raster_to_gtiff(tif, geotif, change_nodata=False, change_gdal_type=False): """Converting Raster format to GeoTIFF. Args: tif: source raster file path. geotif: output raster file path. change_nodata: change NoDataValue to -9999 or not. gdal_type (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default. change_gdal_type: If True, output the Float32 data type. """ rst_file = RasterUtilClass.read_raster(tif) nodata = rst_file.noDataValue if change_nodata: if not MathClass.floatequal(rst_file.noDataValue, DEFAULT_NODATA): nodata = DEFAULT_NODATA rst_file.data[rst_file.data == rst_file.noDataValue] = DEFAULT_NODATA gdal_type = rst_file.dataType if change_gdal_type: gdal_type = GDT_Float32 RasterUtilClass.write_gtiff_file(geotif, rst_file.nRows, rst_file.nCols, rst_file.data, rst_file.geotrans, rst_file.srs, nodata, gdal_type)
def compress_dinf(angle, nodata): """Compress dinf flow direction to D8 direction with weight Args: angle: D-inf flow direction angle nodata: NoData value Returns: Compressed flow direction and weight of the first direction """ if MathClass.floatequal(angle, nodata): return DEFAULT_NODATA, DEFAULT_NODATA d = DinfUtil.check_orthogonal(angle) if d is not None: return d, 1 if angle < FlowModelConst.ne: a1 = angle d = 129 # 1+128 elif angle < FlowModelConst.n: a1 = angle - FlowModelConst.ne d = 192 # 128+64 elif angle < FlowModelConst.nw: a1 = angle - FlowModelConst.n d = 96 # 64+32 elif angle < FlowModelConst.w: a1 = angle - FlowModelConst.nw d = 48 # 32+16 elif angle < FlowModelConst.sw: a1 = angle - FlowModelConst.w d = 24 # 16+8 elif angle < FlowModelConst.s: a1 = angle - FlowModelConst.sw d = 12 # 8+4 elif angle < FlowModelConst.se: a1 = angle - FlowModelConst.s d = 6 # 4+2 else: a1 = angle - FlowModelConst.se d = 3 # 2+1 return d, a1 / PI * 4.0
def test_mathclass_isnumerical(): assert MathClass.isnumerical('78') == True assert MathClass.isnumerical('1.e-5') == True assert MathClass.isnumerical(None) == False assert MathClass.isnumerical('a1.2') == False
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. 1. The command will not execute if any input file does not exist. 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in list(in_files.items()): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error( 'Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params[ 'runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in list(out_files.items()): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in list(in_files.items()): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in list(in_params.items()): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in list(out_files.items()): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = { ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: '' } for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[ k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([ (ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING) ])
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Todo: Not tested yet! Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.csv, flow_localtime_60.csv. Note that `.txt` format is also supported. """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone // 3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = -1 * int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.csv' out_file = work_path + os.path.sep + file_name with open(out_file, 'w', encoding='utf-8') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)
def DelinateSlopePositionByThreshold( modelcfg, # type: ParseSEIMSConfig thresholds, # type: Dict[int, List] fuzzyslppos_fnames, # type: List[Tuple[int, AnyStr, AnyStr]] outfname, # type: AnyStr subbsn_id=0 # type: int ): # type: (...) -> Dict """ Args: model_cfg: Configuration of SEIMS-based model thresholds: {HillslopeID: {rdgID, bksID, vlyID, T_bks2rdg, T_bks2vly}, ...} fuzzyslppos_fnames: [(1, 'summit', 'rdgInf'), ...] outfname: output GridFS name subbsn_id: By default use the whole watershed data Returns: hillslp_data(dict): {} """ # 1. Read raster data from MongoDB hillslpr = ReadRasterFromMongoDB(modelcfg.host, modelcfg.port, modelcfg.db_name, DBTableNames.gridfs_spatial, '%d_HILLSLOPE_MERGED' % subbsn_id) landuser = ReadRasterFromMongoDB(modelcfg.host, modelcfg.port, modelcfg.db_name, DBTableNames.gridfs_spatial, '%d_LANDUSE' % subbsn_id) fuzslppos_rs = list() for tag, tagname, gfsname in fuzzyslppos_fnames: fuzslppos_rs.append( ReadRasterFromMongoDB(modelcfg.host, modelcfg.port, modelcfg.db_name, DBTableNames.gridfs_spatial, '%d_%s' % (subbsn_id, gfsname.upper()))) # Output for test # out_dir = r'D:\data_m\youwuzhen\seims_models_phd\data_prepare\spatial\spatial_units\tmp' # out_hillslp = out_dir + os.sep + 'hillslope.tif' # RasterUtilClass.write_gtiff_file(out_hillslp, hillslpr.nRows, hillslpr.nCols, # hillslpr.data, hillslpr.geotrans, hillslpr.srs, # hillslpr.noDataValue) # out_landuse = out_dir + os.sep + 'landuse.tif' # RasterUtilClass.write_gtiff_file(out_landuse, landuser.nRows, landuser.nCols, # landuser.data, landuser.geotrans, landuser.srs, # landuser.noDataValue) # for i, (tag, tagname, gfsname) in enumerate(fuzzyslppos_fnames): # curname = out_dir + os.sep + '%s.tif' % gfsname # RasterUtilClass.write_gtiff_file(curname, fuzslppos_rs[i].nRows, fuzslppos_rs[i].nCols, # fuzslppos_rs[i].data, fuzslppos_rs[i].geotrans, # fuzslppos_rs[i].srs, # fuzslppos_rs[i].noDataValue) # 2. Initialize output outgfsname = '%d_%s' % (subbsn_id, outfname.upper()) outdict = dict( ) # type: Dict[AnyStr, Dict[int, Dict[AnyStr, Union[float, Dict[int, float]]]]] slppos_cls = numpy.ones( (hillslpr.nRows, hillslpr.nCols)) * hillslpr.noDataValue valid_cells = 0 # Get the fuzzy slope position values from up to bottom def GetFuzzySlopePositionValues(i_row, i_col): seqvalues = [-9999] * len(fuzslppos_rs) for iseq, fuzdata in enumerate(fuzslppos_rs): curv = fuzdata.data[i_row][i_col] if MathClass.floatequal(curv, fuzdata.noDataValue): return None if curv < 0: return None seqvalues[iseq] = curv return seqvalues # ACTUAL ALGORITHM for row in range(hillslpr.nRows): for col in range(hillslpr.nCols): # Exclude invalid situation hillslp_id = hillslpr.data[row][col] if MathClass.floatequal(hillslp_id, hillslpr.noDataValue): continue if hillslp_id not in thresholds: continue landuse_id = landuser.data[row][col] if MathClass.floatequal(landuse_id, landuser.noDataValue): continue fuzzyvalues = GetFuzzySlopePositionValues(row, col) if fuzzyvalues is None: continue # THIS PART SHOULD BE REVIEWED CAREFULLY LATER! --START # Step 1. Get the index of slope position with maximum similarity max_fuz = max(fuzzyvalues) max_idx = fuzzyvalues.index(max_fuz) tmpfuzzyvalues = fuzzyvalues[:] tmpfuzzyvalues.remove(max_fuz) sec_fuz = max(tmpfuzzyvalues) sec_idx = fuzzyvalues.index(sec_fuz) sel_idx = max_idx # Select the maximum by default cur_threshs = thresholds[hillslp_id][1 - len(fuzzyvalues):] if max_idx == len(fuzzyvalues) - 1: # the bottom position if sec_idx == len( fuzzyvalues ) - 2 and 0 < max_fuz - sec_fuz < cur_threshs[-1]: sel_idx = sec_idx # change valley to backslope elif max_idx == 0: # the upper position if sec_idx == 1 and 0 < max_fuz - sec_fuz < cur_threshs[0]: sel_idx = sec_idx # change ridge to backslope else: # the middle positions # Two thresholds could be applied, # i.e., cur_threshs[max_idx-1] and cur_threshs[max_idx] if sec_idx == max_idx - 1 and 0. > sec_fuz - max_fuz > cur_threshs[ max_idx - 1]: sel_idx = sec_idx elif sec_idx == max_idx + 1 and 0. > sec_fuz - max_fuz > cur_threshs[ max_idx]: sel_idx = sec_idx # Exception: if sec_fuz < 0.1 and sel_idx == sec_idx: sel_idx = max_idx # if sel_idx != max_idx: # boundary has been adapted # print('fuzzy values: %s, thresholds: %s, ' # 'sel_idx: %d' % (fuzzyvalues.__str__(), cur_threshs.__str__(), sel_idx)) slppos_id = thresholds[hillslp_id][sel_idx] # THIS PART SHOULD BE REVIEWED CAREFULLY LATER! --END slppos_cls[row][col] = slppos_id sel_tagname = fuzzyslppos_fnames[sel_idx][1] if sel_tagname not in outdict: outdict[sel_tagname] = dict() if slppos_id not in outdict[sel_tagname]: outdict[sel_tagname][slppos_id] = { 'area': 0, 'landuse': dict() } outdict[sel_tagname][slppos_id]['area'] += 1 if landuse_id not in outdict[sel_tagname][slppos_id]['landuse']: outdict[sel_tagname][slppos_id]['landuse'][landuse_id] = 0. outdict[sel_tagname][slppos_id]['landuse'][landuse_id] += 1. valid_cells += 1 # Change cell counts to area area_km2 = hillslpr.dx * hillslpr.dx * 1.e-6 for tagname, slpposdict in viewitems(outdict): for sid, datadict in viewitems(slpposdict): outdict[tagname][sid]['area'] *= area_km2 for luid in outdict[tagname][sid]['landuse']: outdict[tagname][sid]['landuse'][luid] *= area_km2 # 3. Write the classified slope positions data back to mongodb metadata = dict() metadata[RasterMetadata.subbasin] = subbsn_id metadata['ID'] = outgfsname metadata['TYPE'] = outfname.upper() metadata[RasterMetadata.cellsize] = hillslpr.dx metadata[RasterMetadata.nodata] = hillslpr.noDataValue metadata[RasterMetadata.ncols] = hillslpr.nCols metadata[RasterMetadata.nrows] = hillslpr.nRows metadata[RasterMetadata.xll] = hillslpr.xMin + 0.5 * hillslpr.dx metadata[RasterMetadata.yll] = hillslpr.yMin + 0.5 * hillslpr.dx metadata['LAYERS'] = 1. metadata[RasterMetadata.cellnum] = valid_cells metadata[RasterMetadata.srs] = hillslpr.srs client = ConnectMongoDB(modelcfg.host, modelcfg.port) conn = client.get_conn() maindb = conn[modelcfg.db_name] spatial_gfs = GridFS(maindb, DBTableNames.gridfs_spatial) # delete if the tablename gridfs file existed if spatial_gfs.exists(filename=outgfsname): x = spatial_gfs.get_version(filename=outgfsname) spatial_gfs.delete(x._id) # create and write new GridFS file new_gridfs = spatial_gfs.new_file(filename=outgfsname, metadata=metadata) new_gridfs_array = slppos_cls.reshape( (1, hillslpr.nCols * hillslpr.nRows)).tolist()[0] fmt = '%df' % hillslpr.nCols * hillslpr.nRows s = pack(fmt, *new_gridfs_array) new_gridfs.write(s) new_gridfs.close() # Read and output for test # slpposcls_r = ReadRasterFromMongoDB(modelcfg.host, modelcfg.port, # modelcfg.db_name, DBTableNames.gridfs_spatial, outgfsname) # out_slpposcls = out_dir + os.sep + '%s.tif' % outgfsname # RasterUtilClass.write_gtiff_file(out_slpposcls, slpposcls_r.nRows, slpposcls_r.nCols, # slpposcls_r.data, slpposcls_r.geotrans, slpposcls_r.srs, # slpposcls_r.noDataValue) client.close() return outdict
def scenario_from_texts(cfg, main_db, scenario_db): """Import BMPs Scenario data to MongoDB Args: cfg: SEIMS configuration object main_db: climate database scenario_db: scenario database Returns: False if failed, otherwise True. """ if not cfg.use_scernario: return False print('Import BMP Scenario Data... ') bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt']) bmp_tabs = list() bmp_tabs_path = list() for f in bmp_files: bmp_tabs.append(f.split('.')[0]) bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f) # initialize if collection not existed c_list = scenario_db.collection_names() for item in bmp_tabs: if not StringClass.string_in_list(item.upper(), c_list): scenario_db.create_collection(item.upper()) else: scenario_db.drop_collection(item.upper()) # Read subbasin.tif and dist2Stream.tif subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn) dist2stream_r = RasterUtilClass.read_raster( cfg.spatials.dist2stream_d8) # End reading for j, bmp_txt in enumerate(bmp_tabs_path): bmp_tab_name = bmp_tabs[j] data_array = read_data_items_from_txt(bmp_txt) field_array = data_array[0] data_array = data_array[1:] for item in data_array: dic = dict() for i, field_name in enumerate(field_array): if MathClass.isnumerical(item[i]): v = float(item[i]) if v % 1. == 0.: v = int(v) dic[field_name.upper()] = v else: dic[field_name.upper()] = str(item[i]).upper() if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \ StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())): subbsn_id = subbasin_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) distance = dist2stream_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) if subbsn_id is not None and distance is not None: dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id) dic[ImportScenario2Mongo._DISTDOWN] = float(distance) scenario_db[bmp_tab_name.upper()].find_one_and_replace( dic, dic, upsert=True) else: scenario_db[bmp_tab_name.upper()].find_one_and_replace( dic, dic, upsert=True) # print('BMP tables are imported.') # Write BMP database name into Model workflow database c_list = main_db.collection_names() if not StringClass.string_in_list(DBTableNames.main_scenario, c_list): main_db.create_collection(DBTableNames.main_scenario) bmp_info_dic = dict() bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic, upsert=True) return True
def calculate_statistics( sim_obs_dict, # type: Optional[Dict[str, Dict[str, Union[List[datetime], List[float], float]]]] stime=None, # type: Optional[datetime] etime=None # type: Optional[datetime] ): # type: (...) -> Optional[List[str]] """Calculate NSE, R-square, RMSE, PBIAS, and RSR. Args: sim_obs_dict: {VarName: {'UTCDATETIME': [t1, t2, ..., tn], 'Obs': [o1, o2, ..., on], 'Sim': [s1, s2, ..., sn] }, ... } stime: Start time for statistics calculation. etime: End time for statistics calculation. Returns: The dict with the format: {VarName: {'UTCDATETIME': [t1, t2, ..., tn], 'Obs': [o1, o2, ..., on], 'Sim': [s1, s2, ..., sn]}, 'NSE': nse_value, 'R-square': r2_value, 'RMSE': rmse_value, 'PBIAS': pbias_value, 'lnNSE': lnnse_value, 'NSE1': nse1_value, 'NSE3': nse3_value }, ... } Return name list of the calculated statistics """ if not sim_obs_dict: return None for param, values in sim_obs_dict.items(): if stime is None and etime is None: sidx = 0 eidx = len(values['UTCDATETIME']) else: sidx = bisect.bisect_left(values['UTCDATETIME'], stime) eidx = bisect.bisect_right(values['UTCDATETIME'], etime) obsl = values['Obs'][sidx:eidx] siml = values['Sim'][sidx:eidx] nse_value = MathClass.nashcoef(obsl, siml) r2_value = MathClass.rsquare(obsl, siml) rmse_value = MathClass.rmse(obsl, siml) pbias_value = MathClass.pbias(obsl, siml) rsr_value = MathClass.rsr(obsl, siml) lnnse_value = MathClass.nashcoef(obsl, siml, log=True) nse1_value = MathClass.nashcoef(obsl, siml, expon=1) nse3_value = MathClass.nashcoef(obsl, siml, expon=3) values['NSE'] = nse_value values['R-square'] = r2_value values['RMSE'] = rmse_value values['PBIAS'] = pbias_value values['RSR'] = rsr_value values['lnNSE'] = lnnse_value values['NSE1'] = nse1_value values['NSE3'] = nse3_value # print('Statistics for %s, NSE: %.3f, R2: %.3f, RMSE: %.3f, PBIAS: %.3f, RSR: %.3f,' # ' lnNSE: %.3f, NSE1: %.3f, NSE3: %.3f' % # (param, nse_value, r2_value, rmse_value, pbias_value, rsr_value, # lnnse_value, nse1_value, nse3_value)) return ['NSE', 'R-square', 'RMSE', 'PBIAS', 'RSR', 'lnNSE', 'NSE1', 'NSE3']
def lookup_tables_as_collection_and_gridfs(cfg, maindb): """Import lookup tables (from txt file) as Collection and GridFS Args: cfg: SEIMS config object maindb: workflow model database """ for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()): # import each lookup table as a collection and GridFS file. c_list = maindb.collection_names() if not StringClass.string_in_list(tablename.upper(), c_list): maindb.create_collection(tablename.upper()) else: maindb.drop_collection(tablename.upper()) # initial bulk operator bulk = maindb[tablename.upper()].initialize_ordered_bulk_op() # delete if the tablename gridfs file existed spatial = GridFS(maindb, DBTableNames.gridfs_spatial) if spatial.exists(filename=tablename.upper()): x = spatial.get_version(filename=tablename.upper()) spatial.delete(x._id) # read data items data_items = read_data_items_from_txt(txt_file) field_names = data_items[0][0:] item_values = list() # import as gridfs file for i, cur_data_item in enumerate(data_items): if i == 0: continue data_import = dict() # import as Collection item_value = list() # import as gridfs file for idx, fld in enumerate(field_names): if MathClass.isnumerical(cur_data_item[idx]): tmp_value = float(cur_data_item[idx]) data_import[fld] = tmp_value item_value.append(tmp_value) else: data_import[fld] = cur_data_item[idx] bulk.insert(data_import) if len(item_value) > 0: item_values.append(item_value) MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename) # begin import gridfs file n_row = len(item_values) # print(item_values) if n_row >= 1: n_col = len(item_values[0]) for i in range(n_row): if n_col != len(item_values[i]): raise ValueError('Please check %s to make sure each item has ' 'the same numeric dimension. The size of first ' 'row is: %d, and the current data item is: %d' % (tablename, n_col, len(item_values[i]))) else: item_values[i].insert(0, n_col) metadic = {ModelParamDataUtils.item_count: n_row, ModelParamDataUtils.field_count: n_col} cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic) header = [n_row] fmt = '%df' % 1 s = pack(fmt, *header) cur_lookup_gridfs.write(s) fmt = '%df' % (n_col + 1) for i in range(n_row): s = pack(fmt, *item_values[i]) cur_lookup_gridfs.write(s) cur_lookup_gridfs.close()
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone / -3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.txt' out_file = work_path + os.path.sep + file_name with open(out_file, 'w') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)
def scenario_from_texts(cfg, main_db, scenario_db): """Import BMPs Scenario data to MongoDB Args: cfg: SEIMS configuration object main_db: climate database scenario_db: scenario database Returns: False if failed, otherwise True. """ if not cfg.use_scernario: return False print('Import BMP Scenario Data... ') bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt']) bmp_tabs = list() bmp_tabs_path = list() for f in bmp_files: bmp_tabs.append(f.split('.')[0]) bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f) # initialize if collection not existed c_list = scenario_db.collection_names() for item in bmp_tabs: if not StringClass.string_in_list(item.upper(), c_list): scenario_db.create_collection(item.upper()) else: scenario_db.drop_collection(item.upper()) # Read subbasin.tif and dist2Stream.tif subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn) dist2stream_r = RasterUtilClass.read_raster(cfg.spatials.dist2stream_d8) # End reading for j, bmp_txt in enumerate(bmp_tabs_path): bmp_tab_name = bmp_tabs[j] data_array = read_data_items_from_txt(bmp_txt) field_array = data_array[0] data_array = data_array[1:] for item in data_array: dic = dict() for i, field_name in enumerate(field_array): if MathClass.isnumerical(item[i]): v = float(item[i]) if v % 1. == 0.: v = int(v) dic[field_name.upper()] = v else: dic[field_name.upper()] = str(item[i]).upper() if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \ StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())): subbsn_id = subbasin_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) distance = dist2stream_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) if subbsn_id is not None and distance is not None: dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id) dic[ImportScenario2Mongo._DISTDOWN] = float(distance) scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) else: scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) # print('BMP tables are imported.') # Write BMP database name into Model workflow database c_list = main_db.collection_names() if not StringClass.string_in_list(DBTableNames.main_scenario, c_list): main_db.create_collection(DBTableNames.main_scenario) bmp_info_dic = dict() bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic, upsert=True) return True
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. - 1. The command will not execute if any input file does not exist. - 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in iteritems(in_files): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error('Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params['runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in iteritems(out_files): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in iteritems(in_files): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in iteritems(in_params): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in iteritems(out_files): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True