def calculate_environment(self): if not self.modelrun: # no evaluate done self.economy = self.worst_econ self.environment = self.worst_env return rfile = self.modelout_dir + os.path.sep + self.bmps_info['ENVEVAL'] if not FileClass.is_file_exists(rfile): time.sleep(5) # sleep 5 seconds wait for the ouput if not FileClass.is_file_exists(rfile): print( 'WARNING: Although SEIMS model runs successfully, the desired output: %s' ' cannot be found!' % rfile) self.economy = self.worst_econ self.environment = self.worst_env return base_amount = self.bmps_info['BASE_ENV'] if StringClass.string_match(rfile.split('.')[-1], 'tif'): # Raster data rr = RasterUtilClass.read_raster(rfile) soil_erosion_amount = rr.get_sum() / self.timerange # unit: year # reduction rate of soil erosion self.environment = (base_amount - soil_erosion_amount) / base_amount elif StringClass.string_match(rfile.split('.')[-1], 'txt'): # Time series data sed_sum = read_simulation_from_txt( self.modelout_dir) # TODO, fix it later, lj self.environment = (base_amount - sed_sum) / base_amount else: self.economy = self.worst_econ self.environment = self.worst_env return
def calculate_environment(self): if not self.modelrun: # no evaluate done self.economy = self.worst_econ self.environment = self.worst_env return rfile = self.modelout_dir + os.path.sep + self.bmps_info['ENVEVAL'] if not FileClass.is_file_exists(rfile): time.sleep(5) # sleep 5 seconds wait for the ouput if not FileClass.is_file_exists(rfile): print('WARNING: Although SEIMS model runs successfully, the desired output: %s' ' cannot be found!' % rfile) self.economy = self.worst_econ self.environment = self.worst_env return base_amount = self.bmps_info['BASE_ENV'] if StringClass.string_match(rfile.split('.')[-1], 'tif'): # Raster data rr = RasterUtilClass.read_raster(rfile) soil_erosion_amount = rr.get_sum() / self.timerange # unit: year # reduction rate of soil erosion self.environment = (base_amount - soil_erosion_amount) / base_amount elif StringClass.string_match(rfile.split('.')[-1], 'txt'): # Time series data sed_sum = read_simulation_from_txt(self.modelout_dir) # TODO, fix it later, lj self.environment = (base_amount - sed_sum) / base_amount else: self.economy = self.worst_econ self.environment = self.worst_env return
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = {ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: ''} for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING)])
def convertstatsmethod(method_str): """Convert statistics method to ave, min, and max.""" if StringClass.string_match(method_str, 'Average'): return 'ave' elif StringClass.string_match(method_str, 'Maximum'): return 'max' elif StringClass.string_match(method_str, 'Minimum'): return 'min' elif method_str.lower() in ['ave', 'max', 'min']: return method_str.lower() else: return 'ave'
def variable_table(db, var_file): """Import variables table""" var_data_items = read_data_items_from_txt(var_file) var_flds = var_data_items[0] for i in range(1, len(var_data_items)): dic = {} for j in range(len(var_data_items[i])): if StringClass.string_match(var_flds[j], VariableDesc.type): dic[VariableDesc.type] = var_data_items[i][j] elif StringClass.string_match(var_flds[j], VariableDesc.unit): dic[VariableDesc.unit] = var_data_items[i][j] # If this item existed already, then update it, otherwise insert one. curfilter = {VariableDesc.type: dic[VariableDesc.type]} db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
def calculate_environment(self): """Calculate environment benefit based on the output and base values predefined in configuration file. """ if not self.modelrun: # no evaluate done self.economy = self.worst_econ self.environment = self.worst_env return rfile = self.modelout_dir + os.path.sep + self.eval_info['ENVEVAL'] if not FileClass.is_file_exists(rfile): time.sleep( 0.1 ) # Wait a moment in case of unpredictable file system error if not FileClass.is_file_exists(rfile): print( 'WARNING: Although SEIMS model has been executed, the desired output: %s' ' cannot be found!' % rfile) self.economy = self.worst_econ self.environment = self.worst_env # model clean self.model.SetMongoClient() self.model.clean(delete_scenario=True) self.model.UnsetMongoClient() return base_amount = self.eval_info['BASE_ENV'] if StringClass.string_match(rfile.split('.')[-1], 'tif'): # Raster data rr = RasterUtilClass.read_raster(rfile) sed_sum = rr.get_sum() / self.eval_timerange # unit: year elif StringClass.string_match(rfile.split('.')[-1], 'txt'): # Time series data sed_sum = read_simulation_from_txt(self.modelout_dir, ['SED'], self.model.OutletID, self.cfg.eval_stime, self.cfg.eval_etime) else: raise ValueError('The file format of ENVEVAL MUST be tif or txt!') if base_amount < 0: # indicates a base scenario self.environment = sed_sum else: # reduction rate of soil erosion self.environment = (base_amount - sed_sum) / base_amount # print exception values if self.environment > 1. or self.environment < 0. or self.environment is numpy.nan: print('Exception Information: Scenario ID: %d, ' 'SUM(%s): %s' % (self.ID, rfile, repr(sed_sum))) self.environment = self.worst_env
def variable_table(db, var_file): """Import variables table""" var_data_items = read_data_items_from_txt(var_file) var_flds = var_data_items[0] for i in range(1, len(var_data_items)): dic = dict() for j in range(len(var_data_items[i])): if StringClass.string_match(var_flds[j], VariableDesc.type): dic[VariableDesc.type] = var_data_items[i][j] elif StringClass.string_match(var_flds[j], VariableDesc.unit): dic[VariableDesc.unit] = var_data_items[i][j] # If this item existed already, then update it, otherwise insert one. curfilter = {VariableDesc.type: dic[VariableDesc.type]} db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
def convertdistmethod(method_str): """Convert distance method to h, v, p, and s.""" if StringClass.string_match(method_str, 'Horizontal'): return 'h' elif StringClass.string_match(method_str, 'Vertical'): return 'v' elif StringClass.string_match(method_str, 'Pythagoras'): return 'p' elif StringClass.string_match(method_str, 'Surface'): return 's' elif method_str.lower() in ['h', 'v', 'p', 's']: return method_str.lower() else: return 's'
def __init__(self, cf): """Get parameters from ConfigParser object.""" self.N = 100 self.num_levels = 10 self.grid_jump = 2 self.optimal_t = None self.local_opt = True section_name = 'Morris_Method' if section_name not in cf.sections(): raise ValueError('[%s] section MUST be existed in *.ini file.' % section_name) if cf.has_option(section_name, 'n'): self.N = cf.getint(section_name, 'n') if cf.has_option(section_name, 'num_levels'): self.num_levels = cf.getint(section_name, 'num_levels') if cf.has_option(section_name, 'grid_jump'): self.grid_jump = cf.getint(section_name, 'grid_jump') if cf.has_option(section_name, 'optimal_trajectories'): tmp_opt_t = cf.get(section_name, 'optimal_trajectories') if not StringClass.string_match(tmp_opt_t, 'none'): self.optimal_t = cf.getint(section_name, 'optimal_trajectories') if self.optimal_t > self.N or self.optimal_t < 2: self.optimal_t = None if cf.has_option(section_name, 'local_optimization'): self.local_opt = cf.getboolean(section_name, 'local_optimization')
def read_optional_section(self, _opt): """read and check OPTIONAL inputs.""" if _opt not in self.cf.sections(): return self.mpi_dir = self.cf.get(_opt, 'mpiexedir') self.hostfile = self.cf.get(_opt, 'hostfile') self.outlet = self.cf.get(_opt, 'outlet') self.valley = self.cf.get(_opt, 'vlysrc') self.ridge = self.cf.get(_opt, 'rdgsrc') self.regional_attr = self.cf.get(_opt, 'regionalattr') if self.proc <= 0 or self.proc is None: if self.cf.has_option(_opt, 'inputproc'): self.proc = self.cf.getint(_opt, 'inputproc') else: self.proc = cpu_count() / 2 # if mpi directory is not set if self.mpi_dir is None or StringClass.string_match(self.mpi_dir, 'none') \ or not os.path.isdir(self.mpi_dir): mpipath = FileClass.get_executable_fullpath('mpiexec') self.mpi_dir = os.path.dirname(mpipath) if self.mpi_dir is None: raise RuntimeError('Can not find mpiexec!') self.hostfile = AutoFuzSlpPosConfig.check_file_available(self.hostfile) self.outlet = AutoFuzSlpPosConfig.check_file_available(self.outlet) self.valley = AutoFuzSlpPosConfig.check_file_available(self.valley) self.ridge = AutoFuzSlpPosConfig.check_file_available(self.ridge) self.regional_attr = AutoFuzSlpPosConfig.check_file_available( self.regional_attr) if self.topoparam is None: self.topoparam = TopoAttrNames(self.ws) if self.regional_attr is not None: self.topoparam.add_user_defined_attribute('rpi', self.regional_attr, True)
def read_optionfuzinf_section(self, _optfuzinf): """Optional parameter-settings for Fuzzy slope position inference.""" if _optfuzinf not in self.cf.sections(): return if self.cf.has_option(_optfuzinf, 'inferparams'): fuzinf_strs = self.cf.get(_optfuzinf, 'inferparams') if StringClass.string_match(fuzinf_strs, 'none'): return self.inferparam = dict() fuzinf_types = StringClass.split_string(fuzinf_strs, ';') if len(fuzinf_types) != len(self.slppostype): raise RuntimeError( "InferParams (%s) MUST be consistent with slope position types" " and separated by ';'!" % fuzinf_strs) for i, slppos in enumerate(self.slppostype): self.inferparam[slppos] = dict() infparams = StringClass.extract_numeric_values_from_string( fuzinf_types[i]) if len(infparams) % 4 != 0: raise RuntimeError( "Each item of InferParams MUST contains four elements," "i.e., Attribute No., FMF No., w1, w2! Please check item: " "%s for %s." % (fuzinf_types[i], slppos)) for j in range(int(len(infparams) / 4)): attridx = int(infparams[j * 4]) - 1 attrname = self.selectedtopolist[attridx] fmf = self._FMFTYPE[int(infparams[j * 4 + 1])] curinfparam = self._FMFPARAM[fmf][:] curinfparam[0] = infparams[j * 4 + 2] # w1 curinfparam[3] = infparams[j * 4 + 3] # w2 self.inferparam[slppos][attrname] = [fmf] + curinfparam
def OutputItems(self): # type: (...) -> (List[AnyStr], Dict[AnyStr, Optional[List[AnyStr]]]) """Read output ID and items from database. Returns: _output_ids (list): OUTPUTID list _output_items (dict): key is core file name of output, value is None or list of aggregated types """ if self._output_ids and self._output_items: return self._output_ids, self._output_items cursor = self.fileout_tab.find( {'$or': [{ ModelCfgFields.use: '1' }, { ModelCfgFields.use: 1 }]}) if cursor is not None: for item in cursor: self._output_ids.append(item[ModelCfgFields.output_id]) name = item[ModelCfgFields.filename] corename = StringClass.split_string(name, '.')[0] types = item[ModelCfgFields.type] if StringClass.string_match(types, 'NONE'): self._output_items.setdefault(corename, None) else: self._output_items.setdefault( corename, StringClass.split_string(types, '-')) return self._output_ids, self._output_items
def initialize_landcover_parameters(landcover_file, landcover_initial_fields_file, dst_dir): """generate initial landcover_init_param parameters""" lc_data_items = read_data_items_from_txt(landcover_initial_fields_file) # print(lc_data_items) field_names = lc_data_items[0] lu_id = -1 for i, v in enumerate(field_names): if StringClass.string_match(v, 'LANDUSE_ID'): lu_id = i break data_items = lc_data_items[1:] replace_dicts = dict() for item in data_items: for i, v in enumerate(item): if i != lu_id: if field_names[i].upper() not in list(replace_dicts.keys()): replace_dicts[field_names[i].upper()] = {float(item[lu_id]): float(v)} else: replace_dicts[field_names[i].upper()][float(item[lu_id])] = float(v) # print(replace_dicts) # Generate GTIFF for item, v in list(replace_dicts.items()): filename = dst_dir + os.path.sep + item + '.tif' print(filename) RasterUtilClass.raster_reclassify(landcover_file, v, filename) return list(replace_dicts['LANDCOVER'].values())
def initialize_landcover_parameters(landcover_file, landcover_initial_fields_file, dst_dir): """generate initial landcover_init_param parameters""" lc_data_items = read_data_items_from_txt(landcover_initial_fields_file) # print(lc_data_items) field_names = lc_data_items[0] lu_id = -1 for i, v in enumerate(field_names): if StringClass.string_match(v, 'LANDUSE_ID'): lu_id = i break data_items = lc_data_items[1:] replace_dicts = dict() for item in data_items: for i, v in enumerate(item): if i != lu_id: if field_names[i].upper() not in list( replace_dicts.keys()): replace_dicts[field_names[i].upper()] = { float(item[lu_id]): float(v) } else: replace_dicts[field_names[i].upper()][float( item[lu_id])] = float(v) # print(replace_dicts) # Generate GTIFF for item, v in list(replace_dicts.items()): filename = dst_dir + os.path.sep + item + '.tif' print(filename) RasterUtilClass.raster_reclassify(landcover_file, v, filename) return list(replace_dicts['LANDCOVER'].values())
def export_landuse_lookup_files_from_mongodb(cfg, maindb): """export landuse lookup tables to txt file from MongoDB.""" lookup_dir = cfg.dirs.lookup property_namelist = ModelParamDataUtils.landuse_fields property_map = {} property_namelist.append('USLE_P') query_result = maindb['LANDUSELOOKUP'].find() if query_result is None: raise RuntimeError("LanduseLoop Collection is not existed or empty!") count = 0 for row in query_result: # print(row) value_map = dict() for i, p_name in enumerate(property_namelist): if StringClass.string_match(p_name, "USLE_P"): # Currently, USLE_P is set as 1 for all landuse. value_map[p_name] = 1 else: # I do not know why manning * 10 here. Just uncommented now. lj # if StringClass.string_match(p_name, "Manning"): # value_map[p_name] = row.get(p_name) * 10 # else: value_map[p_name] = row.get(p_name) count += 1 property_map[count] = value_map n = len(property_map) UtilClass.rmmkdir(lookup_dir) for propertyName in property_namelist: with open("%s/%s.txt" % (lookup_dir, propertyName,), 'w') as f: f.write("%d\n" % n) for prop_id in property_map: s = "%d %f\n" % (prop_id, property_map[prop_id][propertyName]) f.write(s)
def check_file_available(in_f): """Check the input file is existed or not, and return None, if not.""" if StringClass.string_match(in_f, 'none') or in_f == '' or in_f is None: return None if not FileClass.is_file_exists(in_f): raise ValueError("The %s is not existed or have no access permission!" % in_f) else: return in_f
def read_inf_param_from_file(conf): """Read fuzzy inference parameters from file.""" params_list = list() with open(conf, 'r', encoding='utf-8') as f: for line in f.readlines(): eles = line.split('\n')[0].split('\t') params = StringClass.extract_numeric_values_from_string(line.split('\n')[0]) if StringClass.string_match(eles[0], 'Parameters') and len(params) >= 6: params_list.append([eles[1]] + [eles[3]] + params[-6:]) return params_list
def read_pareto_popsize_from_txt(txt_file, sce_name='scenario'): # type: (AnyStr, AnyStr) -> (List[int], List[int]) """Read the population size of each generations.""" with open(txt_file, 'r', encoding='utf-8') as f: lines = f.readlines() pareto_popnum = OrderedDict() found = False cur_gen = -1 iden_idx = -1 for line in lines: str_line = line for LF in LFs: if LF in line: str_line = line.split(LF)[0] break if str_line == '': continue values = StringClass.extract_numeric_values_from_string(str_line) # Check generation if str_line[0] == '#' and 'Generation' in str_line: if len(values) != 1: continue gen = int(values[0]) found = True cur_gen = gen pareto_popnum[cur_gen] = list() continue if not found: continue if values is None: # means header line line_list = StringClass.split_string(str_line, ['\t']) for idx, v in enumerate(line_list): if StringClass.string_match(v, sce_name): iden_idx = idx break continue if iden_idx < 0: continue # now append the real Pareto front point data pareto_popnum[cur_gen].append(int(values[iden_idx])) all_sceids = list() acc_num = list() genids = sorted(list(pareto_popnum.keys())) for idx, genid in enumerate(genids): for _id in pareto_popnum[genid]: if _id not in all_sceids: all_sceids.append(_id) acc_num.append(len(all_sceids)) return genids, acc_num
def dinfdistdown(np, ang, fel, slp, src, statsm, distm, edgecontamination, wg, dist, workingdir=None, mpiexedir=None, exedir=None, log_file=None, hostfile=None): """Run D-inf distance down to stream""" in_params = {'-m': '%s %s' % (TauDEM.convertstatsmethod(statsm), TauDEM.convertdistmethod(distm))} if StringClass.string_match(edgecontamination, 'false') or edgecontamination is False: in_params['-nc'] = None return TauDEM.run(FileClass.get_executable_fullpath('dinfdistdown', exedir), {'-fel': fel, '-slp': slp, '-ang': ang, '-src': src, '-wg': wg}, workingdir, in_params, {'-dd': dist}, {'mpipath': mpiexedir, 'hostfile': hostfile, 'n': np}, {'logfile': log_file})
def sites_table(hydro_clim_db, site_file, site_type): """Import HydroClimate sites table""" sites_loc = dict() site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = site_data_items[i][j] elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(site_data_items[i][j]) dic[StationFields.type] = site_type curfilter = { StationFields.id: dic[StationFields.id], StationFields.type: dic[StationFields.type] } hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True) if dic[StationFields.id] not in list(sites_loc.keys()): sites_loc[dic[StationFields.id]] = SiteInfo( dic[StationFields.id], dic[StationFields.name], dic[StationFields.lat], dic[StationFields.lon], dic[StationFields.x], dic[StationFields.y], dic[StationFields.elev]) hydro_clim_db[DBTableNames.sites].create_index([ (StationFields.id, ASCENDING), (StationFields.type, ASCENDING) ]) return sites_loc
def dinfdistdown(np, ang, fel, slp, src, statsm, distm, edgecontamination, wg, dist, workingdir=None, mpiexedir=None, exedir=None, log_file=None, runtime_file=None, hostfile=None): """Run D-inf distance down to stream""" in_params = {'-m': '%s %s' % (TauDEM.convertstatsmethod(statsm), TauDEM.convertdistmethod(distm))} if StringClass.string_match(edgecontamination, 'false') or edgecontamination is False: in_params['-nc'] = None fname = TauDEM.func_name('dinfdistdown') return TauDEM.run(FileClass.get_executable_fullpath(fname, exedir), {'-fel': fel, '-slp': slp, '-ang': ang, '-src': src, '-wg': wg}, workingdir, in_params, {'-dd': dist}, {'mpipath': mpiexedir, 'hostfile': hostfile, 'n': np}, {'logfile': log_file, 'runtimefile': runtime_file})
def dinfdistuptoridge(np, ang, fel, slp, propthresh, dist, statsm, distm, edgecontamination, rdg=None, workingdir=None, mpiexedir=None, exedir=None, log_file=None, runtime_file=None, hostfile=None): """Run Dinf distance to ridge.""" fname = TauDEM.func_name('dinfdistuptoridge') in_params = { '-thresh': str(propthresh), '-m': '%s %s' % (TauDEM.convertstatsmethod(statsm), TauDEM.convertdistmethod(distm)) } if StringClass.string_match(edgecontamination, 'false') or edgecontamination is False: in_params['-nc'] = None return TauDEM.run(FileClass.get_executable_fullpath(fname, exedir), in_files={ '-ang': ang, '-fel': fel, '-slp': slp, '-rdg': rdg }, wp=workingdir, in_params=in_params, out_files={'-du': dist}, mpi_params={ 'mpipath': mpiexedir, 'hostfile': hostfile, 'n': np }, log_params={ 'logfile': log_file, 'runtimefile': runtime_file })
def read_field_arrays_from_csv(csvf): data_items = read_data_items_from_txt(csvf) if len(data_items) < 2: return flds = data_items[0] flds_array = dict() for idx, data_item in enumerate(data_items): if idx == 0: continue data_item_values = StringClass.extract_numeric_values_from_string(','.join(data_item)) for fld_idx, fld_name in enumerate(flds): if fld_idx == 0 or StringClass.string_match(fld_name, 'FID'): continue if fld_name not in flds_array: flds_array[fld_name] = list() flds_array[fld_name].append(data_item_values[fld_idx]) # for key, value in list(flds_array.items()): # print('%s: %d' % (key, len(value))) return combine_multi_layers_array(flds_array)
def get_utcdatetime_from_field_values(flds, values, tsys, tzone=None): """Get datetime from field-value lists. Returns: utctime """ cur_y = 0 cur_m = 0 cur_d = 0 cur_hh = 0 cur_mm = 0 cur_ss = 0 dt = None for i, fld in enumerate(flds): if StringClass.string_match(fld, DataValueFields.dt): dt = StringClass.get_datetime(values[i]) elif StringClass.string_match(fld, DataValueFields.y): cur_y = int(values[i]) elif StringClass.string_match(fld, DataValueFields.m): cur_m = int(values[i]) elif StringClass.string_match(fld, DataValueFields.d): cur_d = int(values[i]) elif StringClass.string_match(fld, DataValueFields.hour): cur_hh = int(values[i]) elif StringClass.string_match(fld, DataValueFields.minute): cur_mm = int(values[i]) elif StringClass.string_match(fld, DataValueFields.second): cur_ss = int(values[i]) # Get datetime and utc/local transformation if dt is None: # 'DATETIME' is not existed if cur_y < 1900 or cur_m <= 0 and cur_d <= 0: raise ValueError("Can not find TIME information from " "fields: %s" % ' '.join(fld for fld in flds)) else: dt = datetime(cur_y, cur_m, cur_d, cur_hh, cur_mm, cur_ss) if not StringClass.string_match(tsys, 'UTCTIME'): if tzone is None: tzone = time.timezone // -3600 # positive value for EAST dt -= timedelta(minutes=tzone * 60) return dt
def sites_table(hydro_clim_db, site_file, site_type): """Import HydroClimate sites table""" sites_loc = dict() site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = site_data_items[i][j] elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(site_data_items[i][j]) dic[StationFields.type] = site_type curfilter = {StationFields.id: dic[StationFields.id], StationFields.type: dic[StationFields.type]} hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True) if dic[StationFields.id] not in list(sites_loc.keys()): sites_loc[dic[StationFields.id]] = SiteInfo(dic[StationFields.id], dic[StationFields.name], dic[StationFields.lat], dic[StationFields.lon], dic[StationFields.x], dic[StationFields.y], dic[StationFields.elev]) hydro_clim_db[DBTableNames.sites].create_index([(StationFields.id, ASCENDING), (StationFields.type, ASCENDING)]) return sites_loc
def read_field_arrays_from_csv(csvf): data_items = read_data_items_from_txt(csvf) if len(data_items) < 2: return flds = data_items[0] flds_array = dict() for idx, data_item in enumerate(data_items): if idx == 0: continue data_item_values = StringClass.extract_numeric_values_from_string( ','.join(data_item)) for fld_idx, fld_name in enumerate(flds): if fld_idx == 0 or StringClass.string_match(fld_name, 'FID'): continue if fld_name not in flds_array: flds_array[fld_name] = list() flds_array[fld_name].append(data_item_values[fld_idx]) # for key, value in list(flds_array.items()): # print('%s: %d' % (key, len(value))) return combine_multi_layers_array(flds_array)
def get_attr_file(self, attrname): """Get the file path of pre-prepared topographic attribute.""" if StringClass.string_match(attrname, 'rpi'): return self.rpi elif StringClass.string_match(attrname, 'profc'): return self.profc elif StringClass.string_match(attrname, 'horizc'): return self.horizc elif StringClass.string_match(attrname, 'slp'): return self.slope elif StringClass.string_match(attrname, 'elev'): return self.elev elif StringClass.string_match(attrname, 'hand'): return self.hand else: return None
def OutputItems(self): # type: (...) -> Dict[AnyStr, Union[List[AnyStr]]] """Read output items from database.""" if self._output_items: return self._output_items cursor = self.fileout_tab.find( {'$or': [{ ModelCfgFields.use: '1' }, { ModelCfgFields.use: 1 }]}) if cursor is not None: for item in cursor: name = item[ModelCfgFields.filename] corename = StringClass.split_string(name, '.')[0] types = item[ModelCfgFields.type] if StringClass.string_match(types, 'NONE'): self._output_items.setdefault(corename, None) else: self._output_items.setdefault( corename, StringClass.split_string(types, '-')) return self._output_items
def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: maindb: Main spatial database hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() types = list() units = list() for j, v in enumerate(site_data_items[i]): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(v) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string(v.strip(), '-') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(v) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(v) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(v) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(v) elif StringClass.string_match(site_flds[j], StationFields.unit): units = StringClass.split_string(v.strip(), '-') elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(v) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(v) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.unit] = units[j] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_sids = ImportObservedData.match_subbasin( subbsn_file, site_dic, maindb) if not matched: break if len(cur_sids ) == 1: # if only one subbasin ID, store integer cur_subbsn_id_str = cur_sids[0] else: cur_subbsn_id_str = ','.join( str(cid) for cid in cur_sids if cur_sids is None) site_dic[StationFields.subbsn] = cur_subbsn_id_str curfilter = { StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type] } # print(curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace( curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = units[j] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print(measDataFile) obs_data_items = read_data_items_from_txt(measDataFile) tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file( measDataFile) # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [ StationFields.id, DataValueFields.type, DataValueFields.value ] for fld in required_flds: if not StringClass.string_in_list( fld, obs_flds): # data can not meet the request! raise ValueError( 'The %s can not meet the required format!' % measDataFile) for i, cur_obs_data_item in enumerate(obs_data_items): dic = dict() if i == 0: continue for j, cur_data_value in enumerate(cur_obs_data_item): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(cur_data_value) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = cur_data_value elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float(cur_data_value) utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values( obs_flds, cur_obs_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_t - timedelta( minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_t # curfilter = {StationFields.id: dic[StationFields.id], # DataValueFields.type: dic[DataValueFields.type], # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).replace_one(dic) bulk.insert(dic) count += 1 if count % 500 == 0: MongoUtil.run_bulk(bulk) bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: MongoUtil.run_bulk(bulk) # 3. Add measurement data with unit converted # loop variables list added_dics = list() for curVar in variable_lists: # print(curVar) # if the unit is mg/L, then change the Type name with the suffix 'Conc', # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find( {StationFields.type: cur_type}): # print(item) dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[ DataValueFields.local_time] dic[DataValueFields.time_zone] = item[ DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == 'mg/L' or cur_unit == 'g/L': # update the Type name dic[StationFields.type] = '%sConc' % cur_type curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = { StationFields.type: 'Q', DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id] } q_dic = hydro_clim_db[DBTableNames.observes].find_one( filter=cur_filter) if q_dic is not None: q = q_dic[DataValueFields.value] else: continue if cur_unit == 'mg/L': # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == 'g/L': # convert g/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400., 2) elif cur_unit == 'kg': dic[StationFields.type] = '%sConc' % cur_type # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True)
def lookup_soil_parameters(dstdir, soiltype_file, soil_lookup_file): """Reclassify soil parameters by lookup table.""" # Read soil properties from txt file soil_lookup_data = read_data_items_from_txt(soil_lookup_file) soil_instances = list() soil_prop_flds = soil_lookup_data[0][:] for i in range(1, len(soil_lookup_data)): cur_soil_data_item = soil_lookup_data[i][:] cur_seqn = cur_soil_data_item[0] cur_sname = cur_soil_data_item[1] cur_soil_ins = SoilProperty(cur_seqn, cur_sname) for j in range(2, len(soil_prop_flds)): cur_flds = StringClass.split_string(cur_soil_data_item[j], '-') # Get field values for k, tmpfld in enumerate(cur_flds): cur_flds[k] = float(tmpfld) # Convert to float if StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NLYRS): cur_soil_ins.SOILLAYERS = int(cur_flds[0]) elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._Z): cur_soil_ins.SOILDEPTH = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._OM): cur_soil_ins.OM = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CLAY): cur_soil_ins.CLAY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SILT): cur_soil_ins.SILT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SAND): cur_soil_ins.SAND = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ROCK): cur_soil_ins.ROCK = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ZMX): cur_soil_ins.SOL_ZMX = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ANIONEXCL): cur_soil_ins.ANION_EXCL = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CRK): cur_soil_ins.SOL_CRK = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._BD): cur_soil_ins.DENSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._K): cur_soil_ins.CONDUCTIVITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._WP): cur_soil_ins.WILTINGPOINT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._FC): cur_soil_ins.FIELDCAP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._AWC): cur_soil_ins.AWC = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._POROSITY): cur_soil_ins.POROSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._USLE_K): cur_soil_ins.USLE_K = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ALB): cur_soil_ins.SOL_ALB = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ESCO): cur_soil_ins.ESCO = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NO3): cur_soil_ins.SOL_NO3 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NH4): cur_soil_ins.SOL_NH4 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGN): cur_soil_ins.SOL_ORGN = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SOLP): cur_soil_ins.SOL_SOLP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGP): cur_soil_ins.SOL_ORGP = cur_flds cur_soil_ins.check_data_validation() soil_instances.append(cur_soil_ins) soil_prop_dict = dict() for sol in soil_instances: cur_sol_dict = sol.soil_dict() for fld in cur_sol_dict: if fld in soil_prop_dict: soil_prop_dict[fld].append(cur_sol_dict[fld]) else: soil_prop_dict[fld] = [cur_sol_dict[fld]] # print(list(soilPropDict.keys())) # print(list(soilPropDict.values())) replace_dicts = list() dst_soil_tifs = list() seqns = soil_prop_dict[SoilUtilClass._SEQN] max_lyr_num = int(numpy.max(soil_prop_dict[SoilUtilClass._NLYRS])) for key in soil_prop_dict: if key != SoilUtilClass._SEQN and key != SoilUtilClass._NAME: key_l = 1 for key_v in soil_prop_dict[key]: if isinstance(key_v, list): if len(key_v) > key_l: key_l = len(key_v) if key_l == 1: cur_dict = {} for i, tmpseq in enumerate(seqns): cur_dict[float(tmpseq)] = soil_prop_dict[key][i] replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '.tif') else: for i in range(max_lyr_num): cur_dict = dict() for j, tmpseq in enumerate(seqns): if i < soil_prop_dict[SoilUtilClass._NLYRS][j]: cur_dict[float( tmpseq)] = soil_prop_dict[key][j][i] else: cur_dict[float(seqns[j])] = DEFAULT_NODATA replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '_' + str(i + 1) + '.tif') # print(replaceDicts) # print(len(replaceDicts)) # print(dstSoilTifs) # print(len(dstSoilTifs)) # Generate GTIFF for i, soil_tif in enumerate(dst_soil_tifs): print(soil_tif) RasterUtilClass.raster_reclassify(soiltype_file, replace_dicts[i], soil_tif)
def daily_data_from_txt(climdb, data_txt_file, sites_info_dict): """Import climate data table""" tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file( data_txt_file) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 clim_data_items = read_data_items_from_txt(data_txt_file) clim_flds = clim_data_items[0] # PHUCalDic is used for Calculating potential heat units (PHU) # for each climate station and each year. # format is {StationID:{Year1:[values],Year2:[Values]...}, ...} # PHUCalDic = {} # format: {StationID1: climateStats1, ...} hydro_climate_stats = dict() required_flds = [ DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws ] output_flds = [ DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.pet, DataType.ws, DataType.sr ] # remove existed records for fld in output_flds: climdb[DBTableNames.data_values].remove({'TYPE': fld}) for fld in required_flds: if not StringClass.string_in_list(fld, clim_flds): raise ValueError('Meteorological Daily data MUST contain %s!' % fld) # Create bulk object bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i, cur_clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() cur_ssd = DEFAULT_NODATA for j, clim_data_v in enumerate(cur_clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.id): dic[DataValueFields.id] = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.mean_tmp): dic[DataType.mean_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.min_tmp): dic[DataType.min_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.max_tmp): dic[DataType.max_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.pet): dic[DataType.pet] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.sr): dic[DataType.sr] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.ws): dic[DataType.ws] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.rm): dic[DataType.rm] = float(clim_data_v) * 0.01 elif StringClass.string_match(clim_flds[j], DataType.ssd): cur_ssd = float(clim_data_v) # Get datetime and utc/local transformation utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values( clim_flds, cur_clim_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_time + timedelta( minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_time dic[DataValueFields.y] = utc_time.year # Do if some of these data are not provided if DataType.mean_tmp not in list(dic.keys()): dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2. if DataType.sr not in list(dic.keys()): if cur_ssd == DEFAULT_NODATA: raise ValueError(DataType.sr + ' or ' + DataType.ssd + ' must be provided!') else: if dic[DataValueFields.id] in list(sites_info_dict.keys()): cur_lon, cur_lat = sites_info_dict[dic[ DataValueFields.id]].lon_lat() sr = round( HydroClimateUtilClass.rs( DateClass.day_of_year(utc_time), float(cur_ssd), cur_lat * PI / 180.), 1) dic[DataType.sr] = sr for fld in output_flds: cur_dic = dict() if fld in list(dic.keys()): cur_dic[DataValueFields.value] = dic[fld] cur_dic[DataValueFields.id] = dic[DataValueFields.id] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] cur_dic[DataValueFields.time_zone] = dic[ DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[ DataValueFields.local_time] cur_dic[DataValueFields.type] = fld # Old code, insert or update one item a time, which is quite inefficiency # Update by using bulk operation interface. lj # # find old records and remove (deprecated because of low efficiency, lj.) # curfilter = {DataValueFields.type: fld, # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).upsert().replace_one(cur_dic) bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records MongoUtil.run_bulk(bulk) bulk = climdb[ DBTableNames. data_values].initialize_ordered_bulk_op() if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()): hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats() hydro_climate_stats[dic[DataValueFields.id]].add_item(dic) # execute the remained records if count % 500 != 0: MongoUtil.run_bulk(bulk) for item, cur_climate_stats in list(hydro_climate_stats.items()): cur_climate_stats.annual_stats() # Create index climdb[DBTableNames.data_values].create_index([ (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING) ]) # prepare dic for MongoDB for s_id, stats_v in list(hydro_climate_stats.items()): for YYYY in list(stats_v.Count.keys()): cur_dic = dict() cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY] cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = YYYY cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu_tot curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu_tot, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY] curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) cur_dic[DataValueFields.value] = stats_v.PHU0 cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = DEFAULT_NODATA cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp0 cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True)
def read_optiontyploc_section(self, _opttyploc): """Optional parameter-settings for Typical Locations selection""" if _opttyploc not in self.cf.sections(): return # handling slope position types and tags if self.cf.has_option(_opttyploc, 'slopepositiontypes'): self.slppostype = list() typstrs = self.cf.get(_opttyploc, 'slopepositiontypes') self.slppostype = StringClass.split_string(typstrs.lower(), ',') else: # five slope position system will be adapted. pass if self.cf.has_option(_opttyploc, 'slopepositiontags'): self.slppostag = list() tagstrs = self.cf.get(_opttyploc, 'slopepositiontags') self.slppostag = StringClass.extract_numeric_values_from_string( tagstrs) if len(self.slppostag) != len(self.slppostype): raise RuntimeError( "The input number of slope position types and " "tags are not the same!") else: self.slppostag = list() for i in range(len(self.slppostype)): self.slppostag.append(pow(2, i)) for typ in self.slppostype: self.singleslpposconf[typ] = SingleSlpPosFiles(self.ws, typ) # handling selected topographic attributes if self.cf.has_option(_opttyploc, 'terrainattrdict'): self.selectedtopolist = list() self.selectedtopo = dict() terrain_attr_dict_str = self.cf.get(_opttyploc, 'terrainattrdict') attrpath_strs = StringClass.split_string(terrain_attr_dict_str, ';') for i, singattr in enumerate(attrpath_strs): ap = StringClass.split_string(singattr, ',') attrname = ap[0].lower() if i == 0 and not StringClass.string_match(attrname, 'rpi'): attrname = 'rpi' self.selectedtopolist.append(attrname) attrpath = self.topoparam.get_attr_file(attrname) if attrpath is not None: self.selectedtopo[attrname] = attrpath else: # this should be user-defined attribute, and should has a valid file path if len(ap) != 2: raise RuntimeError( "User defined topographic attribute (%s) MUST have " "an existed file path!" % singattr) attrp = AutoFuzSlpPosConfig.check_file_available(ap[1]) if attrp is None: raise RuntimeError( "User defined topographic attribute (%s) MUST have " "an existed file path!" % singattr) self.selectedtopo[attrname] = attrp is_regional = False if i == 0: # the first one is regional attribute is_regional = True self.topoparam.add_user_defined_attribute( attrname, attrp, is_regional) # handling several parameters used in extracting typical location if self.cf.has_option(_opttyploc, 'typlocextractparam'): self.param4typloc = dict() base_param_str = self.cf.get(_opttyploc, 'typlocextractparam') base_param_floats = StringClass.extract_numeric_values_from_string( base_param_str) defnum = len(self._DEFAULT_PARAM_TYPLOC) if len(base_param_floats) == defnum: for slppos in self.slppostype: self.param4typloc[slppos] = base_param_floats[:] elif len(base_param_floats) == len(self.slppostype) * defnum: for i, slppos in enumerate(self.slppostype): self.param4typloc[slppos] = base_param_floats[i * defnum:(i + 1) * defnum] else: raise RuntimeError("TyplocExtractParam MUST has the number of " "%d or %d!" % (defnum, len(self.slppostype) * defnum)) else: for slppos in self.slppostype: self.param4typloc[slppos] = self._DEFAULT_PARAM_TYPLOC[:] # handling Pre-defined fuzzy membership function shapes of each terrain attribute # for each slope position if self.cf.has_option(_opttyploc, 'fuzinfdefault'): self.infshape = dict() fuz_inf_shp_strs = self.cf.get(_opttyploc, 'fuzinfdefault') # inference shapes are separated by SIMICOLON bewteen slope positions fuz_inf_shp_types = StringClass.split_string(fuz_inf_shp_strs, ';') if len(fuz_inf_shp_types) != len(self.slppostype): raise RuntimeError( "FuzInfDefault (%s) MUST be consistent with slope position types" " and separated by ';'!" % fuz_inf_shp_strs) for i, slppos in enumerate(self.slppostype): self.infshape[slppos] = dict() # inference shapes are separated by COMMA bewteen topographic attributes infshps = StringClass.split_string(fuz_inf_shp_types[i], ',') if len(infshps) != len(self.selectedtopolist): raise RuntimeError( "FuzInfDefault (%s) for each slope position MUST have " "the same size with TerrainAttrDict" % fuz_inf_shp_types[i]) for j, attrn in enumerate(self.selectedtopolist): self.infshape[slppos][attrn] = infshps[j] else: if len(self.slppostype) != 5: raise RuntimeError( "Only the fuzzy membership function shapes of " "5 slope position system are built-in. For other " "classification system, please set as input!") # handling value ranges of terrain attributes for extracting prototypes if self.cf.has_option(_opttyploc, 'valueranges'): self.extractrange = dict() value_rng_strs = self.cf.get(_opttyploc, 'valueranges') value_rng_types = StringClass.split_string(value_rng_strs, ';') if len(value_rng_types) != len(self.slppostype): raise RuntimeError( "ValueRanges (%s) MUST be consistent with slope position types" " and separated by ';'!" % value_rng_strs) for i, slppos in enumerate(self.slppostype): self.extractrange[slppos] = dict() value_rngs = StringClass.extract_numeric_values_from_string( value_rng_types[i]) if len(value_rngs) == 0 or len(value_rngs) % 3 != 0: raise RuntimeError( "Each item of ValueRanges MUST contains three elements," "i.e., Attributes No., Min, Max! Please check item: " "%s for %s." % (value_rng_types[i], slppos)) for j in range(int(len(value_rngs) / 3)): attridx = int(value_rngs[j * 3]) - 1 attrname = self.selectedtopolist[attridx] min_v = value_rngs[j * 3 + 1] max_v = value_rngs[j * 3 + 2] self.extractrange[slppos][attrname] = [min_v, max_v] else: if len(self.slppostype) != 5: raise RuntimeError( "Only the extract value ranges of " "5 slope position system are built-in. For other " "classification system, please set as input!")
def daily_data_from_txt(climdb, data_txt_file, sites_info_dict): """Import climate data table""" tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_txt_file) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 clim_data_items = read_data_items_from_txt(data_txt_file) clim_flds = clim_data_items[0] # PHUCalDic is used for Calculating potential heat units (PHU) # for each climate station and each year. # format is {StationID:{Year1:[values],Year2:[Values]...}, ...} # PHUCalDic = {} # format: {StationID1: climateStats1, ...} hydro_climate_stats = dict() required_flds = [DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws] output_flds = [DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.pet, DataType.ws, DataType.sr] # remove existed records for fld in output_flds: climdb[DBTableNames.data_values].remove({'TYPE': fld}) for fld in required_flds: if not StringClass.string_in_list(fld, clim_flds): raise ValueError('Meteorological Daily data MUST contain %s!' % fld) # Create bulk object bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i, cur_clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() cur_ssd = DEFAULT_NODATA for j, clim_data_v in enumerate(cur_clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.id): dic[DataValueFields.id] = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.mean_tmp): dic[DataType.mean_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.min_tmp): dic[DataType.min_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.max_tmp): dic[DataType.max_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.pet): dic[DataType.pet] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.sr): dic[DataType.sr] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.ws): dic[DataType.ws] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.rm): dic[DataType.rm] = float(clim_data_v) * 0.01 elif StringClass.string_match(clim_flds[j], DataType.ssd): cur_ssd = float(clim_data_v) # Get datetime and utc/local transformation utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds, cur_clim_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_time dic[DataValueFields.y] = utc_time.year # Do if some of these data are not provided if DataType.mean_tmp not in list(dic.keys()): dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2. if DataType.sr not in list(dic.keys()): if cur_ssd == DEFAULT_NODATA: raise ValueError(DataType.sr + ' or ' + DataType.ssd + ' must be provided!') else: if dic[DataValueFields.id] in list(sites_info_dict.keys()): cur_lon, cur_lat = sites_info_dict[dic[DataValueFields.id]].lon_lat() sr = round(HydroClimateUtilClass.rs(DateClass.day_of_year(utc_time), float(cur_ssd), cur_lat * PI / 180.), 1) dic[DataType.sr] = sr for fld in output_flds: cur_dic = dict() if fld in list(dic.keys()): cur_dic[DataValueFields.value] = dic[fld] cur_dic[DataValueFields.id] = dic[ DataValueFields.id] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time] cur_dic[DataValueFields.type] = fld # Old code, insert or update one item a time, which is quite inefficiency # Update by using bulk operation interface. lj # # find old records and remove (deprecated because of low efficiency, lj.) # curfilter = {DataValueFields.type: fld, # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).upsert().replace_one(cur_dic) bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records MongoUtil.run_bulk(bulk) bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()): hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats() hydro_climate_stats[dic[DataValueFields.id]].add_item(dic) # execute the remained records if count % 500 != 0: MongoUtil.run_bulk(bulk) for item, cur_climate_stats in list(hydro_climate_stats.items()): cur_climate_stats.annual_stats() # Create index climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING)]) # prepare dic for MongoDB for s_id, stats_v in list(hydro_climate_stats.items()): for YYYY in list(stats_v.Count.keys()): cur_dic = dict() cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY] cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = YYYY cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu_tot curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.phu_tot, DataValueFields.y: YYYY} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY] curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp, DataValueFields.y: YYYY} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) cur_dic[DataValueFields.value] = stats_v.PHU0 cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = DEFAULT_NODATA cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu0 curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.phu0, DataValueFields.y: DEFAULT_NODATA} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp0 cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp0 curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp0, DataValueFields.y: DEFAULT_NODATA} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True)
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = { ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: '' } for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[ k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([ (ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING) ])
def read_output_item(output_fields, item): file_out_dict = dict() for i, v in enumerate(output_fields): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if not list(file_out_dict.keys()): raise ValueError( 'There are not any valid output item stored in file.out!') return file_out_dict
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. - 1. The command will not execute if any input file does not exist. - 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in iteritems(in_files): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error('Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params['runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in iteritems(out_files): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in iteritems(in_files): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in iteritems(in_params): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in iteritems(out_files): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True
def __init__(self, cf): """Initialization.""" # 1. Directories self.base_dir = None self.clim_dir = None self.spatial_dir = None self.observe_dir = None self.scenario_dir = None self.model_dir = None self.txt_db_dir = None self.preproc_script_dir = None self.seims_bin = None self.mpi_bin = None self.workspace = None # 1.1. Directory determined flags self.use_observed = True self.use_scernario = True # 2. MongoDB configuration and database, collation, GridFS names self.hostname = '127.0.0.1' # localhost by default self.port = 27017 self.climate_db = '' self.bmp_scenario_db = '' self.spatial_db = '' # 3. Climate inputs self.hydro_climate_vars = None self.prec_sites = None self.prec_data = None self.Meteo_sites = None self.Meteo_data = None self.thiessen_field = 'ID' # 4. Spatial inputs self.prec_sites_thiessen = None self.meteo_sites_thiessen = None self.dem = None self.outlet_file = None self.landuse = None self.landcover_init_param = None self.soil = None self.soil_property = None self.fields_partition = False self.fields_partition_thresh = list() self.additional_rs = dict() # 5. Option parameters self.d8acc_threshold = 0 self.np = 4 self.d8down_method = 's' self.dorm_hr = -1. self.temp_base = 0. self.imper_perc_in_urban = 0. self.default_landuse = -1 self.default_soil = -1 # 1. Directories if 'PATH' in cf.sections(): self.base_dir = cf.get('PATH', 'base_data_dir') self.clim_dir = cf.get('PATH', 'climate_data_dir') self.spatial_dir = cf.get('PATH', 'spatial_data_dir') self.observe_dir = cf.get('PATH', 'measurement_data_dir') self.scenario_dir = cf.get('PATH', 'bmp_data_dir') self.model_dir = cf.get('PATH', 'model_dir') self.txt_db_dir = cf.get('PATH', 'txt_db_dir') self.preproc_script_dir = cf.get('PATH', 'preproc_script_dir') self.seims_bin = cf.get('PATH', 'cpp_program_dir') self.mpi_bin = cf.get('PATH', 'mpiexec_dir') self.workspace = cf.get('PATH', 'working_dir') else: raise ValueError('[PATH] section MUST be existed in *.ini file.') if not (FileClass.is_dir_exists(self.base_dir) and FileClass.is_dir_exists(self.model_dir) and FileClass.is_dir_exists(self.txt_db_dir) and FileClass.is_dir_exists(self.preproc_script_dir) and FileClass.is_dir_exists(self.seims_bin)): raise IOError( 'Please Check Directories defined in [PATH]. ' 'BASE_DATA_DIR, MODEL_DIR, TXT_DB_DIR, PREPROC_SCRIPT_DIR, ' 'and CPP_PROGRAM_DIR are required!') if not FileClass.is_dir_exists(self.mpi_bin): self.mpi_bin = None if not FileClass.is_dir_exists(self.workspace): try: # first try to make dirs UtilClass.mkdir(self.workspace) # os.mkdir(self.workspace) except OSError as exc: self.workspace = self.model_dir + os.path.sep + 'preprocess_output' print('WARNING: Make WORKING_DIR failed! Use the default: %s' % self.workspace) if not os.path.exists(self.workspace): UtilClass.mkdir(self.workspace) self.dirs = DirNameUtils(self.workspace) self.logs = LogNameUtils(self.dirs.log) self.vecs = VectorNameUtils(self.dirs.geoshp) self.taudems = TauDEMFilesUtils(self.dirs.taudem) self.spatials = SpatialNamesUtils(self.dirs.geodata2db) self.modelcfgs = ModelCfgUtils(self.model_dir) self.paramcfgs = ModelParamDataUtils(self.preproc_script_dir + os.path.sep + 'database') if not FileClass.is_dir_exists(self.clim_dir): print( 'The CLIMATE_DATA_DIR is not existed, try the default folder name "climate".' ) self.clim_dir = self.base_dir + os.path.sep + 'climate' if not FileClass.is_dir_exists(self.clim_dir): raise IOError( 'Directories named "climate" MUST BE located in [base_dir]!' ) if not FileClass.is_dir_exists(self.spatial_dir): print( 'The SPATIAL_DATA_DIR is not existed, try the default folder name "spatial".' ) self.spatial_dir = self.base_dir + os.path.sep + 'spatial' raise IOError( 'Directories named "spatial" MUST BE located in [base_dir]!') if not FileClass.is_dir_exists(self.observe_dir): self.observe_dir = None self.use_observed = False if not FileClass.is_dir_exists(self.scenario_dir): self.scenario_dir = None self.use_scernario = False # 2. MongoDB related if 'MONGODB' in cf.sections(): self.hostname = cf.get('MONGODB', 'hostname') self.port = cf.getint('MONGODB', 'port') self.climate_db = cf.get('MONGODB', 'climatedbname') self.bmp_scenario_db = cf.get('MONGODB', 'bmpscenariodbname') self.spatial_db = cf.get('MONGODB', 'spatialdbname') else: raise ValueError( '[MONGODB] section MUST be existed in *.ini file.') if not StringClass.is_valid_ip_addr(self.hostname): raise ValueError('HOSTNAME illegal defined in [MONGODB]!') # 3. Climate Input if 'CLIMATE' in cf.sections(): self.hydro_climate_vars = self.clim_dir + os.path.sep + cf.get( 'CLIMATE', 'hydroclimatevarfile') self.prec_sites = self.clim_dir + os.path.sep + cf.get( 'CLIMATE', 'precsitefile') self.prec_data = self.clim_dir + os.path.sep + cf.get( 'CLIMATE', 'precdatafile') self.Meteo_sites = self.clim_dir + os.path.sep + cf.get( 'CLIMATE', 'meteositefile') self.Meteo_data = self.clim_dir + os.path.sep + cf.get( 'CLIMATE', 'meteodatafile') self.thiessen_field = cf.get('CLIMATE', 'thiessenidfield') else: raise ValueError( 'Climate input file names MUST be provided in [CLIMATE]!') # 4. Spatial Input if 'SPATIAL' in cf.sections(): self.prec_sites_thiessen = self.spatial_dir + os.path.sep + cf.get( 'SPATIAL', 'precsitesthiessen') self.meteo_sites_thiessen = self.spatial_dir + os.path.sep + cf.get( 'SPATIAL', 'meteositesthiessen') self.dem = self.spatial_dir + os.path.sep + cf.get( 'SPATIAL', 'dem') self.outlet_file = self.spatial_dir + os.path.sep + cf.get( 'SPATIAL', 'outlet_file') if not os.path.exists(self.outlet_file): self.outlet_file = None self.landuse = self.spatial_dir + os.path.sep + cf.get( 'SPATIAL', 'landusefile') self.landcover_init_param = self.txt_db_dir + os.path.sep + cf.get( 'SPATIAL', 'landcoverinitfile') self.soil = self.spatial_dir + os.path.sep + cf.get( 'SPATIAL', 'soilseqnfile') self.soil_property = self.txt_db_dir + os.path.sep + cf.get( 'SPATIAL', 'soilseqntext') if cf.has_option('SPATIAL', 'additionalfile'): additional_dict_str = cf.get('SPATIAL', 'additionalfile') tmpdict = json.loads(additional_dict_str) tmpdict = { str(k): (str(v) if is_string(v) else v) for k, v in list(tmpdict.items()) } for k, v in list(tmpdict.items()): # Existence check has been moved to mask_origin_delineated_data() # in sp_delineation.py self.additional_rs[k] = v # Field partition if cf.has_option('SPATIAL', 'field_partition_thresh'): ths = cf.get('SPATIAL', 'field_partition_thresh') thsv = StringClass.extract_numeric_values_from_string(ths) if thsv is not None: self.fields_partition_thresh = [int(v) for v in thsv] self.fields_partition = True else: raise ValueError( 'Spatial input file names MUST be provided in [SPATIAL]!') # 5. Optional parameters if 'OPTIONAL_PARAMETERS' in cf.sections(): self.d8acc_threshold = cf.getfloat('OPTIONAL_PARAMETERS', 'd8accthreshold') self.np = cf.getint('OPTIONAL_PARAMETERS', 'np') self.d8down_method = cf.get('OPTIONAL_PARAMETERS', 'd8downmethod') if StringClass.string_match(self.d8down_method, 'surface'): self.d8down_method = 's' elif StringClass.string_match(self.d8down_method, 'horizontal'): self.d8down_method = 'h' elif StringClass.string_match(self.d8down_method, 'pythagoras'): self.d8down_method = 'p' elif StringClass.string_match(self.d8down_method, 'vertical'): self.d8down_method = 'v' else: self.d8down_method = self.d8down_method.lower() if self.d8down_method not in ['s', 'h', 'p', 'v']: self.d8down_method = 's' self.dorm_hr = cf.getfloat('OPTIONAL_PARAMETERS', 'dorm_hr') self.temp_base = cf.getfloat('OPTIONAL_PARAMETERS', 't_base') self.imper_perc_in_urban = cf.getfloat( 'OPTIONAL_PARAMETERS', 'imperviouspercinurbancell') self.default_landuse = cf.getint('OPTIONAL_PARAMETERS', 'defaultlanduse') self.default_soil = cf.getint('OPTIONAL_PARAMETERS', 'defaultsoil')
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. 1. The command will not execute if any input file does not exist. 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in list(in_files.items()): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error( 'Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params[ 'runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in list(out_files.items()): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in list(in_files.items()): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in list(in_params.items()): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in list(out_files.items()): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # initialize if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(item[0].strip(), ['|']) if len(values) != 2: raise ValueError( 'One item should only have one Tag and one value string,' ' split by "|"') file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print(out_data_array) for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if not list(file_out_dict.keys()): raise ValueError( 'There are not any valid output item stored in file.out!') bulk.insert(file_out_dict) MongoUtil.run_bulk( bulk, 'No operations to excute when import initial outputs settings.') # begin to import the desired outputs # initialize bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print(field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print(cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError( 'Items in file.out must have 7 columns, i.e., OUTPUTID,' 'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.') bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk( bulk, 'No operations to excute when import the desired outputs.')
def __init__(self, cf): """Initialization.""" # 1. Directories self.base_dir = None self.clim_dir = None self.spatial_dir = None self.observe_dir = None self.scenario_dir = None self.model_dir = None self.txt_db_dir = None self.preproc_script_dir = None self.seims_bin = None self.mpi_bin = None self.workspace = None # 1.1. Directory determined flags self.use_observed = True self.use_scernario = True # 2. MongoDB configuration and database, collation, GridFS names self.hostname = '127.0.0.1' # localhost by default self.port = 27017 self.climate_db = '' self.bmp_scenario_db = '' self.spatial_db = '' # 3. Switch for building SEIMS. These switches should be removed! By lj. # self.gen_cn = True # self.gen_runoff_coef = True # self.gen_crop = True # self.gen_iuh = True # 4. Climate inputs self.hydro_climate_vars = None self.prec_sites = None self.prec_data = None self.Meteo_sites = None self.Meteo_data = None self.thiessen_field = 'ID' # 5. Spatial inputs self.prec_sites_thiessen = None self.meteo_sites_thiessen = None self.dem = None self.outlet_file = None self.landuse = None self.landcover_init_param = None self.soil = None self.soil_property = None self.fields_partition = False self.fields_partition_thresh = list() self.additional_rs = dict() # 6. Option parameters self.d8acc_threshold = 0 self.np = 4 self.d8down_method = 's' self.dorm_hr = -1. self.temp_base = 0. self.imper_perc_in_urban = 0. self.default_landuse = -1 self.default_soil = -1 # 1. Directories if 'PATH' in cf.sections(): self.base_dir = cf.get('PATH', 'base_data_dir') self.clim_dir = cf.get('PATH', 'climate_data_dir') self.spatial_dir = cf.get('PATH', 'spatial_data_dir') self.observe_dir = cf.get('PATH', 'measurement_data_dir') self.scenario_dir = cf.get('PATH', 'bmp_data_dir') self.model_dir = cf.get('PATH', 'model_dir') self.txt_db_dir = cf.get('PATH', 'txt_db_dir') self.preproc_script_dir = cf.get('PATH', 'preproc_script_dir') self.seims_bin = cf.get('PATH', 'cpp_program_dir') self.mpi_bin = cf.get('PATH', 'mpiexec_dir') self.workspace = cf.get('PATH', 'working_dir') else: raise ValueError('[PATH] section MUST be existed in *.ini file.') if not (FileClass.is_dir_exists(self.base_dir) and FileClass.is_dir_exists(self.model_dir) and FileClass.is_dir_exists(self.txt_db_dir) and FileClass.is_dir_exists(self.preproc_script_dir) and FileClass.is_dir_exists(self.seims_bin)): raise IOError('Please Check Directories defined in [PATH]. ' 'BASE_DATA_DIR, MODEL_DIR, TXT_DB_DIR, PREPROC_SCRIPT_DIR, ' 'and CPP_PROGRAM_DIR are required!') if not FileClass.is_dir_exists(self.mpi_bin): self.mpi_bin = None if not FileClass.is_dir_exists(self.workspace): try: # first try to make dirs UtilClass.mkdir(self.workspace) # os.mkdir(self.workspace) except OSError as exc: self.workspace = self.model_dir + os.path.sep + 'preprocess_output' print('WARNING: Make WORKING_DIR failed: %s. ' 'Use the default: %s' % (exc.message, self.workspace)) if not os.path.exists(self.workspace): UtilClass.mkdir(self.workspace) self.dirs = DirNameUtils(self.workspace) self.logs = LogNameUtils(self.dirs.log) self.vecs = VectorNameUtils(self.dirs.geoshp) self.taudems = TauDEMFilesUtils(self.dirs.taudem) self.spatials = SpatialNamesUtils(self.dirs.geodata2db) self.modelcfgs = ModelCfgUtils(self.model_dir) self.paramcfgs = ModelParamDataUtils(self.preproc_script_dir + os.path.sep + 'database') if not FileClass.is_dir_exists(self.clim_dir): print('The CLIMATE_DATA_DIR is not existed, try the default folder name "climate".') self.clim_dir = self.base_dir + os.path.sep + 'climate' if not FileClass.is_dir_exists(self.clim_dir): raise IOError('Directories named "climate" MUST BE located in [base_dir]!') if not FileClass.is_dir_exists(self.spatial_dir): print('The SPATIAL_DATA_DIR is not existed, try the default folder name "spatial".') self.spatial_dir = self.base_dir + os.path.sep + 'spatial' raise IOError('Directories named "spatial" MUST BE located in [base_dir]!') if not FileClass.is_dir_exists(self.observe_dir): self.observe_dir = None self.use_observed = False if not FileClass.is_dir_exists(self.scenario_dir): self.scenario_dir = None self.use_scernario = False # 2. MongoDB related if 'MONGODB' in cf.sections(): self.hostname = cf.get('MONGODB', 'hostname') self.port = cf.getint('MONGODB', 'port') self.climate_db = cf.get('MONGODB', 'climatedbname') self.bmp_scenario_db = cf.get('MONGODB', 'bmpscenariodbname') self.spatial_db = cf.get('MONGODB', 'spatialdbname') else: raise ValueError('[MONGODB] section MUST be existed in *.ini file.') if not StringClass.is_valid_ip_addr(self.hostname): raise ValueError('HOSTNAME illegal defined in [MONGODB]!') # 3. Model related switch. The SWITCH section should be removed! By lj. # by default, OpenMP version and daily (longterm) mode will be built # if 'SWITCH' in cf.sections(): # self.gen_cn = cf.getboolean('SWITCH', 'gencn') # self.gen_runoff_coef = cf.getboolean('SWITCH', 'genrunoffcoef') # self.gen_crop = cf.getboolean('SWITCH', 'gencrop') # # if self.storm_mode: # self.gen_iuh = False # self.climate_db = ModelNameUtils.standardize_climate_dbname(self.climate_db) # 4. Climate Input if 'CLIMATE' in cf.sections(): self.hydro_climate_vars = self.clim_dir + os.path.sep + cf.get('CLIMATE', 'hydroclimatevarfile') self.prec_sites = self.clim_dir + os.path.sep + cf.get('CLIMATE', 'precsitefile') self.prec_data = self.clim_dir + os.path.sep + cf.get('CLIMATE', 'precdatafile') self.Meteo_sites = self.clim_dir + os.path.sep + cf.get('CLIMATE', 'meteositefile') self.Meteo_data = self.clim_dir + os.path.sep + cf.get('CLIMATE', 'meteodatafile') self.thiessen_field = cf.get('CLIMATE', 'thiessenidfield') else: raise ValueError('Climate input file names MUST be provided in [CLIMATE]!') # 5. Spatial Input if 'SPATIAL' in cf.sections(): self.prec_sites_thiessen = self.spatial_dir + os.path.sep + cf.get('SPATIAL', 'precsitesthiessen') self.meteo_sites_thiessen = self.spatial_dir + os.path.sep + cf.get('SPATIAL', 'meteositesthiessen') self.dem = self.spatial_dir + os.path.sep + cf.get('SPATIAL', 'dem') self.outlet_file = self.spatial_dir + os.path.sep + cf.get('SPATIAL', 'outlet_file') if not os.path.exists(self.outlet_file): self.outlet_file = None self.landuse = self.spatial_dir + os.path.sep + cf.get('SPATIAL', 'landusefile') self.landcover_init_param = self.txt_db_dir + os.path.sep + cf.get('SPATIAL', 'landcoverinitfile') self.soil = self.spatial_dir + os.path.sep + cf.get('SPATIAL', 'soilseqnfile') self.soil_property = self.txt_db_dir + os.path.sep + cf.get('SPATIAL', 'soilseqntext') if cf.has_option('SPATIAL', 'additionalfile'): additional_dict_str = cf.get('SPATIAL', 'additionalfile') tmpdict = json.loads(additional_dict_str) tmpdict = {str(k): (str(v) if isinstance(v, str) else v) for k, v in list(tmpdict.items())} for k, v in list(tmpdict.items()): # Existence check has been moved to mask_origin_delineated_data() # in sp_delineation.py self.additional_rs[k] = v # Field partition if cf.has_option('SPATIAL', 'field_partition_thresh'): ths = cf.get('SPATIAL', 'field_partition_thresh') thsv = StringClass.extract_numeric_values_from_string(ths) if thsv is not None: self.fields_partition_thresh = [int(v) for v in thsv] self.fields_partition = True else: raise ValueError('Spatial input file names MUST be provided in [SPATIAL]!') # 6. Option parameters if 'OPTIONAL_PARAMETERS' in cf.sections(): self.d8acc_threshold = cf.getfloat('OPTIONAL_PARAMETERS', 'd8accthreshold') self.np = cf.getint('OPTIONAL_PARAMETERS', 'np') self.d8down_method = cf.get('OPTIONAL_PARAMETERS', 'd8downmethod') if StringClass.string_match(self.d8down_method, 'surface'): self.d8down_method = 's' elif StringClass.string_match(self.d8down_method, 'horizontal'): self.d8down_method = 'h' elif StringClass.string_match(self.d8down_method, 'pythagoras'): self.d8down_method = 'p' elif StringClass.string_match(self.d8down_method, 'vertical'): self.d8down_method = 'v' else: self.d8down_method = self.d8down_method.lower() if self.d8down_method not in ['s', 'h', 'p', 'v']: self.d8down_method = 'h' self.dorm_hr = cf.getfloat('OPTIONAL_PARAMETERS', 'dorm_hr') self.temp_base = cf.getfloat('OPTIONAL_PARAMETERS', 't_base') self.imper_perc_in_urban = cf.getfloat('OPTIONAL_PARAMETERS', 'imperviouspercinurbancell') self.default_landuse = cf.getint('OPTIONAL_PARAMETERS', 'defaultlanduse') self.default_soil = cf.getint('OPTIONAL_PARAMETERS', 'defaultsoil')
def lookup_soil_parameters(dstdir, soiltype_file, soil_lookup_file, landuse_shapefile): """Reclassify soil parameters by lookup table.""" # Read soil properties from txt file soil_lookup_data = read_data_items_from_txt(soil_lookup_file) soil_instances = list() soil_prop_flds = soil_lookup_data[0][:] for i in range(1, len(soil_lookup_data)): cur_soil_data_item = soil_lookup_data[i][:] cur_seqn = cur_soil_data_item[0] cur_sname = cur_soil_data_item[1] cur_soil_ins = SoilProperty(cur_seqn, cur_sname) for j in range(2, len(soil_prop_flds)): cur_flds = StringClass.split_string(cur_soil_data_item[j], '-') # Get field values for k, tmpfld in enumerate(cur_flds): cur_flds[k] = float(tmpfld) # Convert to float if StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NLYRS): cur_soil_ins.SOILLAYERS = int(cur_flds[0]) elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._Z): cur_soil_ins.SOILDEPTH = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._OM): cur_soil_ins.OM = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CLAY): cur_soil_ins.CLAY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SILT): cur_soil_ins.SILT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SAND): cur_soil_ins.SAND = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ROCK): cur_soil_ins.ROCK = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ZMX): cur_soil_ins.SOL_ZMX = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ANIONEXCL): cur_soil_ins.ANION_EXCL = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CRK): cur_soil_ins.SOL_CRK = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._BD): cur_soil_ins.DENSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._K): cur_soil_ins.CONDUCTIVITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._WP): cur_soil_ins.WILTINGPOINT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._FC): cur_soil_ins.FIELDCAP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._AWC): cur_soil_ins.AWC = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._POROSITY): cur_soil_ins.POROSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._USLE_K): cur_soil_ins.USLE_K = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ALB): cur_soil_ins.SOL_ALB = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ESCO): cur_soil_ins.ESCO = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NO3): cur_soil_ins.SOL_NO3 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NH4): cur_soil_ins.SOL_NH4 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGN): cur_soil_ins.SOL_ORGN = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SOLP): cur_soil_ins.SOL_SOLP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGP): cur_soil_ins.SOL_ORGP = cur_flds cur_soil_ins.check_data_validation() soil_instances.append(cur_soil_ins) soil_prop_dict = {} for sol in soil_instances: cur_sol_dict = sol.soil_dict() for fld in cur_sol_dict: if fld in soil_prop_dict: soil_prop_dict[fld].append(cur_sol_dict[fld]) else: soil_prop_dict[fld] = [cur_sol_dict[fld]] # print(list(soilPropDict.keys())) # print(list(soilPropDict.values())) replace_dicts = list() dst_soil_tifs = list() sol_fld_name = list() seqns = soil_prop_dict[SoilUtilClass._SEQN] max_lyr_num = int(numpy.max(soil_prop_dict[SoilUtilClass._NLYRS])) for key in soil_prop_dict: if key != SoilUtilClass._SEQN and key != SoilUtilClass._NAME: key_l = 1 for key_v in soil_prop_dict[key]: if isinstance(key_v, list): if len(key_v) > key_l: key_l = len(key_v) if key_l == 1: cur_dict = {} for i, tmpseq in enumerate(seqns): cur_dict[float(tmpseq)] = soil_prop_dict[key][i] replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '.tif') sol_fld_name.append(key) else: for i in range(max_lyr_num): cur_dict = dict() for j, tmpseq in enumerate(seqns): if i < soil_prop_dict[SoilUtilClass._NLYRS][j]: cur_dict[float(tmpseq)] = soil_prop_dict[key][j][i] else: cur_dict[float(seqns[j])] = DEFAULT_NODATA replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '_' + str(i + 1) + '.tif') sol_fld_name.append(key + '_' + str(i + 1)) # print(replaceDicts) # print(len(replaceDicts)) # print(dstSoilTifs) # print(len(dstSoilTifs)) # Generate GTIFF soil_shp = r'D:\SEIMS\data\zts\data_prepare\spatial\soil_SEQN_all.shp' # landuse_basin = r'D:\SEIMS\data\zts\data_prepare\spatial\LanduseFinal_basin.shp' # for i, soil_tif in enumerate(sol_fld_name): # print(soil_tif) # SoilProperty.count_by_shp(soil_shp, landuse_shapefile, soil_tif, replace_dicts[i]) RasterUtilClass.count_by_shp(soil_shp, landuse_shapefile, sol_fld_name, replace_dicts)
def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: maindb: Main spatial database hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j, v in enumerate(site_data_items[i]): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(v) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string(v.strip(), '-') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(v) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(v) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(v) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(v) elif StringClass.string_match(site_flds[j], StationFields.unit): dic[StationFields.unit] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(v) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(v) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_sids = ImportObservedData.match_subbasin(subbsn_file, site_dic, maindb) if not matched: break cur_subbsn_id_str = '' if len(cur_sids) == 1: # if only one subbasin ID, store integer cur_subbsn_id_str = cur_sids[0] else: cur_subbsn_id_str = ','.join(str(cid) for cid in cur_sids if cur_sids is None) site_dic[StationFields.subbsn] = cur_subbsn_id_str curfilter = {StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type]} # print(curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = dic[StationFields.unit] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print(measDataFile) obs_data_items = read_data_items_from_txt(measDataFile) tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(measDataFile) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [StationFields.id, DataValueFields.type, DataValueFields.value] for fld in required_flds: if not StringClass.string_in_list(fld, obs_flds): # data can not meet the request! raise ValueError('The %s can not meet the required format!' % measDataFile) for i, cur_obs_data_item in enumerate(obs_data_items): dic = dict() if i == 0: continue for j, cur_data_value in enumerate(cur_obs_data_item): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(cur_data_value) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = cur_data_value elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float(cur_data_value) utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(obs_flds, cur_obs_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_t + timedelta(minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_t # curfilter = {StationFields.id: dic[StationFields.id], # DataValueFields.type: dic[DataValueFields.type], # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).replace_one(dic) bulk.insert(dic) count += 1 if count % 500 == 0: MongoUtil.run_bulk(bulk) bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: MongoUtil.run_bulk(bulk) # 3. Add measurement data with unit converted # loop variables list added_dics = [] for curVar in variable_lists: # print(curVar) # if the unit is mg/L, then change the Type name with the suffix 'Conc', # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find({StationFields.type: cur_type}): # print(item) dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[DataValueFields.local_time] dic[DataValueFields.time_zone] = item[DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == 'mg/L' or cur_unit == 'g/L': # update the Type name dic[StationFields.type] = cur_type + 'Conc' curfilter = {StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc]} hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = {StationFields.type: 'Q', DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id]} q_dic = hydro_clim_db[DBTableNames.observes].find_one(filter=cur_filter) q = -9999. if q_dic is not None: q = q_dic[DataValueFields.value] else: continue if cur_unit == 'mg/L': # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == 'g/L': # convert g/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400., 2) elif cur_unit == 'kg': dic[StationFields.type] = cur_type + 'Conc' # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = {StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc]} hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # initialize if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(item[0].strip(), ['|']) if len(values) != 2: raise ValueError('One item should only have one Tag and one value string,' ' split by "|"') file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print(out_data_array) for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if not list(file_out_dict.keys()): raise ValueError('There are not any valid output item stored in file.out!') bulk.insert(file_out_dict) MongoUtil.run_bulk(bulk, 'No operations to excute when import initial outputs settings.') # begin to import the desired outputs # initialize bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print(field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print(cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError('Items in file.out must have 7 columns, i.e., OUTPUTID,' 'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.') bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk(bulk, 'No operations to excute when import the desired outputs.')