def OutputItems(self): # type: (...) -> (List[AnyStr], Dict[AnyStr, Optional[List[AnyStr]]]) """Read output ID and items from database. Returns: _output_ids (list): OUTPUTID list _output_items (dict): key is core file name of output, value is None or list of aggregated types """ if self._output_ids and self._output_items: return self._output_ids, self._output_items cursor = self.fileout_tab.find( {'$or': [{ ModelCfgFields.use: '1' }, { ModelCfgFields.use: 1 }]}) if cursor is not None: for item in cursor: self._output_ids.append(item[ModelCfgFields.output_id]) name = item[ModelCfgFields.filename] corename = StringClass.split_string(name, '.')[0] types = item[ModelCfgFields.type] if StringClass.string_match(types, 'NONE'): self._output_items.setdefault(corename, None) else: self._output_items.setdefault( corename, StringClass.split_string(types, '-')) return self._output_ids, self._output_items
def get_time_system_from_data_file(in_file): # type: (str) -> (str, int) """Get the time system from the data file. The basic format is: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #LOCALTIME -2, #UTCTIME Returns: time_sys: 'UTCTIME' or 'LOCALTIME' time_zone(int): Positive for West time zone, and negative for East. """ time_sys = 'LOCALTIME' time_zone = time.timezone // 3600 with open(in_file, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: str_line = line.strip() # for LF in LFs: # if LF in line: # str_line = line.split(LF)[0] # break if str_line[0] != '#': break if str_line.lower().find('utc') >= 0: time_sys = 'UTCTIME' time_zone = 0 break if str_line.lower().find('local') >= 0: line_list = StringClass.split_string(str_line, [' ', ',']) if len(line_list) == 2 and MathClass.isnumerical(line_list[1]): time_zone = -1 * int(line_list[1]) break return time_sys, time_zone
def get_time_system_from_data_file(in_file): """Get the time system from the data file. The basic format is: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME """ time_sys = 'LOCALTIME' time_zone = time.timezone // -3600 with open(in_file, 'r') as f: lines = f.readlines() for line in lines: str_line = line.strip() # for LF in LFs: # if LF in line: # str_line = line.split(LF)[0] # break if str_line[0] != '#': break if str_line.lower().find('utc') >= 0: time_sys = 'UTCTIME' time_zone = 0 break if str_line.lower().find('local') >= 0: line_list = StringClass.split_string(str_line, [',']) if len(line_list) == 2 and MathClass.isnumerical(line_list[1]): time_zone = -1 * int(line_list[1]) break return time_sys, time_zone
def read_optionfuzinf_section(self, _optfuzinf): """Optional parameter-settings for Fuzzy slope position inference.""" if _optfuzinf not in self.cf.sections(): return if self.cf.has_option(_optfuzinf, 'inferparams'): fuzinf_strs = self.cf.get(_optfuzinf, 'inferparams') if StringClass.string_match(fuzinf_strs, 'none'): return self.inferparam = dict() fuzinf_types = StringClass.split_string(fuzinf_strs, ';') if len(fuzinf_types) != len(self.slppostype): raise RuntimeError( "InferParams (%s) MUST be consistent with slope position types" " and separated by ';'!" % fuzinf_strs) for i, slppos in enumerate(self.slppostype): self.inferparam[slppos] = dict() infparams = StringClass.extract_numeric_values_from_string( fuzinf_types[i]) if len(infparams) % 4 != 0: raise RuntimeError( "Each item of InferParams MUST contains four elements," "i.e., Attribute No., FMF No., w1, w2! Please check item: " "%s for %s." % (fuzinf_types[i], slppos)) for j in range(int(len(infparams) / 4)): attridx = int(infparams[j * 4]) - 1 attrname = self.selectedtopolist[attridx] fmf = self._FMFTYPE[int(infparams[j * 4 + 1])] curinfparam = self._FMFPARAM[fmf][:] curinfparam[0] = infparams[j * 4 + 2] # w1 curinfparam[3] = infparams[j * 4 + 3] # w2 self.inferparam[slppos][attrname] = [fmf] + curinfparam
def read_data_items_from_txt(txt_file): """Read data items include title from text file, each data element are split by TAB or COMMA. Be aware, the separator for each line can only be TAB or COMMA, and COMMA is the recommended. Args: txt_file: full path of text data file Returns: 2D data array """ data_items = list() with open(txt_file, 'r') as f: for line in f: str_line = line.strip() if str_line != '' and str_line.find('#') < 0: line_list = StringClass.split_string(str_line, ['\t']) if len(line_list) <= 1: line_list = StringClass.split_string(str_line, [',']) data_items.append(line_list) return data_items
def __init__(self, cf, method='morris'): """Initialization.""" self.method = method # 1. SEIMS model related self.model = ParseSEIMSConfig(cf) # 2. Common settings of parameters sensitivity analysis if 'PSA_Settings' not in cf.sections(): raise ValueError( "[PSA_Settings] section MUST be existed in *.ini file.") self.evaluate_params = list() if cf.has_option('PSA_Settings', 'evaluateparam'): eva_str = cf.get('PSA_Settings', 'evaluateparam') self.evaluate_params = StringClass.split_string(eva_str, ',') else: self.evaluate_params = ['Q'] # Default self.param_range_def = 'morris_param_rng.def' # Default if cf.has_option('PSA_Settings', 'paramrngdef'): self.param_range_def = cf.get('PSA_Settings', 'paramrngdef') self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def if not FileClass.is_file_exists(self.param_range_def): raise IOError('Ranges of parameters MUST be provided!') if not (cf.has_option('PSA_Settings', 'psa_time_start') and cf.has_option('PSA_Settings', 'psa_time_end')): raise ValueError( "Start and end time of PSA MUST be specified in [PSA_Settings]." ) try: # UTCTIME tstart = cf.get('PSA_Settings', 'psa_time_start') tend = cf.get('PSA_Settings', 'psa_time_end') self.psa_stime = StringClass.get_datetime(tstart) self.psa_etime = StringClass.get_datetime(tend) except ValueError: raise ValueError('The time format MUST be"YYYY-MM-DD HH:MM:SS".') if self.psa_stime >= self.psa_etime: raise ValueError("Wrong time settings in [PSA_Settings]!") # 3. Parameters settings for specific sensitivity analysis methods self.morris = None self.fast = None if self.method == 'fast': self.fast = FASTConfig(cf) self.psa_outpath = '%s/PSA_FAST_N%dM%d' % ( self.model.model_dir, self.fast.N, self.fast.M) elif self.method == 'morris': self.morris = MorrisConfig(cf) self.psa_outpath = '%s/PSA_Morris_N%dL%d' % ( self.model.model_dir, self.morris.N, self.morris.num_levels) # 4. (Optional) Plot settings for matplotlib self.plot_cfg = PlotConfig(cf) # Do not remove psa_outpath if already existed UtilClass.mkdir(self.psa_outpath) self.outfiles = PSAOutputs(self.psa_outpath)
def read_pareto_solutions_from_txt(txt_file, sce_name='scenario', field_name='gene_values'): # type: (AnyStr, AnyStr, AnyStr) -> (Dict[int, List[List[float]]]) """Read Pareto points from `runtime.log` file. Args: txt_file: Full file path of `runtime.log` output by NSGA2 algorithm. sce_name: Field name followed by `generation`, e.g., 'calibrationID', 'scenario', etc. field_name: Filed name in header for gene values, 'gene_values' by default Returns: pareto_solutions: `OrderedDict`, key is generation ID, value is arrays of Pareto solutions """ with open(txt_file, 'r', encoding='utf-8') as f: lines = f.readlines() pareto_solutions = OrderedDict() found = False cur_gen = -1 field_idx = -1 for lno, line in enumerate(lines): str_line = line for LF in LFs: if LF in line: str_line = line.split(LF)[0] break if str_line == '': continue values = StringClass.extract_numeric_values_from_string(str_line) # Check generation if str_line[0] == '#' and 'Generation' in str_line: if len(values) != 1: continue # e.g., ###### Generation: 23 ###### gen = int(values[0]) found = True cur_gen = gen pareto_solutions[cur_gen] = list() continue if not found: # If the first "###### Generation: 1 ######" has not been found. continue line_list = StringClass.split_string(str_line.upper(), ['\t']) if values is None: # means header line if field_idx >= 0: continue for idx, v in enumerate(line_list): if field_name.upper() in v.upper(): field_idx = idx break continue if field_idx < 0: continue # now append the real Pareto solutions data tmpvalues = StringClass.extract_numeric_values_from_string(line_list[field_idx]) pareto_solutions[cur_gen].append(tmpvalues[:]) return pareto_solutions
def __init__(self, cf, method='morris'): """Initialization.""" self.method = method # 1. SEIMS model related self.model = ParseSEIMSConfig(cf) # 2. Common settings of parameters sensitivity analysis if 'PSA_Settings' not in cf.sections(): raise ValueError("[PSA_Settings] section MUST be existed in *.ini file.") self.evaluate_params = list() if cf.has_option('PSA_Settings', 'evaluateparam'): eva_str = cf.get('PSA_Settings', 'evaluateparam') self.evaluate_params = StringClass.split_string(eva_str, ',') else: self.evaluate_params = ['Q'] # Default self.param_range_def = 'morris_param_rng.def' # Default if cf.has_option('PSA_Settings', 'paramrngdef'): self.param_range_def = cf.get('PSA_Settings', 'paramrngdef') self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def if not FileClass.is_file_exists(self.param_range_def): raise IOError('Ranges of parameters MUST be provided!') if not (cf.has_option('PSA_Settings', 'psa_time_start') and cf.has_option('PSA_Settings', 'psa_time_end')): raise ValueError("Start and end time of PSA MUST be specified in [PSA_Settings].") try: # UTCTIME tstart = cf.get('PSA_Settings', 'psa_time_start') tend = cf.get('PSA_Settings', 'psa_time_end') self.psa_stime = StringClass.get_datetime(tstart) self.psa_etime = StringClass.get_datetime(tend) except ValueError: raise ValueError('The time format MUST be"YYYY-MM-DD HH:MM:SS".') if self.psa_stime >= self.psa_etime: raise ValueError("Wrong time settings in [PSA_Settings]!") # 3. Parameters settings for specific sensitivity analysis methods self.morris = None self.fast = None if self.method == 'fast': self.fast = FASTConfig(cf) self.psa_outpath = '%s/PSA-FAST-N%dM%d' % (self.model.model_dir, self.fast.N, self.fast.M) elif self.method == 'morris': self.morris = MorrisConfig(cf) self.psa_outpath = '%s/PSA-Morris-N%dL%dJ%d' % (self.model.model_dir, self.morris.N, self.morris.num_levels, self.morris.grid_jump) # Do not remove psa_outpath if already existed UtilClass.mkdir(self.psa_outpath) self.outfiles = PSAOutputs(self.psa_outpath)
def read_simulation_from_txt( ws, # type: AnyStr plot_vars, # type: List[AnyStr] subbsnID, # type: int stime, # type: datetime etime # type: datetime ): # type: (...) -> (List[AnyStr], Dict[datetime, List[float]]) """ Read simulation data from text file according to subbasin ID. Returns: 1. Matched variable names, [var1, var2, ...] 2. Simulation data dict of all plotted variables, with UTCDATETIME. {Datetime: [value_of_var1, value_of_var2, ...], ...} """ plot_vars_existed = list() sim_data_dict = OrderedDict() for i, v in enumerate(plot_vars): txtfile = ws + os.path.sep + v + '.txt' if not FileClass.is_file_exists(txtfile): print('WARNING: Simulation variable file: %s is not existed!' % txtfile) continue data_items = read_data_items_from_txt(txtfile) found = False data_available = False for item in data_items: item_vs = StringClass.split_string(item[0], ' ', elim_empty=True) if len(item_vs) == 2: if int(item_vs[1]) == subbsnID and not found: found = True elif int(item_vs[1]) != subbsnID and found: break if not found: continue if len(item_vs) != 3: continue date_str = '%s %s' % (item_vs[0], item_vs[1]) sim_datetime = StringClass.get_datetime(date_str, "%Y-%m-%d %H:%M:%S") if stime <= sim_datetime <= etime: if sim_datetime not in sim_data_dict: sim_data_dict[sim_datetime] = list() sim_data_dict[sim_datetime].append(float(item_vs[2])) data_available = True if data_available: plot_vars_existed.append(v) print('Read simulation from %s to %s done.' % (stime.strftime('%c'), etime.strftime('%c'))) return plot_vars_existed, sim_data_dict
def OutputItems(self): # type: (...) -> Dict[AnyStr, Union[List[AnyStr]]] """Read output items from database.""" if self._output_items: return self._output_items cursor = self.fileout_tab.find( {'$or': [{ ModelCfgFields.use: '1' }, { ModelCfgFields.use: 1 }]}) if cursor is not None: for item in cursor: name = item[ModelCfgFields.filename] corename = StringClass.split_string(name, '.')[0] types = item[ModelCfgFields.type] if StringClass.string_match(types, 'NONE'): self._output_items.setdefault(corename, None) else: self._output_items.setdefault( corename, StringClass.split_string(types, '-')) return self._output_items
def __init__(self, cf=None): # type: (Optional[ConfigParser]) -> None """Get parameters from ConfigParser object.""" self.fmts = ['png'] self.font_name = 'Times New Roman' self.plot_cn = False self.title_fsize = 18 self.legend_fsize = 14 self.tick_fsize = 12 self.axislabel_fsize = 14 self.label_fsize = 16 self.dpi = 300 section_name = 'OPTIONAL_MATPLOT_SETTINGS' if cf is None or not cf.has_section(section_name): return if cf.has_option(section_name, 'figure_formats'): fmts_strings = cf.get(section_name, 'figure_formats') fmts_strings = fmts_strings.lower() fmts_list = StringClass.split_string(fmts_strings, [',', ';', '-']) for fmt in fmts_list: if fmt not in ['png', 'tif', 'jpg', 'pdf', 'eps', 'svg', 'ps']: continue if fmt not in self.fmts: self.fmts.append(fmt) if cf.has_option(section_name, 'font_title'): font_name = cf.get(section_name, 'font_title') if font_manager.findfont( font_manager.FontProperties(family=font_name)): self.font_name = font_name else: print('Warning: The specified font title %s can not be found!' 'Please copy the .ttf font file to the directory of' 'Lib/site-packages/matplotlib/mpl-data/fonts/ttf, ' 'rebuild the font cache by font_manager._rebuild(), ' 'and rerun this script.' % font_name) if cf.has_option(section_name, 'lang_cn'): self.plot_cn = cf.getboolean(section_name, 'lang_cn') if cf.has_option(section_name, 'title_fontsize'): self.title_fsize = cf.getint(section_name, 'title_fontsize') if cf.has_option(section_name, 'legend_fontsize'): self.legend_fsize = cf.getint(section_name, 'legend_fontsize') if cf.has_option(section_name, 'ticklabel_fontsize'): self.tick_fsize = cf.getint(section_name, 'ticklabel_fontsize') if cf.has_option(section_name, 'axislabel_fontsize'): self.axislabel_fsize = cf.getint(section_name, 'axislabel_fontsize') if cf.has_option(section_name, 'label_fontsize'): self.label_fsize = cf.getint(section_name, 'label_fontsize') if cf.has_option(section_name, 'dpi'): self.dpi = cf.getint(section_name, 'dpi')
def read_pareto_popsize_from_txt(txt_file, sce_name='scenario'): # type: (AnyStr, AnyStr) -> (List[int], List[int]) """Read the population size of each generations.""" with open(txt_file, 'r', encoding='utf-8') as f: lines = f.readlines() pareto_popnum = OrderedDict() found = False cur_gen = -1 iden_idx = -1 for line in lines: str_line = line for LF in LFs: if LF in line: str_line = line.split(LF)[0] break if str_line == '': continue values = StringClass.extract_numeric_values_from_string(str_line) # Check generation if str_line[0] == '#' and 'Generation' in str_line: if len(values) != 1: continue gen = int(values[0]) found = True cur_gen = gen pareto_popnum[cur_gen] = list() continue if not found: continue if values is None: # means header line line_list = StringClass.split_string(str_line, ['\t']) for idx, v in enumerate(line_list): if StringClass.string_match(v, sce_name): iden_idx = idx break continue if iden_idx < 0: continue # now append the real Pareto front point data pareto_popnum[cur_gen].append(int(values[iden_idx])) all_sceids = list() acc_num = list() genids = sorted(list(pareto_popnum.keys())) for idx, genid in enumerate(genids): for _id in pareto_popnum[genid]: if _id not in all_sceids: all_sceids.append(_id) acc_num.append(len(all_sceids)) return genids, acc_num
def __init__(self, cf): """Initialization.""" # 1. SEIMS model related self.model_cfg = ParseSEIMSConfig(cf) # 2. Parameters self.plt_subbsnid = -1 self.plt_vars = list() if 'PARAMETERS' in cf.sections(): self.plt_subbsnid = cf.getint('PARAMETERS', 'plot_subbasinid') plt_vars_str = cf.get('PARAMETERS', 'plot_variables') else: raise ValueError( "[PARAMETERS] section MUST be existed in *.ini file.") if self.plt_subbsnid < 0: raise ValueError( "PLOT_SUBBASINID must be greater or equal than 0.") if plt_vars_str != '': self.plt_vars = StringClass.split_string(plt_vars_str) else: raise ValueError("PLOT_VARIABLES illegal defined in [PARAMETERS]!") # 3. Optional_Parameters if 'OPTIONAL_PARAMETERS' not in cf.sections(): raise ValueError( "[OPTIONAL_PARAMETERS] section MUST be existed in *.ini file.") # UTCTIME self.cali_stime = parse_datetime_from_ini(cf, 'OPTIONAL_PARAMETERS', 'cali_time_start') self.cali_etime = parse_datetime_from_ini(cf, 'OPTIONAL_PARAMETERS', 'cali_time_end') self.vali_stime = parse_datetime_from_ini(cf, 'OPTIONAL_PARAMETERS', 'vali_time_start') self.vali_etime = parse_datetime_from_ini(cf, 'OPTIONAL_PARAMETERS', 'vali_time_end') if not self.cali_stime or not self.cali_etime or self.cali_stime >= self.cali_etime: raise ValueError( "Wrong time settings of calibration in [OPTIONAL_PARAMETERS]!") if self.vali_stime and self.vali_etime and self.vali_stime >= self.vali_etime: raise ValueError( "Wrong time settings of validation in [OPTIONAL_PARAMETERS]!") # 4. Switches self.lang_cn = False if 'SWITCH' in cf.sections(): self.lang_cn = cf.getboolean('SWITCH', 'lang_cn')
def __init__(self, cf): """Initialization.""" # 1. Directories self.model_dir = None self.scenario_id = -1 if 'PATH' in cf.sections(): self.model_dir = cf.get('PATH', 'model_dir') self.scenario_id = cf.getint('PATH', 'scenarioid') if self.scenario_id < 0: self.model_dir = self.model_dir + os.path.sep + 'OUTPUT' else: self.model_dir = self.model_dir + os.path.sep + 'OUTPUT' + str( self.scenario_id) else: raise ValueError("[PATH] section MUST be existed in *.ini file.") if not FileClass.is_dir_exists(self.model_dir): raise ValueError("Please Check Directories defined in [PATH]") # 2. MongoDB configuration and database, collation, GridFS names self.hostname = '127.0.0.1' # localhost by default self.port = 27017 self.climate_db = '' self.bmp_scenario_db = '' self.spatial_db = '' if 'MONGODB' in cf.sections(): self.hostname = cf.get('MONGODB', 'hostname') self.port = cf.getint('MONGODB', 'port') self.climate_db = cf.get('MONGODB', 'climatedbname') self.bmp_scenario_db = cf.get('MONGODB', 'bmpscenariodbname') self.spatial_db = cf.get('MONGODB', 'spatialdbname') else: raise ValueError( '[MONGODB] section MUST be existed in *.ini file.') if not StringClass.is_valid_ip_addr(self.hostname): raise ValueError('HOSTNAME illegal defined in [MONGODB]!') # 3. Parameters self.plt_subbsnid = -1 self.plt_vars = list() if 'PARAMETERS' in cf.sections(): self.plt_subbsnid = cf.getint('PARAMETERS', 'plot_subbasinid') plt_vars_str = cf.get('PARAMETERS', 'plot_variables') else: raise ValueError( "[PARAMETERS] section MUST be existed in *.ini file.") if self.plt_subbsnid < 0: raise ValueError( "PLOT_SUBBASINID must be greater or equal than 0.") if plt_vars_str != '': self.plt_vars = StringClass.split_string(plt_vars_str) else: raise ValueError("PLOT_VARIABLES illegal defined in [PARAMETERS]!") # 4. Optional_Parameters if 'OPTIONAL_PARAMETERS' in cf.sections(): tstart = cf.get('OPTIONAL_PARAMETERS', 'time_start') tend = cf.get('OPTIONAL_PARAMETERS', 'time_end') else: raise ValueError( "[OPTIONAL_PARAMETERS] section MUST be existed in *.ini file.") try: # UTCTIME self.time_start = StringClass.get_datetime(tstart) self.time_end = StringClass.get_datetime(tend) if cf.has_option('OPTIONAL_PARAMETERS', 'vali_time_start') and \ cf.has_option('OPTIONAL_PARAMETERS', 'vali_time_end'): tstart = cf.get('OPTIONAL_PARAMETERS', 'vali_time_start') tend = cf.get('OPTIONAL_PARAMETERS', 'vali_time_end') self.vali_stime = StringClass.get_datetime(tstart) self.vali_etime = StringClass.get_datetime(tend) else: self.vali_stime = None self.vali_etime = None except ValueError: raise ValueError( 'The time format MUST be "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS".' ) if self.time_start >= self.time_end: raise ValueError("Wrong time setted in [OPTIONAL_PARAMETERS]!") # 5. Switches self.lang_cn = False if 'SWITCH' in cf.sections(): self.lang_cn = cf.getboolean('SWITCH', 'lang_cn')
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # initialize if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(item[0].strip(), ['|']) if len(values) != 2: raise ValueError('One item should only have one Tag and one value string,' ' split by "|"') file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print(out_data_array) for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if not list(file_out_dict.keys()): raise ValueError('There are not any valid output item stored in file.out!') bulk.insert(file_out_dict) MongoUtil.run_bulk(bulk, 'No operations to excute when import initial outputs settings.') # begin to import the desired outputs # initialize bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print(field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print(cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError('Items in file.out must have 7 columns, i.e., OUTPUTID,' 'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.') bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk(bulk, 'No operations to excute when import the desired outputs.')
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. - 1. The command will not execute if any input file does not exist. - 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in iteritems(in_files): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error('Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params['runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in iteritems(out_files): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in iteritems(in_files): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in iteritems(in_params): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in iteritems(out_files): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True
def extract_typical_location(cfg): """Prepare configuration files for extracting typical location.""" if not cfg.flag_selecttyploc: return 0 start_t = time.time() for i, slppos in enumerate(cfg.slppostype): if cfg.flag_auto_typlocparams: # automatically extract typical location # write extract ranges to initial configuration file cur_ext_conf = cfg.singleslpposconf[slppos].extinitial extconfig_info = open(cur_ext_conf, 'w') extconfig_info.write('ProtoTag\t%d\n' % cfg.slppostag[i]) abandon = list() # abandoned terrain attributes (full file path) for vname, inf in cfg.infshape[slppos].iteritems(): if StringClass.string_match(inf, 'N'): abandon.append(vname) # print abandon param_num = 0 for param in cfg.selectedtopo: if param not in abandon: param_num += 1 extconfig_info.write('ParametersNUM\t%d\n' % param_num) for vname, vpath in cfg.selectedtopo.iteritems(): if vname in abandon: continue if vname in cfg.extractrange[slppos] and vname not in abandon: vrange = cfg.extractrange[slppos][vname] extconfig_info.write('Parameters\t%s\t%s\t%f\t%f\n' % (vname, vpath, vrange[0], vrange[1])) else: extconfig_info.write('Parameters\t%s\t%s\t%f\t%f\n' % (vname, vpath, 0, 0)) extconfig_info.write('OUTPUT\t%s\n' % cfg.singleslpposconf[slppos].typloc) for vname, inf in cfg.infshape[slppos].iteritems(): if not StringClass.string_match(inf, 'N'): extconfig_info.write('FuzInfShp\t%s\t%s\n' % (vname, inf)) base_input_param = 'BaseInput\t' base_input_param += '\t'.join( str(p) for p in cfg.param4typloc[slppos]) extconfig_info.write(base_input_param) extconfig_info.close() else: # read from existed extconfig file cur_ext_conf = cfg.singleslpposconf[slppos].extconfig if not os.path.exists(cur_ext_conf) and len( cfg.extractrange[slppos]) <= 1: raise RuntimeError( 'The input extract config file %s MUST existed when the ' 'value ranges setting are absent in *.ini!' % cur_ext_conf) else: with open(cur_ext_conf, 'r') as extconfig_info: infos = extconfig_info.readlines() for line in infos: splitstring = StringClass.split_string( line.split('\n')[0], '\t') if StringClass.string_match(splitstring[0], 'Parameters') \ and len(splitstring) == 5 \ and splitstring[2] not in cfg.extractrange[slppos]: cfg.extractrange[slppos][splitstring[2]] = [ float(splitstring[3]), float(splitstring[4]) ] # rewrite extconfig file extconfig_info = open(cur_ext_conf, 'w') extconfig_info.write('ProtoTag\t%d\n' % cfg.slppostag[i]) param_num = len(cfg.extractrange[slppos]) extconfig_info.write('ParametersNUM\t%d\n' % param_num) for vname, vrange in cfg.extractrange[slppos].iteritems(): extconfig_info.write( 'Parameters\t%s\t%s\t%f\t%f\n' % (vname, cfg.selectedtopo[vname], vrange[0], vrange[1])) extconfig_info.write('OUTPUT\t%s\n' % cfg.singleslpposconf[slppos].typloc) extconfig_info.close() TauDEMExtension.selecttyplocslppos( cfg.proc, cur_ext_conf, cfg.singleslpposconf[slppos].infrecommend, cfg.singleslpposconf[slppos].extlog, cfg.ws.typloc_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.all, cfg.hostfile) print('Typical Locations extracted done!') # Combine extraction parameters. combine_ext_conf_parameters(cfg.slppostype, cfg.singleslpposconf, cfg.slpposresult.extconfig) end_t = time.time() cost = (end_t - start_t) / 60. logf = open(cfg.log.runtime, 'a') logf.write('Selection of Typical Locations Time-consuming: ' + str(cost) + ' s\n') logf.close() return cost
def lookup_soil_parameters(dstdir, soiltype_file, soil_lookup_file, landuse_shapefile): """Reclassify soil parameters by lookup table.""" # Read soil properties from txt file soil_lookup_data = read_data_items_from_txt(soil_lookup_file) soil_instances = list() soil_prop_flds = soil_lookup_data[0][:] for i in range(1, len(soil_lookup_data)): cur_soil_data_item = soil_lookup_data[i][:] cur_seqn = cur_soil_data_item[0] cur_sname = cur_soil_data_item[1] cur_soil_ins = SoilProperty(cur_seqn, cur_sname) for j in range(2, len(soil_prop_flds)): cur_flds = StringClass.split_string(cur_soil_data_item[j], '-') # Get field values for k, tmpfld in enumerate(cur_flds): cur_flds[k] = float(tmpfld) # Convert to float if StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NLYRS): cur_soil_ins.SOILLAYERS = int(cur_flds[0]) elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._Z): cur_soil_ins.SOILDEPTH = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._OM): cur_soil_ins.OM = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CLAY): cur_soil_ins.CLAY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SILT): cur_soil_ins.SILT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SAND): cur_soil_ins.SAND = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ROCK): cur_soil_ins.ROCK = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ZMX): cur_soil_ins.SOL_ZMX = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ANIONEXCL): cur_soil_ins.ANION_EXCL = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CRK): cur_soil_ins.SOL_CRK = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._BD): cur_soil_ins.DENSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._K): cur_soil_ins.CONDUCTIVITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._WP): cur_soil_ins.WILTINGPOINT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._FC): cur_soil_ins.FIELDCAP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._AWC): cur_soil_ins.AWC = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._POROSITY): cur_soil_ins.POROSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._USLE_K): cur_soil_ins.USLE_K = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ALB): cur_soil_ins.SOL_ALB = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ESCO): cur_soil_ins.ESCO = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NO3): cur_soil_ins.SOL_NO3 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NH4): cur_soil_ins.SOL_NH4 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGN): cur_soil_ins.SOL_ORGN = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SOLP): cur_soil_ins.SOL_SOLP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGP): cur_soil_ins.SOL_ORGP = cur_flds cur_soil_ins.check_data_validation() soil_instances.append(cur_soil_ins) soil_prop_dict = {} for sol in soil_instances: cur_sol_dict = sol.soil_dict() for fld in cur_sol_dict: if fld in soil_prop_dict: soil_prop_dict[fld].append(cur_sol_dict[fld]) else: soil_prop_dict[fld] = [cur_sol_dict[fld]] # print(list(soilPropDict.keys())) # print(list(soilPropDict.values())) replace_dicts = list() dst_soil_tifs = list() sol_fld_name = list() seqns = soil_prop_dict[SoilUtilClass._SEQN] max_lyr_num = int(numpy.max(soil_prop_dict[SoilUtilClass._NLYRS])) for key in soil_prop_dict: if key != SoilUtilClass._SEQN and key != SoilUtilClass._NAME: key_l = 1 for key_v in soil_prop_dict[key]: if isinstance(key_v, list): if len(key_v) > key_l: key_l = len(key_v) if key_l == 1: cur_dict = {} for i, tmpseq in enumerate(seqns): cur_dict[float(tmpseq)] = soil_prop_dict[key][i] replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '.tif') sol_fld_name.append(key) else: for i in range(max_lyr_num): cur_dict = dict() for j, tmpseq in enumerate(seqns): if i < soil_prop_dict[SoilUtilClass._NLYRS][j]: cur_dict[float(tmpseq)] = soil_prop_dict[key][j][i] else: cur_dict[float(seqns[j])] = DEFAULT_NODATA replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '_' + str(i + 1) + '.tif') sol_fld_name.append(key + '_' + str(i + 1)) # print(replaceDicts) # print(len(replaceDicts)) # print(dstSoilTifs) # print(len(dstSoilTifs)) # Generate GTIFF soil_shp = r'D:\SEIMS\data\zts\data_prepare\spatial\soil_SEQN_all.shp' # landuse_basin = r'D:\SEIMS\data\zts\data_prepare\spatial\LanduseFinal_basin.shp' # for i, soil_tif in enumerate(sol_fld_name): # print(soil_tif) # SoilProperty.count_by_shp(soil_shp, landuse_shapefile, soil_tif, replace_dicts[i]) RasterUtilClass.count_by_shp(soil_shp, landuse_shapefile, sol_fld_name, replace_dicts)
def boundary_adjustment(self): """ Update BMP configuration units and related data according to gene_values, i.e., bmps_info and units_infos """ if not self.cfg.boundary_adaptive: return if self.gene_num == self.cfg.units_num: return # 1. New filename of BMP configuration unit dist = '%s_%d' % (self.cfg.orignal_dist, self.ID) self.bmps_info[self.cfg.bmpid]['DISTRIBUTION'] = dist spfilename = StringClass.split_string(dist, '|')[1] # 2. Organize the slope position IDs and thresholds by hillslope ID # Format: {HillslopeID: {rdgID, bksID, vlyID, T_bks2rdg, T_bks2vly}, ...} slppos_threshs = dict() # type: Dict[int, List] upperslppos = self.cfg.slppos_tagnames[0][ 1] # Most upper slope position name for subbsnid, subbsndict in viewitems( self.cfg.units_infos['hierarchy_units']): for hillslpid, hillslpdict in viewitems(subbsndict): slppos_threshs[hillslpid] = list() for slppostag, slpposname in self.cfg.slppos_tagnames: slppos_threshs[hillslpid].append(hillslpdict[slpposname]) upper_geneidx = self.cfg.unit_to_gene[hillslpdict[upperslppos]] thresh_idx = upper_geneidx + len(hillslpdict) thresh_idxend = thresh_idx + self.cfg.thresh_num slppos_threshs[hillslpid] += self.gene_values[ thresh_idx:thresh_idxend] # 3. Delineate slope position and get the updated information (landuse area, etc.) # 3.1 Erase current data in units_info for itag, iname in self.cfg.slppos_tagnames: if iname not in self.cfg.units_infos: continue for sid, datadict in viewitems(self.cfg.units_infos[iname]): self.cfg.units_infos[iname][sid]['area'] = 0. for luid in self.cfg.units_infos[iname][sid]['landuse']: self.cfg.units_infos[iname][sid]['landuse'][luid] = 0. # 3.2 Delineate slope position and get data by subbasin # The whole watershed will be generateed for both version hillslp_data = DelinateSlopePositionByThreshold( self.modelcfg, slppos_threshs, self.cfg.slppos_tag_gfs, spfilename, subbsn_id=0) # 3.3 Update units_infos for tagname, slpposdict in viewitems(hillslp_data): for sid, datadict in viewitems(slpposdict): self.cfg.units_infos[tagname][sid]['area'] += hillslp_data[ tagname][sid]['area'] for luid in hillslp_data[tagname][sid]['landuse']: if luid not in self.cfg.units_infos[tagname][sid][ 'landuse']: self.cfg.units_infos[tagname][sid]['landuse'][ luid] = 0. newlanduse_area = hillslp_data[tagname][sid]['landuse'][ luid] self.cfg.units_infos[tagname][sid]['landuse'][ luid] += newlanduse_area if self.modelcfg.version.upper() == 'MPI': for tmp_subbsnid in range(1, self.model.SubbasinCount + 1): DelinateSlopePositionByThreshold(self.modelcfg, slppos_threshs, self.cfg.slppos_tag_gfs, spfilename, subbsn_id=tmp_subbsnid)
def export_scenario_to_gtiff(self, outpath=None): # type: (Optional[str]) -> None """Export scenario to GTiff. TODO: Read Raster from MongoDB should be extracted to pygeoc. """ if not self.export_sce_tif: return dist = self.bmps_info[self.cfg.bmpid]['DISTRIBUTION'] dist_list = StringClass.split_string(dist, '|') if len(dist_list) >= 2 and dist_list[0] == 'RASTER': dist_name = '0_' + dist_list[1] # prefix 0_ means the whole basin # read dist_name from MongoDB # client = ConnectMongoDB(self.modelcfg.host, self.modelcfg.port) # conn = client.get_conn() conn = MongoDBObj.client maindb = conn[self.modelcfg.db_name] spatial_gfs = GridFS(maindb, DBTableNames.gridfs_spatial) # read file from mongodb if not spatial_gfs.exists(filename=dist_name): print('WARNING: %s is not existed, export scenario failed!' % dist_name) return try: slpposf = maindb[DBTableNames.gridfs_spatial].files.find( {'filename': dist_name}, no_cursor_timeout=True)[0] except NetworkTimeout or Exception: # In case of unexpected raise # client.close() return ysize = int(slpposf['metadata'][RasterMetadata.nrows]) xsize = int(slpposf['metadata'][RasterMetadata.ncols]) xll = slpposf['metadata'][RasterMetadata.xll] yll = slpposf['metadata'][RasterMetadata.yll] cellsize = slpposf['metadata'][RasterMetadata.cellsize] nodata_value = slpposf['metadata'][RasterMetadata.nodata] srs = slpposf['metadata'][RasterMetadata.srs] if is_string(srs): srs = str(srs) srs = osr.GetUserInputAsWKT(srs) geotransform = [0] * 6 geotransform[0] = xll - 0.5 * cellsize geotransform[1] = cellsize geotransform[3] = yll + (ysize - 0.5) * cellsize # yMax geotransform[5] = -cellsize slppos_data = spatial_gfs.get(slpposf['_id']) total_len = xsize * ysize fmt = '%df' % (total_len, ) slppos_data = unpack(fmt, slppos_data.read()) slppos_data = numpy.reshape(slppos_data, (ysize, xsize)) v_dict = dict() for unitidx, geneidx in viewitems(self.cfg.unit_to_gene): v_dict[unitidx] = self.gene_values[geneidx] # Deprecated and replaced by using self.cfg.unit_to_gene. 03/14/2019. ljzhu. # for idx, gene_v in enumerate(self.gene_values): # v_dict[self.cfg.gene_to_unit[idx]] = gene_v for k, v in v_dict.items(): slppos_data[slppos_data == k] = v if outpath is None: outpath = self.scenario_dir + os.path.sep + 'Scenario_%d.tif' % self.ID RasterUtilClass.write_gtiff_file(outpath, ysize, xsize, slppos_data, geotransform, srs, nodata_value)
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # initialize if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(item[0].strip(), ['|']) if len(values) != 2: raise ValueError( 'One item should only have one Tag and one value string,' ' split by "|"') file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings file_out_items = read_data_items_from_txt(file_out_path) bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] # print(out_data_array) def read_output_item(output_fields, item): file_out_dict = dict() for i, v in enumerate(output_fields): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if not list(file_out_dict.keys()): raise ValueError( 'There are not any valid output item stored in file.out!') return file_out_dict for idx, iitem in enumerate(file_out_items): if idx == 0: continue iitem_dict = read_output_item(out_field_array, iitem) bulk.insert(iitem_dict) MongoUtil.run_bulk( bulk, 'No operations to execute when import initial outputs settings.') # begin to import the desired outputs # initialize bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print(field_names) user_out_field_array = data_items[0] if ModelCfgFields.output_id not in user_out_field_array: if len(data_items[0]) != 7: # For the compatibility of old code! raise RuntimeError( 'If header information is not provided,' 'items in file.out must have 7 columns, i.e., OUTPUTID,' 'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.' 'Otherwise, the OUTPUTID MUST existed in the header!') user_out_field_array = [ 'OUTPUTID', 'TYPE', 'STARTTIME', 'ENDTIME', 'INTERVAL', 'INTERVAL_UNIT', 'SUBBASIN' ] data_items.insert(0, user_out_field_array) for idx, iitem in enumerate(data_items): if idx == 0: continue data_import = read_output_item(user_out_field_array, iitem) data_import[ModelCfgFields.use] = 1 cur_filter = dict() cur_filter[ModelCfgFields.output_id] = data_import[ ModelCfgFields.output_id] bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk( bulk, 'No operations to excute when import the desired outputs.')
def read_optiontyploc_section(self, _opttyploc): """Optional parameter-settings for Typical Locations selection""" if _opttyploc not in self.cf.sections(): return # handling slope position types and tags if self.cf.has_option(_opttyploc, 'slopepositiontypes'): self.slppostype = list() typstrs = self.cf.get(_opttyploc, 'slopepositiontypes') self.slppostype = StringClass.split_string(typstrs.lower(), ',') else: # five slope position system will be adapted. pass if self.cf.has_option(_opttyploc, 'slopepositiontags'): self.slppostag = list() tagstrs = self.cf.get(_opttyploc, 'slopepositiontags') self.slppostag = StringClass.extract_numeric_values_from_string( tagstrs) if len(self.slppostag) != len(self.slppostype): raise RuntimeError( "The input number of slope position types and " "tags are not the same!") else: self.slppostag = list() for i in range(len(self.slppostype)): self.slppostag.append(pow(2, i)) for typ in self.slppostype: self.singleslpposconf[typ] = SingleSlpPosFiles(self.ws, typ) # handling selected topographic attributes if self.cf.has_option(_opttyploc, 'terrainattrdict'): self.selectedtopolist = list() self.selectedtopo = dict() terrain_attr_dict_str = self.cf.get(_opttyploc, 'terrainattrdict') attrpath_strs = StringClass.split_string(terrain_attr_dict_str, ';') for i, singattr in enumerate(attrpath_strs): ap = StringClass.split_string(singattr, ',') attrname = ap[0].lower() if i == 0 and not StringClass.string_match(attrname, 'rpi'): attrname = 'rpi' self.selectedtopolist.append(attrname) attrpath = self.topoparam.get_attr_file(attrname) if attrpath is not None: self.selectedtopo[attrname] = attrpath else: # this should be user-defined attribute, and should has a valid file path if len(ap) != 2: raise RuntimeError( "User defined topographic attribute (%s) MUST have " "an existed file path!" % singattr) attrp = AutoFuzSlpPosConfig.check_file_available(ap[1]) if attrp is None: raise RuntimeError( "User defined topographic attribute (%s) MUST have " "an existed file path!" % singattr) self.selectedtopo[attrname] = attrp is_regional = False if i == 0: # the first one is regional attribute is_regional = True self.topoparam.add_user_defined_attribute( attrname, attrp, is_regional) # handling several parameters used in extracting typical location if self.cf.has_option(_opttyploc, 'typlocextractparam'): self.param4typloc = dict() base_param_str = self.cf.get(_opttyploc, 'typlocextractparam') base_param_floats = StringClass.extract_numeric_values_from_string( base_param_str) defnum = len(self._DEFAULT_PARAM_TYPLOC) if len(base_param_floats) == defnum: for slppos in self.slppostype: self.param4typloc[slppos] = base_param_floats[:] elif len(base_param_floats) == len(self.slppostype) * defnum: for i, slppos in enumerate(self.slppostype): self.param4typloc[slppos] = base_param_floats[i * defnum:(i + 1) * defnum] else: raise RuntimeError("TyplocExtractParam MUST has the number of " "%d or %d!" % (defnum, len(self.slppostype) * defnum)) else: for slppos in self.slppostype: self.param4typloc[slppos] = self._DEFAULT_PARAM_TYPLOC[:] # handling Pre-defined fuzzy membership function shapes of each terrain attribute # for each slope position if self.cf.has_option(_opttyploc, 'fuzinfdefault'): self.infshape = dict() fuz_inf_shp_strs = self.cf.get(_opttyploc, 'fuzinfdefault') # inference shapes are separated by SIMICOLON bewteen slope positions fuz_inf_shp_types = StringClass.split_string(fuz_inf_shp_strs, ';') if len(fuz_inf_shp_types) != len(self.slppostype): raise RuntimeError( "FuzInfDefault (%s) MUST be consistent with slope position types" " and separated by ';'!" % fuz_inf_shp_strs) for i, slppos in enumerate(self.slppostype): self.infshape[slppos] = dict() # inference shapes are separated by COMMA bewteen topographic attributes infshps = StringClass.split_string(fuz_inf_shp_types[i], ',') if len(infshps) != len(self.selectedtopolist): raise RuntimeError( "FuzInfDefault (%s) for each slope position MUST have " "the same size with TerrainAttrDict" % fuz_inf_shp_types[i]) for j, attrn in enumerate(self.selectedtopolist): self.infshape[slppos][attrn] = infshps[j] else: if len(self.slppostype) != 5: raise RuntimeError( "Only the fuzzy membership function shapes of " "5 slope position system are built-in. For other " "classification system, please set as input!") # handling value ranges of terrain attributes for extracting prototypes if self.cf.has_option(_opttyploc, 'valueranges'): self.extractrange = dict() value_rng_strs = self.cf.get(_opttyploc, 'valueranges') value_rng_types = StringClass.split_string(value_rng_strs, ';') if len(value_rng_types) != len(self.slppostype): raise RuntimeError( "ValueRanges (%s) MUST be consistent with slope position types" " and separated by ';'!" % value_rng_strs) for i, slppos in enumerate(self.slppostype): self.extractrange[slppos] = dict() value_rngs = StringClass.extract_numeric_values_from_string( value_rng_types[i]) if len(value_rngs) == 0 or len(value_rngs) % 3 != 0: raise RuntimeError( "Each item of ValueRanges MUST contains three elements," "i.e., Attributes No., Min, Max! Please check item: " "%s for %s." % (value_rng_types[i], slppos)) for j in range(int(len(value_rngs) / 3)): attridx = int(value_rngs[j * 3]) - 1 attrname = self.selectedtopolist[attridx] min_v = value_rngs[j * 3 + 1] max_v = value_rngs[j * 3 + 2] self.extractrange[slppos][attrname] = [min_v, max_v] else: if len(self.slppostype) != 5: raise RuntimeError( "Only the extract value ranges of " "5 slope position system are built-in. For other " "classification system, please set as input!")
def lookup_soil_parameters(dstdir, soiltype_file, soil_lookup_file): """Reclassify soil parameters by lookup table.""" # Read soil properties from txt file soil_lookup_data = read_data_items_from_txt(soil_lookup_file) soil_instances = list() soil_prop_flds = soil_lookup_data[0][:] for i in range(1, len(soil_lookup_data)): cur_soil_data_item = soil_lookup_data[i][:] cur_seqn = cur_soil_data_item[0] cur_sname = cur_soil_data_item[1] cur_soil_ins = SoilProperty(cur_seqn, cur_sname) for j in range(2, len(soil_prop_flds)): cur_flds = StringClass.split_string(cur_soil_data_item[j], '-') # Get field values for k, tmpfld in enumerate(cur_flds): cur_flds[k] = float(tmpfld) # Convert to float if StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NLYRS): cur_soil_ins.SOILLAYERS = int(cur_flds[0]) elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._Z): cur_soil_ins.SOILDEPTH = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._OM): cur_soil_ins.OM = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CLAY): cur_soil_ins.CLAY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SILT): cur_soil_ins.SILT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SAND): cur_soil_ins.SAND = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ROCK): cur_soil_ins.ROCK = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ZMX): cur_soil_ins.SOL_ZMX = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ANIONEXCL): cur_soil_ins.ANION_EXCL = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CRK): cur_soil_ins.SOL_CRK = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._BD): cur_soil_ins.DENSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._K): cur_soil_ins.CONDUCTIVITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._WP): cur_soil_ins.WILTINGPOINT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._FC): cur_soil_ins.FIELDCAP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._AWC): cur_soil_ins.AWC = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._POROSITY): cur_soil_ins.POROSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._USLE_K): cur_soil_ins.USLE_K = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ALB): cur_soil_ins.SOL_ALB = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ESCO): cur_soil_ins.ESCO = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NO3): cur_soil_ins.SOL_NO3 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NH4): cur_soil_ins.SOL_NH4 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGN): cur_soil_ins.SOL_ORGN = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SOLP): cur_soil_ins.SOL_SOLP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGP): cur_soil_ins.SOL_ORGP = cur_flds cur_soil_ins.check_data_validation() soil_instances.append(cur_soil_ins) soil_prop_dict = dict() for sol in soil_instances: cur_sol_dict = sol.soil_dict() for fld in cur_sol_dict: if fld in soil_prop_dict: soil_prop_dict[fld].append(cur_sol_dict[fld]) else: soil_prop_dict[fld] = [cur_sol_dict[fld]] # print(list(soilPropDict.keys())) # print(list(soilPropDict.values())) replace_dicts = list() dst_soil_tifs = list() seqns = soil_prop_dict[SoilUtilClass._SEQN] max_lyr_num = int(numpy.max(soil_prop_dict[SoilUtilClass._NLYRS])) for key in soil_prop_dict: if key != SoilUtilClass._SEQN and key != SoilUtilClass._NAME: key_l = 1 for key_v in soil_prop_dict[key]: if isinstance(key_v, list): if len(key_v) > key_l: key_l = len(key_v) if key_l == 1: cur_dict = {} for i, tmpseq in enumerate(seqns): cur_dict[float(tmpseq)] = soil_prop_dict[key][i] replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '.tif') else: for i in range(max_lyr_num): cur_dict = dict() for j, tmpseq in enumerate(seqns): if i < soil_prop_dict[SoilUtilClass._NLYRS][j]: cur_dict[float( tmpseq)] = soil_prop_dict[key][j][i] else: cur_dict[float(seqns[j])] = DEFAULT_NODATA replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '_' + str(i + 1) + '.tif') # print(replaceDicts) # print(len(replaceDicts)) # print(dstSoilTifs) # print(len(dstSoilTifs)) # Generate GTIFF for i, soil_tif in enumerate(dst_soil_tifs): print(soil_tif) RasterUtilClass.raster_reclassify(soiltype_file, replace_dicts[i], soil_tif)
def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: maindb: Main spatial database hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() types = list() units = list() for j, v in enumerate(site_data_items[i]): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(v) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string(v.strip(), '-') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(v) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(v) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(v) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(v) elif StringClass.string_match(site_flds[j], StationFields.unit): units = StringClass.split_string(v.strip(), '-') elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(v) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(v) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.unit] = units[j] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_sids = ImportObservedData.match_subbasin( subbsn_file, site_dic, maindb) if not matched: break if len(cur_sids ) == 1: # if only one subbasin ID, store integer cur_subbsn_id_str = cur_sids[0] else: cur_subbsn_id_str = ','.join( str(cid) for cid in cur_sids if cur_sids is None) site_dic[StationFields.subbsn] = cur_subbsn_id_str curfilter = { StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type] } # print(curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace( curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = units[j] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print(measDataFile) obs_data_items = read_data_items_from_txt(measDataFile) tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file( measDataFile) # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [ StationFields.id, DataValueFields.type, DataValueFields.value ] for fld in required_flds: if not StringClass.string_in_list( fld, obs_flds): # data can not meet the request! raise ValueError( 'The %s can not meet the required format!' % measDataFile) for i, cur_obs_data_item in enumerate(obs_data_items): dic = dict() if i == 0: continue for j, cur_data_value in enumerate(cur_obs_data_item): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(cur_data_value) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = cur_data_value elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float(cur_data_value) utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values( obs_flds, cur_obs_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_t - timedelta( minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_t # curfilter = {StationFields.id: dic[StationFields.id], # DataValueFields.type: dic[DataValueFields.type], # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).replace_one(dic) bulk.insert(dic) count += 1 if count % 500 == 0: MongoUtil.run_bulk(bulk) bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: MongoUtil.run_bulk(bulk) # 3. Add measurement data with unit converted # loop variables list added_dics = list() for curVar in variable_lists: # print(curVar) # if the unit is mg/L, then change the Type name with the suffix 'Conc', # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find( {StationFields.type: cur_type}): # print(item) dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[ DataValueFields.local_time] dic[DataValueFields.time_zone] = item[ DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == 'mg/L' or cur_unit == 'g/L': # update the Type name dic[StationFields.type] = '%sConc' % cur_type curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = { StationFields.type: 'Q', DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id] } q_dic = hydro_clim_db[DBTableNames.observes].find_one( filter=cur_filter) if q_dic is not None: q = q_dic[DataValueFields.value] else: continue if cur_unit == 'mg/L': # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == 'g/L': # convert g/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400., 2) elif cur_unit == 'kg': dic[StationFields.type] = '%sConc' % cur_type # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True)
def read_pareto_points_from_txt(txt_file, sce_name, headers, labels=None): # type: (AnyStr, AnyStr, List[AnyStr], Optional[List[AnyStr]]) -> (Dict[int, Union[List, numpy.ndarray]], Dict[int, int]) """Read Pareto points from `runtime.log` file. Args: txt_file: Full file path of `runtime.log` output by NSGA2 algorithm. sce_name: Field name followed by `generation`, e.g., 'calibrationID', 'scenarioID', etc. headers: Filed names in header for each dimension of Pareto front labels: (Optional) Labels corresponding to `headers` for Pareto graphs Returns: pareto_points: `OrderedDict`, key is generation ID, value is Pareto front array pareto_popnum: `OrderedDict`, key is generation ID, value is newly model runs number """ with open(txt_file, 'r', encoding='utf-8') as f: lines = f.readlines() pareto_points = OrderedDict() pareto_popnum = OrderedDict() found = False cur_gen = -1 iden_idx = -1 new_headers = headers[:] for i, hd in enumerate(new_headers): new_headers[i] = hd.upper() if labels is None: labels = headers[:] headers_idx = list() new_labels = list() for lno, line in enumerate(lines): str_line = line for LF in LFs: if LF in line: str_line = line.split(LF)[0] break if str_line == '': continue values = StringClass.extract_numeric_values_from_string(str_line) # Check generation if str_line[0] == '#' and 'Generation' in str_line: if len(values) != 1: continue # e.g., ###### Generation: 23 ###### gen = int(values[0]) found = True cur_gen = gen pareto_popnum[cur_gen] = list() pareto_points[cur_gen] = list() continue if not found: # If the first "###### Generation: 1 ######" has not been found. continue line_list = StringClass.split_string(str_line.upper(), ['\t']) if values is None: # means header line if headers_idx and new_labels: continue for idx, v in enumerate(line_list): if sce_name.upper() in v.upper(): iden_idx = idx break for fldno, fld in enumerate(new_headers): if fld in line_list: tmpidx = line_list.index(fld) headers_idx.append(tmpidx) new_labels.append(labels[fldno]) continue if iden_idx < 0: continue # now append the real Pareto front point data tmpvalues = list() for tmpidx in headers_idx: tmpvalues.append(StringClass.extract_numeric_values_from_string(line_list[tmpidx])[0]) pareto_points[cur_gen].append(tmpvalues[:]) iden_str = line_list[iden_idx] # e.g., 1-44 iden_strs = iden_str.split('-') if len(iden_strs) == 1: pareto_popnum[cur_gen].append(int(iden_strs[0])) if len(iden_strs) == 2: pareto_popnum.setdefault(int(iden_strs[0]), list()) pareto_popnum[int(iden_strs[0])].append(int(iden_strs[1])) return pareto_points, pareto_popnum
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone / -3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.txt' out_file = work_path + os.path.sep + file_name with open(out_file, 'w') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)
def __init__(self, cf): """Initialization.""" # 1. Directories self.model_dir = None self.scenario_id = -1 if 'PATH' in cf.sections(): self.model_dir = cf.get('PATH', 'model_dir') self.scenario_id = cf.getint('PATH', 'scenarioid') if self.scenario_id < 0: self.model_dir = self.model_dir + os.path.sep + 'OUTPUT' else: self.model_dir = self.model_dir + os.path.sep + 'OUTPUT' + str(self.scenario_id) else: raise ValueError("[PATH] section MUST be existed in *.ini file.") if not FileClass.is_dir_exists(self.model_dir): raise ValueError("Please Check Directories defined in [PATH]") # 2. MongoDB configuration and database, collation, GridFS names self.hostname = '127.0.0.1' # localhost by default self.port = 27017 self.climate_db = '' self.bmp_scenario_db = '' self.spatial_db = '' if 'MONGODB' in cf.sections(): self.hostname = cf.get('MONGODB', 'hostname') self.port = cf.getint('MONGODB', 'port') self.climate_db = cf.get('MONGODB', 'climatedbname') self.bmp_scenario_db = cf.get('MONGODB', 'bmpscenariodbname') self.spatial_db = cf.get('MONGODB', 'spatialdbname') else: raise ValueError('[MONGODB] section MUST be existed in *.ini file.') if not StringClass.is_valid_ip_addr(self.hostname): raise ValueError('HOSTNAME illegal defined in [MONGODB]!') # 3. Parameters self.plt_subbsnid = -1 self.plt_vars = list() if 'PARAMETERS' in cf.sections(): self.plt_subbsnid = cf.getint('PARAMETERS', 'plot_subbasinid') plt_vars_str = cf.get('PARAMETERS', 'plot_variables') else: raise ValueError("[PARAMETERS] section MUST be existed in *.ini file.") if self.plt_subbsnid < 0: raise ValueError("PLOT_SUBBASINID must be greater or equal than 0.") if plt_vars_str != '': self.plt_vars = StringClass.split_string(plt_vars_str) else: raise ValueError("PLOT_VARIABLES illegal defined in [PARAMETERS]!") # 4. Optional_Parameters if 'OPTIONAL_PARAMETERS' in cf.sections(): tstart = cf.get('OPTIONAL_PARAMETERS', 'time_start') tend = cf.get('OPTIONAL_PARAMETERS', 'time_end') else: raise ValueError("[OPTIONAL_PARAMETERS] section MUST be existed in *.ini file.") try: # UTCTIME self.time_start = StringClass.get_datetime(tstart) self.time_end = StringClass.get_datetime(tend) if cf.has_option('OPTIONAL_PARAMETERS', 'vali_time_start') and \ cf.has_option('OPTIONAL_PARAMETERS', 'vali_time_end'): tstart = cf.get('OPTIONAL_PARAMETERS', 'vali_time_start') tend = cf.get('OPTIONAL_PARAMETERS', 'vali_time_end') self.vali_stime = StringClass.get_datetime(tstart) self.vali_etime = StringClass.get_datetime(tend) else: self.vali_stime = None self.vali_etime = None except ValueError: raise ValueError('The time format MUST be "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS".') if self.time_start >= self.time_end: raise ValueError("Wrong time setted in [OPTIONAL_PARAMETERS]!") # 5. Switches self.lang_cn = False if 'SWITCH' in cf.sections(): self.lang_cn = cf.getboolean('SWITCH', 'lang_cn')
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Todo: Not tested yet! Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.csv, flow_localtime_60.csv. Note that `.txt` format is also supported. """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone // 3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = -1 * int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.csv' out_file = work_path + os.path.sep + file_name with open(out_file, 'w', encoding='utf-8') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)
def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: maindb: Main spatial database hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j, v in enumerate(site_data_items[i]): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(v) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string(v.strip(), '-') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(v) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(v) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(v) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(v) elif StringClass.string_match(site_flds[j], StationFields.unit): dic[StationFields.unit] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(v) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(v) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_sids = ImportObservedData.match_subbasin(subbsn_file, site_dic, maindb) if not matched: break cur_subbsn_id_str = '' if len(cur_sids) == 1: # if only one subbasin ID, store integer cur_subbsn_id_str = cur_sids[0] else: cur_subbsn_id_str = ','.join(str(cid) for cid in cur_sids if cur_sids is None) site_dic[StationFields.subbsn] = cur_subbsn_id_str curfilter = {StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type]} # print(curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = dic[StationFields.unit] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print(measDataFile) obs_data_items = read_data_items_from_txt(measDataFile) tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(measDataFile) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [StationFields.id, DataValueFields.type, DataValueFields.value] for fld in required_flds: if not StringClass.string_in_list(fld, obs_flds): # data can not meet the request! raise ValueError('The %s can not meet the required format!' % measDataFile) for i, cur_obs_data_item in enumerate(obs_data_items): dic = dict() if i == 0: continue for j, cur_data_value in enumerate(cur_obs_data_item): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(cur_data_value) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = cur_data_value elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float(cur_data_value) utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(obs_flds, cur_obs_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_t + timedelta(minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_t # curfilter = {StationFields.id: dic[StationFields.id], # DataValueFields.type: dic[DataValueFields.type], # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).replace_one(dic) bulk.insert(dic) count += 1 if count % 500 == 0: MongoUtil.run_bulk(bulk) bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: MongoUtil.run_bulk(bulk) # 3. Add measurement data with unit converted # loop variables list added_dics = [] for curVar in variable_lists: # print(curVar) # if the unit is mg/L, then change the Type name with the suffix 'Conc', # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find({StationFields.type: cur_type}): # print(item) dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[DataValueFields.local_time] dic[DataValueFields.time_zone] = item[DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == 'mg/L' or cur_unit == 'g/L': # update the Type name dic[StationFields.type] = cur_type + 'Conc' curfilter = {StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc]} hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = {StationFields.type: 'Q', DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id]} q_dic = hydro_clim_db[DBTableNames.observes].find_one(filter=cur_filter) q = -9999. if q_dic is not None: q = q_dic[DataValueFields.value] else: continue if cur_unit == 'mg/L': # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == 'g/L': # convert g/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400., 2) elif cur_unit == 'kg': dic[StationFields.type] = cur_type + 'Conc' # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = {StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc]} hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None, log_params=None): """ Run TauDEM function. 1. The command will not execute if any input file does not exist. 2. An error will be detected after running the TauDEM command if any output file does not exist; Args: function_name (str): Full path of TauDEM function. in_files (dict, required): Dict of pairs of parameter id (string) and file path (string or list) for input files, e.g.:: {'-z': '/full/path/to/dem.tif'} wp (str, optional): Workspace for outputs. If not specified, the directory of the first input file in ``in_files`` will be used. in_params (dict, optional): Dict of pairs of parameter id (string) and value (or None for a flag parameter without a value) for input parameters, e.g.:: {'-nc': None} {'-thresh': threshold} {'-m': 'ave' 's', '-nc': None} out_files (dict, optional): Dict of pairs of parameter id (string) and file path (string or list) for output files, e.g.:: {'-fel': 'filleddem.tif'} {'-maxS': ['harden.tif', 'maxsimi.tif']} mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or path for MPI setting, e.g.:: {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4} {'mpipath':'/soft/bin', 'n':4} {'n':4} log_params (dict, optional): Dict of pairs of parameter id (string) and value or path for runtime and log output parameters. e.g.:: {'logfile': '/home/user/log.txt', 'runtimefile': '/home/user/runtime.txt'} Returns: True if TauDEM run successfully, otherwise False. """ # Check input files if in_files is None: TauDEM.error('Input files parameter is required!') if not isinstance(in_files, dict): TauDEM.error('The input files parameter must be a dict!') for (pid, infile) in list(in_files.items()): if infile is None: continue if isinstance(infile, list) or isinstance(infile, tuple): for idx, inf in enumerate(infile): if inf is None: continue inf, wp = TauDEM.check_infile_and_wp(inf, wp) in_files[pid][idx] = inf continue if os.path.exists(infile): infile, wp = TauDEM.check_infile_and_wp(infile, wp) in_files[pid] = os.path.abspath(infile) else: # For more flexible input files extension. # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ... # in such unpredictable circumstance, we cannot check the existance of # input files, so the developer will check it in other place. if len(StringClass.split_string(infile, ' ')) > 1: continue else: # the infile still should be a existing file, so check in workspace if wp is None: TauDEM.error('Workspace should not be None!') infile = wp + os.sep + infile if not os.path.exists(infile): TauDEM.error( 'Input files parameter %s: %s is not existed!' % (pid, infile)) in_files[pid] = os.path.abspath(infile) # Make workspace dir if not existed UtilClass.mkdir(wp) # Check the log parameter log_file = None runtime_file = None if log_params is not None: if not isinstance(log_params, dict): TauDEM.error('The log parameter must be a dict!') if 'logfile' in log_params and log_params['logfile'] is not None: log_file = log_params['logfile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in log_file: log_file = wp + os.sep + log_file log_file = os.path.abspath(log_file) if 'runtimefile' in log_params and log_params[ 'runtimefile'] is not None: runtime_file = log_params['runtimefile'] # If log_file is just a file name, then save it in the default workspace. if os.sep not in runtime_file: runtime_file = wp + os.sep + runtime_file runtime_file = os.path.abspath(runtime_file) # remove out_files to avoid any file IO related error new_out_files = list() if out_files is not None: if not isinstance(out_files, dict): TauDEM.error('The output files parameter must be a dict!') for (pid, out_file) in list(out_files.items()): if out_file is None: continue if isinstance(out_file, list) or isinstance(out_file, tuple): for idx, outf in enumerate(out_file): if outf is None: continue outf = FileClass.get_file_fullpath(outf, wp) FileClass.remove_files(outf) out_files[pid][idx] = outf new_out_files.append(outf) else: out_file = FileClass.get_file_fullpath(out_file, wp) FileClass.remove_files(out_file) out_files[pid] = out_file new_out_files.append(out_file) # concatenate command line commands = list() # MPI header if mpi_params is not None: if not isinstance(mpi_params, dict): TauDEM.error('The MPI settings parameter must be a dict!') if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None: commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec') else: commands.append('mpiexec') if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \ and not StringClass.string_match(mpi_params['hostfile'], 'none') \ and os.path.isfile(mpi_params['hostfile']): commands.append('-f') commands.append(mpi_params['hostfile']) if 'n' in mpi_params and mpi_params['n'] > 1: commands.append('-n') commands.append(str(mpi_params['n'])) else: # If number of processor is less equal than 1, then do not call mpiexec. commands = [] # append TauDEM function name, which can be full path or just one name commands.append(function_name) # append input files for (pid, infile) in list(in_files.items()): if infile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(infile, list) or isinstance(infile, tuple): commands.append(' '.join(tmpf for tmpf in infile)) else: commands.append(infile) # append input parameters if in_params is not None: if not isinstance(in_params, dict): TauDEM.error('The input parameters must be a dict!') for (pid, v) in list(in_params.items()): if pid[0] != '-': pid = '-' + pid commands.append(pid) # allow for parameter which is an flag without value if v != '' and v is not None: if MathClass.isnumerical(v): commands.append(str(v)) else: commands.append(v) # append output parameters if out_files is not None: for (pid, outfile) in list(out_files.items()): if outfile is None: continue if pid[0] != '-': pid = '-' + pid commands.append(pid) if isinstance(outfile, list) or isinstance(outfile, tuple): commands.append(' '.join(tmpf for tmpf in outfile)) else: commands.append(outfile) # run command runmsg = UtilClass.run_command(commands) TauDEM.log(runmsg, log_file) TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file) # Check out_files, raise RuntimeError if not exist. for of in new_out_files: if not os.path.exists(of): TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of)) return False return True
def export_scenario_to_gtiff(self, outpath=None): """Export scenario to GTiff. TODO: Read Raster from MongoDB should be extracted to pygeoc. """ if not self.export_sce_tif: return dist = self.bmps_info['DISTRIBUTION'] dist_list = StringClass.split_string(dist, '|') if len(dist_list) >= 2 and dist_list[0] == 'RASTER': dist_name = '0_' + dist_list[1] # prefix 0_ means the whole basin # read dist_name from MongoDB client = ConnectMongoDB(self.hostname, self.port) conn = client.get_conn() maindb = conn[self.main_db] spatial_gfs = GridFS(maindb, DBTableNames.gridfs_spatial) # read file from mongodb if not spatial_gfs.exists(filename=dist_name): print('WARNING: %s is not existed, export scenario failed!' % dist_name) return try: slpposf = maindb[DBTableNames.gridfs_spatial].files.find({'filename': dist_name}, no_cursor_timeout=True)[0] except NetworkTimeout or Exception: # In case of unexpected raise client.close() return ysize = int(slpposf['metadata'][RasterMetadata.nrows]) xsize = int(slpposf['metadata'][RasterMetadata.ncols]) xll = slpposf['metadata'][RasterMetadata.xll] yll = slpposf['metadata'][RasterMetadata.yll] cellsize = slpposf['metadata'][RasterMetadata.cellsize] nodata_value = slpposf['metadata'][RasterMetadata.nodata] srs = slpposf['metadata'][RasterMetadata.srs] if isinstance(srs, text_type): srs = str(srs) srs = osr.GetUserInputAsWKT(srs) geotransform = [0] * 6 geotransform[0] = xll - 0.5 * cellsize geotransform[1] = cellsize geotransform[3] = yll + (ysize - 0.5) * cellsize # yMax geotransform[5] = -cellsize slppos_data = spatial_gfs.get(slpposf['_id']) total_len = xsize * ysize fmt = '%df' % (total_len,) slppos_data = unpack(fmt, slppos_data.read()) slppos_data = numpy.reshape(slppos_data, (ysize, xsize)) v_dict = dict() for idx, gene_v in enumerate(self.gene_values): v_dict[self.gene_to_unit[idx]] = gene_v for k, v in v_dict.items(): slppos_data[slppos_data == k] = v if outpath is None: outpath = self.scenario_dir + os.path.sep + 'Scenario_%d.tif' % self.ID RasterUtilClass.write_gtiff_file(outpath, ysize, xsize, slppos_data, geotransform, srs, nodata_value) client.close()