def calculate_environment(self): if not self.modelrun: # no evaluate done self.economy = self.worst_econ self.environment = self.worst_env return rfile = self.modelout_dir + os.path.sep + self.bmps_info['ENVEVAL'] if not FileClass.is_file_exists(rfile): time.sleep(5) # sleep 5 seconds wait for the ouput if not FileClass.is_file_exists(rfile): print('WARNING: Although SEIMS model runs successfully, the desired output: %s' ' cannot be found!' % rfile) self.economy = self.worst_econ self.environment = self.worst_env return base_amount = self.bmps_info['BASE_ENV'] if StringClass.string_match(rfile.split('.')[-1], 'tif'): # Raster data rr = RasterUtilClass.read_raster(rfile) soil_erosion_amount = rr.get_sum() / self.timerange # unit: year # reduction rate of soil erosion self.environment = (base_amount - soil_erosion_amount) / base_amount elif StringClass.string_match(rfile.split('.')[-1], 'txt'): # Time series data sed_sum = read_simulation_from_txt(self.modelout_dir) # TODO, fix it later, lj self.environment = (base_amount - sed_sum) / base_amount else: self.economy = self.worst_econ self.environment = self.worst_env return
def mask_origin_delineated_data(cfg): """Mask the original delineated data by Subbasin raster.""" subbasin_tau_file = cfg.taudems.subbsn geodata2dbdir = cfg.dirs.geodata2db UtilClass.mkdir(geodata2dbdir) mask_file = cfg.spatials.mask RasterUtilClass.get_mask_from_raster(subbasin_tau_file, mask_file) # Total 12 raster files original_files = [cfg.taudems.subbsn, cfg.taudems.d8flow, cfg.taudems.stream_raster, cfg.taudems.slp, cfg.taudems.filldem, cfg.taudems.d8acc, cfg.taudems.stream_order, cfg.taudems.dinf, cfg.taudems.dinf_d8dir, cfg.taudems.dinf_slp, cfg.taudems.dinf_weight, cfg.taudems.dist2stream_d8] # output masked files output_files = [cfg.taudems.subbsn_m, cfg.taudems.d8flow_m, cfg.taudems.stream_m, cfg.spatials.slope, cfg.spatials.filldem, cfg.spatials.d8acc, cfg.spatials.stream_order, cfg.spatials.dinf, cfg.spatials.dinf_d8dir, cfg.spatials.dinf_slp, cfg.spatials.dinf_weight, cfg.spatials.dist2stream_d8] default_values = list() for i in range(len(original_files)): default_values.append(DEFAULT_NODATA) # other input rasters need to be masked # soil and landuse FileClass.check_file_exists(cfg.soil) FileClass.check_file_exists(cfg.landuse) original_files.append(cfg.soil) output_files.append(cfg.spatials.soil_type) default_values.append(cfg.default_soil) original_files.append(cfg.landuse) output_files.append(cfg.spatials.landuse) default_values.append(cfg.default_landuse) # Additional raster file for k, v in cfg.additional_rs.items(): org_v = v if not FileClass.is_file_exists(org_v): v = cfg.spatial_dir + os.path.sep + org_v if not FileClass.is_file_exists(v): print('WARNING: The additional file %s MUST be located in ' 'SPATIAL_DATA_DIR, or provided as full file path!' % k) continue original_files.append(v) output_files.append(cfg.dirs.geodata2db + os.path.sep + k + '.tif') default_values.append(DEFAULT_NODATA) config_file = cfg.logs.mask_cfg # run mask operation print('Mask original delineated data by Subbasin raster...') SpatialDelineation.mask_raster_cpp(cfg.seims_bin, mask_file, original_files, output_files, default_values, config_file)
def get_psa_config(): """Parse arguments. Returns: cf: ConfigParse object of *.ini file mtd: Parameters sensitivity method name, currently, 'morris' and 'fast' are supported. """ # define input arguments parser = argparse.ArgumentParser(description="Execute parameters sensitivity analysis.") parser.add_argument('-ini', type=str, help="Full path of configuration file") # add mutually group psa_group = parser.add_mutually_exclusive_group() psa_group.add_argument('-morris', action='store_true', help='Run Morris Screening method') psa_group.add_argument('-fast', action='store_true', help='Run FAST variant-based method') # parse arguments args = parser.parse_args() ini_file = args.ini psa_mtd = 'morris' # Default if args.fast: psa_mtd = 'fast' elif args.morris: psa_mtd = 'morris' if not FileClass.is_file_exists(ini_file): raise ImportError('Configuration file is not existed: %s' % ini_file) cf = ConfigParser() cf.read(ini_file) return cf, psa_mtd
def calculate_sensitivity(self): """Calculate Morris elementary effects. It is worth to be noticed that evaluate_models() allows to return several output variables, hence we should calculate each of them separately. """ if not self.psa_si: if FileClass.is_file_exists(self.cfg.outfiles.psa_si_json): with open(self.cfg.outfiles.psa_si_json, 'rb') as f: self.psa_si = UtilClass.decode_strs_in_dict(json.load(f)) return if not self.objnames: if FileClass.is_file_exists('%s/objnames.pickle' % self.cfg.psa_outpath): with open('%s/objnames.pickle' % self.cfg.psa_outpath, 'rb') as f: self.objnames = pickle.load(f) if self.output_values is None or len(self.output_values) == 0: self.evaluate_models() if self.param_values is None or len(self.param_values) == 0: self.generate_samples() if not self.param_defs: self.read_param_ranges() row, col = self.output_values.shape assert (row == self.run_count) for i in range(col): print(self.objnames[i]) if self.cfg.method == 'morris': tmp_Si = morris_alz(self.param_defs, self.param_values, self.output_values[:, i], conf_level=0.95, print_to_console=True, num_levels=self.cfg.morris.num_levels) elif self.cfg.method == 'fast': tmp_Si = fast_alz(self.param_defs, self.output_values[:, i], print_to_console=True) else: raise ValueError('%s method is not supported now!' % self.cfg.method) self.psa_si[i] = tmp_Si # print(self.psa_si) # Save as json, which can be loaded by json.load() json_data = json.dumps(self.psa_si, indent=4, cls=SpecialJsonEncoder) with open(self.cfg.outfiles.psa_si_json, 'w', encoding='utf-8') as f: f.write('%s' % json_data) self.output_psa_si()
def check_file_available(in_f): """Check the input file is existed or not, and return None, if not.""" if StringClass.string_match(in_f, 'none') or in_f == '' or in_f is None: return None if not FileClass.is_file_exists(in_f): raise ValueError("The %s is not existed or have no access permission!" % in_f) else: return in_f
def get_input_cfgs(): """Get model configuration arguments. Returns: InputArgs object. """ c = C() parser = argparse.ArgumentParser(description="Read AutoFuzSlpPos configurations.") parser.add_argument('-ini', help="Full path of configuration file.") parser.add_argument('-bin', help="Path of executable programs, which will override" "exeDir in *.ini file.") parser.add_argument('-proc', help="Number of processor for parallel computing, " "which will override inputProc in *.ini file.") parser.add_argument('-dem', help="DEM of study area.") parser.add_argument('-root', help="Workspace to store results, which will override " "rootDir in *.ini file.") args = parser.parse_args(namespace=c) ini_file = args.ini bin_dir = args.bin input_proc = args.proc rawdem = args.dem root_dir = args.root if input_proc is not None: xx = StringClass.extract_numeric_values_from_string(input_proc) if xx is None or len(xx) != 1: raise RuntimeError("-proc MUST be one integer number!") input_proc = int(xx[0]) else: input_proc = -1 if not FileClass.is_file_exists(ini_file): if FileClass.is_file_exists(rawdem) and os.path.isdir(bin_dir): # In this scenario, the script can be executed by default setting, i.e., the *.ini # file is not required. cf = None if input_proc < 0: input_proc = cpu_count() / 2 else: raise RuntimeError("*.ini file MUST be provided when '-dem', '-bin', " "and '-root' are not provided!") else: cf = ConfigParser() cf.read(ini_file) return AutoFuzSlpPosConfig(cf, bin_dir, input_proc, rawdem, root_dir)
def __init__(self, cf, method='morris'): """Initialization.""" self.method = method # 1. SEIMS model related self.model = ParseSEIMSConfig(cf) # 2. Common settings of parameters sensitivity analysis if 'PSA_Settings' not in cf.sections(): raise ValueError( "[PSA_Settings] section MUST be existed in *.ini file.") self.evaluate_params = list() if cf.has_option('PSA_Settings', 'evaluateparam'): eva_str = cf.get('PSA_Settings', 'evaluateparam') self.evaluate_params = StringClass.split_string(eva_str, ',') else: self.evaluate_params = ['Q'] # Default self.param_range_def = 'morris_param_rng.def' # Default if cf.has_option('PSA_Settings', 'paramrngdef'): self.param_range_def = cf.get('PSA_Settings', 'paramrngdef') self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def if not FileClass.is_file_exists(self.param_range_def): raise IOError('Ranges of parameters MUST be provided!') if not (cf.has_option('PSA_Settings', 'psa_time_start') and cf.has_option('PSA_Settings', 'psa_time_end')): raise ValueError( "Start and end time of PSA MUST be specified in [PSA_Settings]." ) try: # UTCTIME tstart = cf.get('PSA_Settings', 'psa_time_start') tend = cf.get('PSA_Settings', 'psa_time_end') self.psa_stime = StringClass.get_datetime(tstart) self.psa_etime = StringClass.get_datetime(tend) except ValueError: raise ValueError('The time format MUST be"YYYY-MM-DD HH:MM:SS".') if self.psa_stime >= self.psa_etime: raise ValueError("Wrong time settings in [PSA_Settings]!") # 3. Parameters settings for specific sensitivity analysis methods self.morris = None self.fast = None if self.method == 'fast': self.fast = FASTConfig(cf) self.psa_outpath = '%s/PSA_FAST_N%dM%d' % ( self.model.model_dir, self.fast.N, self.fast.M) elif self.method == 'morris': self.morris = MorrisConfig(cf) self.psa_outpath = '%s/PSA_Morris_N%dL%d' % ( self.model.model_dir, self.morris.N, self.morris.num_levels) # 4. (Optional) Plot settings for matplotlib self.plot_cfg = PlotConfig(cf) # Do not remove psa_outpath if already existed UtilClass.mkdir(self.psa_outpath) self.outfiles = PSAOutputs(self.psa_outpath)
def calculate_sensitivity(self): """Calculate Morris elementary effects. It is worth to be noticed that evaluate_models() allows to return several output variables, hence we should calculate each of them separately. """ if not self.psa_si: if FileClass.is_file_exists(self.cfg.outfiles.psa_si_json): with open(self.cfg.outfiles.psa_si_json, 'r') as f: self.psa_si = UtilClass.decode_strs_in_dict(json.load(f)) return if not self.objnames: if FileClass.is_file_exists('%s/objnames.pickle' % self.cfg.psa_outpath): with open('%s/objnames.pickle' % self.cfg.psa_outpath, 'r') as f: self.objnames = pickle.load(f) if self.output_values is None or len(self.output_values) == 0: self.evaluate_models() if self.param_values is None or len(self.param_values) == 0: self.generate_samples() if not self.param_defs: self.read_param_ranges() row, col = self.output_values.shape assert (row == self.run_count) for i in range(col): print(self.objnames[i]) if self.cfg.method == 'morris': tmp_Si = morris_alz(self.param_defs, self.param_values, self.output_values[:, i], conf_level=0.95, print_to_console=True, num_levels=self.cfg.morris.num_levels, grid_jump=self.cfg.morris.grid_jump) elif self.cfg.method == 'fast': tmp_Si = fast_alz(self.param_defs, self.output_values[:, i], print_to_console=True) else: raise ValueError('%s method is not supported now!' % self.cfg.method) self.psa_si[i] = tmp_Si # print(self.psa_si) # Save as json, which can be loaded by json.load() json_data = json.dumps(self.psa_si, indent=4, cls=SpecialJsonEncoder) with open(self.cfg.outfiles.psa_si_json, 'w') as f: f.write(json_data) self.output_psa_si()
def __init__(self, bin_dir='', model_dir='', nthread=4, lyrmtd=0, host='127.0.0.1', port=27017, scenario_id=-1, calibration_id=-1, version='OMP', nprocess=1, mpi_bin='', hosts_opt='-f', hostfile='', **kwargs): # Allow any other keyword arguments # Derived from input arguments args_dict = dict() if 'args_dict' in kwargs: # Preferred to use 'args_dict' if existed. args_dict = kwargs['args_dict'] bin_dir = args_dict['bin_dir'] if 'bin_dir' in args_dict else bin_dir model_dir = args_dict['model_dir'] if 'model_dir' in args_dict else model_dir self.version = args_dict['version'] if 'version' in args_dict else version suffix = '.exe' if sysstr == 'Windows' else '' if self.version == 'MPI': self.seims_exec = bin_dir + os.path.sep + 'seims_mpi' + suffix else: self.seims_exec = bin_dir + os.path.sep + 'seims_omp' + suffix if not FileClass.is_file_exists(self.seims_exec): # If not support OpenMP, use `seims`! self.seims_exec = bin_dir + os.path.sep + 'seims' + suffix self.seims_exec = os.path.abspath(self.seims_exec) self.model_dir = os.path.abspath(model_dir) self.nthread = args_dict['nthread'] if 'nthread' in args_dict else nthread self.lyrmtd = args_dict['lyrmtd'] if 'lyrmtd' in args_dict else lyrmtd self.host = args_dict['host'] if 'host' in args_dict else host self.port = args_dict['port'] if 'port' in args_dict else port self.scenario_id = args_dict['scenario_id'] if 'scenario_id' in args_dict else scenario_id self.calibration_id = args_dict[ 'calibration_id'] if 'calibration_id' in args_dict else calibration_id self.nprocess = args_dict['nprocess'] if 'nprocess' in args_dict else nprocess self.mpi_bin = args_dict['mpi_bin'] if 'mpi_bin' in args_dict else mpi_bin self.hosts_opt = args_dict['hosts_opt'] if 'hosts_opt' in args_dict else hosts_opt self.hostfile = args_dict['hostfile'] if 'hostfile' in args_dict else hostfile # Concatenate executable command self.cmd = self.Command self.run_success = False self.output_dir = self.OutputDirectory # Read model data from MongoDB self.db_name = os.path.split(self.model_dir)[1] self.outlet_id = self.OutletID self.start_time, self.end_time = self.SimulatedPeriod # Data maybe used after model run self.timespan = dict() self.obs_vars = list() # Observation types at the outlet self.obs_value = dict() # Observation value, key: DATETIME, value: value list of obs_vars self.sim_vars = list() # Simulation types at the outlet, which is part of obs_vars self.sim_value = dict() # Simulation value, same as obs_value # The format of sim_obs_dict: # {VarName: {'UTCDATETIME': [t1, t2, ..., tn], # 'Obs': [o1, o2, ..., on], # 'Sim': [s1, s2, ..., sn]}, # ... # } self.sim_obs_dict = dict()
def __init__(self, cf, method='morris'): """Initialization.""" self.method = method # 1. SEIMS model related self.model = ParseSEIMSConfig(cf) # 2. Common settings of parameters sensitivity analysis if 'PSA_Settings' not in cf.sections(): raise ValueError("[PSA_Settings] section MUST be existed in *.ini file.") self.evaluate_params = list() if cf.has_option('PSA_Settings', 'evaluateparam'): eva_str = cf.get('PSA_Settings', 'evaluateparam') self.evaluate_params = StringClass.split_string(eva_str, ',') else: self.evaluate_params = ['Q'] # Default self.param_range_def = 'morris_param_rng.def' # Default if cf.has_option('PSA_Settings', 'paramrngdef'): self.param_range_def = cf.get('PSA_Settings', 'paramrngdef') self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def if not FileClass.is_file_exists(self.param_range_def): raise IOError('Ranges of parameters MUST be provided!') if not (cf.has_option('PSA_Settings', 'psa_time_start') and cf.has_option('PSA_Settings', 'psa_time_end')): raise ValueError("Start and end time of PSA MUST be specified in [PSA_Settings].") try: # UTCTIME tstart = cf.get('PSA_Settings', 'psa_time_start') tend = cf.get('PSA_Settings', 'psa_time_end') self.psa_stime = StringClass.get_datetime(tstart) self.psa_etime = StringClass.get_datetime(tend) except ValueError: raise ValueError('The time format MUST be"YYYY-MM-DD HH:MM:SS".') if self.psa_stime >= self.psa_etime: raise ValueError("Wrong time settings in [PSA_Settings]!") # 3. Parameters settings for specific sensitivity analysis methods self.morris = None self.fast = None if self.method == 'fast': self.fast = FASTConfig(cf) self.psa_outpath = '%s/PSA-FAST-N%dM%d' % (self.model.model_dir, self.fast.N, self.fast.M) elif self.method == 'morris': self.morris = MorrisConfig(cf) self.psa_outpath = '%s/PSA-Morris-N%dL%dJ%d' % (self.model.model_dir, self.morris.N, self.morris.num_levels, self.morris.grid_jump) # Do not remove psa_outpath if already existed UtilClass.mkdir(self.psa_outpath) self.outfiles = PSAOutputs(self.psa_outpath)
def read_simulation_from_txt( ws, # type: AnyStr plot_vars, # type: List[AnyStr] subbsnID, # type: int stime, # type: datetime etime # type: datetime ): # type: (...) -> (List[AnyStr], Dict[datetime, List[float]]) """ Read simulation data from text file according to subbasin ID. Returns: 1. Matched variable names, [var1, var2, ...] 2. Simulation data dict of all plotted variables, with UTCDATETIME. {Datetime: [value_of_var1, value_of_var2, ...], ...} """ plot_vars_existed = list() sim_data_dict = OrderedDict() for i, v in enumerate(plot_vars): txtfile = ws + os.path.sep + v + '.txt' if not FileClass.is_file_exists(txtfile): print('WARNING: Simulation variable file: %s is not existed!' % txtfile) continue data_items = read_data_items_from_txt(txtfile) found = False data_available = False for item in data_items: item_vs = StringClass.split_string(item[0], ' ', elim_empty=True) if len(item_vs) == 2: if int(item_vs[1]) == subbsnID and not found: found = True elif int(item_vs[1]) != subbsnID and found: break if not found: continue if len(item_vs) != 3: continue date_str = '%s %s' % (item_vs[0], item_vs[1]) sim_datetime = StringClass.get_datetime(date_str, "%Y-%m-%d %H:%M:%S") if stime <= sim_datetime <= etime: if sim_datetime not in sim_data_dict: sim_data_dict[sim_datetime] = list() sim_data_dict[sim_datetime].append(float(item_vs[2])) data_available = True if data_available: plot_vars_existed.append(v) print('Read simulation from %s to %s done.' % (stime.strftime('%c'), etime.strftime('%c'))) return plot_vars_existed, sim_data_dict
def ParamDefs(self): """Read cali_param_rng.def file name,lower_bound,upper_bound e.g., Param1,0,1 Param2,0.5,1.2 Param3,-1.0,1.0 Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) """ # read param_defs.json if already existed if self.param_defs: return self.param_defs # read param_range_def file and output to json file client = ConnectMongoDB(self.cfg.model.host, self.cfg.model.port) conn = client.get_conn() db = conn[self.cfg.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() num_vars = 0 if not FileClass.is_file_exists(self.cfg.param_range_def): raise ValueError('Parameters definition file: %s is not' ' existed!' % self.cfg.param_range_def) items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) self.param_defs = { 'names': names, 'bounds': bounds, 'num_vars': num_vars } return self.param_defs
def __init__(self, cf): # type: (ConfigParser) -> None """Initialization.""" SAConfig.__init__(self, cf) # initialize base class first # 1. Check the required key and values requiredkeys = [ 'COLLECTION', 'DISTRIBUTION', 'SUBSCENARIO', 'UNITJSON' ] self.bmpid = -1 units_json = '' for cbmpid, cbmpdict in viewitems(self.bmps_info): for k in requiredkeys: if k not in cbmpdict: raise ValueError( '%s: MUST be provided in BMPs_cfg_units or BMPs_info!' % k) # In current version, only one type of BMP and one type of spatial units are allowed self.bmpid = cbmpid units_json = cbmpdict.get('UNITJSON') break for k in ['ENVEVAL', 'BASE_ENV']: if k not in self.eval_info: raise ValueError('%s: MUST be provided in Eval_info!' % k) self.orignal_dist = self.bmps_info[self.bmpid]['DISTRIBUTION'] # 2. Spatial units information unitsf = self.model.model_dir + os.sep + units_json if not FileClass.is_file_exists(unitsf): raise Exception('UNITJSON file %s is not existed!' % unitsf) with open(unitsf, 'r', encoding='utf-8') as updownfo: self.units_infos = json.load(updownfo) self.units_infos = UtilClass.decode_strs_in_dict(self.units_infos) if 'overview' not in self.units_infos: raise ValueError('overview MUST be existed in the UNITJSON file.') if 'all_units' not in self.units_infos['overview']: raise ValueError( 'all_units MUST be existed in overview dict of UNITJSON.') self.units_num = self.units_infos['overview']['all_units'] # type: int self.genes_num = self.units_num # 3. Collection name and subscenario IDs self.bmps_coll = self.bmps_info[self.bmpid].get( 'COLLECTION') # type: str self.bmps_subids = self.bmps_info[self.bmpid].get( 'SUBSCENARIO') # type: List[int] # 4. Construct the dict of gene index to unit ID, and unit ID to gene index self.unit_to_gene = OrderedDict() # type: OrderedDict[int, int] self.gene_to_unit = dict() # type: Dict[int, int] # 5. Construct the upstream-downstream units of each unit if necessary self.updown_units = dict() # type: Dict[int, Dict[AnyStr, List[int]]]
def __init__(self, cf, method='nsga2'): """Initialization.""" # 1. SEIMS model related self.model = ParseSEIMSConfig(cf) # 2. Common settings of auto-calibration if 'CALI_Settings' not in cf.sections(): raise ValueError( "[CALI_Settings] section MUST be existed in *.ini file.") self.param_range_def = 'cali_param_rng.def' if cf.has_option('CALI_Settings', 'paramrngdef'): self.param_range_def = cf.get('CALI_Settings', 'paramrngdef') self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def if not FileClass.is_file_exists(self.param_range_def): raise IOError('Ranges of parameters MUST be provided!') if not (cf.has_option('CALI_Settings', 'cali_time_start') and cf.has_option('CALI_Settings', 'cali_time_end')): raise ValueError("Start and end time of Calibration " "MUST be specified in [CALI_Settings].") try: # UTCTIME tstart = cf.get('CALI_Settings', 'cali_time_start') tend = cf.get('CALI_Settings', 'cali_time_end') self.cali_stime = StringClass.get_datetime(tstart) self.cali_etime = StringClass.get_datetime(tend) self.calc_validation = False if cf.has_option('CALI_Settings', 'vali_time_start') and \ cf.has_option('CALI_Settings', 'vali_time_end'): tstart = cf.get('CALI_Settings', 'vali_time_start') tend = cf.get('CALI_Settings', 'vali_time_end') self.vali_stime = StringClass.get_datetime(tstart) self.vali_etime = StringClass.get_datetime(tend) self.calc_validation = True except ValueError: raise ValueError( 'The time format MUST be "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS".' ) if self.cali_stime >= self.cali_etime or ( self.calc_validation and self.vali_stime >= self.vali_etime): raise ValueError("Wrong time setted in [CALI_Settings]!") # 3. Parameters settings for specific optimization algorithm self.opt_mtd = method self.opt = None if self.opt_mtd == 'nsga2': self.opt = ParseNSGA2Config(cf, self.model.model_dir)
def ParamDefs(self): """Read cali_param_rng.def file name,lower_bound,upper_bound e.g., Param1,0,1 Param2,0.5,1.2 Param3,-1.0,1.0 Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) """ # read param_defs.json if already existed if self.param_defs: return self.param_defs # read param_range_def file and output to json file client = ConnectMongoDB(self.cfg.model.host, self.cfg.model.port) conn = client.get_conn() db = conn[self.cfg.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() num_vars = 0 if not FileClass.is_file_exists(self.cfg.param_range_def): raise ValueError('Parameters definition file: %s is not' ' existed!' % self.cfg.param_range_def) items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) self.param_defs = {'names': names, 'bounds': bounds, 'num_vars': num_vars} return self.param_defs
def __init__(self, cf, method='nsga2'): # type: (ConfigParser, str) -> None """Initialization.""" # 1. SEIMS model related self.model = ParseSEIMSConfig(cf) # 2. Common settings of auto-calibration if 'CALI_Settings' not in cf.sections(): raise ValueError( "[CALI_Settings] section MUST be existed in *.ini file.") self.param_range_def = 'cali_param_rng.def' if cf.has_option('CALI_Settings', 'paramrngdef'): self.param_range_def = cf.get('CALI_Settings', 'paramrngdef') self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def if not FileClass.is_file_exists(self.param_range_def): raise IOError('Ranges of parameters MUST be provided!') # UTCTIME of calibration and validation (optional) periods if not (cf.has_option('CALI_Settings', 'cali_time_start') and cf.has_option('CALI_Settings', 'cali_time_end')): raise ValueError("Start and end time of Calibration " "MUST be specified in [CALI_Settings].") self.cali_stime = parse_datetime_from_ini(cf, 'CALI_Settings', 'cali_time_start') self.cali_etime = parse_datetime_from_ini(cf, 'CALI_Settings', 'cali_time_end') self.vali_stime = parse_datetime_from_ini(cf, 'CALI_Settings', 'vali_time_start') self.vali_etime = parse_datetime_from_ini(cf, 'CALI_Settings', 'vali_time_end') self.calc_validation = True if self.vali_stime and self.vali_etime else False if self.cali_stime >= self.cali_etime or ( self.calc_validation and self.vali_stime >= self.vali_etime): raise ValueError("Wrong time settings in [CALI_Settings]!") # 3. Parameters settings for specific optimization algorithm self.opt_mtd = method self.opt = None if self.opt_mtd == 'nsga2': self.opt = ParseNSGA2Config(cf, self.model.model_dir, 'CALI_NSGA2_Gen_%d_Pop_%d') # 4. (Optional) Plot settings for matplotlib self.plot_cfg = PlotConfig(cf)
def output_wgs84_geojson(cfg): """Convert ESRI shapefile to GeoJson based on WGS84 coordinate.""" src_srs = RasterUtilClass.read_raster(cfg.dem).srs proj_srs = src_srs.ExportToProj4() if not proj_srs: raise ValueError('The source raster %s has not ' 'coordinate, which is required!' % cfg.dem) # print(proj_srs) wgs84_srs = 'EPSG:4326' geo_json_dict = {'reach': [cfg.vecs.reach, cfg.vecs.json_reach], 'subbasin': [cfg.vecs.subbsn, cfg.vecs.json_subbsn], 'basin': [cfg.vecs.bsn, cfg.vecs.json_bsn], 'outlet': [cfg.vecs.outlet, cfg.vecs.json_outlet]} for jsonName, shp_json_list in list(geo_json_dict.items()): # delete if geojson file already existed if FileClass.is_file_exists(shp_json_list[1]): os.remove(shp_json_list[1]) VectorUtilClass.convert2geojson(shp_json_list[1], proj_srs, wgs84_srs, shp_json_list[0])
def __init__(self, cf, method='nsga2'): """Initialization.""" # 1. SEIMS model related self.model = ParseSEIMSConfig(cf) # 2. Common settings of auto-calibration if 'CALI_Settings' not in cf.sections(): raise ValueError("[CALI_Settings] section MUST be existed in *.ini file.") self.param_range_def = 'cali_param_rng.def' if cf.has_option('CALI_Settings', 'paramrngdef'): self.param_range_def = cf.get('CALI_Settings', 'paramrngdef') self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def if not FileClass.is_file_exists(self.param_range_def): raise IOError('Ranges of parameters MUST be provided!') if not (cf.has_option('CALI_Settings', 'cali_time_start') and cf.has_option('CALI_Settings', 'cali_time_end')): raise ValueError("Start and end time of Calibration " "MUST be specified in [CALI_Settings].") try: # UTCTIME tstart = cf.get('CALI_Settings', 'cali_time_start') tend = cf.get('CALI_Settings', 'cali_time_end') self.cali_stime = StringClass.get_datetime(tstart) self.cali_etime = StringClass.get_datetime(tend) self.calc_validation = False if cf.has_option('CALI_Settings', 'vali_time_start') and \ cf.has_option('CALI_Settings', 'vali_time_end'): tstart = cf.get('CALI_Settings', 'vali_time_start') tend = cf.get('CALI_Settings', 'vali_time_end') self.vali_stime = StringClass.get_datetime(tstart) self.vali_etime = StringClass.get_datetime(tend) self.calc_validation = True except ValueError: raise ValueError('The time format MUST be "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS".') if self.cali_stime >= self.cali_etime or (self.calc_validation and self.vali_stime >= self.vali_etime): raise ValueError("Wrong time setted in [CALI_Settings]!") # 3. Parameters settings for specific optimization algorithm self.opt_mtd = method self.opt = None if self.opt_mtd == 'nsga2': self.opt = ParseNSGA2Config(cf, self.model.model_dir)
def check_watershed_delineation_results(cfg): """Check if watershed_delineation is need to run.""" if not FileClass.is_file_exists(cfg.pretaudem.filldem): return False if not FileClass.is_file_exists(cfg.pretaudem.outlet_m): return False if cfg.d8_stream_thresh <= 0 and not FileClass.is_file_exists(cfg.pretaudem.drptxt): return False if not FileClass.is_file_exists(cfg.pretaudem.d8flow): return False if cfg.flow_model == 1: if not FileClass.is_file_exists(cfg.pretaudem.dinf): return False if not FileClass.is_file_exists(cfg.pretaudem.dinf_slp): return False if not FileClass.is_file_exists(cfg.pretaudem.stream_pd): return False return True
def generate_samples(self): """Sampling and write to a single file and MongoDB 'PARAMETERS' collection""" if self.param_values is None or len(self.param_values) == 0: if FileClass.is_file_exists(self.cfg.outfiles.param_values_txt): self.param_values = numpy.loadtxt(self.cfg.outfiles.param_values_txt) self.run_count = len(self.param_values) return if not self.param_defs: self.read_param_ranges() if self.cfg.method == 'morris': self.param_values = morris_spl(self.param_defs, self.cfg.morris.N, self.cfg.morris.num_levels, self.cfg.morris.grid_jump, optimal_trajectories=self.cfg.morris.optimal_t, local_optimization=self.cfg.morris.local_opt) elif self.cfg.method == 'fast': self.param_values = fast_spl(self.param_defs, self.cfg.fast.N, self.cfg.fast.M) else: raise ValueError('%s method is not supported now!' % self.cfg.method) self.run_count = len(self.param_values) # Save as txt file, which can be loaded by numpy.loadtxt() numpy.savetxt(self.cfg.outfiles.param_values_txt, self.param_values, delimiter=str(' '), fmt=str('%.4f'))
def generate_samples(self): """Sampling and write to a single file and MongoDB 'PARAMETERS' collection""" if self.param_values is None or len(self.param_values) == 0: if FileClass.is_file_exists(self.cfg.outfiles.param_values_txt): self.param_values = numpy.loadtxt(self.cfg.outfiles.param_values_txt) self.run_count = len(self.param_values) return if not self.param_defs: self.read_param_ranges() if self.cfg.method == 'morris': self.param_values = morris_spl(self.param_defs, self.cfg.morris.N, self.cfg.morris.num_levels, self.cfg.morris.grid_jump, optimal_trajectories=self.cfg.morris.optimal_t, local_optimization=self.cfg.morris.local_opt) elif self.cfg.method == 'fast': self.param_values = fast_spl(self.param_defs, self.cfg.fast.N, self.cfg.fast.M) else: raise ValueError('%s method is not supported now!' % self.cfg.method) self.run_count = len(self.param_values) # Save as txt file, which can be loaded by numpy.loadtxt() numpy.savetxt(self.cfg.outfiles.param_values_txt, self.param_values, delimiter=' ', fmt='%.4f')
def get_cali_config(): """Parse arguments. Returns: cf: ConfigParse object of *.ini file mtd: Calibration method name, currently, 'nsga2' is supported. """ # define input arguments parser = argparse.ArgumentParser(description="Execute parameters calibration.") parser.add_argument('-ini', type=str, help="Full path of configuration file") # add mutually group psa_group = parser.add_mutually_exclusive_group() psa_group.add_argument('-nsga2', action='store_true', help='Run NSGA-II method') # parse arguments args = parser.parse_args() ini_file = args.ini psa_mtd = 'nsga2' # Default if args.nsga2: psa_mtd = 'nsga2' if not FileClass.is_file_exists(ini_file): raise ImportError('Configuration file is not existed: %s' % ini_file) cf = ConfigParser() cf.read(ini_file) return cf, psa_mtd
def evaluate_models(self): """Run SEIMS for objective output variables, and write out. """ if self.output_values is None or len(self.output_values) == 0: if FileClass.is_file_exists(self.cfg.outfiles.output_values_txt): self.output_values = numpy.loadtxt(self.cfg.outfiles.output_values_txt) return assert (self.run_count > 0) # model configurations model_cfg_dict = self.model.ConfigDict # Parameters to be evaluated input_eva_vars = self.cfg.evaluate_params # split tasks if needed task_num = self.run_count // 480 # In our cluster, the largest workers number is 96. if task_num == 0: split_seqs = [range(self.run_count)] else: split_seqs = numpy.array_split(numpy.arange(self.run_count), task_num + 1) split_seqs = [a.tolist() for a in split_seqs] # Loop partitioned tasks run_model_stime = time.time() exec_times = list() # execute time of all model runs for idx, cali_seqs in enumerate(split_seqs): cur_out_file = '%s/outputs_%d.txt' % (self.cfg.outfiles.output_values_dir, idx) if FileClass.is_file_exists(cur_out_file): continue model_cfg_dict_list = list() for i, caliid in enumerate(cali_seqs): tmpcfg = deepcopy(model_cfg_dict) tmpcfg['calibration_id'] = caliid model_cfg_dict_list.append(tmpcfg) try: # parallel on multiprocessor or clusters using SCOOP from scoop import futures output_models = list(futures.map(create_run_model, model_cfg_dict_list)) except ImportError or ImportWarning: # serial output_models = list(map(create_run_model, model_cfg_dict_list)) time.sleep(0.1) # Wait a moment in case of unpredictable file system error # Read observation data from MongoDB only once if len(output_models) < 1: # Although this is not gonna happen, just for insurance. continue obs_vars, obs_data_dict = output_models[0].ReadOutletObservations(input_eva_vars) if (len(obs_vars)) < 1: # Make sure the observation data exists. continue # Loop the executed models eva_values = list() for imod, mod_obj in enumerate(output_models): # Read executable timespan of each model run exec_times.append(mod_obj.GetTimespan()) # Set observation data since there is no need to read from MongoDB. if imod != 0: mod_obj.SetOutletObservations(obs_vars, obs_data_dict) # Read simulation mod_obj.ReadTimeseriesSimulations(self.cfg.psa_stime, self.cfg.psa_etime) # Calculate NSE, R2, RMSE, PBIAS, RSR, ln(NSE), NSE1, and NSE3 self.objnames, obj_values = mod_obj.CalcTimeseriesStatistics(mod_obj.sim_obs_dict) eva_values.append(obj_values) # delete model output directory for saving storage rmtree(mod_obj.output_dir) if not isinstance(eva_values, numpy.ndarray): eva_values = numpy.array(eva_values) numpy.savetxt(cur_out_file, eva_values, delimiter=' ', fmt='%.4f') # Save as pickle data for further usage. DO not save all models which maybe very large! cur_model_out_file = '%s/models_%d.pickle' % (self.cfg.outfiles.output_values_dir, idx) with open(cur_model_out_file, 'wb') as f: pickle.dump(output_models, f) exec_times = numpy.array(exec_times) numpy.savetxt('%s/exec_time_allmodelruns.txt' % self.cfg.psa_outpath, exec_times, delimiter=' ', fmt='%.4f') print('Running time of all SEIMS models:\n' '\tIO\tCOMP\tSIMU\tRUNTIME\n' 'MAX\t%s\n' 'MIN\t%s\n' 'AVG\t%s\n' 'SUM\t%s\n' % ('\t'.join('%.3f' % v for v in exec_times.max(0)), '\t'.join('%.3f' % v for v in exec_times.min(0)), '\t'.join('%.3f' % v for v in exec_times.mean(0)), '\t'.join('%.3f' % v for v in exec_times.sum(0)))) print('Running time of executing SEIMS models: %.2fs' % (time.time() - run_model_stime)) # Save objective names as pickle data for further usgae with open('%s/objnames.pickle' % self.cfg.psa_outpath, 'wb') as f: pickle.dump(self.objnames, f) # load the first part of output values self.output_values = numpy.loadtxt('%s/outputs_0.txt' % self.cfg.outfiles.output_values_dir) if task_num == 0: import shutil shutil.move('%s/outputs_0.txt' % self.cfg.outfiles.output_values_dir, self.cfg.outfiles.output_values_txt) shutil.rmtree(self.cfg.outfiles.output_values_dir) return for idx in range(1, task_num + 1): tmp_outputs = numpy.loadtxt('%s/outputs_%d.txt' % (self.cfg.outfiles.output_values_dir, idx)) self.output_values = numpy.concatenate((self.output_values, tmp_outputs)) numpy.savetxt(self.cfg.outfiles.output_values_txt, self.output_values, delimiter=' ', fmt='%.4f')
def evaluate_models(self): """Run SEIMS for objective output variables, and write out. """ if self.output_values is None or len(self.output_values) == 0: if FileClass.is_file_exists(self.cfg.outfiles.output_values_txt): self.output_values = numpy.loadtxt(self.cfg.outfiles.output_values_txt) return assert (self.run_count > 0) # model configurations model_cfg_dict = self.model.ConfigDict # Parameters to be evaluated input_eva_vars = self.cfg.evaluate_params # split tasks if needed task_num = self.run_count // 480 # In our cluster, the largest workers number is 96. if task_num == 0: split_seqs = [range(self.run_count)] else: split_seqs = numpy.array_split(numpy.arange(self.run_count), task_num + 1) split_seqs = [a.tolist() for a in split_seqs] # Loop partitioned tasks run_model_stime = time.time() exec_times = list() # execute time of all model runs for idx, cali_seqs in enumerate(split_seqs): cur_out_file = '%s/outputs_%d.txt' % (self.cfg.outfiles.output_values_dir, idx) if FileClass.is_file_exists(cur_out_file): continue model_cfg_dict_list = list() for i, caliid in enumerate(cali_seqs): tmpcfg = deepcopy(model_cfg_dict) tmpcfg['calibration_id'] = caliid model_cfg_dict_list.append(tmpcfg) try: # parallel on multiprocessor or clusters using SCOOP from scoop import futures output_models = list(futures.map(create_run_model, model_cfg_dict_list)) except ImportError or ImportWarning: # serial output_models = list(map(create_run_model, model_cfg_dict_list)) time.sleep(0.1) # Wait a moment in case of unpredictable file system error # Read observation data from MongoDB only once if len(output_models) < 1: # Although this is not gonna happen, just for insurance. continue obs_vars, obs_data_dict = output_models[0].ReadOutletObservations(input_eva_vars) if (len(obs_vars)) < 1: # Make sure the observation data exists. continue # Loop the executed models eva_values = list() for imod, mod_obj in enumerate(output_models): # Read executable timespan of each model run exec_times.append(mod_obj.GetTimespan()) # Set observation data since there is no need to read from MongoDB. if imod != 0: mod_obj.SetOutletObservations(obs_vars, obs_data_dict) # Read simulation mod_obj.ReadTimeseriesSimulations(self.cfg.psa_stime, self.cfg.psa_etime) # Calculate NSE, R2, RMSE, PBIAS, RSR, ln(NSE), NSE1, and NSE3 self.objnames, obj_values = mod_obj.CalcTimeseriesStatistics(mod_obj.sim_obs_dict) eva_values.append(obj_values) # delete model output directory and GridFS files for saving storage mod_obj.clean() if not isinstance(eva_values, numpy.ndarray): eva_values = numpy.array(eva_values) numpy.savetxt(cur_out_file, eva_values, delimiter=str(' '), fmt=str('%.4f')) # Save as pickle data for further usage. DO not save all models which maybe very large! cur_model_out_file = '%s/models_%d.pickle' % (self.cfg.outfiles.output_values_dir, idx) with open(cur_model_out_file, 'wb') as f: pickle.dump(output_models, f) exec_times = numpy.array(exec_times) numpy.savetxt('%s/exec_time_allmodelruns.txt' % self.cfg.psa_outpath, exec_times, delimiter=str(' '), fmt=str('%.4f')) print('Running time of all SEIMS models:\n' '\tIO\tCOMP\tSIMU\tRUNTIME\n' 'MAX\t%s\n' 'MIN\t%s\n' 'AVG\t%s\n' 'SUM\t%s\n' % ('\t'.join('%.3f' % v for v in exec_times.max(0)), '\t'.join('%.3f' % v for v in exec_times.min(0)), '\t'.join('%.3f' % v for v in exec_times.mean(0)), '\t'.join('%.3f' % v for v in exec_times.sum(0)))) print('Running time of executing SEIMS models: %.2fs' % (time.time() - run_model_stime)) # Save objective names as pickle data for further usgae with open('%s/objnames.pickle' % self.cfg.psa_outpath, 'wb') as f: pickle.dump(self.objnames, f) # load the first part of output values self.output_values = numpy.loadtxt('%s/outputs_0.txt' % self.cfg.outfiles.output_values_dir) if task_num == 0: import shutil shutil.move('%s/outputs_0.txt' % self.cfg.outfiles.output_values_dir, self.cfg.outfiles.output_values_txt) shutil.rmtree(self.cfg.outfiles.output_values_dir) return for idx in range(1, task_num + 1): tmp_outputs = numpy.loadtxt('%s/outputs_%d.txt' % (self.cfg.outfiles.output_values_dir, idx)) self.output_values = numpy.concatenate((self.output_values, tmp_outputs)) numpy.savetxt(self.cfg.outfiles.output_values_txt, self.output_values, delimiter=str(' '), fmt=str('%.4f'))
def read_param_ranges(self): """Read param_rng.def file name,lower_bound,upper_bound,group,dist (group and dist are optional) e.g., Param1,0,1[,Group1][,dist1] Param2,0,1[,Group2][,dist2] Param3,0,1[,Group3][,dist3] Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) - groups - a list of group names (strings) for each variable - dists - a list of distributions for the problem, None if not specified or all uniform """ # read param_defs.json if already existed if not self.param_defs: if FileClass.is_file_exists(self.cfg.outfiles.param_defs_json): with open(self.cfg.outfiles.param_defs_json, 'r', encoding='utf-8') as f: self.param_defs = UtilClass.decode_strs_in_dict(json.load(f)) return # read param_range_def file and output to json file client = ConnectMongoDB(self.model.host, self.model.port) conn = client.get_conn() db = conn[self.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() groups = list() dists = list() num_vars = 0 items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) # If the fourth column does not contain a group name, use # the parameter name if len(item) >= 4: groups.append(item[3]) else: groups.append(item[0]) if len(item) >= 5: dists.append(item[4]) else: dists.append('unif') if groups == names: groups = None elif len(set(groups)) == 1: raise ValueError('Only one group defined, results will not bemeaningful') # setting dists to none if all are uniform # because non-uniform scaling is not needed if all([d == 'unif' for d in dists]): dists = None self.param_defs = {'names': names, 'bounds': bounds, 'num_vars': num_vars, 'groups': groups, 'dists': dists} # Save as json, which can be loaded by json.load() json_data = json.dumps(self.param_defs, indent=4, cls=SpecialJsonEncoder) with open(self.cfg.outfiles.param_defs_json, 'w', encoding='utf-8') as f: f.write('%s' % json_data)
def watershed_delineation(np, dem, outlet_file=None, thresh=0, singlebasin=False, workingdir=None, mpi_bin=None, bin_dir=None, logfile=None, runtime_file=None, hostfile=None): """Watershed Delineation.""" # 1. Check directories if not os.path.exists(dem): TauDEM.error('DEM: %s is not existed!' % dem) dem = os.path.abspath(dem) if workingdir is None: workingdir = os.path.dirname(dem) namecfg = TauDEMFilesUtils(workingdir) workingdir = namecfg.workspace UtilClass.mkdir(workingdir) # 2. Check log file if logfile is not None and FileClass.is_file_exists(logfile): os.remove(logfile) # 3. Get predefined intermediate file names filled_dem = namecfg.filldem flow_dir = namecfg.d8flow slope = namecfg.slp flow_dir_dinf = namecfg.dinf slope_dinf = namecfg.dinf_slp dir_code_dinf = namecfg.dinf_d8dir weight_dinf = namecfg.dinf_weight acc = namecfg.d8acc stream_raster = namecfg.stream_raster default_outlet = namecfg.outlet_pre modified_outlet = namecfg.outlet_m stream_skeleton = namecfg.stream_pd acc_with_weight = namecfg.d8acc_weight stream_order = namecfg.stream_order ch_network = namecfg.channel_net ch_coord = namecfg.channel_coord stream_net = namecfg.streamnet_shp subbasin = namecfg.subbsn dist2_stream_d8 = namecfg.dist2stream_d8 # 4. perform calculation UtilClass.writelog(logfile, "[Output] %d..., %s" % (10, "pitremove DEM..."), 'a') TauDEM.pitremove(np, dem, filled_dem, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (20, "Calculating D8 and Dinf flow direction..."), 'a') TauDEM.d8flowdir(np, filled_dem, flow_dir, slope, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.dinfflowdir(np, filled_dem, flow_dir_dinf, slope_dinf, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) DinfUtil.output_compressed_dinf(flow_dir_dinf, dir_code_dinf, weight_dinf) UtilClass.writelog( logfile, "[Output] %d..., %s" % (30, "D8 flow accumulation..."), 'a') TauDEM.aread8(np, flow_dir, acc, None, None, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (40, "Generating stream raster initially..."), 'a') min_accum, max_accum, mean_accum, std_accum = RasterUtilClass.raster_statistics( acc) TauDEM.threshold(np, acc, stream_raster, mean_accum, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (50, "Moving outlet to stream..."), 'a') if outlet_file is None: outlet_file = default_outlet TauDEM.connectdown(np, flow_dir, acc, outlet_file, wtsd=None, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.moveoutletstostrm(np, flow_dir, stream_raster, outlet_file, modified_outlet, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (60, "Generating stream skeleton..."), 'a') TauDEM.peukerdouglas(np, filled_dem, stream_skeleton, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (70, "Flow accumulation with outlet..."), 'a') tmp_outlet = None if singlebasin: tmp_outlet = modified_outlet TauDEM.aread8(np, flow_dir, acc_with_weight, tmp_outlet, stream_skeleton, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if thresh <= 0: # find the optimal threshold using dropanalysis function UtilClass.writelog( logfile, "[Output] %d..., %s" % (75, "Drop analysis to select optimal threshold..."), 'a') min_accum, max_accum, mean_accum, std_accum = \ RasterUtilClass.raster_statistics(acc_with_weight) if mean_accum - std_accum < 0: minthresh = mean_accum else: minthresh = mean_accum - std_accum maxthresh = mean_accum + std_accum numthresh = 20 logspace = 'true' drp_file = namecfg.drptxt TauDEM.dropanalysis(np, filled_dem, flow_dir, acc_with_weight, acc_with_weight, modified_outlet, minthresh, maxthresh, numthresh, logspace, drp_file, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if not FileClass.is_file_exists(drp_file): raise RuntimeError( "Dropanalysis failed and drp.txt was not created!") drpf = open(drp_file, "r") temp_contents = drpf.read() (beg, thresh) = temp_contents.rsplit(' ', 1) print(thresh) drpf.close() UtilClass.writelog( logfile, "[Output] %d..., %s" % (80, "Generating stream raster..."), 'a') TauDEM.threshold(np, acc_with_weight, stream_raster, float(thresh), workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (90, "Generating stream net..."), 'a') TauDEM.streamnet(np, filled_dem, flow_dir, acc_with_weight, stream_raster, modified_outlet, stream_order, ch_network, ch_coord, stream_net, subbasin, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d..., %s" % (95, "Calculating distance to stream (D8)..."), 'a') TauDEM.d8hdisttostrm(np, flow_dir, stream_raster, dist2_stream_d8, 1, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog( logfile, "[Output] %d.., %s" % (100, "Original subbasin delineation is finished!"), 'a')
def pre_processing(cfg): start_t = time.time() if not cfg.flag_preprocess: return 0 single_basin = False if cfg.outlet is not None: single_basin = True pretaudem_done = check_watershed_delineation_results(cfg) if cfg.valley is None or not FileClass.is_file_exists(cfg.valley) or not pretaudem_done: cfg.valley = cfg.pretaudem.stream_raster # Watershed delineation based on D8 flow model. TauDEMWorkflow.watershed_delineation(cfg.proc, cfg.dem, cfg.outlet, cfg.d8_stream_thresh, single_basin, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, logfile=cfg.log.preproc, hostfile=cfg.hostfile) # use outlet_m or not outlet_use = None if single_basin: outlet_use = cfg.pretaudem.outlet_m log_status = open(cfg.log.preproc, 'a') log_status.write("Calculating RPI(Relative Position Index)...\n") log_status.flush() if cfg.flow_model == 1: # Dinf model, extract stream using the D8 threshold if cfg.valley is None or not FileClass.is_file_exists(cfg.valley): if cfg.d8_stream_thresh <= 0: drpf = open(cfg.pretaudem.drptxt, "r") temp_contents = drpf.read() (beg, cfg.d8_stream_thresh) = temp_contents.rsplit(' ', 1) drpf.close() print (cfg.d8_stream_thresh) TauDEMExtension.areadinf(cfg.proc, cfg.pretaudem.dinf, cfg.pretaudem.dinfacc_weight, outlet_use, cfg.pretaudem.stream_pd, 'false', cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) TauDEMExtension.threshold(cfg.proc, cfg.pretaudem.dinfacc_weight, cfg.pretaudem.stream_dinf, float(cfg.d8_stream_thresh), cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) cfg.valley = cfg.pretaudem.stream_dinf # calculate Height Above the Nearest Drainage (HAND) TauDEMExtension.dinfdistdown(cfg.proc, cfg.pretaudem.dinf, cfg.pretaudem.filldem, cfg.pretaudem.dinf_slp, cfg.valley, cfg.dinf_down_stat, 'v', 'false', cfg.dinf_dist_down_wg, cfg.pretaudem.dist2stream_v, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) else: # calculate Height Above the Nearest Drainage (HAND) TauDEMExtension.d8distdowntostream(cfg.proc, cfg.pretaudem.d8flow, cfg.pretaudem.filldem, cfg.valley, cfg.pretaudem.dist2stream_v, 'v', 1, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) if cfg.rpi_method == 1: # calculate RPI based on hydrological proximity measures (Default). if cfg.flow_model == 0: # D8 model TauDEMExtension.d8distdowntostream(cfg.proc, cfg.pretaudem.d8flow, cfg.pretaudem.filldem, cfg.valley, cfg.pretaudem.dist2stream, cfg.d8_down_method, 1, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) TauDEMExtension.d8distuptoridge(cfg.proc, cfg.pretaudem.d8flow, cfg.pretaudem.filldem, cfg.ridge, cfg.pretaudem.distup2rdg, cfg.d8_up_method, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) elif cfg.flow_model == 1: # Dinf model # Dinf distance down TauDEMExtension.dinfdistdown(cfg.proc, cfg.pretaudem.dinf, cfg.pretaudem.filldem, cfg.pretaudem.dinf_slp, cfg.valley, cfg.dinf_down_stat, cfg.dinf_down_method, 'false', cfg.dinf_dist_down_wg, cfg.pretaudem.dist2stream, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) TauDEMExtension.dinfdistuptoridge(cfg.proc, cfg.pretaudem.dinf, cfg.pretaudem.filldem, cfg.pretaudem.dinf_slp, cfg.propthresh, cfg.pretaudem.distup2rdg, cfg.dinf_up_stat, cfg.dinf_up_method, 'false', cfg.ridge, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) TauDEMExtension.simplecalculator(cfg.proc, cfg.pretaudem.dist2stream, cfg.pretaudem.distup2rdg, cfg.pretaudem.rpi_hydro, 4, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) if cfg.rpi_method == 0: # calculate RPI based on Skidmore's method if cfg.ridge is None or not FileClass.is_file_exists(cfg.ridge): cfg.ridge = cfg.pretaudem.rdgsrc angfile = cfg.pretaudem.d8flow elevfile = cfg.pretaudem.dist2stream_v if cfg.flow_model == 1: # D-inf model angfile = cfg.pretaudem.dinf elevfile = cfg.pretaudem.dist2stream_v TauDEMExtension.extractridge(cfg.proc, angfile, elevfile, cfg.ridge, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) TauDEMExtension.rpiskidmore(cfg.proc, cfg.valley, cfg.ridge, cfg.pretaudem.rpi_skidmore, 1, 1, cfg.pretaudem.dist2stream_ed, cfg.pretaudem.dist2rdg_ed, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) log_status.write("Calculating Horizontal Curvature and Profile Curvature...\n") TauDEMExtension.curvature(cfg.proc, cfg.pretaudem.filldem, cfg.topoparam.profc, cfg.topoparam.horizc, None, None, None, None, None, cfg.ws.pre_dir, cfg.mpi_dir, cfg.bin_dir, cfg.log.preproc, cfg.hostfile) if cfg.flow_model == 0: slope_rad_to_deg(cfg.pretaudem.slp, cfg.topoparam.slope) elif cfg.flow_model == 1: slope_rad_to_deg(cfg.pretaudem.dinf_slp, cfg.topoparam.slope) if cfg.rpi_method == 1: copy2(cfg.pretaudem.rpi_hydro, cfg.topoparam.rpi) else: copy2(cfg.pretaudem.rpi_skidmore, cfg.topoparam.rpi) copy2(cfg.pretaudem.dist2stream_v, cfg.topoparam.hand) copy2(cfg.pretaudem.filldem, cfg.topoparam.elev) if single_basin: # clip RPI RasterUtilClass.mask_raster(cfg.topoparam.rpi, cfg.pretaudem.subbsn, cfg.topoparam.rpi) log_status.write("Preprocessing succeed!\n") end_t = time.time() cost = (end_t - start_t) / 60. log_status.write("Time consuming: %.2f min.\n" % cost) log_status.close() logf = open(cfg.log.runtime, 'a') logf.write("Preprocessing Time-consuming: " + str(cost) + ' s\n') logf.close() return cost
def watershed_delineation(np, dem, outlet_file=None, thresh=0, singlebasin=False, workingdir=None, mpi_bin=None, bin_dir=None, logfile=None, runtime_file=None, hostfile=None): """Watershed Delineation.""" # 1. Check directories if not os.path.exists(dem): TauDEM.error('DEM: %s is not existed!' % dem) dem = os.path.abspath(dem) if workingdir is None: workingdir = os.path.dirname(dem) namecfg = TauDEMFilesUtils(workingdir) workingdir = namecfg.workspace UtilClass.mkdir(workingdir) # 2. Check log file if logfile is not None and FileClass.is_file_exists(logfile): os.remove(logfile) # 3. Get predefined intermediate file names filled_dem = namecfg.filldem flow_dir = namecfg.d8flow slope = namecfg.slp flow_dir_dinf = namecfg.dinf slope_dinf = namecfg.dinf_slp dir_code_dinf = namecfg.dinf_d8dir weight_dinf = namecfg.dinf_weight acc = namecfg.d8acc stream_raster = namecfg.stream_raster default_outlet = namecfg.outlet_pre modified_outlet = namecfg.outlet_m stream_skeleton = namecfg.stream_pd acc_with_weight = namecfg.d8acc_weight stream_order = namecfg.stream_order ch_network = namecfg.channel_net ch_coord = namecfg.channel_coord stream_net = namecfg.streamnet_shp subbasin = namecfg.subbsn dist2_stream_d8 = namecfg.dist2stream_d8 # 4. perform calculation UtilClass.writelog(logfile, '[Output] %d..., %s' % (10, 'pitremove DEM...'), 'a') TauDEM.pitremove(np, dem, filled_dem, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (20, 'Calculating D8 and Dinf flow direction...'), 'a') TauDEM.d8flowdir(np, filled_dem, flow_dir, slope, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.dinfflowdir(np, filled_dem, flow_dir_dinf, slope_dinf, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) DinfUtil.output_compressed_dinf(flow_dir_dinf, dir_code_dinf, weight_dinf) UtilClass.writelog(logfile, '[Output] %d..., %s' % (30, 'D8 flow accumulation...'), 'a') TauDEM.aread8(np, flow_dir, acc, None, None, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (40, 'Generating stream raster initially...'), 'a') min_accum, max_accum, mean_accum, std_accum = RasterUtilClass.raster_statistics(acc) TauDEM.threshold(np, acc, stream_raster, mean_accum, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (50, 'Moving outlet to stream...'), 'a') if outlet_file is None: outlet_file = default_outlet TauDEM.connectdown(np, flow_dir, acc, outlet_file, wtsd=None, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.moveoutletstostrm(np, flow_dir, stream_raster, outlet_file, modified_outlet, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (60, 'Generating stream skeleton...'), 'a') TauDEM.peukerdouglas(np, filled_dem, stream_skeleton, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (70, 'Flow accumulation with outlet...'), 'a') tmp_outlet = None if singlebasin: tmp_outlet = modified_outlet TauDEM.aread8(np, flow_dir, acc_with_weight, tmp_outlet, stream_skeleton, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if thresh <= 0: # find the optimal threshold using dropanalysis function UtilClass.writelog(logfile, '[Output] %d..., %s' % (75, 'Drop analysis to select optimal threshold...'), 'a') min_accum, max_accum, mean_accum, std_accum = \ RasterUtilClass.raster_statistics(acc_with_weight) if mean_accum - std_accum < 0: minthresh = mean_accum else: minthresh = mean_accum - std_accum maxthresh = mean_accum + std_accum numthresh = 20 logspace = 'true' drp_file = namecfg.drptxt TauDEM.dropanalysis(np, filled_dem, flow_dir, acc_with_weight, acc_with_weight, modified_outlet, minthresh, maxthresh, numthresh, logspace, drp_file, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if not FileClass.is_file_exists(drp_file): raise RuntimeError('Dropanalysis failed and drp.txt was not created!') with open(drp_file, 'r', encoding='utf-8') as drpf: temp_contents = drpf.read() (beg, thresh) = temp_contents.rsplit(' ', 1) print(thresh) UtilClass.writelog(logfile, '[Output] %d..., %s' % (80, 'Generating stream raster...'), 'a') TauDEM.threshold(np, acc_with_weight, stream_raster, float(thresh), workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (90, 'Generating stream net...'), 'a') TauDEM.streamnet(np, filled_dem, flow_dir, acc_with_weight, stream_raster, modified_outlet, stream_order, ch_network, ch_coord, stream_net, subbasin, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d..., %s' % (95, 'Calculating distance to stream (D8)...'), 'a') TauDEM.d8hdisttostrm(np, flow_dir, stream_raster, dist2_stream_d8, 1, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %d.., %s' % (100, 'Original subbasin delineation is finished!'), 'a')
def watershed_delineation(np, dem, outlet_file=None, thresh=0, singlebasin=False, workingdir=None, mpi_bin=None, bin_dir=None, logfile=None, runtime_file=None, hostfile=None, avoid_redo=False): """Watershed Delineation based on D8 flow direction. Args: np: process number for MPI dem: DEM path outlet_file: predefined outlet shapefile path thresh: predefined threshold for extracting stream from accumulated flow direction singlebasin: when set True, only extract subbasins that drains into predefined outlets workingdir: directory that store outputs mpi_bin: directory of MPI executable binary, e.g., mpiexec, mpirun bin_dir: directory of TauDEM and other executable binaries logfile: log file path runtime_file: runtime file path hostfile: host list file path for MPI avoid_redo: avoid executing some functions that do not depend on input arguments when repeatedly invoke this function """ # 1. Check directories if not os.path.exists(dem): TauDEM.error('DEM: %s is not existed!' % dem) dem = os.path.abspath(dem) if workingdir is None or workingdir is '': workingdir = os.path.dirname(dem) nc = TauDEMFilesUtils(workingdir) # predefined names workingdir = nc.workspace UtilClass.mkdir(workingdir) # 2. Check log file if logfile is not None and FileClass.is_file_exists(logfile): os.remove(logfile) # 3. perform calculation # Filling DEM if not (avoid_redo and FileClass.is_file_exists(nc.filldem)): UtilClass.writelog(logfile, '[Output] %s' % 'remove pit...', 'a') TauDEM.pitremove(np, dem, nc.filldem, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Flow direction based on D8 algorithm if not (avoid_redo and FileClass.is_file_exists(nc.d8flow)): UtilClass.writelog(logfile, '[Output] %s' % 'D8 flow direction...', 'a') TauDEM.d8flowdir(np, nc.filldem, nc.d8flow, nc.slp, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Flow accumulation without stream skeleton as weight if not (avoid_redo and FileClass.is_file_exists(nc.d8acc)): UtilClass.writelog(logfile, '[Output] %s' % 'D8 flow accumulation...', 'a') TauDEM.aread8(np, nc.d8flow, nc.d8acc, None, None, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Initial stream network using mean accumulation as threshold UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream raster initially...', 'a') min_accum, max_accum, mean_accum, std_accum = RasterUtilClass.raster_statistics(nc.d8acc) TauDEM.threshold(np, nc.d8acc, nc.stream_raster, mean_accum, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Outlets position initialization and adjustment UtilClass.writelog(logfile, '[Output] %s' % 'Moving outlet to stream...', 'a') if outlet_file is None: # if not given, take cell with maximum accumulation as outlet outlet_file = nc.outlet_pre TauDEM.connectdown(np, nc.d8flow, nc.d8acc, outlet_file, nc.outlet_m, wtsd=None, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) TauDEM.moveoutletstostrm(np, nc.d8flow, nc.stream_raster, outlet_file, nc.outlet_m, workingdir=workingdir, mpiexedir=mpi_bin, exedir=bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Stream skeleton by peuker-douglas algorithm UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream skeleton ...', 'a') TauDEM.peukerdouglas(np, nc.filldem, nc.stream_pd, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Weighted flow acculation with outlet UtilClass.writelog(logfile, '[Output] %s' % 'Flow accumulation with outlet...', 'a') tmp_outlet = None if singlebasin: tmp_outlet = nc.outlet_m TauDEM.aread8(np, nc.d8flow, nc.d8acc_weight, tmp_outlet, nc.stream_pd, False, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Determine threshold by input argument or dropanalysis function if thresh <= 0: # find the optimal threshold using dropanalysis function UtilClass.writelog(logfile, '[Output] %s' % 'Drop analysis to select optimal threshold...', 'a') min_accum, max_accum, mean_accum, std_accum = \ RasterUtilClass.raster_statistics(nc.d8acc_weight) if mean_accum - std_accum < 0: minthresh = mean_accum else: minthresh = mean_accum - std_accum maxthresh = mean_accum + std_accum TauDEM.dropanalysis(np, nc.filldem, nc.d8flow, nc.d8acc_weight, nc.d8acc_weight, nc.outlet_m, minthresh, maxthresh, 20, 'true', nc.drptxt, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) if not FileClass.is_file_exists(nc.drptxt): # raise RuntimeError('Dropanalysis failed and drp.txt was not created!') UtilClass.writelog(logfile, '[Output] %s' % 'dropanalysis failed!', 'a') thresh = 0.5 * (maxthresh - minthresh) + minthresh else: with open(nc.drptxt, 'r', encoding='utf-8') as drpf: temp_contents = drpf.read() (beg, thresh) = temp_contents.rsplit(' ', 1) thresh = float(thresh) UtilClass.writelog(logfile, '[Output] %s: %f' % ('Selected optimal threshold: ', thresh), 'a') # Final stream network UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream raster...', 'a') TauDEM.threshold(np, nc.d8acc_weight, nc.stream_raster, thresh, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) UtilClass.writelog(logfile, '[Output] %s' % 'Generating stream net...', 'a') TauDEM.streamnet(np, nc.filldem, nc.d8flow, nc.d8acc_weight, nc.stream_raster, nc.outlet_m, nc.stream_order, nc.channel_net, nc.channel_coord, nc.streamnet_shp, nc.subbsn, workingdir, mpi_bin, bin_dir, log_file=logfile, runtime_file=runtime_file, hostfile=hostfile) # Serialize IDs of subbasins and the corresponding streams UtilClass.writelog(logfile, '[Output] %s' % 'Serialize subbasin&stream IDs...', 'a') id_map = StreamnetUtil.serialize_streamnet(nc.streamnet_shp, nc.streamnet_m) RasterUtilClass.raster_reclassify(nc.subbsn, id_map, nc.subbsn_m, GDT_Int32) StreamnetUtil.assign_stream_id_raster(nc.stream_raster, nc.subbsn_m, nc.stream_m) # convert raster to shapefile (for subbasin and basin) UtilClass.writelog(logfile, '[Output] %s' % 'Generating subbasin vector...', 'a') VectorUtilClass.raster2shp(nc.subbsn_m, nc.subbsn_shp, 'subbasin', 'SUBBASINID') # Finish the workflow UtilClass.writelog(logfile, '[Output] %s' % 'Original subbasin delineation is finished!', 'a')
def read_param_ranges(self): """Read param_rng.def file name,lower_bound,upper_bound,group,dist (group and dist are optional) e.g., Param1,0,1[,Group1][,dist1] Param2,0,1[,Group2][,dist2] Param3,0,1[,Group3][,dist3] Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) - groups - a list of group names (strings) for each variable - dists - a list of distributions for the problem, None if not specified or all uniform """ # read param_defs.json if already existed if not self.param_defs: if FileClass.is_file_exists(self.cfg.outfiles.param_defs_json): with open(self.cfg.outfiles.param_defs_json, 'r') as f: self.param_defs = UtilClass.decode_strs_in_dict(json.load(f)) return # read param_range_def file and output to json file client = ConnectMongoDB(self.model.host, self.model.port) conn = client.get_conn() db = conn[self.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() groups = list() dists = list() num_vars = 0 items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) # If the fourth column does not contain a group name, use # the parameter name if len(item) >= 4: groups.append(item[3]) else: groups.append(item[0]) if len(item) >= 5: dists.append(item[4]) else: dists.append('unif') if groups == names: groups = None elif len(set(groups)) == 1: raise ValueError('Only one group defined, results will not bemeaningful') # setting dists to none if all are uniform # because non-uniform scaling is not needed if all([d == 'unif' for d in dists]): dists = None self.param_defs = {'names': names, 'bounds': bounds, 'num_vars': num_vars, 'groups': groups, 'dists': dists} # Save as json, which can be loaded by json.load() json_data = json.dumps(self.param_defs, indent=4, cls=SpecialJsonEncoder) with open(self.cfg.outfiles.param_defs_json, 'w') as f: f.write(json_data)
def __init__( self, bin_dir='', # type: AnyStr # The directory of SEIMS binary model_dir='', # type: AnyStr # The directory of SEIMS model nthread=4, # type: int # Thread number for OpenMP lyrmtd=0, # type: int # Layering method, can be 0 (UP_DOWN) or 1 (DOWN_UP) host='127.0.0.1', # type: AnyStr # MongoDB host address, default is `localhost` port=27017, # type: int # MongoDB port, default is 27017 db_name='', # type: AnyStr # Main spatial dbname which can diff from dirname scenario_id=-1, # type: int # Scenario ID defined in `<model>_Scenario` database calibration_id=-1, # type: int # Calibration ID used for model auto-calibration subbasin_id=0, # type: int # Subbasin ID, 0 for whole watershed, 9999 for field version version='OMP', # type: AnyStr # SEIMS version, can be `MPI` or `OMP` (default) nprocess=1, # type: int # Process number for MPI mpi_bin='', # type: AnyStr # Full path of MPI executable file, e.g., './mpirun` hosts_opt='-f', # type: AnyStr # Option for assigning hosts, # e.g., `-f`, `-hostfile`, `-machine`, `-machinefile` hostfile='', # type: AnyStr # File containing host names, # or file mapping process numbers to machines simu_stime=None, # type: Optional[datetime, AnyStr] # Start time of simulation simu_etime=None, # type: Optional[datetime, AnyStr] # End time of simulation out_stime=None, # type: Optional[datetime, AnyStr] # Start time of outputs out_etime=None, # type: Optional[datetime, AnyStr] # End time of outputs args_dict=None # type: Dict[AnyStr, Optional[AnyStr, datetime, int]] ): # type: (...) -> None # Derived from input arguments if args_dict is None: # Preferred to use 'args_dict' if existed. args_dict = dict() bin_dir = args_dict['bin_dir'] if 'bin_dir' in args_dict else bin_dir model_dir = args_dict[ 'model_dir'] if 'model_dir' in args_dict else model_dir self.version = args_dict[ 'version'] if 'version' in args_dict else version suffix = '.exe' if sysstr == 'Windows' else '' if self.version == 'MPI': self.seims_exec = '%s/seims_mpi%s' % (bin_dir, suffix) else: self.seims_exec = '%s/seims_omp%s' % (bin_dir, suffix) if not FileClass.is_file_exists( self.seims_exec): # If not support OpenMP, use `seims`! self.seims_exec = '%s/seims%s' % (bin_dir, suffix) self.seims_exec = os.path.abspath(self.seims_exec) self.model_dir = os.path.abspath(model_dir) self.nthread = args_dict[ 'nthread'] if 'nthread' in args_dict else nthread self.lyrmtd = args_dict['lyrmtd'] if 'lyrmtd' in args_dict else lyrmtd self.host = args_dict['host'] if 'host' in args_dict else host self.port = args_dict['port'] if 'port' in args_dict else port self.db_name = args_dict['db_name'] if 'db_name' in args_dict \ else os.path.split(self.model_dir)[1] self.scenario_id = args_dict[ 'scenario_id'] if 'scenario_id' in args_dict else scenario_id self.calibration_id = args_dict['calibration_id'] \ if 'calibration_id' in args_dict else calibration_id self.subbasin_id = args_dict[ 'subbasin_id'] if 'subbasin_id' in args_dict else subbasin_id self.nprocess = args_dict[ 'nprocess'] if 'nprocess' in args_dict else nprocess self.mpi_bin = args_dict[ 'mpi_bin'] if 'mpi_bin' in args_dict else mpi_bin self.hosts_opt = args_dict[ 'hosts_opt'] if 'hosts_opt' in args_dict else hosts_opt self.hostfile = args_dict[ 'hostfile'] if 'hostfile' in args_dict else hostfile self.simu_stime = args_dict[ 'simu_stime'] if 'simu_stime' in args_dict else simu_stime self.simu_etime = args_dict[ 'simu_etime'] if 'simu_etime' in args_dict else simu_etime self.out_stime = args_dict[ 'out_stime'] if 'out_stime' in args_dict else out_stime self.out_etime = args_dict[ 'out_etime'] if 'out_etime' in args_dict else out_etime if is_string( self.simu_stime) and not isinstance(self.simu_stime, datetime): self.simu_stime = StringClass.get_datetime(self.simu_stime) if is_string( self.simu_etime) and not isinstance(self.simu_etime, datetime): self.simu_etime = StringClass.get_datetime(self.simu_etime) if is_string( self.out_stime) and not isinstance(self.out_stime, datetime): self.out_stime = StringClass.get_datetime(self.out_stime) if is_string( self.out_etime) and not isinstance(self.out_etime, datetime): self.out_etime = StringClass.get_datetime(self.out_etime) # Concatenate executable command self.cmd = self.Command self.run_success = False self.output_dir = self.OutputDirectory # Model data read from MongoDB self.outlet_id = -1 self.subbasin_count = -1 self.scenario_dbname = '' self.start_time = None self.end_time = None self.output_ids = list() # type: List[AnyStr] self.output_items = dict() # type: Dict[AnyStr, Union[List[AnyStr]]] # Data maybe used after model run self.timespan = dict( ) # type: Dict[AnyStr, Dict[AnyStr, Union[float, Dict[AnyStr, float]]]] self.obs_vars = list( ) # type: List[AnyStr] # Observation types at the outlet self.obs_value = dict( ) # type: Dict[datetime, List[float]] # Observation value self.sim_vars = list( ) # type: List[AnyStr] # Simulation types, part of `obs_vars` self.sim_value = dict( ) # type: Dict[datetime, List[float]] # Simulation value # The format of sim_obs_dict: # {VarName: {'UTCDATETIME': [t1, t2, ..., tn], # 'Obs': [o1, o2, ..., on], # 'Sim': [s1, s2, ..., sn]}, # ... # } self.sim_obs_dict = dict( ) # type: Dict[AnyStr, Dict[AnyStr, Union[float, List[Union[datetime, float]]]]] self.runtime = 0. self.runlogs = list() # type: List[AnyStr] self.mongoclient = None # type: Union[MongoClient, None] # Set to None after use
def __init__(self, bin_dir='', model_dir='', nthread=4, lyrmtd=0, host='127.0.0.1', port=27017, scenario_id=-1, calibration_id=-1, version='OMP', nprocess=1, mpi_bin='', hosts_opt='-f', hostfile='', **kwargs): # Allow any other keyword arguments # Derived from input arguments args_dict = dict() if 'args_dict' in kwargs: # Preferred to use 'args_dict' if existed. args_dict = kwargs['args_dict'] bin_dir = args_dict['bin_dir'] if 'bin_dir' in args_dict else bin_dir model_dir = args_dict[ 'model_dir'] if 'model_dir' in args_dict else model_dir self.version = args_dict[ 'version'] if 'version' in args_dict else version suffix = '.exe' if sysstr == 'Windows' else '' if self.version == 'MPI': self.seims_exec = bin_dir + os.path.sep + 'seims_mpi' + suffix else: self.seims_exec = bin_dir + os.path.sep + 'seims_omp' + suffix if not FileClass.is_file_exists( self.seims_exec): # If not support OpenMP, use `seims`! self.seims_exec = bin_dir + os.path.sep + 'seims' + suffix self.seims_exec = os.path.abspath(self.seims_exec) self.model_dir = os.path.abspath(model_dir) self.nthread = args_dict[ 'nthread'] if 'nthread' in args_dict else nthread self.lyrmtd = args_dict['lyrmtd'] if 'lyrmtd' in args_dict else lyrmtd self.host = args_dict['host'] if 'host' in args_dict else host self.port = args_dict['port'] if 'port' in args_dict else port self.scenario_id = args_dict[ 'scenario_id'] if 'scenario_id' in args_dict else scenario_id self.calibration_id = args_dict[ 'calibration_id'] if 'calibration_id' in args_dict else calibration_id self.nprocess = args_dict[ 'nprocess'] if 'nprocess' in args_dict else nprocess self.mpi_bin = args_dict[ 'mpi_bin'] if 'mpi_bin' in args_dict else mpi_bin self.hosts_opt = args_dict[ 'hosts_opt'] if 'hosts_opt' in args_dict else hosts_opt self.hostfile = args_dict[ 'hostfile'] if 'hostfile' in args_dict else hostfile # Concatenate executable command self.cmd = self.Command self.run_success = False self.output_dir = self.OutputDirectory # Read model data from MongoDB self.db_name = os.path.split(self.model_dir)[1] self.outlet_id = self.OutletID self.start_time, self.end_time = self.SimulatedPeriod # Data maybe used after model run self.timespan = dict() self.obs_vars = list() # Observation types at the outlet self.obs_value = dict( ) # Observation value, key: DATETIME, value: value list of obs_vars self.sim_vars = list( ) # Simulation types at the outlet, which is part of obs_vars self.sim_value = dict() # Simulation value, same as obs_value # The format of sim_obs_dict: # {VarName: {'UTCDATETIME': [t1, t2, ..., tn], # 'Obs': [o1, o2, ..., on], # 'Sim': [s1, s2, ..., sn]}, # ... # } self.sim_obs_dict = dict()