def calibrated_params_from_txt(cfg, maindb): """Read and update calibrated parameters.""" # initialize bulk operator coll = maindb[DBTableNames.main_parameter] bulk = coll.initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.filecali) # print(field_names) # Clean up the existing calibration settings coll.update_many({ModelParamFields.change: ModelParamFields.change_vc}, {'$set': {ModelParamFields.impact: -9999.}}) coll.update_many({ModelParamFields.change: ModelParamFields.change_rc}, {'$set': {ModelParamFields.impact: 1.}}) coll.update_many({ModelParamFields.change: ModelParamFields.change_ac}, {'$set': {ModelParamFields.impact: 0.}}) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() if len(cur_data_item) < 2: raise RuntimeError('param.cali at least contain NAME and IMPACT fields!') data_import[ModelParamFields.name] = cur_data_item[0] data_import[ModelParamFields.impact] = float(cur_data_item[1]) cur_filter[ModelParamFields.name] = cur_data_item[0] if len(cur_data_item) >= 3: if cur_data_item[2] in [ModelParamFields.change_vc, ModelParamFields.change_ac, ModelParamFields.change_rc, ModelParamFields.change_nc]: data_import[ModelParamFields.change] = cur_data_item[2] bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk(bulk, 'No operations during calibrated_params_from_txt.')
def initialize_landcover_parameters(landcover_file, landcover_initial_fields_file, dst_dir): """generate initial landcover_init_param parameters""" lc_data_items = read_data_items_from_txt(landcover_initial_fields_file) # print(lc_data_items) field_names = lc_data_items[0] lu_id = -1 for i, v in enumerate(field_names): if StringClass.string_match(v, 'LANDUSE_ID'): lu_id = i break data_items = lc_data_items[1:] replace_dicts = dict() for item in data_items: for i, v in enumerate(item): if i != lu_id: if field_names[i].upper() not in list(replace_dicts.keys()): replace_dicts[field_names[i].upper()] = {float(item[lu_id]): float(v)} else: replace_dicts[field_names[i].upper()][float(item[lu_id])] = float(v) # print(replace_dicts) # Generate GTIFF for item, v in list(replace_dicts.items()): filename = dst_dir + os.path.sep + item + '.tif' print(filename) RasterUtilClass.raster_reclassify(landcover_file, v, filename) return list(replace_dicts['LANDCOVER'].values())
def initialize_landcover_parameters(landcover_file, landcover_initial_fields_file, dst_dir): """generate initial landcover_init_param parameters""" lc_data_items = read_data_items_from_txt(landcover_initial_fields_file) # print(lc_data_items) field_names = lc_data_items[0] lu_id = -1 for i, v in enumerate(field_names): if StringClass.string_match(v, 'LANDUSE_ID'): lu_id = i break data_items = lc_data_items[1:] replace_dicts = dict() for item in data_items: for i, v in enumerate(item): if i != lu_id: if field_names[i].upper() not in list( replace_dicts.keys()): replace_dicts[field_names[i].upper()] = { float(item[lu_id]): float(v) } else: replace_dicts[field_names[i].upper()][float( item[lu_id])] = float(v) # print(replace_dicts) # Generate GTIFF for item, v in list(replace_dicts.items()): filename = dst_dir + os.path.sep + item + '.tif' print(filename) RasterUtilClass.raster_reclassify(landcover_file, v, filename) return list(replace_dicts['LANDCOVER'].values())
def regular_data_from_txt(climdb, data_file): """Regular precipitation data from text file.""" # delete existed precipitation data climdb[DBTableNames.data_values].remove( {DataValueFields.type: DataType.p}) tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file( data_file) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 clim_data_items = read_data_items_from_txt(data_file) clim_flds = clim_data_items[0] station_id = list() bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for fld in clim_flds: if not StringClass.string_in_list(fld, [ DataValueFields.dt, DataValueFields.y, DataValueFields.m, DataValueFields.d, DataValueFields.hour, DataValueFields.minute, DataValueFields.second ]): station_id.append(fld) for i, clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() precipitation = list() for j, clim_data_v in enumerate(clim_data_item): if StringClass.string_in_list(clim_flds[j], station_id): precipitation.append(float(clim_data_v)) utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values( clim_flds, clim_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_time + timedelta( minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_time for j, cur_id in enumerate(station_id): cur_dic = dict() cur_dic[DataValueFields.value] = precipitation[j] cur_dic[DataValueFields.id] = int(cur_id) cur_dic[DataValueFields.type] = DataType.p cur_dic[DataValueFields.time_zone] = dic[ DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[ DataValueFields.local_time] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records MongoUtil.run_bulk(bulk) bulk = climdb[ DBTableNames.data_values].initialize_ordered_bulk_op() if count % 500 != 0: MongoUtil.run_bulk(bulk) # Create index climdb[DBTableNames.data_values].create_index([ (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING) ])
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = {ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: ''} for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING)])
def regular_data_from_txt(climdb, data_file): """Regular precipitation data from text file.""" # delete existed precipitation data climdb[DBTableNames.data_values].remove({DataValueFields.type: DataType.p}) tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_file) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 clim_data_items = read_data_items_from_txt(data_file) clim_flds = clim_data_items[0] station_id = list() bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for fld in clim_flds: if not StringClass.string_in_list(fld, [DataValueFields.dt, DataValueFields.y, DataValueFields.m, DataValueFields.d, DataValueFields.hour, DataValueFields.minute, DataValueFields.second]): station_id.append(fld) for i, clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() precipitation = list() for j, clim_data_v in enumerate(clim_data_item): if StringClass.string_in_list(clim_flds[j], station_id): precipitation.append(float(clim_data_v)) utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds, clim_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_time for j, cur_id in enumerate(station_id): cur_dic = dict() cur_dic[DataValueFields.value] = precipitation[j] cur_dic[DataValueFields.id] = int(cur_id) cur_dic[DataValueFields.type] = DataType.p cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records MongoUtil.run_bulk(bulk) bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() if count % 500 != 0: MongoUtil.run_bulk(bulk) # Create index climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING)])
def ParamDefs(self): """Read cali_param_rng.def file name,lower_bound,upper_bound e.g., Param1,0,1 Param2,0.5,1.2 Param3,-1.0,1.0 Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) """ # read param_defs.json if already existed if self.param_defs: return self.param_defs # read param_range_def file and output to json file client = ConnectMongoDB(self.cfg.model.host, self.cfg.model.port) conn = client.get_conn() db = conn[self.cfg.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() num_vars = 0 if not FileClass.is_file_exists(self.cfg.param_range_def): raise ValueError('Parameters definition file: %s is not' ' existed!' % self.cfg.param_range_def) items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) self.param_defs = { 'names': names, 'bounds': bounds, 'num_vars': num_vars } return self.param_defs
def variable_table(db, var_file): """Import variables table""" var_data_items = read_data_items_from_txt(var_file) var_flds = var_data_items[0] for i in range(1, len(var_data_items)): dic = {} for j in range(len(var_data_items[i])): if StringClass.string_match(var_flds[j], VariableDesc.type): dic[VariableDesc.type] = var_data_items[i][j] elif StringClass.string_match(var_flds[j], VariableDesc.unit): dic[VariableDesc.unit] = var_data_items[i][j] # If this item existed already, then update it, otherwise insert one. curfilter = {VariableDesc.type: dic[VariableDesc.type]} db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
def sites_table(hydro_clim_db, site_file, site_type): """Import HydroClimate sites table""" sites_loc = dict() site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = site_data_items[i][j] elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(site_data_items[i][j]) dic[StationFields.type] = site_type curfilter = { StationFields.id: dic[StationFields.id], StationFields.type: dic[StationFields.type] } hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True) if dic[StationFields.id] not in list(sites_loc.keys()): sites_loc[dic[StationFields.id]] = SiteInfo( dic[StationFields.id], dic[StationFields.name], dic[StationFields.lat], dic[StationFields.lon], dic[StationFields.x], dic[StationFields.y], dic[StationFields.elev]) hydro_clim_db[DBTableNames.sites].create_index([ (StationFields.id, ASCENDING), (StationFields.type, ASCENDING) ]) return sites_loc
def ParamDefs(self): """Read cali_param_rng.def file name,lower_bound,upper_bound e.g., Param1,0,1 Param2,0.5,1.2 Param3,-1.0,1.0 Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) """ # read param_defs.json if already existed if self.param_defs: return self.param_defs # read param_range_def file and output to json file client = ConnectMongoDB(self.cfg.model.host, self.cfg.model.port) conn = client.get_conn() db = conn[self.cfg.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() num_vars = 0 if not FileClass.is_file_exists(self.cfg.param_range_def): raise ValueError('Parameters definition file: %s is not' ' existed!' % self.cfg.param_range_def) items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) self.param_defs = {'names': names, 'bounds': bounds, 'num_vars': num_vars} return self.param_defs
def read_crop_lookup_table(crop_lookup_file): """read crop lookup table""" FileClass.check_file_exists(crop_lookup_file) data_items = read_data_items_from_txt(crop_lookup_file) attr_dic = dict() fields = data_items[0] n = len(fields) for i in range(n): attr_dic[fields[i]] = dict() for items in data_items[1:]: cur_id = int(items[0]) for i in range(n): dic = attr_dic[fields[i]] try: dic[cur_id] = float(items[i]) except ValueError: dic[cur_id] = items[i] return attr_dic
def read_field_arrays_from_csv(csvf): data_items = read_data_items_from_txt(csvf) if len(data_items) < 2: return flds = data_items[0] flds_array = dict() for idx, data_item in enumerate(data_items): if idx == 0: continue data_item_values = StringClass.extract_numeric_values_from_string(','.join(data_item)) for fld_idx, fld_name in enumerate(flds): if fld_idx == 0 or StringClass.string_match(fld_name, 'FID'): continue if fld_name not in flds_array: flds_array[fld_name] = list() flds_array[fld_name].append(data_item_values[fld_idx]) # for key, value in list(flds_array.items()): # print('%s: %d' % (key, len(value))) return combine_multi_layers_array(flds_array)
def read_simulation_from_txt(ws, plot_vars, subbsnID, stime, etime): """ Read simulation data from text file according to subbasin ID. Returns: 1. Matched variable names, [var1, var2, ...] 2. Simulation data dict of all plotted variables, with UTCDATETIME. {Datetime: [value_of_var1, value_of_var2, ...], ...} """ plot_vars_existed = list() sim_data_dict = OrderedDict() for i, v in enumerate(plot_vars): txtfile = ws + os.path.sep + v + '.txt' if not FileClass.is_file_exists(txtfile): print('WARNING: Simulation variable file: %s is not existed!' % txtfile) continue data_items = read_data_items_from_txt(txtfile) found = False data_available = False for item in data_items: item_vs = StringClass.split_string(item[0], ' ', elim_empty=True) if len(item_vs) == 2: if int(item_vs[1]) == subbsnID and not found: found = True elif int(item_vs[1]) != subbsnID and found: break if not found: continue if len(item_vs) != 3: continue date_str = '%s %s' % (item_vs[0], item_vs[1]) sim_datetime = StringClass.get_datetime(date_str, "%Y-%m-%d %H:%M:%S") if stime <= sim_datetime <= etime: if sim_datetime not in sim_data_dict: sim_data_dict[sim_datetime] = list() sim_data_dict[sim_datetime].append(float(item_vs[2])) data_available = True if data_available: plot_vars_existed.append(v) print('Read simulation from %s to %s done.' % (stime.strftime('%c'), etime.strftime('%c'))) return plot_vars_existed, sim_data_dict
def read_field_arrays_from_csv(csvf): data_items = read_data_items_from_txt(csvf) if len(data_items) < 2: return flds = data_items[0] flds_array = dict() for idx, data_item in enumerate(data_items): if idx == 0: continue data_item_values = StringClass.extract_numeric_values_from_string( ','.join(data_item)) for fld_idx, fld_name in enumerate(flds): if fld_idx == 0 or StringClass.string_match(fld_name, 'FID'): continue if fld_name not in flds_array: flds_array[fld_name] = list() flds_array[fld_name].append(data_item_values[fld_idx]) # for key, value in list(flds_array.items()): # print('%s: %d' % (key, len(value))) return combine_multi_layers_array(flds_array)
def calibrated_params_from_txt(cfg, maindb): """Read and update calibrated parameters.""" # initialize bulk operator coll = maindb[DBTableNames.main_parameter] bulk = coll.initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.filecali) # print(field_names) # Clean up the existing calibration settings coll.update_many({ModelParamFields.change: ModelParamFields.change_vc}, {'$set': { ModelParamFields.impact: -9999. }}) coll.update_many({ModelParamFields.change: ModelParamFields.change_rc}, {'$set': { ModelParamFields.impact: 1. }}) coll.update_many({ModelParamFields.change: ModelParamFields.change_ac}, {'$set': { ModelParamFields.impact: 0. }}) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() if len(cur_data_item) < 2: raise RuntimeError( 'param.cali at least contain NAME and IMPACT fields!') data_import[ModelParamFields.name] = cur_data_item[0] data_import[ModelParamFields.impact] = float(cur_data_item[1]) cur_filter[ModelParamFields.name] = cur_data_item[0] if len(cur_data_item) >= 3: if cur_data_item[2] in [ ModelParamFields.change_vc, ModelParamFields.change_ac, ModelParamFields.change_rc, ModelParamFields.change_nc ]: data_import[ModelParamFields.change] = cur_data_item[2] bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk(bulk, 'No operations during calibrated_params_from_txt.')
def sites_table(hydro_clim_db, site_file, site_type): """Import HydroClimate sites table""" sites_loc = dict() site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = site_data_items[i][j] elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(site_data_items[i][j]) dic[StationFields.type] = site_type curfilter = {StationFields.id: dic[StationFields.id], StationFields.type: dic[StationFields.type]} hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True) if dic[StationFields.id] not in list(sites_loc.keys()): sites_loc[dic[StationFields.id]] = SiteInfo(dic[StationFields.id], dic[StationFields.name], dic[StationFields.lat], dic[StationFields.lon], dic[StationFields.x], dic[StationFields.y], dic[StationFields.elev]) hydro_clim_db[DBTableNames.sites].create_index([(StationFields.id, ASCENDING), (StationFields.type, ASCENDING)]) return sites_loc
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone / -3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError('To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.txt' out_file = work_path + os.path.sep + file_name with open(out_file, 'w') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line)
def read_param_ranges(self): """Read param_rng.def file name,lower_bound,upper_bound,group,dist (group and dist are optional) e.g., Param1,0,1[,Group1][,dist1] Param2,0,1[,Group2][,dist2] Param3,0,1[,Group3][,dist3] Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) - groups - a list of group names (strings) for each variable - dists - a list of distributions for the problem, None if not specified or all uniform """ # read param_defs.json if already existed if not self.param_defs: if FileClass.is_file_exists(self.cfg.outfiles.param_defs_json): with open(self.cfg.outfiles.param_defs_json, 'r') as f: self.param_defs = UtilClass.decode_strs_in_dict(json.load(f)) return # read param_range_def file and output to json file client = ConnectMongoDB(self.model.host, self.model.port) conn = client.get_conn() db = conn[self.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() groups = list() dists = list() num_vars = 0 items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) # If the fourth column does not contain a group name, use # the parameter name if len(item) >= 4: groups.append(item[3]) else: groups.append(item[0]) if len(item) >= 5: dists.append(item[4]) else: dists.append('unif') if groups == names: groups = None elif len(set(groups)) == 1: raise ValueError('Only one group defined, results will not bemeaningful') # setting dists to none if all are uniform # because non-uniform scaling is not needed if all([d == 'unif' for d in dists]): dists = None self.param_defs = {'names': names, 'bounds': bounds, 'num_vars': num_vars, 'groups': groups, 'dists': dists} # Save as json, which can be loaded by json.load() json_data = json.dumps(self.param_defs, indent=4, cls=SpecialJsonEncoder) with open(self.cfg.outfiles.param_defs_json, 'w') as f: f.write(json_data)
def scenario_from_texts(cfg, main_db, scenario_db): """Import BMPs Scenario data to MongoDB Args: cfg: SEIMS configuration object main_db: climate database scenario_db: scenario database Returns: False if failed, otherwise True. """ if not cfg.use_scernario: return False print('Import BMP Scenario Data... ') bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt']) bmp_tabs = list() bmp_tabs_path = list() for f in bmp_files: bmp_tabs.append(f.split('.')[0]) bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f) # initialize if collection not existed c_list = scenario_db.collection_names() for item in bmp_tabs: if not StringClass.string_in_list(item.upper(), c_list): scenario_db.create_collection(item.upper()) else: scenario_db.drop_collection(item.upper()) # Read subbasin.tif and dist2Stream.tif subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn) dist2stream_r = RasterUtilClass.read_raster(cfg.spatials.dist2stream_d8) # End reading for j, bmp_txt in enumerate(bmp_tabs_path): bmp_tab_name = bmp_tabs[j] data_array = read_data_items_from_txt(bmp_txt) field_array = data_array[0] data_array = data_array[1:] for item in data_array: dic = dict() for i, field_name in enumerate(field_array): if MathClass.isnumerical(item[i]): v = float(item[i]) if v % 1. == 0.: v = int(v) dic[field_name.upper()] = v else: dic[field_name.upper()] = str(item[i]).upper() if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \ StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())): subbsn_id = subbasin_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) distance = dist2stream_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) if subbsn_id is not None and distance is not None: dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id) dic[ImportScenario2Mongo._DISTDOWN] = float(distance) scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) else: scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) # print('BMP tables are imported.') # Write BMP database name into Model workflow database c_list = main_db.collection_names() if not StringClass.string_in_list(DBTableNames.main_scenario, c_list): main_db.create_collection(DBTableNames.main_scenario) bmp_info_dic = dict() bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic, upsert=True) return True
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # initialize if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(item[0].strip(), ['|']) if len(values) != 2: raise ValueError('One item should only have one Tag and one value string,' ' split by "|"') file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print(out_data_array) for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if not list(file_out_dict.keys()): raise ValueError('There are not any valid output item stored in file.out!') bulk.insert(file_out_dict) MongoUtil.run_bulk(bulk, 'No operations to excute when import initial outputs settings.') # begin to import the desired outputs # initialize bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print(field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print(cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError('Items in file.out must have 7 columns, i.e., OUTPUTID,' 'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.') bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk(bulk, 'No operations to excute when import the desired outputs.')
def daily_data_from_txt(climdb, data_txt_file, sites_info_dict): """Import climate data table""" tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_txt_file) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 clim_data_items = read_data_items_from_txt(data_txt_file) clim_flds = clim_data_items[0] # PHUCalDic is used for Calculating potential heat units (PHU) # for each climate station and each year. # format is {StationID:{Year1:[values],Year2:[Values]...}, ...} # PHUCalDic = {} # format: {StationID1: climateStats1, ...} hydro_climate_stats = dict() required_flds = [DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws] output_flds = [DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.pet, DataType.ws, DataType.sr] # remove existed records for fld in output_flds: climdb[DBTableNames.data_values].remove({'TYPE': fld}) for fld in required_flds: if not StringClass.string_in_list(fld, clim_flds): raise ValueError('Meteorological Daily data MUST contain %s!' % fld) # Create bulk object bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i, cur_clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() cur_ssd = DEFAULT_NODATA for j, clim_data_v in enumerate(cur_clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.id): dic[DataValueFields.id] = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.mean_tmp): dic[DataType.mean_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.min_tmp): dic[DataType.min_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.max_tmp): dic[DataType.max_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.pet): dic[DataType.pet] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.sr): dic[DataType.sr] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.ws): dic[DataType.ws] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.rm): dic[DataType.rm] = float(clim_data_v) * 0.01 elif StringClass.string_match(clim_flds[j], DataType.ssd): cur_ssd = float(clim_data_v) # Get datetime and utc/local transformation utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds, cur_clim_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_time dic[DataValueFields.y] = utc_time.year # Do if some of these data are not provided if DataType.mean_tmp not in list(dic.keys()): dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2. if DataType.sr not in list(dic.keys()): if cur_ssd == DEFAULT_NODATA: raise ValueError(DataType.sr + ' or ' + DataType.ssd + ' must be provided!') else: if dic[DataValueFields.id] in list(sites_info_dict.keys()): cur_lon, cur_lat = sites_info_dict[dic[DataValueFields.id]].lon_lat() sr = round(HydroClimateUtilClass.rs(DateClass.day_of_year(utc_time), float(cur_ssd), cur_lat * PI / 180.), 1) dic[DataType.sr] = sr for fld in output_flds: cur_dic = dict() if fld in list(dic.keys()): cur_dic[DataValueFields.value] = dic[fld] cur_dic[DataValueFields.id] = dic[ DataValueFields.id] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time] cur_dic[DataValueFields.type] = fld # Old code, insert or update one item a time, which is quite inefficiency # Update by using bulk operation interface. lj # # find old records and remove (deprecated because of low efficiency, lj.) # curfilter = {DataValueFields.type: fld, # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).upsert().replace_one(cur_dic) bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records MongoUtil.run_bulk(bulk) bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()): hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats() hydro_climate_stats[dic[DataValueFields.id]].add_item(dic) # execute the remained records if count % 500 != 0: MongoUtil.run_bulk(bulk) for item, cur_climate_stats in list(hydro_climate_stats.items()): cur_climate_stats.annual_stats() # Create index climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING)]) # prepare dic for MongoDB for s_id, stats_v in list(hydro_climate_stats.items()): for YYYY in list(stats_v.Count.keys()): cur_dic = dict() cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY] cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = YYYY cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu_tot curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.phu_tot, DataValueFields.y: YYYY} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY] curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp, DataValueFields.y: YYYY} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) cur_dic[DataValueFields.value] = stats_v.PHU0 cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = DEFAULT_NODATA cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu0 curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.phu0, DataValueFields.y: DEFAULT_NODATA} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp0 cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp0 curfilter = {DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp0, DataValueFields.y: DEFAULT_NODATA} climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True)
def lookup_soil_parameters(dstdir, soiltype_file, soil_lookup_file, landuse_shapefile): """Reclassify soil parameters by lookup table.""" # Read soil properties from txt file soil_lookup_data = read_data_items_from_txt(soil_lookup_file) soil_instances = list() soil_prop_flds = soil_lookup_data[0][:] for i in range(1, len(soil_lookup_data)): cur_soil_data_item = soil_lookup_data[i][:] cur_seqn = cur_soil_data_item[0] cur_sname = cur_soil_data_item[1] cur_soil_ins = SoilProperty(cur_seqn, cur_sname) for j in range(2, len(soil_prop_flds)): cur_flds = StringClass.split_string(cur_soil_data_item[j], '-') # Get field values for k, tmpfld in enumerate(cur_flds): cur_flds[k] = float(tmpfld) # Convert to float if StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NLYRS): cur_soil_ins.SOILLAYERS = int(cur_flds[0]) elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._Z): cur_soil_ins.SOILDEPTH = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._OM): cur_soil_ins.OM = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CLAY): cur_soil_ins.CLAY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SILT): cur_soil_ins.SILT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SAND): cur_soil_ins.SAND = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ROCK): cur_soil_ins.ROCK = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ZMX): cur_soil_ins.SOL_ZMX = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ANIONEXCL): cur_soil_ins.ANION_EXCL = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CRK): cur_soil_ins.SOL_CRK = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._BD): cur_soil_ins.DENSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._K): cur_soil_ins.CONDUCTIVITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._WP): cur_soil_ins.WILTINGPOINT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._FC): cur_soil_ins.FIELDCAP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._AWC): cur_soil_ins.AWC = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._POROSITY): cur_soil_ins.POROSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._USLE_K): cur_soil_ins.USLE_K = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ALB): cur_soil_ins.SOL_ALB = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ESCO): cur_soil_ins.ESCO = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NO3): cur_soil_ins.SOL_NO3 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NH4): cur_soil_ins.SOL_NH4 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGN): cur_soil_ins.SOL_ORGN = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SOLP): cur_soil_ins.SOL_SOLP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGP): cur_soil_ins.SOL_ORGP = cur_flds cur_soil_ins.check_data_validation() soil_instances.append(cur_soil_ins) soil_prop_dict = {} for sol in soil_instances: cur_sol_dict = sol.soil_dict() for fld in cur_sol_dict: if fld in soil_prop_dict: soil_prop_dict[fld].append(cur_sol_dict[fld]) else: soil_prop_dict[fld] = [cur_sol_dict[fld]] # print(list(soilPropDict.keys())) # print(list(soilPropDict.values())) replace_dicts = list() dst_soil_tifs = list() sol_fld_name = list() seqns = soil_prop_dict[SoilUtilClass._SEQN] max_lyr_num = int(numpy.max(soil_prop_dict[SoilUtilClass._NLYRS])) for key in soil_prop_dict: if key != SoilUtilClass._SEQN and key != SoilUtilClass._NAME: key_l = 1 for key_v in soil_prop_dict[key]: if isinstance(key_v, list): if len(key_v) > key_l: key_l = len(key_v) if key_l == 1: cur_dict = {} for i, tmpseq in enumerate(seqns): cur_dict[float(tmpseq)] = soil_prop_dict[key][i] replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '.tif') sol_fld_name.append(key) else: for i in range(max_lyr_num): cur_dict = dict() for j, tmpseq in enumerate(seqns): if i < soil_prop_dict[SoilUtilClass._NLYRS][j]: cur_dict[float(tmpseq)] = soil_prop_dict[key][j][i] else: cur_dict[float(seqns[j])] = DEFAULT_NODATA replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '_' + str(i + 1) + '.tif') sol_fld_name.append(key + '_' + str(i + 1)) # print(replaceDicts) # print(len(replaceDicts)) # print(dstSoilTifs) # print(len(dstSoilTifs)) # Generate GTIFF soil_shp = r'D:\SEIMS\data\zts\data_prepare\spatial\soil_SEQN_all.shp' # landuse_basin = r'D:\SEIMS\data\zts\data_prepare\spatial\LanduseFinal_basin.shp' # for i, soil_tif in enumerate(sol_fld_name): # print(soil_tif) # SoilProperty.count_by_shp(soil_shp, landuse_shapefile, soil_tif, replace_dicts[i]) RasterUtilClass.count_by_shp(soil_shp, landuse_shapefile, sol_fld_name, replace_dicts)
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, initialize if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # initialize bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print(field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print(cur_data_item) # initial one default blank parameter dict. data_import = { ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: '' } for k, v in list(data_import.items()): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[ k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.') # initialize index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([ (ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING) ])
def lookup_tables_as_collection_and_gridfs(cfg, maindb): """Import lookup tables (from txt file) as Collection and GridFS Args: cfg: SEIMS config object maindb: workflow model database """ for tablename, txt_file in list( cfg.paramcfgs.lookup_tabs_dict.items()): # import each lookup table as a collection and GridFS file. c_list = maindb.collection_names() if not StringClass.string_in_list(tablename.upper(), c_list): maindb.create_collection(tablename.upper()) else: maindb.drop_collection(tablename.upper()) # initial bulk operator bulk = maindb[tablename.upper()].initialize_ordered_bulk_op() # delete if the tablename gridfs file existed spatial = GridFS(maindb, DBTableNames.gridfs_spatial) if spatial.exists(filename=tablename.upper()): x = spatial.get_version(filename=tablename.upper()) spatial.delete(x._id) # read data items data_items = read_data_items_from_txt(txt_file) field_names = data_items[0][0:] item_values = list() # import as gridfs file for i, cur_data_item in enumerate(data_items): if i == 0: continue data_import = dict() # import as Collection item_value = list() # import as gridfs file for idx, fld in enumerate(field_names): if MathClass.isnumerical(cur_data_item[idx]): tmp_value = float(cur_data_item[idx]) data_import[fld] = tmp_value item_value.append(tmp_value) else: data_import[fld] = cur_data_item[idx] bulk.insert(data_import) if len(item_value) > 0: item_values.append(item_value) MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename) # begin import gridfs file n_row = len(item_values) # print(item_values) if n_row >= 1: n_col = len(item_values[0]) for i in range(n_row): if n_col != len(item_values[i]): raise ValueError( 'Please check %s to make sure each item has ' 'the same numeric dimension. The size of first ' 'row is: %d, and the current data item is: %d' % (tablename, n_col, len(item_values[i]))) else: item_values[i].insert(0, n_col) metadic = { ModelParamDataUtils.item_count: n_row, ModelParamDataUtils.field_count: n_col } cur_lookup_gridfs = spatial.new_file( filename=tablename.upper(), metadata=metadic) header = [n_row] fmt = '%df' % 1 s = pack(fmt, *header) cur_lookup_gridfs.write(s) fmt = '%df' % (n_col + 1) for i in range(n_row): s = pack(fmt, *item_values[i]) cur_lookup_gridfs.write(s) cur_lookup_gridfs.close()
def read_param_ranges(self): """Read param_rng.def file name,lower_bound,upper_bound,group,dist (group and dist are optional) e.g., Param1,0,1[,Group1][,dist1] Param2,0,1[,Group2][,dist2] Param3,0,1[,Group3][,dist3] Returns: a dictionary containing: - names - the names of the parameters - bounds - a list of lists of lower and upper bounds - num_vars - a scalar indicating the number of variables (the length of names) - groups - a list of group names (strings) for each variable - dists - a list of distributions for the problem, None if not specified or all uniform """ # read param_defs.json if already existed if not self.param_defs: if FileClass.is_file_exists(self.cfg.outfiles.param_defs_json): with open(self.cfg.outfiles.param_defs_json, 'r') as f: self.param_defs = UtilClass.decode_strs_in_dict( json.load(f)) return # read param_range_def file and output to json file client = ConnectMongoDB(self.model.host, self.model.port) conn = client.get_conn() db = conn[self.model.db_name] collection = db['PARAMETERS'] names = list() bounds = list() groups = list() dists = list() num_vars = 0 items = read_data_items_from_txt(self.cfg.param_range_def) for item in items: if len(item) < 3: continue # find parameter name, print warning message if not existed cursor = collection.find({'NAME': item[0]}, no_cursor_timeout=True) if not cursor.count(): print('WARNING: parameter %s is not existed!' % item[0]) continue num_vars += 1 names.append(item[0]) bounds.append([float(item[1]), float(item[2])]) # If the fourth column does not contain a group name, use # the parameter name if len(item) >= 4: groups.append(item[3]) else: groups.append(item[0]) if len(item) >= 5: dists.append(item[4]) else: dists.append('unif') if groups == names: groups = None elif len(set(groups)) == 1: raise ValueError( 'Only one group defined, results will not bemeaningful') # setting dists to none if all are uniform # because non-uniform scaling is not needed if all([d == 'unif' for d in dists]): dists = None self.param_defs = { 'names': names, 'bounds': bounds, 'num_vars': num_vars, 'groups': groups, 'dists': dists } # Save as json, which can be loaded by json.load() json_data = json.dumps(self.param_defs, indent=4, cls=SpecialJsonEncoder) with open(self.cfg.outfiles.param_defs_json, 'w') as f: f.write(json_data)
def scenario_from_texts(cfg, main_db, scenario_db): """Import BMPs Scenario data to MongoDB Args: cfg: SEIMS configuration object main_db: climate database scenario_db: scenario database Returns: False if failed, otherwise True. """ if not cfg.use_scernario: return False print('Import BMP Scenario Data... ') bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt']) bmp_tabs = list() bmp_tabs_path = list() for f in bmp_files: bmp_tabs.append(f.split('.')[0]) bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f) # initialize if collection not existed c_list = scenario_db.collection_names() for item in bmp_tabs: if not StringClass.string_in_list(item.upper(), c_list): scenario_db.create_collection(item.upper()) else: scenario_db.drop_collection(item.upper()) # Read subbasin.tif and dist2Stream.tif subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn) dist2stream_r = RasterUtilClass.read_raster( cfg.spatials.dist2stream_d8) # End reading for j, bmp_txt in enumerate(bmp_tabs_path): bmp_tab_name = bmp_tabs[j] data_array = read_data_items_from_txt(bmp_txt) field_array = data_array[0] data_array = data_array[1:] for item in data_array: dic = dict() for i, field_name in enumerate(field_array): if MathClass.isnumerical(item[i]): v = float(item[i]) if v % 1. == 0.: v = int(v) dic[field_name.upper()] = v else: dic[field_name.upper()] = str(item[i]).upper() if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \ StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())): subbsn_id = subbasin_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) distance = dist2stream_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) if subbsn_id is not None and distance is not None: dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id) dic[ImportScenario2Mongo._DISTDOWN] = float(distance) scenario_db[bmp_tab_name.upper()].find_one_and_replace( dic, dic, upsert=True) else: scenario_db[bmp_tab_name.upper()].find_one_and_replace( dic, dic, upsert=True) # print('BMP tables are imported.') # Write BMP database name into Model workflow database c_list = main_db.collection_names() if not StringClass.string_in_list(DBTableNames.main_scenario, c_list): main_db.create_collection(DBTableNames.main_scenario) bmp_info_dic = dict() bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic, upsert=True) return True
def lookup_soil_parameters(dstdir, soiltype_file, soil_lookup_file, landuse_shapefile): """Reclassify soil parameters by lookup table.""" # Read soil properties from txt file soil_lookup_data = read_data_items_from_txt(soil_lookup_file) soil_instances = list() soil_prop_flds = soil_lookup_data[0][:] for i in range(1, len(soil_lookup_data)): cur_soil_data_item = soil_lookup_data[i][:] cur_seqn = cur_soil_data_item[0] cur_sname = cur_soil_data_item[1] cur_soil_ins = SoilProperty(cur_seqn, cur_sname) for j in range(2, len(soil_prop_flds)): cur_flds = StringClass.split_string(cur_soil_data_item[j], '-') # Get field values for k, tmpfld in enumerate(cur_flds): cur_flds[k] = float(tmpfld) # Convert to float if StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NLYRS): cur_soil_ins.SOILLAYERS = int(cur_flds[0]) elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._Z): cur_soil_ins.SOILDEPTH = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._OM): cur_soil_ins.OM = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CLAY): cur_soil_ins.CLAY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SILT): cur_soil_ins.SILT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SAND): cur_soil_ins.SAND = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ROCK): cur_soil_ins.ROCK = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ZMX): cur_soil_ins.SOL_ZMX = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ANIONEXCL): cur_soil_ins.ANION_EXCL = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CRK): cur_soil_ins.SOL_CRK = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._BD): cur_soil_ins.DENSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._K): cur_soil_ins.CONDUCTIVITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._WP): cur_soil_ins.WILTINGPOINT = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._FC): cur_soil_ins.FIELDCAP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._AWC): cur_soil_ins.AWC = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._POROSITY): cur_soil_ins.POROSITY = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._USLE_K): cur_soil_ins.USLE_K = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ALB): cur_soil_ins.SOL_ALB = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ESCO): cur_soil_ins.ESCO = cur_flds[0] elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NO3): cur_soil_ins.SOL_NO3 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NH4): cur_soil_ins.SOL_NH4 = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGN): cur_soil_ins.SOL_ORGN = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SOLP): cur_soil_ins.SOL_SOLP = cur_flds elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGP): cur_soil_ins.SOL_ORGP = cur_flds cur_soil_ins.check_data_validation() soil_instances.append(cur_soil_ins) soil_prop_dict = {} for sol in soil_instances: cur_sol_dict = sol.soil_dict() for fld in cur_sol_dict: if fld in soil_prop_dict: soil_prop_dict[fld].append(cur_sol_dict[fld]) else: soil_prop_dict[fld] = [cur_sol_dict[fld]] # print(list(soilPropDict.keys())) # print(list(soilPropDict.values())) replace_dicts = list() dst_soil_tifs = list() sol_fld_name = list() seqns = soil_prop_dict[SoilUtilClass._SEQN] max_lyr_num = int(numpy.max(soil_prop_dict[SoilUtilClass._NLYRS])) for key in soil_prop_dict: if key != SoilUtilClass._SEQN and key != SoilUtilClass._NAME: key_l = 1 for key_v in soil_prop_dict[key]: if isinstance(key_v, list): if len(key_v) > key_l: key_l = len(key_v) if key_l == 1: cur_dict = {} for i, tmpseq in enumerate(seqns): cur_dict[float(tmpseq)] = soil_prop_dict[key][i] replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '.tif') sol_fld_name.append(key) else: for i in range(max_lyr_num): cur_dict = dict() for j, tmpseq in enumerate(seqns): if i < soil_prop_dict[SoilUtilClass._NLYRS][j]: cur_dict[float( tmpseq)] = soil_prop_dict[key][j][i] else: cur_dict[float(seqns[j])] = DEFAULT_NODATA replace_dicts.append(cur_dict) dst_soil_tifs.append(dstdir + os.path.sep + key + '_' + str(i + 1) + '.tif') sol_fld_name.append(key + '_' + str(i + 1)) # print(replaceDicts) # print(len(replaceDicts)) # print(dstSoilTifs) # print(len(dstSoilTifs)) # Generate GTIFF soil_shp = r'D:\SEIMS\data\zts\data_prepare\spatial\soil_SEQN_all.shp' # landuse_basin = r'D:\SEIMS\data\zts\data_prepare\spatial\LanduseFinal_basin.shp' # for i, soil_tif in enumerate(sol_fld_name): # print(soil_tif) # SoilProperty.count_by_shp(soil_shp, landuse_shapefile, soil_tif, replace_dicts[i]) RasterUtilClass.count_by_shp(soil_shp, landuse_shapefile, sol_fld_name, replace_dicts)
def daily_data_from_txt(climdb, data_txt_file, sites_info_dict): """Import climate data table""" tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file( data_txt_file) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 clim_data_items = read_data_items_from_txt(data_txt_file) clim_flds = clim_data_items[0] # PHUCalDic is used for Calculating potential heat units (PHU) # for each climate station and each year. # format is {StationID:{Year1:[values],Year2:[Values]...}, ...} # PHUCalDic = {} # format: {StationID1: climateStats1, ...} hydro_climate_stats = dict() required_flds = [ DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws ] output_flds = [ DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.pet, DataType.ws, DataType.sr ] # remove existed records for fld in output_flds: climdb[DBTableNames.data_values].remove({'TYPE': fld}) for fld in required_flds: if not StringClass.string_in_list(fld, clim_flds): raise ValueError('Meteorological Daily data MUST contain %s!' % fld) # Create bulk object bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i, cur_clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() cur_ssd = DEFAULT_NODATA for j, clim_data_v in enumerate(cur_clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.id): dic[DataValueFields.id] = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.mean_tmp): dic[DataType.mean_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.min_tmp): dic[DataType.min_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.max_tmp): dic[DataType.max_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.pet): dic[DataType.pet] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.sr): dic[DataType.sr] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.ws): dic[DataType.ws] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.rm): dic[DataType.rm] = float(clim_data_v) * 0.01 elif StringClass.string_match(clim_flds[j], DataType.ssd): cur_ssd = float(clim_data_v) # Get datetime and utc/local transformation utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values( clim_flds, cur_clim_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_time + timedelta( minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_time dic[DataValueFields.y] = utc_time.year # Do if some of these data are not provided if DataType.mean_tmp not in list(dic.keys()): dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2. if DataType.sr not in list(dic.keys()): if cur_ssd == DEFAULT_NODATA: raise ValueError(DataType.sr + ' or ' + DataType.ssd + ' must be provided!') else: if dic[DataValueFields.id] in list(sites_info_dict.keys()): cur_lon, cur_lat = sites_info_dict[dic[ DataValueFields.id]].lon_lat() sr = round( HydroClimateUtilClass.rs( DateClass.day_of_year(utc_time), float(cur_ssd), cur_lat * PI / 180.), 1) dic[DataType.sr] = sr for fld in output_flds: cur_dic = dict() if fld in list(dic.keys()): cur_dic[DataValueFields.value] = dic[fld] cur_dic[DataValueFields.id] = dic[DataValueFields.id] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] cur_dic[DataValueFields.time_zone] = dic[ DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[ DataValueFields.local_time] cur_dic[DataValueFields.type] = fld # Old code, insert or update one item a time, which is quite inefficiency # Update by using bulk operation interface. lj # # find old records and remove (deprecated because of low efficiency, lj.) # curfilter = {DataValueFields.type: fld, # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).upsert().replace_one(cur_dic) bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records MongoUtil.run_bulk(bulk) bulk = climdb[ DBTableNames. data_values].initialize_ordered_bulk_op() if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()): hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats() hydro_climate_stats[dic[DataValueFields.id]].add_item(dic) # execute the remained records if count % 500 != 0: MongoUtil.run_bulk(bulk) for item, cur_climate_stats in list(hydro_climate_stats.items()): cur_climate_stats.annual_stats() # Create index climdb[DBTableNames.data_values].create_index([ (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING) ]) # prepare dic for MongoDB for s_id, stats_v in list(hydro_climate_stats.items()): for YYYY in list(stats_v.Count.keys()): cur_dic = dict() cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY] cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = YYYY cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu_tot curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu_tot, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY] curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) cur_dic[DataValueFields.value] = stats_v.PHU0 cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = DEFAULT_NODATA cur_dic[VariableDesc.unit] = 'heat units' cur_dic[VariableDesc.type] = DataType.phu0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp0 cur_dic[VariableDesc.unit] = 'deg C' cur_dic[DataValueFields.value] = stats_v.MeanTmp0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True)
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file( in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError('Day divided hour must range from 0 to 23!') try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError('DATETIME must be one of the fields!') # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone / -3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = StringClass.get_datetime(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = list() for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print(ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = StringClass.get_datetime(start_time) edatetime = StringClass.get_datetime(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print(item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = list() pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in list(ord_data.items()): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][ 0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError( 'To interpolate SED, FLOW must be provided!') for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print('WARNING: Flow is 0 for %s, please check!' % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper( ): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print(i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.txt' out_file = work_path + os.path.sep + file_name with open(out_file, 'w') as f: f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in list(itp_data.items()): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str( v[idx]) + '\n' f.write(cur_line)
def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: maindb: Main spatial database hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j, v in enumerate(site_data_items[i]): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(v) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string(v.strip(), '-') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(v) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(v) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(v) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(v) elif StringClass.string_match(site_flds[j], StationFields.unit): dic[StationFields.unit] = v.strip() elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(v) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(v) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_sids = ImportObservedData.match_subbasin(subbsn_file, site_dic, maindb) if not matched: break cur_subbsn_id_str = '' if len(cur_sids) == 1: # if only one subbasin ID, store integer cur_subbsn_id_str = cur_sids[0] else: cur_subbsn_id_str = ','.join(str(cid) for cid in cur_sids if cur_sids is None) site_dic[StationFields.subbsn] = cur_subbsn_id_str curfilter = {StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type]} # print(curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = dic[StationFields.unit] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print(measDataFile) obs_data_items = read_data_items_from_txt(measDataFile) tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(measDataFile) if tsysin == 'UTCTIME': tzonein = time.timezone / -3600 # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [StationFields.id, DataValueFields.type, DataValueFields.value] for fld in required_flds: if not StringClass.string_in_list(fld, obs_flds): # data can not meet the request! raise ValueError('The %s can not meet the required format!' % measDataFile) for i, cur_obs_data_item in enumerate(obs_data_items): dic = dict() if i == 0: continue for j, cur_data_value in enumerate(cur_obs_data_item): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(cur_data_value) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = cur_data_value elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float(cur_data_value) utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(obs_flds, cur_obs_data_item, tsysin, tzonein) dic[DataValueFields.local_time] = utc_t + timedelta(minutes=tzonein * 60) dic[DataValueFields.time_zone] = tzonein dic[DataValueFields.utc] = utc_t # curfilter = {StationFields.id: dic[StationFields.id], # DataValueFields.type: dic[DataValueFields.type], # DataValueFields.utc: dic[DataValueFields.utc]} # bulk.find(curfilter).replace_one(dic) bulk.insert(dic) count += 1 if count % 500 == 0: MongoUtil.run_bulk(bulk) bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: MongoUtil.run_bulk(bulk) # 3. Add measurement data with unit converted # loop variables list added_dics = [] for curVar in variable_lists: # print(curVar) # if the unit is mg/L, then change the Type name with the suffix 'Conc', # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find({StationFields.type: cur_type}): # print(item) dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[DataValueFields.local_time] dic[DataValueFields.time_zone] = item[DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == 'mg/L' or cur_unit == 'g/L': # update the Type name dic[StationFields.type] = cur_type + 'Conc' curfilter = {StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc]} hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = {StationFields.type: 'Q', DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id]} q_dic = hydro_clim_db[DBTableNames.observes].find_one(filter=cur_filter) q = -9999. if q_dic is not None: q = q_dic[DataValueFields.value] else: continue if cur_unit == 'mg/L': # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == 'g/L': # convert g/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400., 2) elif cur_unit == 'kg': dic[StationFields.type] = cur_type + 'Conc' # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = {StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc]} hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
def lookup_tables_as_collection_and_gridfs(cfg, maindb): """Import lookup tables (from txt file) as Collection and GridFS Args: cfg: SEIMS config object maindb: workflow model database """ for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()): # import each lookup table as a collection and GridFS file. c_list = maindb.collection_names() if not StringClass.string_in_list(tablename.upper(), c_list): maindb.create_collection(tablename.upper()) else: maindb.drop_collection(tablename.upper()) # initial bulk operator bulk = maindb[tablename.upper()].initialize_ordered_bulk_op() # delete if the tablename gridfs file existed spatial = GridFS(maindb, DBTableNames.gridfs_spatial) if spatial.exists(filename=tablename.upper()): x = spatial.get_version(filename=tablename.upper()) spatial.delete(x._id) # read data items data_items = read_data_items_from_txt(txt_file) field_names = data_items[0][0:] item_values = list() # import as gridfs file for i, cur_data_item in enumerate(data_items): if i == 0: continue data_import = dict() # import as Collection item_value = list() # import as gridfs file for idx, fld in enumerate(field_names): if MathClass.isnumerical(cur_data_item[idx]): tmp_value = float(cur_data_item[idx]) data_import[fld] = tmp_value item_value.append(tmp_value) else: data_import[fld] = cur_data_item[idx] bulk.insert(data_import) if len(item_value) > 0: item_values.append(item_value) MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename) # begin import gridfs file n_row = len(item_values) # print(item_values) if n_row >= 1: n_col = len(item_values[0]) for i in range(n_row): if n_col != len(item_values[i]): raise ValueError('Please check %s to make sure each item has ' 'the same numeric dimension. The size of first ' 'row is: %d, and the current data item is: %d' % (tablename, n_col, len(item_values[i]))) else: item_values[i].insert(0, n_col) metadic = {ModelParamDataUtils.item_count: n_row, ModelParamDataUtils.field_count: n_col} cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic) header = [n_row] fmt = '%df' % 1 s = pack(fmt, *header) cur_lookup_gridfs.write(s) fmt = '%df' % (n_col + 1) for i in range(n_row): s = pack(fmt, *item_values[i]) cur_lookup_gridfs.write(s) cur_lookup_gridfs.close()
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # initialize if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(item[0].strip(), ['|']) if len(values) != 2: raise ValueError( 'One item should only have one Tag and one value string,' ' split by "|"') file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print(out_data_array) for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if not list(file_out_dict.keys()): raise ValueError( 'There are not any valid output item stored in file.out!') bulk.insert(file_out_dict) MongoUtil.run_bulk( bulk, 'No operations to excute when import initial outputs settings.') # begin to import the desired outputs # initialize bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print(field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print(cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError( 'Items in file.out must have 7 columns, i.e., OUTPUTID,' 'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.') bulk.find(cur_filter).update({'$set': data_import}) # execute import operators MongoUtil.run_bulk( bulk, 'No operations to excute when import the desired outputs.')