def workflow(cfg, db): """ This function mainly to import measurement data to MongoDB data type may include Q (discharge, m3/s), tn, tp, etc. the required parameters that defined in configuration file (*.ini) """ if not cfg.use_observed: return False c_list = db.collection_names() if not StringClass.string_in_list(DBTableNames.observes, c_list): db.create_collection(DBTableNames.observes) else: db.drop_collection(DBTableNames.observes) if not StringClass.string_in_list(DBTableNames.sites, c_list): db.create_collection(DBTableNames.sites) if not StringClass.string_in_list(DBTableNames.var_desc, c_list): db.create_collection(DBTableNames.var_desc) file_list = FileClass.get_full_filename_by_suffixes( cfg.observe_dir, ['.txt']) meas_file_list = [] site_loc = [] for fl in file_list: if StringClass.is_substring('observed_', fl): meas_file_list.append(fl) else: site_loc.append(fl) ImportObservedData.data_from_txt(db, meas_file_list, site_loc, cfg.spatials.subbsn) return True
def initial_params_from_txt(cfg, maindb): """ import initial calibration parameters from txt data file. Args: cfg: SEIMS config object maindb: MongoDB database object """ # delete if existed, create if not existed c_list = maindb.collection_names() if not StringClass.string_in_list(DBTableNames.main_parameter, c_list): maindb.create_collection(DBTableNames.main_parameter) else: maindb.drop_collection(DBTableNames.main_parameter) # create bulk operator bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file) field_names = data_items[0][0:] # print (field_names) for i, cur_data_item in enumerate(data_items): if i == 0: continue # print cur_data_item # initial one default blank parameter dict. data_import = {ModelParamFields.name: '', ModelParamFields.desc: '', ModelParamFields.unit: '', ModelParamFields.module: '', ModelParamFields.value: DEFAULT_NODATA, ModelParamFields.impact: DEFAULT_NODATA, ModelParamFields.change: 'NC', ModelParamFields.max: DEFAULT_NODATA, ModelParamFields.min: DEFAULT_NODATA, ModelParamFields.type: ''} for k, v in data_import.items(): idx = field_names.index(k) if cur_data_item[idx] == '': if StringClass.string_match(k, ModelParamFields.change_ac): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_rc): data_import[k] = 1 elif StringClass.string_match(k, ModelParamFields.change_nc): data_import[k] = 0 elif StringClass.string_match(k, ModelParamFields.change_vc): data_import[k] = DEFAULT_NODATA # Be careful to check NODATA when use! else: if MathClass.isnumerical(cur_data_item[idx]): data_import[k] = float(cur_data_item[idx]) else: data_import[k] = cur_data_item[idx] bulk.insert(data_import) # execute import operators bulk.execute() # create index by parameter's type and name by ascending order. maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING), (ModelParamFields.name, ASCENDING)])
def variable_table(db, var_file): """Import variables table""" var_data_items = read_data_items_from_txt(var_file) var_flds = var_data_items[0] for i in range(1, len(var_data_items)): dic = {} for j in range(len(var_data_items[i])): if StringClass.string_match(var_flds[j], VariableDesc.type): dic[VariableDesc.type] = var_data_items[i][j] elif StringClass.string_match(var_flds[j], VariableDesc.unit): dic[VariableDesc.unit] = var_data_items[i][j] # If this item existed already, then update it, otherwise insert one. curfilter = {VariableDesc.type: dic[VariableDesc.type]} db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
def workflow(cfg, main_db, clim_db): """Workflow""" # 1. Find meteorology and precipitation sites in study area thiessen_file_list = [cfg.meteo_sites_thiessen, cfg.prec_sites_thiessen] type_list = [DataType.m, DataType.p] if not cfg.cluster: # the entire basin ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.bsn, FieldNames.basin, thiessen_file_list, cfg.thiessen_field, type_list, cfg.storm_mode) ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.subbsn, FieldNames.subbasin_id, thiessen_file_list, cfg.thiessen_field, type_list, cfg.storm_mode, cfg.cluster) # 2. Import geographic information of each sites to Hydro-Climate database c_list = clim_db.collection_names() tables = [DBTableNames.sites, DBTableNames.var_desc] for tb in tables: if not StringClass.string_in_list(tb, c_list): clim_db.create_collection(tb) ImportHydroClimateSites.variable_table(clim_db, cfg.hydro_climate_vars) site_m_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.Meteo_sites, DataType.m) site_p_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.prec_sites, DataType.p) # print (site_m_loc, site_p_loc) return site_m_loc, site_p_loc
def get_time_system_from_data_file(in_file): """Get the time system from the data file. The basic format is: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME """ time_sys = 'LOCALTIME' time_zone = time.timezone / -3600 f = open(in_file) for line in f: str_line = line for LF in LFs: if LF in line: str_line = line.split(LF)[0] break if str_line[0] != '#': break if str_line.lower().find('utc') >= 0: time_sys = 'UTCTIME' time_zone = 0 break if str_line.lower().find('local') >= 0: line_list = StringClass.split_string(str_line, [',']) if len(line_list) == 2 and MathClass.isnumerical(line_list[1]): time_zone = -1 * int(line_list[1]) break f.close() return time_sys, time_zone
def initialize_landcover_parameters(landcover_file, landcover_initial_fields_file, dst_dir): """generate initial landcover_init_param parameters""" lc_data_items = read_data_items_from_txt(landcover_initial_fields_file) # print lc_data_items field_names = lc_data_items[0] lu_id = -1 for i, v in enumerate(field_names): if StringClass.string_match(v, 'LANDUSE_ID'): lu_id = i break data_items = lc_data_items[1:] replace_dicts = dict() for item in data_items: for i, v in enumerate(item): if i != lu_id: if field_names[i].upper() not in replace_dicts.keys(): replace_dicts[field_names[i].upper()] = { float(item[lu_id]): float(v) } else: replace_dicts[field_names[i].upper()][float( item[lu_id])] = float(v) # print replace_dicts # Generate GTIFF for item, v in replace_dicts.items(): filename = dst_dir + SEP + item + '.tif' print(filename) RasterUtilClass.raster_reclassify(landcover_file, v, filename) return replace_dicts['LANDCOVER'].values()
def export_landuse_lookup_files_from_mongodb(cfg, maindb): """export landuse lookup tables to txt file from MongoDB.""" lookup_dir = cfg.dirs.lookup property_namelist = ModelParamDataUtils.landuse_fields property_map = {} property_namelist.append('USLE_P') query_result = maindb['LANDUSELOOKUP'].find() if query_result is None: raise RuntimeError( "LanduseLoop Collection is not existed or empty!") count = 0 for row in query_result: # print row value_map = dict() for i, p_name in enumerate(property_namelist): if StringClass.string_match(p_name, "USLE_P"): # Currently, USLE_P is set as 1 for all landuse. value_map[p_name] = 1 else: if StringClass.string_match(p_name, "Manning"): value_map[p_name] = row.get(p_name) * 10 else: value_map[p_name] = row.get(p_name) count += 1 property_map[count] = value_map n = len(property_map) UtilClass.rmmkdir(lookup_dir) for propertyName in property_namelist: f = open("%s/%s.txt" % ( lookup_dir, propertyName, ), 'w') f.write("%d\n" % n) for prop_id in property_map: s = "%d %f\n" % (prop_id, property_map[prop_id][propertyName]) f.write(s) f.close()
def read_data_items_from_txt(txt_file): """Read data items include title from text file, each data element are split by TAB or COMMA. Be aware, the separator for each line can only be TAB or COMMA, and COMMA is the recommended. Args: txt_file: full path of text data file Returns: 2D data array """ f = open(txt_file) data_items = [] for line in f: str_line = line for LF in LFs: if LF in line: str_line = line.split(LF)[0] break if str_line != '' and str_line.find('#') < 0: line_list = StringClass.split_string(str_line, ['\t']) if len(line_list) <= 1: line_list = StringClass.split_string(str_line, [',']) data_items.append(line_list) f.close() return data_items
def sites_table(hydro_clim_db, site_file, site_type): """Import HydroClimate sites table""" sites_loc = dict() site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = dict() for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.name): # unicode(site_data_items[i][j], 'gb2312') dic[StationFields.name] = site_data_items[i][j] elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float(site_data_items[i][j]) dic[StationFields.type] = site_type curfilter = {StationFields.id: dic[StationFields.id], StationFields.type: dic[StationFields.type]} hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True) if dic[StationFields.id] not in sites_loc.keys(): sites_loc[dic[StationFields.id]] = SiteInfo(dic[StationFields.id], dic[StationFields.name], dic[StationFields.lat], dic[StationFields.lon], dic[StationFields.x], dic[StationFields.y], dic[StationFields.elev]) hydro_clim_db[DBTableNames.sites].create_index([(StationFields.id, ASCENDING), (StationFields.type, ASCENDING)]) return sites_loc
def regular_data_from_txt(climdb, data_file): """Regular precipitation data from text file.""" # delete existed precipitation data climdb[DBTableNames.data_values].remove( {DataValueFields.type: DataType.p}) clim_data_items = read_data_items_from_txt(data_file) clim_flds = clim_data_items[0] station_id = [] bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i in range(3, len(clim_flds)): station_id.append(clim_flds[i]) for i, clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() precipitation = [] cur_y = 0 cur_m = 0 cur_d = 0 for j, clim_data_v in enumerate(clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.y): cur_y = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.m): cur_m = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.d): cur_d = int(clim_data_v) else: for k, cur_id in enumerate(station_id): if StringClass.string_match(clim_flds[j], cur_id): precipitation.append(float(clim_data_v)) dt = datetime(cur_y, cur_m, cur_d, 0, 0) sec = time.mktime(dt.timetuple()) utc_time = time.gmtime(sec) dic[DataValueFields.local_time] = dt dic[DataValueFields.time_zone] = time.timezone / 3600. dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1], utc_time[2], utc_time[3]) for j, cur_id in enumerate(station_id): cur_dic = dict() cur_dic[DataValueFields.value] = precipitation[j] cur_dic[DataValueFields.id] = int(cur_id) cur_dic[DataValueFields.type] = DataType.p cur_dic[DataValueFields.time_zone] = dic[ DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[ DataValueFields.local_time] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records bulk.execute() bulk = climdb[ DBTableNames.data_values].initialize_ordered_bulk_op() if count % 500 != 0: bulk.execute() # Create index climdb[DBTableNames.data_values].create_index([ (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING) ])
def __init__(self, cf): # 1. Directories self.base_dir = None self.clim_dir = None self.spatial_dir = None self.observe_dir = None self.scenario_dir = None self.model_dir = None self.txt_db_dir = None self.preproc_script_dir = None self.seims_bin = None self.mpi_bin = None self.workspace = None # 1.1. Directory determined flags self.use_observed = True self.use_scernario = True # 2. MongoDB configuration and database, collation, GridFS names self.hostname = '127.0.0.1' # localhost by default self.port = 27017 self.climate_db = '' self.bmp_scenario_db = '' self.spatial_db = '' # 3. Switch for building SEIMS self.cluster = False self.storm_mode = False self.gen_cn = True self.gen_runoff_coef = True self.gen_crop = True self.gen_iuh = True # 4. Climate inputs self.hydro_climate_vars = None self.prec_sites = None self.prec_data = None self.Meteo_sites = None self.Meteo_data = None self.thiessen_field = 'ID' # 5. Spatial inputs self.prec_sites_thiessen = None self.meteo_sites_thiessen = None self.dem = None self.outlet_file = None self.landuse = None self.landcover_init_param = None self.soil = None self.soil_property = None self.mgt_field = None # 6. Option parameters self.is_TauDEM = True self.d8acc_threshold = 0 self.np = 4 self.d8down_method = 's' self.dorm_hr = -1. self.temp_base = 0. self.imper_perc_in_urban = 0.3 self.default_reach_depth = 5. self.default_landuse = -1 self.default_soil = -1 # 1. Directories if 'PATH' in cf.sections(): self.base_dir = cf.get('PATH', 'base_data_dir') self.clim_dir = cf.get('PATH', 'climate_data_dir') self.spatial_dir = cf.get('PATH', 'spatial_data_dir') self.observe_dir = cf.get('PATH', 'measurement_data_dir') self.scenario_dir = cf.get('PATH', 'bmp_data_dir') self.model_dir = cf.get('PATH', 'model_dir') self.txt_db_dir = cf.get('PATH', 'txt_db_dir') self.preproc_script_dir = cf.get('PATH', 'preproc_script_dir') self.seims_bin = cf.get('PATH', 'cpp_program_dir') self.mpi_bin = cf.get('PATH', 'mpiexec_dir') self.workspace = cf.get('PATH', 'working_dir') else: raise ValueError("[PATH] section MUST be existed in *.ini file.") if not (FileClass.is_file_exists(self.base_dir) and FileClass.is_file_exists(self.model_dir) and FileClass.is_file_exists(self.txt_db_dir) and FileClass.is_file_exists(self.preproc_script_dir) and FileClass.is_file_exists(self.seims_bin)): raise IOError( "Please Check Directories defined in [PATH]. " "BASE_DIR, MODEL_DIR, TXT_DB_DIR, PREPROC_SCRIPT_DIR, " "and CPP_PROGRAM_DIR are required!") if not FileClass.is_file_exists(self.mpi_bin): self.mpi_bin = None if not os.path.isdir(self.workspace): try: # first try to make dirs os.mkdir(self.workspace) except OSError as exc: self.workspace = self.model_dir + os.sep + 'preprocess_output' print( "WARNING: Make WORKING_DIR failed: %s. Use the default: %s" % (exc.message, self.workspace)) if not os.path.exists(self.workspace): os.mkdir(self.workspace) self.dirs = DirNameUtils(self.workspace) self.logs = LogNameUtils(self.dirs.log) self.vecs = VectorNameUtils(self.dirs.geoshp) self.taudems = TauDEMFilesUtils(self.dirs.taudem) self.spatials = SpatialNamesUtils(self.dirs.geodata2db) self.modelcfgs = ModelCfgUtils(self.model_dir) self.paramcfgs = ModelParamDataUtils(self.preproc_script_dir + os.sep + 'database') if not FileClass.is_file_exists(self.clim_dir): print( "The CLIMATE_DATA_DIR is not existed, try the default folder name 'climate'." ) self.clim_dir = self.base_dir + os.sep + 'climate' if not FileClass.is_file_exists(self.clim_dir): raise IOError( "Directories named 'climate' MUST BE located in [base_dir]!" ) if not FileClass.is_file_exists(self.spatial_dir): print( "The SPATIAL_DATA_DIR is not existed, try the default folder name 'spatial'." ) self.spatial_dir = self.base_dir + os.sep + 'spatial' raise IOError( "Directories named 'spatial' MUST BE located in [base_dir]!") if not FileClass.is_file_exists(self.observe_dir): self.observe_dir = None self.use_observed = False if not FileClass.is_file_exists(self.scenario_dir): self.scenario_dir = None self.use_scernario = False # 2. MongoDB related if 'MONGODB' in cf.sections(): self.hostname = cf.get('MONGODB', 'hostname') self.port = cf.getint('MONGODB', 'port') self.climate_db = cf.get('MONGODB', 'climatedbname') self.bmp_scenario_db = cf.get('MONGODB', 'BMPScenarioDBName') self.spatial_db = cf.get('MONGODB', 'SpatialDBName') else: raise ValueError( "[MONGODB] section MUST be existed in *.ini file.") if not StringClass.is_valid_ip_addr(self.hostname): raise ValueError("HOSTNAME illegal defined in [MONGODB]!") # 3. Model related switch # by default, OpenMP version and daily (longterm) mode will be built if 'SWITCH' in cf.sections(): self.cluster = cf.getboolean('SWITCH', 'forCluster') self.storm_mode = cf.getboolean('SWITCH', 'stormMode') self.gen_cn = cf.getboolean('SWITCH', 'genCN') self.gen_runoff_coef = cf.getboolean('SWITCH', 'genRunoffCoef') self.gen_crop = cf.getboolean('SWITCH', 'genCrop') if self.storm_mode: self.gen_iuh = False self.climate_db = ModelNameUtils.standardize_climate_dbname( self.climate_db) self.spatial_db = ModelNameUtils.standardize_spatial_dbname( self.cluster, self.storm_mode, self.spatial_db) # 4. Climate Input if 'CLIMATE' in cf.sections(): self.hydro_climate_vars = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'hydroclimatevarfile') self.prec_sites = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'precsitefile') self.prec_data = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'precdatafile') self.Meteo_sites = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'meteositefile') self.Meteo_data = self.clim_dir + os.sep + cf.get( 'CLIMATE', 'meteodatafile') self.thiessen_field = cf.get('CLIMATE', 'thiessenidfield') else: raise ValueError( "Climate input file names MUST be provided in [CLIMATE]!") # 5. Spatial Input if 'SPATIAL' in cf.sections(): self.prec_sites_thiessen = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'precsitesthiessen') self.meteo_sites_thiessen = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'meteositesthiessen') self.dem = self.spatial_dir + os.sep + cf.get('SPATIAL', 'dem') self.outlet_file = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'outlet_file') if not os.path.exists(self.outlet_file): self.outlet_file = None self.landuse = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'landusefile') self.landcover_init_param = self.txt_db_dir + os.sep \ + cf.get('SPATIAL', 'landcoverinitfile') self.soil = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'soilseqnfile') self.soil_property = self.txt_db_dir + os.sep + cf.get( 'SPATIAL', 'soilseqntext') self.mgt_field = self.spatial_dir + os.sep + cf.get( 'SPATIAL', 'mgtfieldfile') if not os.path.exists(self.mgt_field) or \ StringClass.string_match(self.mgt_field, 'none'): self.mgt_field = None else: raise ValueError( "Spatial input file names MUST be provided in [SPATIAL]!") # 6. Option parameters if 'OPTIONAL_PARAMETERS' in cf.sections(): self.is_TauDEM = cf.getboolean('OPTIONAL_PARAMETERS', 'istaudemd8') self.d8acc_threshold = cf.getfloat('OPTIONAL_PARAMETERS', 'd8accthreshold') self.np = cf.getint('OPTIONAL_PARAMETERS', 'np') self.d8down_method = cf.get('OPTIONAL_PARAMETERS', 'd8downmethod') if StringClass.string_match(self.d8down_method, 'surface'): self.d8down_method = 's' elif StringClass.string_match(self.d8down_method, 'horizontal'): self.d8down_method = 'h' elif StringClass.string_match(self.d8down_method, 'pythagoras'): self.d8down_method = 'p' elif StringClass.string_match(self.d8down_method, 'vertical'): self.d8down_method = 'v' else: self.d8down_method = self.d8down_method.lower() if self.d8down_method not in ['s', 'h', 'p', 'v']: self.d8down_method = 'h' self.dorm_hr = cf.getfloat('OPTIONAL_PARAMETERS', 'dorm_hr') self.temp_base = cf.getfloat('OPTIONAL_PARAMETERS', 't_base') self.imper_perc_in_urban = cf.getfloat( 'OPTIONAL_PARAMETERS', 'imperviouspercinurbancell') self.default_reach_depth = cf.getfloat('OPTIONAL_PARAMETERS', 'default_reach_depth') self.default_landuse = cf.getint('OPTIONAL_PARAMETERS', 'defaultlanduse') self.default_soil = cf.getint('OPTIONAL_PARAMETERS', 'defaultsoil')
def scenario_from_texts(cfg, main_db, scenario_db): """Import BMPs Scenario data to MongoDB Args: cfg: SEIMS configuration object main_db: climate database scenario_db: scenario database Returns: False if failed, otherwise True. """ if not cfg.use_scernario: return False print ("Import BMP Scenario Data... ") bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt']) bmp_tabs = [] bmp_tabs_path = [] for f in bmp_files: bmp_tabs.append(f.split('.')[0]) bmp_tabs_path.append(cfg.scenario_dir + SEP + f) # create if collection not existed c_list = scenario_db.collection_names() for item in bmp_tabs: if not StringClass.string_in_list(item.upper(), c_list): scenario_db.create_collection(item.upper()) else: scenario_db.drop_collection(item.upper()) # Read subbasin.tif and dist2Stream.tif subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn) dist2stream_r = RasterUtilClass.read_raster(cfg.spatials.dist2stream_d8) # End reading for j, bmp_txt in enumerate(bmp_tabs_path): bmp_tab_name = bmp_tabs[j] data_array = read_data_items_from_txt(bmp_txt) field_array = data_array[0] data_array = data_array[1:] for item in data_array: dic = {} for i, field_name in enumerate(field_array): if MathClass.isnumerical(item[i]): dic[field_name.upper()] = float(item[i]) else: dic[field_name.upper()] = str(item[i]).upper() if StringClass.string_in_list(ImportScenario2Mongo._LocalX, dic.keys()) and \ StringClass.string_in_list(ImportScenario2Mongo._LocalY, dic.keys()): subbsn_id = subbasin_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) distance = dist2stream_r.get_value_by_xy( dic[ImportScenario2Mongo._LocalX.upper()], dic[ImportScenario2Mongo._LocalY.upper()]) if subbsn_id is not None and distance is not None: dic[ImportScenario2Mongo._SUBBASINID] = float(subbsn_id) dic[ImportScenario2Mongo._DISTDOWN] = float(distance) scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) else: scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic, upsert=True) # print 'BMP tables are imported.' # Write BMP database name into Model workflow database c_list = main_db.collection_names() if not StringClass.string_in_list(DBTableNames.main_scenario, c_list): main_db.create_collection(DBTableNames.main_scenario) bmp_info_dic = dict() bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic, upsert=True) return True
def lookup_tables_as_collection_and_gridfs(cfg, maindb): """Import lookup tables (from txt file) as Collection and GridFS Args: cfg: SEIMS config object maindb: workflow model database """ for tablename, txt_file in cfg.paramcfgs.lookup_tabs_dict.items(): # import each lookup table as a collection and GridFS file. c_list = maindb.collection_names() if not StringClass.string_in_list(tablename.upper(), c_list): maindb.create_collection(tablename.upper()) else: maindb.drop_collection(tablename.upper()) # initial bulk operator bulk = maindb[tablename.upper()].initialize_ordered_bulk_op() # delete if the tablename gridfs file existed spatial = GridFS(maindb, DBTableNames.gridfs_spatial) if spatial.exists(filename=tablename.upper()): x = spatial.get_version(filename=tablename.upper()) spatial.delete(x._id) # read data items data_items = read_data_items_from_txt(txt_file) field_names = data_items[0][0:] item_values = [] # import as gridfs file for i, cur_data_item in enumerate(data_items): if i == 0: continue data_import = dict() # import as Collection item_value = [] # import as gridfs file for idx, fld in enumerate(field_names): if MathClass.isnumerical(cur_data_item[idx]): tmp_value = float(cur_data_item[idx]) data_import[fld] = tmp_value item_value.append(tmp_value) else: data_import[fld] = cur_data_item[idx] bulk.insert(data_import) if len(item_value) > 0: item_values.append(item_value) bulk.execute() # begin import gridfs file n_row = len(item_values) # print (item_values) if n_row >= 1: n_col = len(item_values[0]) for i in range(n_row): if n_col != len(item_values[i]): raise ValueError("Please check %s to make sure each item has " "the same numeric dimension. The size of first " "row is: %d, and the current data item is: %d" % (tablename, n_col, len(item_values[i]))) else: item_values[i].insert(0, n_col) metadic = {ModelParamDataUtils.item_count: n_row, ModelParamDataUtils.field_count: n_col} cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic) header = [n_row] fmt = '%df' % 1 s = pack(fmt, *header) cur_lookup_gridfs.write(s) fmt = '%df' % (n_col + 1) for i in range(n_row): s = pack(fmt, *item_values[i]) cur_lookup_gridfs.write(s) cur_lookup_gridfs.close()
def daily_data_from_txt(climdb, data_txt_file, sites_info_dict): """Import climate data table""" # delete existed precipitation data climdb[DBTableNames.data_values].remove( {DataValueFields.type: DataType.m}) clim_data_items = read_data_items_from_txt(data_txt_file) clim_flds = clim_data_items[0] # PHUCalDic is used for Calculating potential heat units (PHU) # for each climate station and each year. # format is {StationID:{Year1:[values],Year2:[Values]...}, ...} # PHUCalDic = {} # format: {StationID1: climateStats1, ...} hydro_climate_stats = {} required_flds = [ DataValueFields.y, DataValueFields.m, DataValueFields.d, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws ] for fld in required_flds: if not StringClass.string_in_list(fld, clim_flds): raise ValueError( "Meteorological Daily data is invalid, please Check!") # Create bulk object bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op() count = 0 for i, cur_clim_data_item in enumerate(clim_data_items): if i == 0: continue dic = dict() cur_ssd = DEFAULT_NODATA cur_y = 0 cur_m = 0 cur_d = 0 for j, clim_data_v in enumerate(cur_clim_data_item): if StringClass.string_match(clim_flds[j], DataValueFields.id): dic[DataValueFields.id] = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.y): cur_y = int(clim_data_v) dic[DataValueFields.y] = cur_y elif StringClass.string_match(clim_flds[j], DataValueFields.m): cur_m = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataValueFields.d): cur_d = int(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.mean_tmp): dic[DataType.mean_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.min_tmp): dic[DataType.min_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.max_tmp): dic[DataType.max_tmp] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.pet): dic[DataType.pet] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.sr): dic[DataType.sr] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.ws): dic[DataType.ws] = float(clim_data_v) elif StringClass.string_match(clim_flds[j], DataType.rm): dic[DataType.rm] = float(clim_data_v) * 0.01 elif StringClass.string_match(clim_flds[j], DataType.ssd): cur_ssd = float(clim_data_v) # Date transformation dt = datetime(cur_y, cur_m, cur_d, 0, 0) sec = time.mktime(dt.timetuple()) utc_time = time.gmtime(sec) dic[DataValueFields.local_time] = dt dic[DataValueFields.time_zone] = time.timezone / 3600 dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1], utc_time[2], utc_time[3]) # Do if some of these data are not provided if DataType.mean_tmp not in dic.keys(): dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2. if DataType.sr not in dic.keys(): if cur_ssd == DEFAULT_NODATA: raise ValueError(DataType.sr + " or " + DataType.ssd + " must be provided!") else: if dic[DataValueFields.id] in sites_info_dict.keys(): cur_lon, cur_lat = sites_info_dict[dic[ DataValueFields.id]].lon_lat() dic[DataType.sr] = round( HydroClimateUtilClass.rs(DateClass.day_of_year(dt), float(cur_ssd), cur_lat * PI / 180.), 1) output_flds = [ DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.pet, DataType.ws, DataType.sr ] for fld in output_flds: cur_dic = dict() if fld in dic.keys(): cur_dic[DataValueFields.value] = dic[fld] cur_dic[DataValueFields.id] = dic[DataValueFields.id] cur_dic[DataValueFields.utc] = dic[DataValueFields.utc] cur_dic[DataValueFields.time_zone] = dic[ DataValueFields.time_zone] cur_dic[DataValueFields.local_time] = dic[ DataValueFields.local_time] cur_dic[DataValueFields.type] = fld # Old code, insert or update one item a time, which is quite inefficiency # Update by using bulk operation interface. lj bulk.insert(cur_dic) count += 1 if count % 500 == 0: # execute each 500 records bulk.execute() bulk = climdb[ DBTableNames. data_values].initialize_ordered_bulk_op() if dic[DataValueFields.id] not in hydro_climate_stats.keys(): hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats() hydro_climate_stats[dic[DataValueFields.id]].add_item(dic) # execute the remained records if count % 500 != 0: bulk.execute() for item, cur_climate_stats in hydro_climate_stats.items(): cur_climate_stats.annual_stats() # Create index climdb[DBTableNames.data_values].create_index([ (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING), (DataValueFields.utc, ASCENDING) ]) # prepare dic for MongoDB for s_id, stats_v in hydro_climate_stats.items(): for YYYY in stats_v.Count.keys(): cur_dic = dict() cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY] cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = YYYY cur_dic[VariableDesc.unit] = "heat units" cur_dic[VariableDesc.type] = DataType.phu_tot curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu_tot, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp cur_dic[VariableDesc.unit] = "deg C" cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY] curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp, DataValueFields.y: YYYY } climdb[DBTableNames.annual_stats].find_one_and_replace( curfilter, cur_dic, upsert=True) cur_dic[DataValueFields.value] = stats_v.PHU0 cur_dic[DataValueFields.id] = s_id cur_dic[DataValueFields.y] = DEFAULT_NODATA cur_dic[VariableDesc.unit] = "heat units" cur_dic[VariableDesc.type] = DataType.phu0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.phu0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True) # import annual mean temperature cur_dic[VariableDesc.type] = DataType.mean_tmp0 cur_dic[VariableDesc.unit] = "deg C" cur_dic[DataValueFields.value] = stats_v.MeanTmp0 curfilter = { DataValueFields.id: s_id, VariableDesc.type: DataType.mean_tmp0, DataValueFields.y: DEFAULT_NODATA } climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic, upsert=True)
def model_io_configuration(cfg, maindb): """ Import Input and Output Configuration of SEIMS, i.e., file.in and file.out Args: cfg: SEIMS config object maindb: MongoDB database object """ file_in_path = cfg.modelcfgs.filein file_out_path = cfg.paramcfgs.init_outputs_file # create if collection not existed c_list = maindb.collection_names() conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout] for item in conf_tabs: if not StringClass.string_in_list(item, c_list): maindb.create_collection(item) else: maindb.drop_collection(item) file_in_items = read_data_items_from_txt(file_in_path) file_out_items = read_data_items_from_txt(file_out_path) for item in file_in_items: file_in_dict = dict() values = StringClass.split_string(StringClass.strip_string(item[0]), ['|']) if len(values) != 2: raise ValueError("One item should only have one Tag and one value string," " split by '|'") file_in_dict[ModelCfgFields.tag] = values[0] file_in_dict[ModelCfgFields.value] = values[1] maindb[DBTableNames.main_filein].insert(file_in_dict) # begin to import initial outputs settings bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op() out_field_array = file_out_items[0] out_data_array = file_out_items[1:] # print out_data_array for item in out_data_array: file_out_dict = dict() for i, v in enumerate(out_field_array): if StringClass.string_match(ModelCfgFields.mod_cls, v): file_out_dict[ModelCfgFields.mod_cls] = item[i] elif StringClass.string_match(ModelCfgFields.output_id, v): file_out_dict[ModelCfgFields.output_id] = item[i] elif StringClass.string_match(ModelCfgFields.desc, v): file_out_dict[ModelCfgFields.desc] = item[i] elif StringClass.string_match(ModelCfgFields.unit, v): file_out_dict[ModelCfgFields.unit] = item[i] elif StringClass.string_match(ModelCfgFields.type, v): file_out_dict[ModelCfgFields.type] = item[i] elif StringClass.string_match(ModelCfgFields.stime, v): file_out_dict[ModelCfgFields.stime] = item[i] elif StringClass.string_match(ModelCfgFields.etime, v): file_out_dict[ModelCfgFields.etime] = item[i] elif StringClass.string_match(ModelCfgFields.interval, v): file_out_dict[ModelCfgFields.interval] = item[i] elif StringClass.string_match(ModelCfgFields.interval_unit, v): file_out_dict[ModelCfgFields.interval_unit] = item[i] elif StringClass.string_match(ModelCfgFields.filename, v): file_out_dict[ModelCfgFields.filename] = item[i] elif StringClass.string_match(ModelCfgFields.use, v): file_out_dict[ModelCfgFields.use] = item[i] elif StringClass.string_match(ModelCfgFields.subbsn, v): file_out_dict[ModelCfgFields.subbsn] = item[i] if file_out_dict.keys() is []: raise ValueError("There are not any valid output item stored in file.out!") bulk.insert(file_out_dict) bulk.execute() # begin to import the desired outputs # create bulk operator bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op() # read initial parameters from txt file data_items = read_data_items_from_txt(cfg.modelcfgs.fileout) # print (field_names) for i, cur_data_item in enumerate(data_items): data_import = dict() cur_filter = dict() # print (cur_data_item) if len(cur_data_item) == 7: data_import[ModelCfgFields.output_id] = cur_data_item[0] data_import[ModelCfgFields.type] = cur_data_item[1] data_import[ModelCfgFields.stime] = cur_data_item[2] data_import[ModelCfgFields.etime] = cur_data_item[3] data_import[ModelCfgFields.interval] = cur_data_item[4] data_import[ModelCfgFields.interval_unit] = cur_data_item[5] data_import[ModelCfgFields.subbsn] = cur_data_item[6] data_import[ModelCfgFields.use] = 1 cur_filter[ModelCfgFields.output_id] = cur_data_item[0] else: raise RuntimeError("Items in file.out must have 7 columns, i.e., OUTPUTID," "TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.") bulk.find(cur_filter).update({'$set': data_import}) # execute import operators bulk.execute()
def data_from_txt(hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file): """ Read observed data from txt file Args: hydro_clim_db: hydro-climate dababase obs_txts_list: txt file paths of observed data sites_info_txts_list: txt file paths of site information subbsn_file: subbasin raster file Returns: True or False """ # 1. Read monitor station information, and store variables information and station IDs variable_lists = [] site_ids = [] for site_file in sites_info_txts_list: site_data_items = read_data_items_from_txt(site_file) site_flds = site_data_items[0] for i in range(1, len(site_data_items)): dic = {} for j in range(len(site_data_items[i])): if StringClass.string_match(site_flds[j], StationFields.id): dic[StationFields.id] = int(site_data_items[i][j]) site_ids.append(dic[StationFields.id]) elif StringClass.string_match(site_flds[j], StationFields.name): dic[StationFields.name] = StringClass.strip_string( site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.type): types = StringClass.split_string( StringClass.strip_string(site_data_items[i][j]), ',') elif StringClass.string_match(site_flds[j], StationFields.lat): dic[StationFields.lat] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.lon): dic[StationFields.lon] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.x): dic[StationFields.x] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.y): dic[StationFields.y] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.unit): dic[StationFields.unit] = StringClass.strip_string( site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.elev): dic[StationFields.elev] = float(site_data_items[i][j]) elif StringClass.string_match(site_flds[j], StationFields.outlet): dic[StationFields.outlet] = float( site_data_items[i][j]) for j, cur_type in enumerate(types): site_dic = dict() site_dic[StationFields.id] = dic[StationFields.id] site_dic[StationFields.name] = dic[StationFields.name] site_dic[StationFields.type] = cur_type site_dic[StationFields.lat] = dic[StationFields.lat] site_dic[StationFields.lon] = dic[StationFields.lon] site_dic[StationFields.x] = dic[StationFields.x] site_dic[StationFields.y] = dic[StationFields.y] site_dic[StationFields.elev] = dic[StationFields.elev] site_dic[StationFields.outlet] = dic[StationFields.outlet] # Add SubbasinID field matched, cur_subbsn_id = ImportObservedData.match_subbasin( subbsn_file, site_dic) if not matched: break cur_subbsn_id_str = '' for tmp_id in cur_subbsn_id: if tmp_id is not None: cur_subbsn_id_str += str(tmp_id) + ',' cur_subbsn_id_str = cur_subbsn_id_str[:-1] site_dic[StationFields.id] = cur_subbsn_id_str curfilter = { StationFields.id: site_dic[StationFields.id], StationFields.type: site_dic[StationFields.type] } # print (curfilter) hydro_clim_db[DBTableNames.sites].find_one_and_replace( curfilter, site_dic, upsert=True) var_dic = dict() var_dic[StationFields.type] = types[j] var_dic[StationFields.unit] = dic[StationFields.unit] if var_dic not in variable_lists: variable_lists.append(var_dic) site_ids = list(set(site_ids)) # 2. Read measurement data and import to MongoDB bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() count = 0 for measDataFile in obs_txts_list: # print measDataFile obs_data_items = read_data_items_from_txt(measDataFile) # If the data items is EMPTY or only have one header row, then goto # next data file. if obs_data_items == [] or len(obs_data_items) == 1: continue obs_flds = obs_data_items[0] required_flds = [ StationFields.id, DataValueFields.y, DataValueFields.m, DataValueFields.d, DataValueFields.type, DataValueFields.value ] for fld in required_flds: if not StringClass.string_in_list( fld, obs_flds): # data can not meet the request! raise ValueError( "The %s can not meet the required format!" % measDataFile) for i in range(1, len(obs_data_items)): dic = dict() cur_y = 0 cur_m = 0 cur_d = 0 for j in range(len(obs_data_items[i])): if StringClass.string_match(obs_flds[j], StationFields.id): dic[StationFields.id] = int(obs_data_items[i][j]) # if current site ID is not included, goto next data item if dic[StationFields.id] not in site_ids: continue elif StringClass.string_match(obs_flds[j], DataValueFields.y): cur_y = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.m): cur_m = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.d): cur_d = int(obs_data_items[i][j]) elif StringClass.string_match(obs_flds[j], DataValueFields.type): dic[DataValueFields.type] = obs_data_items[i][j] elif StringClass.string_match(obs_flds[j], DataValueFields.value): dic[DataValueFields.value] = float( obs_data_items[i][j]) dt = datetime(cur_y, cur_m, cur_d, 0, 0) sec = time.mktime(dt.timetuple()) utc_time = time.gmtime(sec) dic[DataValueFields.local_time] = dt dic[DataValueFields.time_zone] = time.timezone / 3600 dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1], utc_time[2], utc_time[3]) curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } bulk.find(curfilter).replace_one(dic) count += 1 if count % 500 == 0: bulk.execute() bulk = hydro_clim_db[ DBTableNames.observes].initialize_ordered_bulk_op() # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True) if count % 500 != 0: bulk.execute() # 3. Add measurement data with unit converted # loop variables list added_dics = [] for curVar in variable_lists: # print curVar # if the unit is mg/L, then change the Type name with the suffix "Conc", # and convert the corresponding data to kg if the discharge data is # available. cur_type = curVar[StationFields.type] cur_unit = curVar[StationFields.unit] # Find data by Type for item in hydro_clim_db[DBTableNames.observes].find( {StationFields.type: cur_type}): # print item dic = dict() dic[StationFields.id] = item[StationFields.id] dic[DataValueFields.value] = item[DataValueFields.value] dic[StationFields.type] = item[StationFields.type] dic[DataValueFields.local_time] = item[ DataValueFields.local_time] dic[DataValueFields.time_zone] = item[ DataValueFields.time_zone] dic[DataValueFields.utc] = item[DataValueFields.utc] if cur_unit == "mg/L": # update the Type name dic[StationFields.type] = cur_type + "Conc" curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: cur_type, DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True) dic[StationFields.type] = cur_type # find discharge on current day cur_filter = { StationFields.type: "Q", DataValueFields.utc: dic[DataValueFields.utc], StationFields.id: dic[StationFields.id] } q_dic = hydro_clim_db[DBTableNames.observes].find_one( filter=cur_filter) q = -9999. if q_dic is not None: # and q_dic.has_key(DataValueFields.value): q = q_dic[DataValueFields.value] else: continue if cur_unit == "mg/L": # convert mg/L to kg dic[DataValueFields.value] = round( dic[DataValueFields.value] * q * 86400. / 1000., 2) elif cur_unit == "kg": dic[StationFields.type] = cur_type + "Conc" # convert kg to mg/L dic[DataValueFields.value] = round( dic[DataValueFields.value] / q * 1000. / 86400., 2) # add new data item added_dics.append(dic) # import to MongoDB for dic in added_dics: curfilter = { StationFields.id: dic[StationFields.id], DataValueFields.type: dic[DataValueFields.type], DataValueFields.utc: dic[DataValueFields.utc] } hydro_clim_db[DBTableNames.observes].find_one_and_replace( curfilter, dic, upsert=True)
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time, eliminate_zero=False, time_sys_output='UTCTIME', day_divided_hour=0): """ Interpolate not regular observed data to regular time interval data. Args: in_file: input data file, the basic format is as follows: line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME line 2: DATETIME,field1,field2,... line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,... line 4: ... ... Field name can be PCP, FLOW, SED the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively. time_interval: time interval, unit is minute, e.g., daily output is 1440 start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system is based on time_sys. end_time: end time, see also start_time. eliminate_zero: Boolean flag. If true, the time interval without original records will not be output. time_sys_output: time system of output time_system, the format must be '<time_system> [<time_zone>]', e.g., 'LOCALTIME' 'LOCALTIME 8' 'UTCTIME' (default) day_divided_hour: If the time_interval is equal to N*1440, this parameter should be carefully specified. The value must range from 0 to 23. e.g., day_divided_hour ==> day ranges (all expressed as 2013-02-03) 0 ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default) 8 ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59 20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59 Returns: The output data files are located in the same directory with the input file. The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g., pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt """ FileClass.check_file_exists(in_file) time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file) data_items = read_data_items_from_txt(in_file) flds = data_items[0][:] data_items.remove(flds) if not 0 <= day_divided_hour <= 23: raise ValueError("Day divided hour must range from 0 to 23!") try: date_idx = flds.index('DATETIME') flds.remove('DATETIME') except ValueError: raise ValueError("DATETIME must be one of the fields!") # available field available_flds = ['FLOW', 'SED', 'PCP'] def check_avaiable_field(cur_fld): """Check if the given field name is supported.""" support_flag = False for fff in available_flds: if fff.lower() in cur_fld.lower(): support_flag = True break return support_flag ord_data = OrderedDict() time_zone_output = time.timezone / -3600 if time_sys_output.lower().find('local') >= 0: tmpstrs = StringClass.split_string(time_sys_output, [' ']) if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]): time_zone_output = int(tmpstrs[1]) time_sys_output = 'LOCALTIME' else: time_sys_output = 'UTCTIME' time_zone_output = 0 for item in data_items: org_datetime = HydroClimateUtilClass.get_datetime_from_string(item[date_idx]) if time_sys_input == 'LOCALTIME': org_datetime -= timedelta(hours=time_zone_input) # now, org_datetime is UTC time. if time_sys_output == 'LOCALTIME': org_datetime += timedelta(hours=time_zone_output) # now, org_datetime is consistent with the output time system ord_data[org_datetime] = [] for i, v in enumerate(item): if i == date_idx: continue if MathClass.isnumerical(v): ord_data[org_datetime].append(float(v)) else: ord_data[org_datetime].append(v) # print (ord_data) itp_data = OrderedDict() out_time_delta = timedelta(minutes=time_interval) sdatetime = HydroClimateUtilClass.get_datetime_from_string(start_time) edatetime = HydroClimateUtilClass.get_datetime_from_string(end_time) item_dtime = sdatetime if time_interval % 1440 == 0: item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \ timedelta(minutes=day_divided_hour * 60) while item_dtime <= edatetime: # print (item_dtime) # if item_dtime.month == 12 and item_dtime.day == 31: # print ("debug") sdt = item_dtime # start datetime of records edt = item_dtime + out_time_delta # end datetime of records # get original data items org_items = [] pre_dt = list(ord_data.keys())[0] pre_added = False for i, v in ord_data.items(): if sdt <= i < edt: if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta: # only add one item that less than sdt. org_items.append([pre_dt] + ord_data.get(pre_dt)) pre_added = True org_items.append([i] + v) if i > edt: break pre_dt = i if len(org_items) > 0: org_items.append([edt]) # Just add end time for compute convenient if org_items[0][0] < sdt: org_items[0][0] = sdt # set the begin datetime of current time interval # if eliminate time interval without original records # initial interpolated list itp_data[item_dtime] = [0.] * len(flds) if len(org_items) == 0: if eliminate_zero: itp_data.popitem() item_dtime += out_time_delta continue # core interpolation code flow_idx = -1 for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue if 'SED' in v_name.upper(): # FLOW must be existed for v_idx2, v_name2 in enumerate(flds): if 'FLOW' in v_name2.upper(): flow_idx = v_idx2 break if flow_idx < 0: raise RuntimeError("To interpolate SED, FLOW must be provided!") for v_idx, v_name in enumerate(flds): if not check_avaiable_field(v_name): continue itp_value = 0. itp_auxiliary_value = 0. for org_item_idx, org_item_dtv in enumerate(org_items): if org_item_idx == 0: continue org_item_dt = org_item_dtv[0] pre_item_dtv = org_items[org_item_idx - 1] pre_item_dt = pre_item_dtv[0] tmp_delta_dt = org_item_dt - pre_item_dt tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds if 'SED' in v_name.upper(): itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \ tmp_delta_secs itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs else: itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs if 'SED' in v_name.upper(): if MathClass.floatequal(itp_auxiliary_value, 0.): itp_value = 0. print ("WARNING: Flow is 0 for %s, please check!" % item_dtime.strftime('%Y-%m-%d %H:%M:%S')) itp_value /= itp_auxiliary_value elif 'FLOW' in v_name.upper(): itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds) elif 'PCP' in v_name.upper(): # the input is mm/h, and output is mm itp_value /= 3600. itp_data[item_dtime][v_idx] = round(itp_value, 4) item_dtime += out_time_delta # for i, v in itp_data.items(): # print (i, v) # output to files work_path = os.path.dirname(in_file) header_str = '#' + time_sys_output if time_sys_output == 'LOCALTIME': header_str = header_str + ' ' + str(time_zone_output) for idx, fld in enumerate(flds): if not check_avaiable_field(fld): continue file_name = fld + '_' + time_sys_output + '_' + str(time_interval) if eliminate_zero: file_name += '_nonzero' file_name += '.txt' out_file = work_path + os.sep + file_name f = open(out_file, 'w') f.write(header_str + '\n') f.write('DATETIME,' + fld + '\n') for i, v in itp_data.items(): cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n' f.write(cur_line) f.close()