Esempio n. 1
0
    def workflow(cfg, db):
        """
        This function mainly to import measurement data to MongoDB
        data type may include Q (discharge, m3/s), tn, tp, etc.
        the required parameters that defined in configuration file (*.ini)
        """
        if not cfg.use_observed:
            return False
        c_list = db.collection_names()
        if not StringClass.string_in_list(DBTableNames.observes, c_list):
            db.create_collection(DBTableNames.observes)
        else:
            db.drop_collection(DBTableNames.observes)
        if not StringClass.string_in_list(DBTableNames.sites, c_list):
            db.create_collection(DBTableNames.sites)
        if not StringClass.string_in_list(DBTableNames.var_desc, c_list):
            db.create_collection(DBTableNames.var_desc)

        file_list = FileClass.get_full_filename_by_suffixes(
            cfg.observe_dir, ['.txt'])
        meas_file_list = []
        site_loc = []
        for fl in file_list:
            if StringClass.is_substring('observed_', fl):
                meas_file_list.append(fl)
            else:
                site_loc.append(fl)
        ImportObservedData.data_from_txt(db, meas_file_list, site_loc,
                                         cfg.spatials.subbsn)
        return True
 def initial_params_from_txt(cfg, maindb):
     """
     import initial calibration parameters from txt data file.
     Args:
         cfg: SEIMS config object
         maindb: MongoDB database object
     """
     # delete if existed, create if not existed
     c_list = maindb.collection_names()
     if not StringClass.string_in_list(DBTableNames.main_parameter, c_list):
         maindb.create_collection(DBTableNames.main_parameter)
     else:
         maindb.drop_collection(DBTableNames.main_parameter)
     # create bulk operator
     bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op()
     # read initial parameters from txt file
     data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file)
     field_names = data_items[0][0:]
     # print (field_names)
     for i, cur_data_item in enumerate(data_items):
         if i == 0:
             continue
         # print cur_data_item
         # initial one default blank parameter dict.
         data_import = {ModelParamFields.name: '', ModelParamFields.desc: '',
                        ModelParamFields.unit: '', ModelParamFields.module: '',
                        ModelParamFields.value: DEFAULT_NODATA,
                        ModelParamFields.impact: DEFAULT_NODATA,
                        ModelParamFields.change: 'NC',
                        ModelParamFields.max: DEFAULT_NODATA,
                        ModelParamFields.min: DEFAULT_NODATA,
                        ModelParamFields.type: ''}
         for k, v in data_import.items():
             idx = field_names.index(k)
             if cur_data_item[idx] == '':
                 if StringClass.string_match(k, ModelParamFields.change_ac):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_rc):
                     data_import[k] = 1
                 elif StringClass.string_match(k, ModelParamFields.change_nc):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_vc):
                     data_import[k] = DEFAULT_NODATA  # Be careful to check NODATA when use!
             else:
                 if MathClass.isnumerical(cur_data_item[idx]):
                     data_import[k] = float(cur_data_item[idx])
                 else:
                     data_import[k] = cur_data_item[idx]
         bulk.insert(data_import)
     # execute import operators
     bulk.execute()
     # create index by parameter's type and name by ascending order.
     maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING),
                                                       (ModelParamFields.name, ASCENDING)])
Esempio n. 3
0
 def variable_table(db, var_file):
     """Import variables table"""
     var_data_items = read_data_items_from_txt(var_file)
     var_flds = var_data_items[0]
     for i in range(1, len(var_data_items)):
         dic = {}
         for j in range(len(var_data_items[i])):
             if StringClass.string_match(var_flds[j], VariableDesc.type):
                 dic[VariableDesc.type] = var_data_items[i][j]
             elif StringClass.string_match(var_flds[j], VariableDesc.unit):
                 dic[VariableDesc.unit] = var_data_items[i][j]
         # If this item existed already, then update it, otherwise insert one.
         curfilter = {VariableDesc.type: dic[VariableDesc.type]}
         db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
Esempio n. 4
0
    def workflow(cfg, main_db, clim_db):
        """Workflow"""
        # 1. Find meteorology and precipitation sites in study area
        thiessen_file_list = [cfg.meteo_sites_thiessen, cfg.prec_sites_thiessen]
        type_list = [DataType.m, DataType.p]

        if not cfg.cluster:  # the entire basin
            ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.bsn,
                                               FieldNames.basin, thiessen_file_list,
                                               cfg.thiessen_field, type_list, cfg.storm_mode)
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.subbsn,
                                           FieldNames.subbasin_id, thiessen_file_list,
                                           cfg.thiessen_field, type_list,
                                           cfg.storm_mode, cfg.cluster)

        # 2. Import geographic information of each sites to Hydro-Climate database
        c_list = clim_db.collection_names()
        tables = [DBTableNames.sites, DBTableNames.var_desc]
        for tb in tables:
            if not StringClass.string_in_list(tb, c_list):
                clim_db.create_collection(tb)
        ImportHydroClimateSites.variable_table(clim_db, cfg.hydro_climate_vars)
        site_m_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.Meteo_sites, DataType.m)
        site_p_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.prec_sites, DataType.p)
        # print (site_m_loc, site_p_loc)
        return site_m_loc, site_p_loc
Esempio n. 5
0
 def get_time_system_from_data_file(in_file):
     """Get the time system from the data file. The basic format is:
        #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
     """
     time_sys = 'LOCALTIME'
     time_zone = time.timezone / -3600
     f = open(in_file)
     for line in f:
         str_line = line
         for LF in LFs:
             if LF in line:
                 str_line = line.split(LF)[0]
                 break
         if str_line[0] != '#':
             break
         if str_line.lower().find('utc') >= 0:
             time_sys = 'UTCTIME'
             time_zone = 0
             break
         if str_line.lower().find('local') >= 0:
             line_list = StringClass.split_string(str_line, [','])
             if len(line_list) == 2 and MathClass.isnumerical(line_list[1]):
                 time_zone = -1 * int(line_list[1])
             break
     f.close()
     return time_sys, time_zone
Esempio n. 6
0
    def initialize_landcover_parameters(landcover_file,
                                        landcover_initial_fields_file,
                                        dst_dir):
        """generate initial landcover_init_param parameters"""
        lc_data_items = read_data_items_from_txt(landcover_initial_fields_file)
        # print lc_data_items
        field_names = lc_data_items[0]
        lu_id = -1
        for i, v in enumerate(field_names):
            if StringClass.string_match(v, 'LANDUSE_ID'):
                lu_id = i
                break
        data_items = lc_data_items[1:]
        replace_dicts = dict()
        for item in data_items:
            for i, v in enumerate(item):
                if i != lu_id:
                    if field_names[i].upper() not in replace_dicts.keys():
                        replace_dicts[field_names[i].upper()] = {
                            float(item[lu_id]): float(v)
                        }
                    else:
                        replace_dicts[field_names[i].upper()][float(
                            item[lu_id])] = float(v)
        # print replace_dicts

        # Generate GTIFF
        for item, v in replace_dicts.items():
            filename = dst_dir + SEP + item + '.tif'
            print(filename)
            RasterUtilClass.raster_reclassify(landcover_file, v, filename)
        return replace_dicts['LANDCOVER'].values()
Esempio n. 7
0
    def export_landuse_lookup_files_from_mongodb(cfg, maindb):
        """export landuse lookup tables to txt file from MongoDB."""
        lookup_dir = cfg.dirs.lookup
        property_namelist = ModelParamDataUtils.landuse_fields
        property_map = {}
        property_namelist.append('USLE_P')
        query_result = maindb['LANDUSELOOKUP'].find()
        if query_result is None:
            raise RuntimeError(
                "LanduseLoop Collection is not existed or empty!")
        count = 0
        for row in query_result:
            # print row
            value_map = dict()
            for i, p_name in enumerate(property_namelist):
                if StringClass.string_match(p_name, "USLE_P"):
                    # Currently, USLE_P is set as 1 for all landuse.
                    value_map[p_name] = 1
                else:
                    if StringClass.string_match(p_name, "Manning"):
                        value_map[p_name] = row.get(p_name) * 10
                    else:
                        value_map[p_name] = row.get(p_name)
            count += 1
            property_map[count] = value_map

        n = len(property_map)
        UtilClass.rmmkdir(lookup_dir)
        for propertyName in property_namelist:
            f = open("%s/%s.txt" % (
                lookup_dir,
                propertyName,
            ), 'w')
            f.write("%d\n" % n)
            for prop_id in property_map:
                s = "%d %f\n" % (prop_id, property_map[prop_id][propertyName])
                f.write(s)
            f.close()
Esempio n. 8
0
def read_data_items_from_txt(txt_file):
    """Read data items include title from text file, each data element are split by TAB or COMMA.
       Be aware, the separator for each line can only be TAB or COMMA, and COMMA is the recommended.
    Args:
        txt_file: full path of text data file
    Returns:
        2D data array
    """
    f = open(txt_file)
    data_items = []
    for line in f:
        str_line = line
        for LF in LFs:
            if LF in line:
                str_line = line.split(LF)[0]
                break

        if str_line != '' and str_line.find('#') < 0:
            line_list = StringClass.split_string(str_line, ['\t'])
            if len(line_list) <= 1:
                line_list = StringClass.split_string(str_line, [','])
            data_items.append(line_list)
    f.close()
    return data_items
Esempio n. 9
0
    def sites_table(hydro_clim_db, site_file, site_type):
        """Import HydroClimate sites table"""
        sites_loc = dict()
        site_data_items = read_data_items_from_txt(site_file)
        site_flds = site_data_items[0]
        for i in range(1, len(site_data_items)):
            dic = dict()
            for j in range(len(site_data_items[i])):
                if StringClass.string_match(site_flds[j], StationFields.id):
                    dic[StationFields.id] = int(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.name):
                    # unicode(site_data_items[i][j], 'gb2312')
                    dic[StationFields.name] = site_data_items[i][j]
                elif StringClass.string_match(site_flds[j], StationFields.x):
                    dic[StationFields.x] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.y):
                    dic[StationFields.y] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.lon):
                    dic[StationFields.lon] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.lat):
                    dic[StationFields.lat] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.elev):
                    dic[StationFields.elev] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.outlet):
                    dic[StationFields.outlet] = float(site_data_items[i][j])
            dic[StationFields.type] = site_type
            curfilter = {StationFields.id: dic[StationFields.id],
                         StationFields.type: dic[StationFields.type]}
            hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True)

            if dic[StationFields.id] not in sites_loc.keys():
                sites_loc[dic[StationFields.id]] = SiteInfo(dic[StationFields.id],
                                                            dic[StationFields.name],
                                                            dic[StationFields.lat],
                                                            dic[StationFields.lon],
                                                            dic[StationFields.x],
                                                            dic[StationFields.y],
                                                            dic[StationFields.elev])
        hydro_clim_db[DBTableNames.sites].create_index([(StationFields.id, ASCENDING),
                                                        (StationFields.type, ASCENDING)])
        return sites_loc
Esempio n. 10
0
    def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove(
            {DataValueFields.type: DataType.p})

        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = []
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i in range(3, len(clim_flds)):
            station_id.append(clim_flds[i])
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = []
            cur_y = 0
            cur_m = 0
            cur_d = 0
            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.y):
                    cur_y = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.m):
                    cur_m = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.d):
                    cur_d = int(clim_data_v)
                else:
                    for k, cur_id in enumerate(station_id):
                        if StringClass.string_match(clim_flds[j], cur_id):
                            precipitation.append(float(clim_data_v))

            dt = datetime(cur_y, cur_m, cur_d, 0, 0)
            sec = time.mktime(dt.timetuple())
            utc_time = time.gmtime(sec)
            dic[DataValueFields.local_time] = dt
            dic[DataValueFields.time_zone] = time.timezone / 3600.
            dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1],
                                                utc_time[2], utc_time[3])

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[
                    DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[
                    DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    bulk.execute()
                    bulk = climdb[
                        DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            bulk.execute()
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
Esempio n. 11
0
    def __init__(self, cf):
        # 1. Directories
        self.base_dir = None
        self.clim_dir = None
        self.spatial_dir = None
        self.observe_dir = None
        self.scenario_dir = None
        self.model_dir = None
        self.txt_db_dir = None
        self.preproc_script_dir = None
        self.seims_bin = None
        self.mpi_bin = None
        self.workspace = None
        # 1.1. Directory determined flags
        self.use_observed = True
        self.use_scernario = True
        # 2. MongoDB configuration and database, collation, GridFS names
        self.hostname = '127.0.0.1'  # localhost by default
        self.port = 27017
        self.climate_db = ''
        self.bmp_scenario_db = ''
        self.spatial_db = ''
        # 3. Switch for building SEIMS
        self.cluster = False
        self.storm_mode = False
        self.gen_cn = True
        self.gen_runoff_coef = True
        self.gen_crop = True
        self.gen_iuh = True
        # 4. Climate inputs
        self.hydro_climate_vars = None
        self.prec_sites = None
        self.prec_data = None
        self.Meteo_sites = None
        self.Meteo_data = None
        self.thiessen_field = 'ID'
        # 5. Spatial inputs
        self.prec_sites_thiessen = None
        self.meteo_sites_thiessen = None
        self.dem = None
        self.outlet_file = None
        self.landuse = None
        self.landcover_init_param = None
        self.soil = None
        self.soil_property = None
        self.mgt_field = None
        # 6. Option parameters
        self.is_TauDEM = True
        self.d8acc_threshold = 0
        self.np = 4
        self.d8down_method = 's'
        self.dorm_hr = -1.
        self.temp_base = 0.
        self.imper_perc_in_urban = 0.3
        self.default_reach_depth = 5.
        self.default_landuse = -1
        self.default_soil = -1
        # 1. Directories
        if 'PATH' in cf.sections():
            self.base_dir = cf.get('PATH', 'base_data_dir')
            self.clim_dir = cf.get('PATH', 'climate_data_dir')
            self.spatial_dir = cf.get('PATH', 'spatial_data_dir')
            self.observe_dir = cf.get('PATH', 'measurement_data_dir')
            self.scenario_dir = cf.get('PATH', 'bmp_data_dir')
            self.model_dir = cf.get('PATH', 'model_dir')
            self.txt_db_dir = cf.get('PATH', 'txt_db_dir')
            self.preproc_script_dir = cf.get('PATH', 'preproc_script_dir')
            self.seims_bin = cf.get('PATH', 'cpp_program_dir')
            self.mpi_bin = cf.get('PATH', 'mpiexec_dir')
            self.workspace = cf.get('PATH', 'working_dir')
        else:
            raise ValueError("[PATH] section MUST be existed in *.ini file.")
        if not (FileClass.is_file_exists(self.base_dir)
                and FileClass.is_file_exists(self.model_dir)
                and FileClass.is_file_exists(self.txt_db_dir)
                and FileClass.is_file_exists(self.preproc_script_dir)
                and FileClass.is_file_exists(self.seims_bin)):
            raise IOError(
                "Please Check Directories defined in [PATH]. "
                "BASE_DIR, MODEL_DIR, TXT_DB_DIR, PREPROC_SCRIPT_DIR, "
                "and CPP_PROGRAM_DIR are required!")
        if not FileClass.is_file_exists(self.mpi_bin):
            self.mpi_bin = None
        if not os.path.isdir(self.workspace):
            try:  # first try to make dirs
                os.mkdir(self.workspace)
            except OSError as exc:
                self.workspace = self.model_dir + os.sep + 'preprocess_output'
                print(
                    "WARNING: Make WORKING_DIR failed: %s. Use the default: %s"
                    % (exc.message, self.workspace))
                if not os.path.exists(self.workspace):
                    os.mkdir(self.workspace)

        self.dirs = DirNameUtils(self.workspace)
        self.logs = LogNameUtils(self.dirs.log)
        self.vecs = VectorNameUtils(self.dirs.geoshp)
        self.taudems = TauDEMFilesUtils(self.dirs.taudem)
        self.spatials = SpatialNamesUtils(self.dirs.geodata2db)
        self.modelcfgs = ModelCfgUtils(self.model_dir)
        self.paramcfgs = ModelParamDataUtils(self.preproc_script_dir + os.sep +
                                             'database')

        if not FileClass.is_file_exists(self.clim_dir):
            print(
                "The CLIMATE_DATA_DIR is not existed, try the default folder name 'climate'."
            )
            self.clim_dir = self.base_dir + os.sep + 'climate'
            if not FileClass.is_file_exists(self.clim_dir):
                raise IOError(
                    "Directories named 'climate' MUST BE located in [base_dir]!"
                )

        if not FileClass.is_file_exists(self.spatial_dir):
            print(
                "The SPATIAL_DATA_DIR is not existed, try the default folder name 'spatial'."
            )
            self.spatial_dir = self.base_dir + os.sep + 'spatial'
            raise IOError(
                "Directories named 'spatial' MUST BE located in [base_dir]!")

        if not FileClass.is_file_exists(self.observe_dir):
            self.observe_dir = None
            self.use_observed = False

        if not FileClass.is_file_exists(self.scenario_dir):
            self.scenario_dir = None
            self.use_scernario = False

        # 2. MongoDB related
        if 'MONGODB' in cf.sections():
            self.hostname = cf.get('MONGODB', 'hostname')
            self.port = cf.getint('MONGODB', 'port')
            self.climate_db = cf.get('MONGODB', 'climatedbname')
            self.bmp_scenario_db = cf.get('MONGODB', 'BMPScenarioDBName')
            self.spatial_db = cf.get('MONGODB', 'SpatialDBName')
        else:
            raise ValueError(
                "[MONGODB] section MUST be existed in *.ini file.")
        if not StringClass.is_valid_ip_addr(self.hostname):
            raise ValueError("HOSTNAME illegal defined in [MONGODB]!")

        # 3. Model related switch
        # by default, OpenMP version and daily (longterm) mode will be built
        if 'SWITCH' in cf.sections():
            self.cluster = cf.getboolean('SWITCH', 'forCluster')
            self.storm_mode = cf.getboolean('SWITCH', 'stormMode')
            self.gen_cn = cf.getboolean('SWITCH', 'genCN')
            self.gen_runoff_coef = cf.getboolean('SWITCH', 'genRunoffCoef')
            self.gen_crop = cf.getboolean('SWITCH', 'genCrop')

        if self.storm_mode:
            self.gen_iuh = False
            self.climate_db = ModelNameUtils.standardize_climate_dbname(
                self.climate_db)

        self.spatial_db = ModelNameUtils.standardize_spatial_dbname(
            self.cluster, self.storm_mode, self.spatial_db)

        # 4. Climate Input
        if 'CLIMATE' in cf.sections():
            self.hydro_climate_vars = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'hydroclimatevarfile')
            self.prec_sites = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'precsitefile')
            self.prec_data = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'precdatafile')
            self.Meteo_sites = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'meteositefile')
            self.Meteo_data = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'meteodatafile')
            self.thiessen_field = cf.get('CLIMATE', 'thiessenidfield')
        else:
            raise ValueError(
                "Climate input file names MUST be provided in [CLIMATE]!")

        # 5. Spatial Input
        if 'SPATIAL' in cf.sections():
            self.prec_sites_thiessen = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'precsitesthiessen')
            self.meteo_sites_thiessen = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'meteositesthiessen')
            self.dem = self.spatial_dir + os.sep + cf.get('SPATIAL', 'dem')
            self.outlet_file = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'outlet_file')
            if not os.path.exists(self.outlet_file):
                self.outlet_file = None
            self.landuse = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'landusefile')
            self.landcover_init_param = self.txt_db_dir + os.sep \
                                        + cf.get('SPATIAL', 'landcoverinitfile')
            self.soil = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'soilseqnfile')
            self.soil_property = self.txt_db_dir + os.sep + cf.get(
                'SPATIAL', 'soilseqntext')
            self.mgt_field = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'mgtfieldfile')
            if not os.path.exists(self.mgt_field) or \
                    StringClass.string_match(self.mgt_field, 'none'):
                self.mgt_field = None
        else:
            raise ValueError(
                "Spatial input file names MUST be provided in [SPATIAL]!")

        # 6. Option parameters
        if 'OPTIONAL_PARAMETERS' in cf.sections():
            self.is_TauDEM = cf.getboolean('OPTIONAL_PARAMETERS', 'istaudemd8')
            self.d8acc_threshold = cf.getfloat('OPTIONAL_PARAMETERS',
                                               'd8accthreshold')
            self.np = cf.getint('OPTIONAL_PARAMETERS', 'np')
            self.d8down_method = cf.get('OPTIONAL_PARAMETERS', 'd8downmethod')
            if StringClass.string_match(self.d8down_method, 'surface'):
                self.d8down_method = 's'
            elif StringClass.string_match(self.d8down_method, 'horizontal'):
                self.d8down_method = 'h'
            elif StringClass.string_match(self.d8down_method, 'pythagoras'):
                self.d8down_method = 'p'
            elif StringClass.string_match(self.d8down_method, 'vertical'):
                self.d8down_method = 'v'
            else:
                self.d8down_method = self.d8down_method.lower()
                if self.d8down_method not in ['s', 'h', 'p', 'v']:
                    self.d8down_method = 'h'
            self.dorm_hr = cf.getfloat('OPTIONAL_PARAMETERS', 'dorm_hr')
            self.temp_base = cf.getfloat('OPTIONAL_PARAMETERS', 't_base')
            self.imper_perc_in_urban = cf.getfloat(
                'OPTIONAL_PARAMETERS', 'imperviouspercinurbancell')
            self.default_reach_depth = cf.getfloat('OPTIONAL_PARAMETERS',
                                                   'default_reach_depth')
            self.default_landuse = cf.getint('OPTIONAL_PARAMETERS',
                                             'defaultlanduse')
            self.default_soil = cf.getint('OPTIONAL_PARAMETERS', 'defaultsoil')
Esempio n. 12
0
    def scenario_from_texts(cfg, main_db, scenario_db):
        """Import BMPs Scenario data to MongoDB
        Args:
            cfg: SEIMS configuration object
            main_db: climate database
            scenario_db: scenario database
        Returns:
            False if failed, otherwise True.
        """
        if not cfg.use_scernario:
            return False
        print ("Import BMP Scenario Data... ")
        bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt'])
        bmp_tabs = []
        bmp_tabs_path = []
        for f in bmp_files:
            bmp_tabs.append(f.split('.')[0])
            bmp_tabs_path.append(cfg.scenario_dir + SEP + f)

        # create if collection not existed
        c_list = scenario_db.collection_names()
        for item in bmp_tabs:
            if not StringClass.string_in_list(item.upper(), c_list):
                scenario_db.create_collection(item.upper())
            else:
                scenario_db.drop_collection(item.upper())
        # Read subbasin.tif and dist2Stream.tif
        subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn)
        dist2stream_r = RasterUtilClass.read_raster(cfg.spatials.dist2stream_d8)
        # End reading
        for j, bmp_txt in enumerate(bmp_tabs_path):
            bmp_tab_name = bmp_tabs[j]
            data_array = read_data_items_from_txt(bmp_txt)
            field_array = data_array[0]
            data_array = data_array[1:]
            for item in data_array:
                dic = {}
                for i, field_name in enumerate(field_array):
                    if MathClass.isnumerical(item[i]):
                        dic[field_name.upper()] = float(item[i])
                    else:
                        dic[field_name.upper()] = str(item[i]).upper()
                if StringClass.string_in_list(ImportScenario2Mongo._LocalX, dic.keys()) and \
                        StringClass.string_in_list(ImportScenario2Mongo._LocalY, dic.keys()):
                    subbsn_id = subbasin_r.get_value_by_xy(
                            dic[ImportScenario2Mongo._LocalX.upper()],
                            dic[ImportScenario2Mongo._LocalY.upper()])
                    distance = dist2stream_r.get_value_by_xy(
                            dic[ImportScenario2Mongo._LocalX.upper()],
                            dic[ImportScenario2Mongo._LocalY.upper()])
                    if subbsn_id is not None and distance is not None:
                        dic[ImportScenario2Mongo._SUBBASINID] = float(subbsn_id)
                        dic[ImportScenario2Mongo._DISTDOWN] = float(distance)
                        scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic,
                                                                               upsert=True)
                else:
                    scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic,
                                                                           upsert=True)
        # print 'BMP tables are imported.'
        # Write BMP database name into Model workflow database
        c_list = main_db.collection_names()
        if not StringClass.string_in_list(DBTableNames.main_scenario, c_list):
            main_db.create_collection(DBTableNames.main_scenario)

        bmp_info_dic = dict()
        bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db
        main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic,
                                                                 upsert=True)
        return True
    def lookup_tables_as_collection_and_gridfs(cfg, maindb):
        """Import lookup tables (from txt file) as Collection and GridFS
        Args:
            cfg: SEIMS config object
            maindb: workflow model database
        """
        for tablename, txt_file in cfg.paramcfgs.lookup_tabs_dict.items():
            # import each lookup table as a collection and GridFS file.
            c_list = maindb.collection_names()
            if not StringClass.string_in_list(tablename.upper(), c_list):
                maindb.create_collection(tablename.upper())
            else:
                maindb.drop_collection(tablename.upper())
            # initial bulk operator
            bulk = maindb[tablename.upper()].initialize_ordered_bulk_op()
            # delete if the tablename gridfs file existed
            spatial = GridFS(maindb, DBTableNames.gridfs_spatial)
            if spatial.exists(filename=tablename.upper()):
                x = spatial.get_version(filename=tablename.upper())
                spatial.delete(x._id)

            # read data items
            data_items = read_data_items_from_txt(txt_file)
            field_names = data_items[0][0:]
            item_values = []  # import as gridfs file
            for i, cur_data_item in enumerate(data_items):
                if i == 0:
                    continue
                data_import = dict()  # import as Collection
                item_value = []  # import as gridfs file
                for idx, fld in enumerate(field_names):
                    if MathClass.isnumerical(cur_data_item[idx]):
                        tmp_value = float(cur_data_item[idx])
                        data_import[fld] = tmp_value
                        item_value.append(tmp_value)
                    else:
                        data_import[fld] = cur_data_item[idx]
                bulk.insert(data_import)
                if len(item_value) > 0:
                    item_values.append(item_value)
            bulk.execute()
            # begin import gridfs file
            n_row = len(item_values)
            # print (item_values)
            if n_row >= 1:
                n_col = len(item_values[0])
                for i in range(n_row):
                    if n_col != len(item_values[i]):
                        raise ValueError("Please check %s to make sure each item has "
                                         "the same numeric dimension. The size of first "
                                         "row is: %d, and the current data item is: %d" %
                                         (tablename, n_col, len(item_values[i])))
                    else:
                        item_values[i].insert(0, n_col)

                metadic = {ModelParamDataUtils.item_count: n_row,
                           ModelParamDataUtils.field_count: n_col}
                cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic)
                header = [n_row]
                fmt = '%df' % 1
                s = pack(fmt, *header)
                cur_lookup_gridfs.write(s)
                fmt = '%df' % (n_col + 1)
                for i in range(n_row):
                    s = pack(fmt, *item_values[i])
                    cur_lookup_gridfs.write(s)
                cur_lookup_gridfs.close()
    def daily_data_from_txt(climdb, data_txt_file, sites_info_dict):
        """Import climate data table"""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove(
            {DataValueFields.type: DataType.m})

        clim_data_items = read_data_items_from_txt(data_txt_file)
        clim_flds = clim_data_items[0]
        # PHUCalDic is used for Calculating potential heat units (PHU)
        # for each climate station and each year.
        # format is {StationID:{Year1:[values],Year2:[Values]...}, ...}
        # PHUCalDic = {}
        # format: {StationID1: climateStats1, ...}
        hydro_climate_stats = {}
        required_flds = [
            DataValueFields.y, DataValueFields.m, DataValueFields.d,
            DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws
        ]
        for fld in required_flds:
            if not StringClass.string_in_list(fld, clim_flds):
                raise ValueError(
                    "Meteorological Daily data is invalid, please Check!")
        # Create bulk object
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i, cur_clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            cur_ssd = DEFAULT_NODATA
            cur_y = 0
            cur_m = 0
            cur_d = 0
            for j, clim_data_v in enumerate(cur_clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.id):
                    dic[DataValueFields.id] = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.y):
                    cur_y = int(clim_data_v)
                    dic[DataValueFields.y] = cur_y
                elif StringClass.string_match(clim_flds[j], DataValueFields.m):
                    cur_m = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.d):
                    cur_d = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.mean_tmp):
                    dic[DataType.mean_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.min_tmp):
                    dic[DataType.min_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.max_tmp):
                    dic[DataType.max_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.pet):
                    dic[DataType.pet] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.sr):
                    dic[DataType.sr] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.ws):
                    dic[DataType.ws] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.rm):
                    dic[DataType.rm] = float(clim_data_v) * 0.01
                elif StringClass.string_match(clim_flds[j], DataType.ssd):
                    cur_ssd = float(clim_data_v)
            # Date transformation
            dt = datetime(cur_y, cur_m, cur_d, 0, 0)
            sec = time.mktime(dt.timetuple())
            utc_time = time.gmtime(sec)
            dic[DataValueFields.local_time] = dt
            dic[DataValueFields.time_zone] = time.timezone / 3600
            dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1],
                                                utc_time[2], utc_time[3])

            # Do if some of these data are not provided
            if DataType.mean_tmp not in dic.keys():
                dic[DataType.mean_tmp] = (dic[DataType.max_tmp] +
                                          dic[DataType.min_tmp]) / 2.
            if DataType.sr not in dic.keys():
                if cur_ssd == DEFAULT_NODATA:
                    raise ValueError(DataType.sr + " or " + DataType.ssd +
                                     " must be provided!")
                else:
                    if dic[DataValueFields.id] in sites_info_dict.keys():
                        cur_lon, cur_lat = sites_info_dict[dic[
                            DataValueFields.id]].lon_lat()
                        dic[DataType.sr] = round(
                            HydroClimateUtilClass.rs(DateClass.day_of_year(dt),
                                                     float(cur_ssd),
                                                     cur_lat * PI / 180.), 1)
            output_flds = [
                DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp,
                DataType.rm, DataType.pet, DataType.ws, DataType.sr
            ]
            for fld in output_flds:
                cur_dic = dict()
                if fld in dic.keys():
                    cur_dic[DataValueFields.value] = dic[fld]
                    cur_dic[DataValueFields.id] = dic[DataValueFields.id]
                    cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                    cur_dic[DataValueFields.time_zone] = dic[
                        DataValueFields.time_zone]
                    cur_dic[DataValueFields.local_time] = dic[
                        DataValueFields.local_time]
                    cur_dic[DataValueFields.type] = fld
                    # Old code, insert or update one item a time, which is quite inefficiency
                    # Update by using bulk operation interface. lj
                    bulk.insert(cur_dic)
                    count += 1
                    if count % 500 == 0:  # execute each 500 records
                        bulk.execute()
                        bulk = climdb[
                            DBTableNames.
                            data_values].initialize_ordered_bulk_op()

            if dic[DataValueFields.id] not in hydro_climate_stats.keys():
                hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats()
            hydro_climate_stats[dic[DataValueFields.id]].add_item(dic)
        # execute the remained records
        if count % 500 != 0:
            bulk.execute()
        for item, cur_climate_stats in hydro_climate_stats.items():
            cur_climate_stats.annual_stats()
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
        # prepare dic for MongoDB
        for s_id, stats_v in hydro_climate_stats.items():
            for YYYY in stats_v.Count.keys():
                cur_dic = dict()
                cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY]
                cur_dic[DataValueFields.id] = s_id
                cur_dic[DataValueFields.y] = YYYY
                cur_dic[VariableDesc.unit] = "heat units"
                cur_dic[VariableDesc.type] = DataType.phu_tot
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.phu_tot,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
                # import annual mean temperature
                cur_dic[VariableDesc.type] = DataType.mean_tmp
                cur_dic[VariableDesc.unit] = "deg C"
                cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY]
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.mean_tmp,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
            cur_dic[DataValueFields.value] = stats_v.PHU0
            cur_dic[DataValueFields.id] = s_id
            cur_dic[DataValueFields.y] = DEFAULT_NODATA
            cur_dic[VariableDesc.unit] = "heat units"
            cur_dic[VariableDesc.type] = DataType.phu0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.phu0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
            # import annual mean temperature
            cur_dic[VariableDesc.type] = DataType.mean_tmp0
            cur_dic[VariableDesc.unit] = "deg C"
            cur_dic[DataValueFields.value] = stats_v.MeanTmp0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.mean_tmp0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
    def model_io_configuration(cfg, maindb):
        """
        Import Input and Output Configuration of SEIMS, i.e., file.in and file.out
        Args:
            cfg: SEIMS config object
            maindb: MongoDB database object
        """
        file_in_path = cfg.modelcfgs.filein
        file_out_path = cfg.paramcfgs.init_outputs_file
        # create if collection not existed
        c_list = maindb.collection_names()
        conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout]
        for item in conf_tabs:
            if not StringClass.string_in_list(item, c_list):
                maindb.create_collection(item)
            else:
                maindb.drop_collection(item)
        file_in_items = read_data_items_from_txt(file_in_path)
        file_out_items = read_data_items_from_txt(file_out_path)

        for item in file_in_items:
            file_in_dict = dict()
            values = StringClass.split_string(StringClass.strip_string(item[0]), ['|'])
            if len(values) != 2:
                raise ValueError("One item should only have one Tag and one value string,"
                                 " split by '|'")
            file_in_dict[ModelCfgFields.tag] = values[0]
            file_in_dict[ModelCfgFields.value] = values[1]
            maindb[DBTableNames.main_filein].insert(file_in_dict)

        # begin to import initial outputs settings
        bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op()
        out_field_array = file_out_items[0]
        out_data_array = file_out_items[1:]
        # print out_data_array
        for item in out_data_array:
            file_out_dict = dict()
            for i, v in enumerate(out_field_array):
                if StringClass.string_match(ModelCfgFields.mod_cls, v):
                    file_out_dict[ModelCfgFields.mod_cls] = item[i]
                elif StringClass.string_match(ModelCfgFields.output_id, v):
                    file_out_dict[ModelCfgFields.output_id] = item[i]
                elif StringClass.string_match(ModelCfgFields.desc, v):
                    file_out_dict[ModelCfgFields.desc] = item[i]
                elif StringClass.string_match(ModelCfgFields.unit, v):
                    file_out_dict[ModelCfgFields.unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.type, v):
                    file_out_dict[ModelCfgFields.type] = item[i]
                elif StringClass.string_match(ModelCfgFields.stime, v):
                    file_out_dict[ModelCfgFields.stime] = item[i]
                elif StringClass.string_match(ModelCfgFields.etime, v):
                    file_out_dict[ModelCfgFields.etime] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval, v):
                    file_out_dict[ModelCfgFields.interval] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval_unit, v):
                    file_out_dict[ModelCfgFields.interval_unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.filename, v):
                    file_out_dict[ModelCfgFields.filename] = item[i]
                elif StringClass.string_match(ModelCfgFields.use, v):
                    file_out_dict[ModelCfgFields.use] = item[i]
                elif StringClass.string_match(ModelCfgFields.subbsn, v):
                    file_out_dict[ModelCfgFields.subbsn] = item[i]
            if file_out_dict.keys() is []:
                raise ValueError("There are not any valid output item stored in file.out!")
            bulk.insert(file_out_dict)
        bulk.execute()

        # begin to import the desired outputs
        # create bulk operator
        bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op()
        # read initial parameters from txt file
        data_items = read_data_items_from_txt(cfg.modelcfgs.fileout)
        # print (field_names)
        for i, cur_data_item in enumerate(data_items):
            data_import = dict()
            cur_filter = dict()
            # print (cur_data_item)
            if len(cur_data_item) == 7:
                data_import[ModelCfgFields.output_id] = cur_data_item[0]
                data_import[ModelCfgFields.type] = cur_data_item[1]
                data_import[ModelCfgFields.stime] = cur_data_item[2]
                data_import[ModelCfgFields.etime] = cur_data_item[3]
                data_import[ModelCfgFields.interval] = cur_data_item[4]
                data_import[ModelCfgFields.interval_unit] = cur_data_item[5]
                data_import[ModelCfgFields.subbsn] = cur_data_item[6]
                data_import[ModelCfgFields.use] = 1
                cur_filter[ModelCfgFields.output_id] = cur_data_item[0]
            else:
                raise RuntimeError("Items in file.out must have 7 columns, i.e., OUTPUTID,"
                                   "TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.")

            bulk.find(cur_filter).update({'$set': data_import})
        # execute import operators
        bulk.execute()
Esempio n. 16
0
    def data_from_txt(hydro_clim_db, obs_txts_list, sites_info_txts_list,
                      subbsn_file):
        """
        Read observed data from txt file
        Args:
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = {}
                for j in range(len(site_data_items[i])):
                    if StringClass.string_match(site_flds[j],
                                                StationFields.id):
                        dic[StationFields.id] = int(site_data_items[i][j])
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.name):
                        dic[StationFields.name] = StringClass.strip_string(
                            site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.type):
                        types = StringClass.split_string(
                            StringClass.strip_string(site_data_items[i][j]),
                            ',')
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lat):
                        dic[StationFields.lat] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lon):
                        dic[StationFields.lon] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.x):
                        dic[StationFields.x] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.y):
                        dic[StationFields.y] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.unit):
                        dic[StationFields.unit] = StringClass.strip_string(
                            site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.elev):
                        dic[StationFields.elev] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.outlet):
                        dic[StationFields.outlet] = float(
                            site_data_items[i][j])

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_subbsn_id = ImportObservedData.match_subbasin(
                        subbsn_file, site_dic)
                    if not matched:
                        break
                    cur_subbsn_id_str = ''
                    for tmp_id in cur_subbsn_id:
                        if tmp_id is not None:
                            cur_subbsn_id_str += str(tmp_id) + ','
                    cur_subbsn_id_str = cur_subbsn_id_str[:-1]
                    site_dic[StationFields.id] = cur_subbsn_id_str
                    curfilter = {
                        StationFields.id: site_dic[StationFields.id],
                        StationFields.type: site_dic[StationFields.type]
                    }
                    # print (curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(
                        curfilter, site_dic, upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = dic[StationFields.unit]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[
            DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print measDataFile
            obs_data_items = read_data_items_from_txt(measDataFile)
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [
                StationFields.id, DataValueFields.y, DataValueFields.m,
                DataValueFields.d, DataValueFields.type, DataValueFields.value
            ]
            for fld in required_flds:
                if not StringClass.string_in_list(
                        fld, obs_flds):  # data can not meet the request!
                    raise ValueError(
                        "The %s can not meet the required format!" %
                        measDataFile)
            for i in range(1, len(obs_data_items)):
                dic = dict()
                cur_y = 0
                cur_m = 0
                cur_d = 0
                for j in range(len(obs_data_items[i])):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(obs_data_items[i][j])
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.y):
                        cur_y = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.m):
                        cur_m = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.d):
                        cur_d = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.type):
                        dic[DataValueFields.type] = obs_data_items[i][j]
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.value):
                        dic[DataValueFields.value] = float(
                            obs_data_items[i][j])
                dt = datetime(cur_y, cur_m, cur_d, 0, 0)
                sec = time.mktime(dt.timetuple())
                utc_time = time.gmtime(sec)
                dic[DataValueFields.local_time] = dt
                dic[DataValueFields.time_zone] = time.timezone / 3600
                dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1],
                                                    utc_time[2], utc_time[3])
                curfilter = {
                    StationFields.id: dic[StationFields.id],
                    DataValueFields.type: dic[DataValueFields.type],
                    DataValueFields.utc: dic[DataValueFields.utc]
                }
                bulk.find(curfilter).replace_one(dic)
                count += 1
                if count % 500 == 0:
                    bulk.execute()
                    bulk = hydro_clim_db[
                        DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            bulk.execute()
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = []
        for curVar in variable_lists:
            # print curVar
            # if the unit is mg/L, then change the Type name with the suffix "Conc",
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find(
                {StationFields.type: cur_type}):
                # print item
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[
                    DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[
                    DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == "mg/L":
                    # update the Type name
                    dic[StationFields.type] = cur_type + "Conc"
                    curfilter = {
                        StationFields.id: dic[StationFields.id],
                        DataValueFields.type: cur_type,
                        DataValueFields.utc: dic[DataValueFields.utc]
                    }
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                        curfilter, dic, upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {
                    StationFields.type: "Q",
                    DataValueFields.utc: dic[DataValueFields.utc],
                    StationFields.id: dic[StationFields.id]
                }
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(
                    filter=cur_filter)

                q = -9999.
                if q_dic is not None:  # and q_dic.has_key(DataValueFields.value):
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == "mg/L":
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == "kg":
                    dic[StationFields.type] = cur_type + "Conc"
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {
                StationFields.id: dic[StationFields.id],
                DataValueFields.type: dic[DataValueFields.type],
                DataValueFields.utc: dic[DataValueFields.utc]
            }
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                curfilter, dic, upsert=True)
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time,
                                                  eliminate_zero=False,
                                                  time_sys_output='UTCTIME', day_divided_hour=0):
    """
    Interpolate not regular observed data to regular time interval data.
    Args:
        in_file: input data file, the basic format is as follows:
                 line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
                 line 2: DATETIME,field1,field2,...
                 line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,...
                 line 4: ...
                 ...
                 Field name can be PCP, FLOW, SED
                 the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively.
        time_interval: time interval, unit is minute, e.g., daily output is 1440
        start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system
                    is based on time_sys.
        end_time: end time, see also start_time.
        eliminate_zero: Boolean flag. If true, the time interval without original records will
                        not be output.
        time_sys_output: time system of output time_system, the format must be
                  '<time_system> [<time_zone>]', e.g.,
                  'LOCALTIME'
                  'LOCALTIME 8'
                  'UTCTIME' (default)
        day_divided_hour: If the time_interval is equal to N*1440, this parameter should be
                          carefully specified. The value must range from 0 to 23. e.g.,
                          day_divided_hour ==> day ranges (all expressed as 2013-02-03)
                          0  ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default)
                          8  ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59
                          20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59
    Returns:
        The output data files are located in the same directory with the input file.
        The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g.,
        pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt
    """
    FileClass.check_file_exists(in_file)
    time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file)
    data_items = read_data_items_from_txt(in_file)
    flds = data_items[0][:]
    data_items.remove(flds)
    if not 0 <= day_divided_hour <= 23:
        raise ValueError("Day divided hour must range from 0 to 23!")
    try:
        date_idx = flds.index('DATETIME')
        flds.remove('DATETIME')
    except ValueError:
        raise ValueError("DATETIME must be one of the fields!")
    # available field
    available_flds = ['FLOW', 'SED', 'PCP']

    def check_avaiable_field(cur_fld):
        """Check if the given field name is supported."""
        support_flag = False
        for fff in available_flds:
            if fff.lower() in cur_fld.lower():
                support_flag = True
                break
        return support_flag

    ord_data = OrderedDict()
    time_zone_output = time.timezone / -3600
    if time_sys_output.lower().find('local') >= 0:
        tmpstrs = StringClass.split_string(time_sys_output, [' '])
        if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]):
            time_zone_output = int(tmpstrs[1])
        time_sys_output = 'LOCALTIME'
    else:
        time_sys_output = 'UTCTIME'
        time_zone_output = 0
    for item in data_items:
        org_datetime = HydroClimateUtilClass.get_datetime_from_string(item[date_idx])
        if time_sys_input == 'LOCALTIME':
            org_datetime -= timedelta(hours=time_zone_input)
        # now, org_datetime is UTC time.
        if time_sys_output == 'LOCALTIME':
            org_datetime += timedelta(hours=time_zone_output)
        # now, org_datetime is consistent with the output time system
        ord_data[org_datetime] = []
        for i, v in enumerate(item):
            if i == date_idx:
                continue
            if MathClass.isnumerical(v):
                ord_data[org_datetime].append(float(v))
            else:
                ord_data[org_datetime].append(v)
    # print (ord_data)
    itp_data = OrderedDict()
    out_time_delta = timedelta(minutes=time_interval)
    sdatetime = HydroClimateUtilClass.get_datetime_from_string(start_time)
    edatetime = HydroClimateUtilClass.get_datetime_from_string(end_time)
    item_dtime = sdatetime
    if time_interval % 1440 == 0:
        item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \
                     timedelta(minutes=day_divided_hour * 60)
    while item_dtime <= edatetime:
        # print (item_dtime)
        # if item_dtime.month == 12 and item_dtime.day == 31:
        #     print ("debug")
        sdt = item_dtime  # start datetime of records
        edt = item_dtime + out_time_delta  # end datetime of records
        # get original data items
        org_items = []
        pre_dt = list(ord_data.keys())[0]
        pre_added = False
        for i, v in ord_data.items():
            if sdt <= i < edt:
                if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta:
                    # only add one item that less than sdt.
                    org_items.append([pre_dt] + ord_data.get(pre_dt))
                    pre_added = True
                org_items.append([i] + v)
            if i > edt:
                break
            pre_dt = i
        if len(org_items) > 0:
            org_items.append([edt])  # Just add end time for compute convenient
            if org_items[0][0] < sdt:
                org_items[0][0] = sdt  # set the begin datetime of current time interval
        # if eliminate time interval without original records
        # initial interpolated list
        itp_data[item_dtime] = [0.] * len(flds)
        if len(org_items) == 0:
            if eliminate_zero:
                itp_data.popitem()
            item_dtime += out_time_delta
            continue
        # core interpolation code
        flow_idx = -1
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            if 'SED' in v_name.upper():  # FLOW must be existed
                for v_idx2, v_name2 in enumerate(flds):
                    if 'FLOW' in v_name2.upper():
                        flow_idx = v_idx2
                        break
                if flow_idx < 0:
                    raise RuntimeError("To interpolate SED, FLOW must be provided!")
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            itp_value = 0.
            itp_auxiliary_value = 0.
            for org_item_idx, org_item_dtv in enumerate(org_items):
                if org_item_idx == 0:
                    continue
                org_item_dt = org_item_dtv[0]
                pre_item_dtv = org_items[org_item_idx - 1]
                pre_item_dt = pre_item_dtv[0]
                tmp_delta_dt = org_item_dt - pre_item_dt
                tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds
                if 'SED' in v_name.upper():
                    itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \
                                 tmp_delta_secs
                    itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs
                else:
                    itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs
            if 'SED' in v_name.upper():
                if MathClass.floatequal(itp_auxiliary_value, 0.):
                    itp_value = 0.
                    print ("WARNING: Flow is 0 for %s, please check!" %
                           item_dtime.strftime('%Y-%m-%d %H:%M:%S'))
                itp_value /= itp_auxiliary_value
            elif 'FLOW' in v_name.upper():
                itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds)
            elif 'PCP' in v_name.upper():  # the input is mm/h, and output is mm
                itp_value /= 3600.
            itp_data[item_dtime][v_idx] = round(itp_value, 4)
        item_dtime += out_time_delta

    # for i, v in itp_data.items():
    #     print (i, v)
    # output to files
    work_path = os.path.dirname(in_file)
    header_str = '#' + time_sys_output
    if time_sys_output == 'LOCALTIME':
        header_str = header_str + ' ' + str(time_zone_output)
    for idx, fld in enumerate(flds):
        if not check_avaiable_field(fld):
            continue
        file_name = fld + '_' + time_sys_output + '_' + str(time_interval)
        if eliminate_zero:
            file_name += '_nonzero'
        file_name += '.txt'
        out_file = work_path + os.sep + file_name
        f = open(out_file, 'w')
        f.write(header_str + '\n')
        f.write('DATETIME,' + fld + '\n')
        for i, v in itp_data.items():
            cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n'
            f.write(cur_line)
        f.close()