def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove(
            {DataValueFields.type: DataType.p})
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(
            data_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = list()
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for fld in clim_flds:
            if not StringClass.string_in_list(fld, [
                    DataValueFields.dt, DataValueFields.y, DataValueFields.m,
                    DataValueFields.d, DataValueFields.hour,
                    DataValueFields.minute, DataValueFields.second
            ]):
                station_id.append(fld)
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = list()

            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_in_list(clim_flds[j], station_id):
                    precipitation.append(float(clim_data_v))
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(
                clim_flds, clim_data_item, tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(
                minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[
                    DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[
                    DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    MongoUtil.run_bulk(bulk)
                    bulk = climdb[
                        DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
    def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove({DataValueFields.type: DataType.p})
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = list()
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for fld in clim_flds:
            if not StringClass.string_in_list(fld,
                                              [DataValueFields.dt, DataValueFields.y,
                                               DataValueFields.m,
                                               DataValueFields.d, DataValueFields.hour,
                                               DataValueFields.minute, DataValueFields.second]):
                station_id.append(fld)
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = list()

            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_in_list(clim_flds[j], station_id):
                    precipitation.append(float(clim_data_v))
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds,
                                                                               clim_data_item,
                                                                               tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    MongoUtil.run_bulk(bulk)
                    bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # Create index
        climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING),
                                                       (DataValueFields.type, ASCENDING),
                                                       (DataValueFields.utc, ASCENDING)])
Example #3
0
    def data_from_txt(maindb, hydro_clim_db, obs_txts_list,
                      sites_info_txts_list, subbsn_file):
        """
        Read observed data from txt file
        Args:
            maindb: Main spatial database
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = dict()
                types = list()
                units = list()
                for j, v in enumerate(site_data_items[i]):
                    if StringClass.string_match(site_flds[j],
                                                StationFields.id):
                        dic[StationFields.id] = int(v)
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.name):
                        dic[StationFields.name] = v.strip()
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.type):
                        types = StringClass.split_string(v.strip(), '-')
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lat):
                        dic[StationFields.lat] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lon):
                        dic[StationFields.lon] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.x):
                        dic[StationFields.x] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.y):
                        dic[StationFields.y] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.unit):
                        units = StringClass.split_string(v.strip(), '-')
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.elev):
                        dic[StationFields.elev] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.outlet):
                        dic[StationFields.outlet] = float(v)

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.unit] = units[j]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_sids = ImportObservedData.match_subbasin(
                        subbsn_file, site_dic, maindb)
                    if not matched:
                        break
                    if len(cur_sids
                           ) == 1:  # if only one subbasin ID, store integer
                        cur_subbsn_id_str = cur_sids[0]
                    else:
                        cur_subbsn_id_str = ','.join(
                            str(cid) for cid in cur_sids if cur_sids is None)
                    site_dic[StationFields.subbsn] = cur_subbsn_id_str
                    curfilter = {
                        StationFields.id: site_dic[StationFields.id],
                        StationFields.type: site_dic[StationFields.type]
                    }
                    # print(curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(
                        curfilter, site_dic, upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = units[j]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[
            DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print(measDataFile)
            obs_data_items = read_data_items_from_txt(measDataFile)
            tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(
                measDataFile)
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [
                StationFields.id, DataValueFields.type, DataValueFields.value
            ]

            for fld in required_flds:
                if not StringClass.string_in_list(
                        fld, obs_flds):  # data can not meet the request!
                    raise ValueError(
                        'The %s can not meet the required format!' %
                        measDataFile)
            for i, cur_obs_data_item in enumerate(obs_data_items):
                dic = dict()
                if i == 0:
                    continue
                for j, cur_data_value in enumerate(cur_obs_data_item):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(cur_data_value)
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.type):
                        dic[DataValueFields.type] = cur_data_value
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.value):
                        dic[DataValueFields.value] = float(cur_data_value)
                utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(
                    obs_flds, cur_obs_data_item, tsysin, tzonein)
                dic[DataValueFields.local_time] = utc_t - timedelta(
                    minutes=tzonein * 60)
                dic[DataValueFields.time_zone] = tzonein
                dic[DataValueFields.utc] = utc_t
                # curfilter = {StationFields.id: dic[StationFields.id],
                #              DataValueFields.type: dic[DataValueFields.type],
                #              DataValueFields.utc: dic[DataValueFields.utc]}
                # bulk.find(curfilter).replace_one(dic)
                bulk.insert(dic)
                count += 1
                if count % 500 == 0:
                    MongoUtil.run_bulk(bulk)
                    bulk = hydro_clim_db[
                        DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = list()
        for curVar in variable_lists:
            # print(curVar)
            # if the unit is mg/L, then change the Type name with the suffix 'Conc',
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find(
                {StationFields.type: cur_type}):
                # print(item)
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[
                    DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[
                    DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == 'mg/L' or cur_unit == 'g/L':
                    # update the Type name
                    dic[StationFields.type] = '%sConc' % cur_type
                    curfilter = {
                        StationFields.id: dic[StationFields.id],
                        DataValueFields.type: cur_type,
                        DataValueFields.utc: dic[DataValueFields.utc]
                    }
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                        curfilter, dic, upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {
                    StationFields.type: 'Q',
                    DataValueFields.utc: dic[DataValueFields.utc],
                    StationFields.id: dic[StationFields.id]
                }
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(
                    filter=cur_filter)

                if q_dic is not None:
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == 'mg/L':
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == 'g/L':
                    # convert g/L to kg
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] * q * 86400., 2)
                elif cur_unit == 'kg':
                    dic[StationFields.type] = '%sConc' % cur_type
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {
                StationFields.id: dic[StationFields.id],
                DataValueFields.type: dic[DataValueFields.type],
                DataValueFields.utc: dic[DataValueFields.utc]
            }
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                curfilter, dic, upsert=True)
Example #4
0
    def daily_data_from_txt(climdb, data_txt_file, sites_info_dict):
        """Import climate data table"""
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_txt_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_txt_file)
        clim_flds = clim_data_items[0]
        # PHUCalDic is used for Calculating potential heat units (PHU)
        # for each climate station and each year.
        # format is {StationID:{Year1:[values],Year2:[Values]...}, ...}
        # PHUCalDic = {}
        # format: {StationID1: climateStats1, ...}
        hydro_climate_stats = dict()
        required_flds = [DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws]
        output_flds = [DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp,
                       DataType.rm, DataType.pet, DataType.ws, DataType.sr]
        # remove existed records
        for fld in output_flds:
            climdb[DBTableNames.data_values].remove({'TYPE': fld})
        for fld in required_flds:
            if not StringClass.string_in_list(fld, clim_flds):
                raise ValueError('Meteorological Daily data MUST contain %s!' % fld)
        # Create bulk object
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i, cur_clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            cur_ssd = DEFAULT_NODATA

            for j, clim_data_v in enumerate(cur_clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.id):
                    dic[DataValueFields.id] = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.mean_tmp):
                    dic[DataType.mean_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.min_tmp):
                    dic[DataType.min_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.max_tmp):
                    dic[DataType.max_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.pet):
                    dic[DataType.pet] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.sr):
                    dic[DataType.sr] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.ws):
                    dic[DataType.ws] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.rm):
                    dic[DataType.rm] = float(clim_data_v) * 0.01
                elif StringClass.string_match(clim_flds[j], DataType.ssd):
                    cur_ssd = float(clim_data_v)
            # Get datetime and utc/local transformation
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds,
                                                                               cur_clim_data_item,
                                                                               tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time
            dic[DataValueFields.y] = utc_time.year

            # Do if some of these data are not provided
            if DataType.mean_tmp not in list(dic.keys()):
                dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2.
            if DataType.sr not in list(dic.keys()):
                if cur_ssd == DEFAULT_NODATA:
                    raise ValueError(DataType.sr + ' or ' + DataType.ssd + ' must be provided!')
                else:
                    if dic[DataValueFields.id] in list(sites_info_dict.keys()):
                        cur_lon, cur_lat = sites_info_dict[dic[DataValueFields.id]].lon_lat()
                        sr = round(HydroClimateUtilClass.rs(DateClass.day_of_year(utc_time),
                                                            float(cur_ssd), cur_lat * PI / 180.), 1)
                        dic[DataType.sr] = sr

            for fld in output_flds:
                cur_dic = dict()
                if fld in list(dic.keys()):
                    cur_dic[DataValueFields.value] = dic[fld]
                    cur_dic[DataValueFields.id] = dic[
                        DataValueFields.id]
                    cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                    cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone]
                    cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time]
                    cur_dic[DataValueFields.type] = fld
                    # Old code, insert or update one item a time, which is quite inefficiency
                    # Update by using bulk operation interface. lj
                    # # find old records and remove (deprecated because of low efficiency, lj.)
                    # curfilter = {DataValueFields.type: fld,
                    #              DataValueFields.utc: dic[DataValueFields.utc]}
                    # bulk.find(curfilter).upsert().replace_one(cur_dic)
                    bulk.insert(cur_dic)
                    count += 1
                    if count % 500 == 0:  # execute each 500 records
                        MongoUtil.run_bulk(bulk)
                        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()

            if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()):
                hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats()
            hydro_climate_stats[dic[DataValueFields.id]].add_item(dic)
        # execute the remained records
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        for item, cur_climate_stats in list(hydro_climate_stats.items()):
            cur_climate_stats.annual_stats()
        # Create index
        climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING),
                                                       (DataValueFields.type, ASCENDING),
                                                       (DataValueFields.utc, ASCENDING)])
        # prepare dic for MongoDB
        for s_id, stats_v in list(hydro_climate_stats.items()):
            for YYYY in list(stats_v.Count.keys()):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY]
                cur_dic[DataValueFields.id] = s_id
                cur_dic[DataValueFields.y] = YYYY
                cur_dic[VariableDesc.unit] = 'heat units'
                cur_dic[VariableDesc.type] = DataType.phu_tot
                curfilter = {DataValueFields.id: s_id,
                             VariableDesc.type: DataType.phu_tot,
                             DataValueFields.y: YYYY}
                climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                       upsert=True)
                # import annual mean temperature
                cur_dic[VariableDesc.type] = DataType.mean_tmp
                cur_dic[VariableDesc.unit] = 'deg C'
                cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY]
                curfilter = {DataValueFields.id: s_id,
                             VariableDesc.type: DataType.mean_tmp,
                             DataValueFields.y: YYYY}
                climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                       upsert=True)
            cur_dic[DataValueFields.value] = stats_v.PHU0
            cur_dic[DataValueFields.id] = s_id
            cur_dic[DataValueFields.y] = DEFAULT_NODATA
            cur_dic[VariableDesc.unit] = 'heat units'
            cur_dic[VariableDesc.type] = DataType.phu0
            curfilter = {DataValueFields.id: s_id,
                         VariableDesc.type: DataType.phu0,
                         DataValueFields.y: DEFAULT_NODATA}
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                   upsert=True)
            # import annual mean temperature
            cur_dic[VariableDesc.type] = DataType.mean_tmp0
            cur_dic[VariableDesc.unit] = 'deg C'
            cur_dic[DataValueFields.value] = stats_v.MeanTmp0
            curfilter = {DataValueFields.id: s_id,
                         VariableDesc.type: DataType.mean_tmp0,
                         DataValueFields.y: DEFAULT_NODATA}
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                   upsert=True)
Example #5
0
 def workflow(cfg, clim_db):
     """Workflow"""
     print('Import Daily Meteorological Data... ')
     site_m_loc = HydroClimateUtilClass.query_climate_sites(clim_db, 'M')
     ImportMeteoData.daily_data_from_txt(clim_db, cfg.Meteo_data, site_m_loc)
Example #6
0
    def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file):
        """
        Read observed data from txt file
        Args:
            maindb: Main spatial database
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = dict()
                for j, v in enumerate(site_data_items[i]):
                    if StringClass.string_match(site_flds[j], StationFields.id):
                        dic[StationFields.id] = int(v)
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j], StationFields.name):
                        dic[StationFields.name] = v.strip()
                    elif StringClass.string_match(site_flds[j], StationFields.type):
                        types = StringClass.split_string(v.strip(), '-')
                    elif StringClass.string_match(site_flds[j], StationFields.lat):
                        dic[StationFields.lat] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.lon):
                        dic[StationFields.lon] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.x):
                        dic[StationFields.x] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.y):
                        dic[StationFields.y] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.unit):
                        dic[StationFields.unit] = v.strip()
                    elif StringClass.string_match(site_flds[j], StationFields.elev):
                        dic[StationFields.elev] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.outlet):
                        dic[StationFields.outlet] = float(v)

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_sids = ImportObservedData.match_subbasin(subbsn_file, site_dic,
                                                                          maindb)
                    if not matched:
                        break
                    cur_subbsn_id_str = ''
                    if len(cur_sids) == 1:  # if only one subbasin ID, store integer
                        cur_subbsn_id_str = cur_sids[0]
                    else:
                        cur_subbsn_id_str = ','.join(str(cid) for cid in cur_sids if cur_sids is None)
                    site_dic[StationFields.subbsn] = cur_subbsn_id_str
                    curfilter = {StationFields.id: site_dic[StationFields.id],
                                 StationFields.type: site_dic[StationFields.type]}
                    # print(curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, site_dic,
                                                                           upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = dic[StationFields.unit]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print(measDataFile)
            obs_data_items = read_data_items_from_txt(measDataFile)
            tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(measDataFile)
            if tsysin == 'UTCTIME':
                tzonein = time.timezone / -3600
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [StationFields.id, DataValueFields.type, DataValueFields.value]

            for fld in required_flds:
                if not StringClass.string_in_list(fld, obs_flds):  # data can not meet the request!
                    raise ValueError('The %s can not meet the required format!' % measDataFile)
            for i, cur_obs_data_item in enumerate(obs_data_items):
                dic = dict()
                if i == 0:
                    continue
                for j, cur_data_value in enumerate(cur_obs_data_item):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(cur_data_value)
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j], DataValueFields.type):
                        dic[DataValueFields.type] = cur_data_value
                    elif StringClass.string_match(obs_flds[j], DataValueFields.value):
                        dic[DataValueFields.value] = float(cur_data_value)
                utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(obs_flds,
                                                                                cur_obs_data_item,
                                                                                tsysin, tzonein)
                dic[DataValueFields.local_time] = utc_t + timedelta(minutes=tzonein * 60)
                dic[DataValueFields.time_zone] = tzonein
                dic[DataValueFields.utc] = utc_t
                # curfilter = {StationFields.id: dic[StationFields.id],
                #              DataValueFields.type: dic[DataValueFields.type],
                #              DataValueFields.utc: dic[DataValueFields.utc]}
                # bulk.find(curfilter).replace_one(dic)
                bulk.insert(dic)
                count += 1
                if count % 500 == 0:
                    MongoUtil.run_bulk(bulk)
                    bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = []
        for curVar in variable_lists:
            # print(curVar)
            # if the unit is mg/L, then change the Type name with the suffix 'Conc',
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find({StationFields.type: cur_type}):
                # print(item)
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == 'mg/L' or cur_unit == 'g/L':
                    # update the Type name
                    dic[StationFields.type] = cur_type + 'Conc'
                    curfilter = {StationFields.id: dic[StationFields.id],
                                 DataValueFields.type: cur_type,
                                 DataValueFields.utc: dic[DataValueFields.utc]}
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic,
                                                                              upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {StationFields.type: 'Q',
                              DataValueFields.utc: dic[DataValueFields.utc],
                              StationFields.id: dic[StationFields.id]}
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(filter=cur_filter)

                q = -9999.
                if q_dic is not None:
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == 'mg/L':
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == 'g/L':
                    # convert g/L to kg
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] * q * 86400., 2)
                elif cur_unit == 'kg':
                    dic[StationFields.type] = cur_type + 'Conc'
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {StationFields.id: dic[StationFields.id],
                         DataValueFields.type: dic[DataValueFields.type],
                         DataValueFields.utc: dic[DataValueFields.utc]}
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time,
                                                  eliminate_zero=False,
                                                  time_sys_output='UTCTIME', day_divided_hour=0):
    """
    Interpolate not regular observed data to regular time interval data.

    Todo: Not tested yet!

    Args:
        in_file: input data file, the basic format is as follows:
                 line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
                 line 2: DATETIME,field1,field2,...
                 line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,...
                 line 4: ...
                 ...
                 Field name can be PCP, FLOW, SED
                 the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively.
        time_interval: time interval, unit is minute, e.g., daily output is 1440
        start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system
                    is based on time_sys.
        end_time: end time, see also start_time.
        eliminate_zero: Boolean flag. If true, the time interval without original records will
                        not be output.
        time_sys_output: time system of output time_system, the format must be
                  '<time_system> [<time_zone>]', e.g.,
                  'LOCALTIME'
                  'LOCALTIME 8'
                  'UTCTIME' (default)
        day_divided_hour: If the time_interval is equal to N*1440, this parameter should be
                          carefully specified. The value must range from 0 to 23. e.g.,
                          day_divided_hour ==> day ranges (all expressed as 2013-02-03)
                          0  ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default)
                          8  ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59
                          20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59
    Returns:
        The output data files are located in the same directory with the input file.
        The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g.,
        pcp_utctime_1440_nonzero.csv, flow_localtime_60.csv.
        Note that `.txt` format is also supported.
    """
    FileClass.check_file_exists(in_file)
    time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file)
    data_items = read_data_items_from_txt(in_file)
    flds = data_items[0][:]
    data_items.remove(flds)
    if not 0 <= day_divided_hour <= 23:
        raise ValueError('Day divided hour must range from 0 to 23!')
    try:
        date_idx = flds.index('DATETIME')
        flds.remove('DATETIME')
    except ValueError:
        raise ValueError('DATETIME must be one of the fields!')
    # available field
    available_flds = ['FLOW', 'SED', 'PCP']

    def check_avaiable_field(cur_fld):
        """Check if the given field name is supported."""
        support_flag = False
        for fff in available_flds:
            if fff.lower() in cur_fld.lower():
                support_flag = True
                break
        return support_flag

    ord_data = OrderedDict()
    time_zone_output = time.timezone // 3600
    if time_sys_output.lower().find('local') >= 0:
        tmpstrs = StringClass.split_string(time_sys_output, [' '])
        if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]):
            time_zone_output = -1 * int(tmpstrs[1])
        time_sys_output = 'LOCALTIME'
    else:
        time_sys_output = 'UTCTIME'
        time_zone_output = 0
    for item in data_items:
        org_datetime = StringClass.get_datetime(item[date_idx])
        if time_sys_input == 'LOCALTIME':
            org_datetime += timedelta(hours=time_zone_input)  # now, org_datetime is UTC time.
        if time_sys_output == 'LOCALTIME':
            org_datetime -= timedelta(hours=time_zone_output)
        # now, org_datetime is consistent with the output time system
        ord_data[org_datetime] = list()
        for i, v in enumerate(item):
            if i == date_idx:
                continue
            if MathClass.isnumerical(v):
                ord_data[org_datetime].append(float(v))
            else:
                ord_data[org_datetime].append(v)
    # print(ord_data)
    itp_data = OrderedDict()
    out_time_delta = timedelta(minutes=time_interval)
    sdatetime = StringClass.get_datetime(start_time)
    edatetime = StringClass.get_datetime(end_time)
    item_dtime = sdatetime
    if time_interval % 1440 == 0:
        item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \
                     timedelta(minutes=day_divided_hour * 60)
    while item_dtime <= edatetime:
        # print(item_dtime)
        # if item_dtime.month == 12 and item_dtime.day == 31:
        #     print("debug")
        sdt = item_dtime  # start datetime of records
        edt = item_dtime + out_time_delta  # end datetime of records
        # get original data items
        org_items = list()
        pre_dt = list(ord_data.keys())[0]
        pre_added = False
        for i, v in list(ord_data.items()):
            if sdt <= i < edt:
                if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta:
                    # only add one item that less than sdt.
                    org_items.append([pre_dt] + ord_data.get(pre_dt))
                    pre_added = True
                org_items.append([i] + v)
            if i > edt:
                break
            pre_dt = i
        if len(org_items) > 0:
            org_items.append([edt])  # Just add end time for compute convenient
            if org_items[0][0] < sdt:
                org_items[0][0] = sdt  # set the begin datetime of current time interval
        # if eliminate time interval without original records
        # initial interpolated list
        itp_data[item_dtime] = [0.] * len(flds)
        if len(org_items) == 0:
            if eliminate_zero:
                itp_data.popitem()
            item_dtime += out_time_delta
            continue
        # core interpolation code
        flow_idx = -1
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            if 'SED' in v_name.upper():  # FLOW must be existed
                for v_idx2, v_name2 in enumerate(flds):
                    if 'FLOW' in v_name2.upper():
                        flow_idx = v_idx2
                        break
                if flow_idx < 0:
                    raise RuntimeError('To interpolate SED, FLOW must be provided!')
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            itp_value = 0.
            itp_auxiliary_value = 0.
            for org_item_idx, org_item_dtv in enumerate(org_items):
                if org_item_idx == 0:
                    continue
                org_item_dt = org_item_dtv[0]
                pre_item_dtv = org_items[org_item_idx - 1]
                pre_item_dt = pre_item_dtv[0]
                tmp_delta_dt = org_item_dt - pre_item_dt
                tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds
                if 'SED' in v_name.upper():
                    itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \
                                 tmp_delta_secs
                    itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs
                else:
                    itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs
            if 'SED' in v_name.upper():
                if MathClass.floatequal(itp_auxiliary_value, 0.):
                    itp_value = 0.
                    print('WARNING: Flow is 0 for %s, please check!' %
                          item_dtime.strftime('%Y-%m-%d %H:%M:%S'))
                itp_value /= itp_auxiliary_value
            elif 'FLOW' in v_name.upper():
                itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds)
            elif 'PCP' in v_name.upper():  # the input is mm/h, and output is mm
                itp_value /= 3600.
            itp_data[item_dtime][v_idx] = round(itp_value, 4)
        item_dtime += out_time_delta

    # for i, v in itp_data.items():
    #     print(i, v)
    # output to files
    work_path = os.path.dirname(in_file)
    header_str = '#' + time_sys_output
    if time_sys_output == 'LOCALTIME':
        header_str = header_str + ' ' + str(time_zone_output)
    for idx, fld in enumerate(flds):
        if not check_avaiable_field(fld):
            continue
        file_name = fld + '_' + time_sys_output + '_' + str(time_interval)
        if eliminate_zero:
            file_name += '_nonzero'
        file_name += '.csv'
        out_file = work_path + os.path.sep + file_name
        with open(out_file, 'w', encoding='utf-8') as f:
            f.write(header_str + '\n')
            f.write('DATETIME,' + fld + '\n')
            for i, v in list(itp_data.items()):
                cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n'
                f.write(cur_line)
Example #8
0
    def daily_data_from_txt(climdb, data_txt_file, sites_info_dict):
        """Import climate data table"""
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(
            data_txt_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_txt_file)
        clim_flds = clim_data_items[0]
        # PHUCalDic is used for Calculating potential heat units (PHU)
        # for each climate station and each year.
        # format is {StationID:{Year1:[values],Year2:[Values]...}, ...}
        # PHUCalDic = {}
        # format: {StationID1: climateStats1, ...}
        hydro_climate_stats = dict()
        required_flds = [
            DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws
        ]
        output_flds = [
            DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm,
            DataType.pet, DataType.ws, DataType.sr
        ]
        # remove existed records
        for fld in output_flds:
            climdb[DBTableNames.data_values].remove({'TYPE': fld})
        for fld in required_flds:
            if not StringClass.string_in_list(fld, clim_flds):
                raise ValueError('Meteorological Daily data MUST contain %s!' %
                                 fld)
        # Create bulk object
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i, cur_clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            cur_ssd = DEFAULT_NODATA

            for j, clim_data_v in enumerate(cur_clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.id):
                    dic[DataValueFields.id] = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.mean_tmp):
                    dic[DataType.mean_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.min_tmp):
                    dic[DataType.min_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.max_tmp):
                    dic[DataType.max_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.pet):
                    dic[DataType.pet] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.sr):
                    dic[DataType.sr] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.ws):
                    dic[DataType.ws] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.rm):
                    dic[DataType.rm] = float(clim_data_v) * 0.01
                elif StringClass.string_match(clim_flds[j], DataType.ssd):
                    cur_ssd = float(clim_data_v)
            # Get datetime and utc/local transformation
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(
                clim_flds, cur_clim_data_item, tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(
                minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time
            dic[DataValueFields.y] = utc_time.year

            # Do if some of these data are not provided
            if DataType.mean_tmp not in list(dic.keys()):
                dic[DataType.mean_tmp] = (dic[DataType.max_tmp] +
                                          dic[DataType.min_tmp]) / 2.
            if DataType.sr not in list(dic.keys()):
                if cur_ssd == DEFAULT_NODATA:
                    raise ValueError(DataType.sr + ' or ' + DataType.ssd +
                                     ' must be provided!')
                else:
                    if dic[DataValueFields.id] in list(sites_info_dict.keys()):
                        cur_lon, cur_lat = sites_info_dict[dic[
                            DataValueFields.id]].lon_lat()
                        sr = round(
                            HydroClimateUtilClass.rs(
                                DateClass.day_of_year(utc_time),
                                float(cur_ssd), cur_lat * PI / 180.), 1)
                        dic[DataType.sr] = sr

            for fld in output_flds:
                cur_dic = dict()
                if fld in list(dic.keys()):
                    cur_dic[DataValueFields.value] = dic[fld]
                    cur_dic[DataValueFields.id] = dic[DataValueFields.id]
                    cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                    cur_dic[DataValueFields.time_zone] = dic[
                        DataValueFields.time_zone]
                    cur_dic[DataValueFields.local_time] = dic[
                        DataValueFields.local_time]
                    cur_dic[DataValueFields.type] = fld
                    # Old code, insert or update one item a time, which is quite inefficiency
                    # Update by using bulk operation interface. lj
                    # # find old records and remove (deprecated because of low efficiency, lj.)
                    # curfilter = {DataValueFields.type: fld,
                    #              DataValueFields.utc: dic[DataValueFields.utc]}
                    # bulk.find(curfilter).upsert().replace_one(cur_dic)
                    bulk.insert(cur_dic)
                    count += 1
                    if count % 500 == 0:  # execute each 500 records
                        MongoUtil.run_bulk(bulk)
                        bulk = climdb[
                            DBTableNames.
                            data_values].initialize_ordered_bulk_op()

            if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()):
                hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats()
            hydro_climate_stats[dic[DataValueFields.id]].add_item(dic)
        # execute the remained records
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        for item, cur_climate_stats in list(hydro_climate_stats.items()):
            cur_climate_stats.annual_stats()
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
        # prepare dic for MongoDB
        for s_id, stats_v in list(hydro_climate_stats.items()):
            for YYYY in list(stats_v.Count.keys()):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY]
                cur_dic[DataValueFields.id] = s_id
                cur_dic[DataValueFields.y] = YYYY
                cur_dic[VariableDesc.unit] = 'heat units'
                cur_dic[VariableDesc.type] = DataType.phu_tot
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.phu_tot,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
                # import annual mean temperature
                cur_dic[VariableDesc.type] = DataType.mean_tmp
                cur_dic[VariableDesc.unit] = 'deg C'
                cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY]
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.mean_tmp,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
            cur_dic[DataValueFields.value] = stats_v.PHU0
            cur_dic[DataValueFields.id] = s_id
            cur_dic[DataValueFields.y] = DEFAULT_NODATA
            cur_dic[VariableDesc.unit] = 'heat units'
            cur_dic[VariableDesc.type] = DataType.phu0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.phu0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
            # import annual mean temperature
            cur_dic[VariableDesc.type] = DataType.mean_tmp0
            cur_dic[VariableDesc.unit] = 'deg C'
            cur_dic[DataValueFields.value] = stats_v.MeanTmp0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.mean_tmp0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
Example #9
0
 def workflow(cfg, clim_db):
     """Workflow"""
     print('Import Daily Meteorological Data... ')
     site_m_loc = HydroClimateUtilClass.query_climate_sites(clim_db, 'M')
     ImportMeteoData.daily_data_from_txt(clim_db, cfg.Meteo_data,
                                         site_m_loc)
Example #10
0
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time,
                                                  eliminate_zero=False,
                                                  time_sys_output='UTCTIME', day_divided_hour=0):
    """
    Interpolate not regular observed data to regular time interval data.
    Args:
        in_file: input data file, the basic format is as follows:
                 line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
                 line 2: DATETIME,field1,field2,...
                 line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,...
                 line 4: ...
                 ...
                 Field name can be PCP, FLOW, SED
                 the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively.
        time_interval: time interval, unit is minute, e.g., daily output is 1440
        start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system
                    is based on time_sys.
        end_time: end time, see also start_time.
        eliminate_zero: Boolean flag. If true, the time interval without original records will
                        not be output.
        time_sys_output: time system of output time_system, the format must be
                  '<time_system> [<time_zone>]', e.g.,
                  'LOCALTIME'
                  'LOCALTIME 8'
                  'UTCTIME' (default)
        day_divided_hour: If the time_interval is equal to N*1440, this parameter should be
                          carefully specified. The value must range from 0 to 23. e.g.,
                          day_divided_hour ==> day ranges (all expressed as 2013-02-03)
                          0  ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default)
                          8  ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59
                          20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59
    Returns:
        The output data files are located in the same directory with the input file.
        The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g.,
        pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt
    """
    FileClass.check_file_exists(in_file)
    time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file)
    data_items = read_data_items_from_txt(in_file)
    flds = data_items[0][:]
    data_items.remove(flds)
    if not 0 <= day_divided_hour <= 23:
        raise ValueError('Day divided hour must range from 0 to 23!')
    try:
        date_idx = flds.index('DATETIME')
        flds.remove('DATETIME')
    except ValueError:
        raise ValueError('DATETIME must be one of the fields!')
    # available field
    available_flds = ['FLOW', 'SED', 'PCP']

    def check_avaiable_field(cur_fld):
        """Check if the given field name is supported."""
        support_flag = False
        for fff in available_flds:
            if fff.lower() in cur_fld.lower():
                support_flag = True
                break
        return support_flag

    ord_data = OrderedDict()
    time_zone_output = time.timezone / -3600
    if time_sys_output.lower().find('local') >= 0:
        tmpstrs = StringClass.split_string(time_sys_output, [' '])
        if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]):
            time_zone_output = int(tmpstrs[1])
        time_sys_output = 'LOCALTIME'
    else:
        time_sys_output = 'UTCTIME'
        time_zone_output = 0
    for item in data_items:
        org_datetime = StringClass.get_datetime(item[date_idx])
        if time_sys_input == 'LOCALTIME':
            org_datetime -= timedelta(hours=time_zone_input)
        # now, org_datetime is UTC time.
        if time_sys_output == 'LOCALTIME':
            org_datetime += timedelta(hours=time_zone_output)
        # now, org_datetime is consistent with the output time system
        ord_data[org_datetime] = list()
        for i, v in enumerate(item):
            if i == date_idx:
                continue
            if MathClass.isnumerical(v):
                ord_data[org_datetime].append(float(v))
            else:
                ord_data[org_datetime].append(v)
    # print(ord_data)
    itp_data = OrderedDict()
    out_time_delta = timedelta(minutes=time_interval)
    sdatetime = StringClass.get_datetime(start_time)
    edatetime = StringClass.get_datetime(end_time)
    item_dtime = sdatetime
    if time_interval % 1440 == 0:
        item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \
                     timedelta(minutes=day_divided_hour * 60)
    while item_dtime <= edatetime:
        # print(item_dtime)
        # if item_dtime.month == 12 and item_dtime.day == 31:
        #     print("debug")
        sdt = item_dtime  # start datetime of records
        edt = item_dtime + out_time_delta  # end datetime of records
        # get original data items
        org_items = list()
        pre_dt = list(ord_data.keys())[0]
        pre_added = False
        for i, v in list(ord_data.items()):
            if sdt <= i < edt:
                if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta:
                    # only add one item that less than sdt.
                    org_items.append([pre_dt] + ord_data.get(pre_dt))
                    pre_added = True
                org_items.append([i] + v)
            if i > edt:
                break
            pre_dt = i
        if len(org_items) > 0:
            org_items.append([edt])  # Just add end time for compute convenient
            if org_items[0][0] < sdt:
                org_items[0][0] = sdt  # set the begin datetime of current time interval
        # if eliminate time interval without original records
        # initial interpolated list
        itp_data[item_dtime] = [0.] * len(flds)
        if len(org_items) == 0:
            if eliminate_zero:
                itp_data.popitem()
            item_dtime += out_time_delta
            continue
        # core interpolation code
        flow_idx = -1
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            if 'SED' in v_name.upper():  # FLOW must be existed
                for v_idx2, v_name2 in enumerate(flds):
                    if 'FLOW' in v_name2.upper():
                        flow_idx = v_idx2
                        break
                if flow_idx < 0:
                    raise RuntimeError('To interpolate SED, FLOW must be provided!')
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            itp_value = 0.
            itp_auxiliary_value = 0.
            for org_item_idx, org_item_dtv in enumerate(org_items):
                if org_item_idx == 0:
                    continue
                org_item_dt = org_item_dtv[0]
                pre_item_dtv = org_items[org_item_idx - 1]
                pre_item_dt = pre_item_dtv[0]
                tmp_delta_dt = org_item_dt - pre_item_dt
                tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds
                if 'SED' in v_name.upper():
                    itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \
                                 tmp_delta_secs
                    itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs
                else:
                    itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs
            if 'SED' in v_name.upper():
                if MathClass.floatequal(itp_auxiliary_value, 0.):
                    itp_value = 0.
                    print('WARNING: Flow is 0 for %s, please check!' %
                          item_dtime.strftime('%Y-%m-%d %H:%M:%S'))
                itp_value /= itp_auxiliary_value
            elif 'FLOW' in v_name.upper():
                itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds)
            elif 'PCP' in v_name.upper():  # the input is mm/h, and output is mm
                itp_value /= 3600.
            itp_data[item_dtime][v_idx] = round(itp_value, 4)
        item_dtime += out_time_delta

    # for i, v in itp_data.items():
    #     print(i, v)
    # output to files
    work_path = os.path.dirname(in_file)
    header_str = '#' + time_sys_output
    if time_sys_output == 'LOCALTIME':
        header_str = header_str + ' ' + str(time_zone_output)
    for idx, fld in enumerate(flds):
        if not check_avaiable_field(fld):
            continue
        file_name = fld + '_' + time_sys_output + '_' + str(time_interval)
        if eliminate_zero:
            file_name += '_nonzero'
        file_name += '.txt'
        out_file = work_path + os.path.sep + file_name
        with open(out_file, 'w') as f:
            f.write(header_str + '\n')
            f.write('DATETIME,' + fld + '\n')
            for i, v in list(itp_data.items()):
                cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n'
                f.write(cur_line)