Example #1
0
    def workflow(cfg, maindb, climdb):
        """
        This function mainly to import measurement data to MongoDB
        data type may include Q (discharge, m3/s), SED (mg/L), tn (mg/L), tp (mg/L), etc.
        the required parameters that defined in configuration file (*.ini)
        """
        if not cfg.use_observed:
            return False
        c_list = climdb.collection_names()
        if not StringClass.string_in_list(DBTableNames.observes, c_list):
            climdb.create_collection(DBTableNames.observes)
        else:
            climdb.drop_collection(DBTableNames.observes)
        if not StringClass.string_in_list(DBTableNames.sites, c_list):
            climdb.create_collection(DBTableNames.sites)
        if not StringClass.string_in_list(DBTableNames.var_desc, c_list):
            climdb.create_collection(DBTableNames.var_desc)

        file_list = FileClass.get_full_filename_by_suffixes(cfg.observe_dir, ['.txt'])
        meas_file_list = []
        site_loc = []
        for fl in file_list:
            if StringClass.is_substring('observed_', fl):
                meas_file_list.append(fl)
            else:
                site_loc.append(fl)
        ImportObservedData.data_from_txt(maindb, climdb, meas_file_list, site_loc,
                                         cfg.spatials.subbsn)
        return True
    def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove(
            {DataValueFields.type: DataType.p})
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(
            data_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = list()
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for fld in clim_flds:
            if not StringClass.string_in_list(fld, [
                    DataValueFields.dt, DataValueFields.y, DataValueFields.m,
                    DataValueFields.d, DataValueFields.hour,
                    DataValueFields.minute, DataValueFields.second
            ]):
                station_id.append(fld)
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = list()

            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_in_list(clim_flds[j], station_id):
                    precipitation.append(float(clim_data_v))
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(
                clim_flds, clim_data_item, tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(
                minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[
                    DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[
                    DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    MongoUtil.run_bulk(bulk)
                    bulk = climdb[
                        DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
Example #3
0
    def workflow(cfg, maindb, climdb):
        """
        This function mainly to import measurement data to MongoDB
        data type may include Q (discharge, m3/s), SED (mg/L), TN (mg/L), TP (mg/L), etc.
        the required parameters that defined in configuration file (*.ini)
        """
        if not cfg.use_observed:
            return False
        c_list = climdb.collection_names()
        if not StringClass.string_in_list(DBTableNames.observes, c_list):
            climdb.create_collection(DBTableNames.observes)
        else:
            climdb.drop_collection(DBTableNames.observes)
        if not StringClass.string_in_list(DBTableNames.sites, c_list):
            climdb.create_collection(DBTableNames.sites)
        if not StringClass.string_in_list(DBTableNames.var_desc, c_list):
            climdb.create_collection(DBTableNames.var_desc)

        file_list = FileClass.get_full_filename_by_suffixes(
            cfg.observe_dir, ['.txt', '.csv'])
        meas_file_list = list()
        site_loc = list()
        for fl in file_list:
            if StringClass.is_substring('observed_', fl):
                meas_file_list.append(fl)
            else:
                site_loc.append(fl)
        ImportObservedData.data_from_txt(maindb, climdb, meas_file_list,
                                         site_loc, cfg.spatials.subbsn)
        return True
    def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove({DataValueFields.type: DataType.p})
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = list()
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for fld in clim_flds:
            if not StringClass.string_in_list(fld,
                                              [DataValueFields.dt, DataValueFields.y,
                                               DataValueFields.m,
                                               DataValueFields.d, DataValueFields.hour,
                                               DataValueFields.minute, DataValueFields.second]):
                station_id.append(fld)
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = list()

            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_in_list(clim_flds[j], station_id):
                    precipitation.append(float(clim_data_v))
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds,
                                                                               clim_data_item,
                                                                               tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    MongoUtil.run_bulk(bulk)
                    bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # Create index
        climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING),
                                                       (DataValueFields.type, ASCENDING),
                                                       (DataValueFields.utc, ASCENDING)])
    def workflow(cfg, main_db, clim_db):
        """Workflow"""
        # 1. Find meteorology and precipitation sites in study area
        thiessen_file_list = [
            cfg.meteo_sites_thiessen, cfg.prec_sites_thiessen
        ]
        type_list = [DataType.m, DataType.p]

        # The entire basin, used for OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db,
                                           cfg.vecs.bsn, FieldNames.basin,
                                           thiessen_file_list,
                                           cfg.thiessen_field, type_list)
        # The subbasins, used for MPI&OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db,
                                           cfg.vecs.subbsn,
                                           FieldNames.subbasin_id,
                                           thiessen_file_list,
                                           cfg.thiessen_field, type_list)

        # 2. Import geographic information of each sites to Hydro-Climate database
        c_list = clim_db.collection_names()
        tables = [DBTableNames.sites, DBTableNames.var_desc]
        for tb in tables:
            if not StringClass.string_in_list(tb, c_list):
                clim_db.create_collection(tb)
        ImportHydroClimateSites.variable_table(clim_db, cfg.hydro_climate_vars)
        site_m_loc = ImportHydroClimateSites.sites_table(
            clim_db, cfg.Meteo_sites, DataType.m)
        site_p_loc = ImportHydroClimateSites.sites_table(
            clim_db, cfg.prec_sites, DataType.p)
        # print(site_m_loc, site_p_loc)
        return site_m_loc, site_p_loc
Example #6
0
    def workflow(cfg, main_db, clim_db):
        """Workflow"""
        # 1. Find meteorology and precipitation sites in study area
        thiessen_file_list = [cfg.meteo_sites_thiessen, cfg.prec_sites_thiessen]
        type_list = [DataType.m, DataType.p]

        # The entire basin, used for OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.bsn,
                                               FieldNames.basin, thiessen_file_list,
                                               cfg.thiessen_field, type_list)
        # The subbasins, used for MPI&OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.subbsn,
                                           FieldNames.subbasin_id, thiessen_file_list,
                                           cfg.thiessen_field, type_list)

        # 2. Import geographic information of each sites to Hydro-Climate database
        c_list = clim_db.collection_names()
        tables = [DBTableNames.sites, DBTableNames.var_desc]
        for tb in tables:
            if not StringClass.string_in_list(tb, c_list):
                clim_db.create_collection(tb)
        ImportHydroClimateSites.variable_table(clim_db, cfg.hydro_climate_vars)
        site_m_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.Meteo_sites, DataType.m)
        site_p_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.prec_sites, DataType.p)
        # print(site_m_loc, site_p_loc)
        return site_m_loc, site_p_loc
 def initial_params_from_txt(cfg, maindb):
     """
     import initial calibration parameters from txt data file.
     Args:
         cfg: SEIMS config object
         maindb: MongoDB database object
     """
     # delete if existed, initialize if not existed
     c_list = maindb.collection_names()
     if not StringClass.string_in_list(DBTableNames.main_parameter, c_list):
         maindb.create_collection(DBTableNames.main_parameter)
     else:
         maindb.drop_collection(DBTableNames.main_parameter)
     # initialize bulk operator
     bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op()
     # read initial parameters from txt file
     data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file)
     field_names = data_items[0][0:]
     # print(field_names)
     for i, cur_data_item in enumerate(data_items):
         if i == 0:
             continue
         # print(cur_data_item)
         # initial one default blank parameter dict.
         data_import = {ModelParamFields.name: '', ModelParamFields.desc: '',
                        ModelParamFields.unit: '', ModelParamFields.module: '',
                        ModelParamFields.value: DEFAULT_NODATA,
                        ModelParamFields.impact: DEFAULT_NODATA,
                        ModelParamFields.change: 'NC',
                        ModelParamFields.max: DEFAULT_NODATA,
                        ModelParamFields.min: DEFAULT_NODATA,
                        ModelParamFields.type: ''}
         for k, v in list(data_import.items()):
             idx = field_names.index(k)
             if cur_data_item[idx] == '':
                 if StringClass.string_match(k, ModelParamFields.change_ac):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_rc):
                     data_import[k] = 1
                 elif StringClass.string_match(k, ModelParamFields.change_nc):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_vc):
                     data_import[k] = DEFAULT_NODATA  # Be careful to check NODATA when use!
             else:
                 if MathClass.isnumerical(cur_data_item[idx]):
                     data_import[k] = float(cur_data_item[idx])
                 else:
                     data_import[k] = cur_data_item[idx]
         bulk.insert(data_import)
     # execute import operators
     MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.')
     # initialize index by parameter's type and name by ascending order.
     maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING),
                                                       (ModelParamFields.name, ASCENDING)])
Example #8
0
 def read_optionaldta_section(self, _optdta):
     """Optional parameters settings of digital terrain analysis for topographic attributes"""
     if _optdta not in self.cf.sections():
         return
     self.flow_model = self.cf.getint(_optdta, 'flowmodel')
     self.rpi_method = self.cf.getint(_optdta, 'rpimethod')
     self.dist_exp = self.cf.getint(_optdta, 'distanceexponentforidw')
     self.max_move_dist = self.cf.getfloat(_optdta, 'maxmovedist')
     self.numthresh = self.cf.getint(_optdta, 'numthresh')
     self.d8_stream_thresh = self.cf.getint(_optdta, 'd8streamthreshold')
     self.d8_down_method = self.cf.get(_optdta, 'd8downmethod')
     self.d8_stream_tag = self.cf.getint(_optdta, 'd8streamtag')
     self.d8_up_method = self.cf.get(_optdta, 'd8upmethod')
     self.dinf_stream_thresh = self.cf.getint(_optdta,
                                              'dinfstreamthreshold')
     self.dinf_down_stat = self.cf.get(_optdta, 'dinfdownstat')
     self.dinf_down_method = self.cf.get(_optdta, 'dinfdownmethod')
     self.dinf_dist_down_wg = self.cf.get(_optdta, 'dinfdistdownwg')
     self.propthresh = self.cf.getfloat(_optdta, 'propthresh')
     self.dinf_up_stat = self.cf.get(_optdta, 'dinfupstat')
     self.dinf_up_method = self.cf.get(_optdta, 'dinfupmethod')
     if self.flow_model != 0:
         self.flow_model = 1
     if self.rpi_method != 0:
         self.rpi_method = 1
     if self.dist_exp < 0:
         self.dist_exp = 8
     if self.max_move_dist < 0:
         self.max_move_dist = 50
     if self.numthresh < 0:
         self.numthresh = 20
     if self.d8_stream_thresh < 0:
         self.d8_stream_thresh = 0
     distance_method = ['Horizontal', 'Vertical', 'Pythagoras', 'Surface']
     stat_method = ['Average', 'Maximum', 'Minimum']
     if not StringClass.string_in_list(self.d8_down_method,
                                       distance_method):
         self.d8_down_method = 'Surface'
     if self.d8_stream_tag < 0:
         self.d8_stream_tag = 1
     if not StringClass.string_in_list(self.d8_up_method, distance_method):
         self.d8_up_method = 'Surface'
     if self.dinf_stream_thresh < 0:
         self.dinf_stream_thresh = 0
     if StringClass.string_in_list(self.dinf_down_stat, stat_method):
         self.dinf_down_stat = 'Average'
     if StringClass.string_in_list(self.dinf_down_method, distance_method):
         self.dinf_down_method = 'Surface'
     self.dinf_dist_down_wg = AutoFuzSlpPosConfig.check_file_available(
         self.dinf_dist_down_wg)
     if self.propthresh < 0:
         self.propthresh = 0.0
     if not StringClass.string_in_list(self.dinf_up_stat, stat_method):
         self.dinf_up_stat = 'Average'
     if not StringClass.string_in_list(self.dinf_up_method,
                                       distance_method):
         self.dinf_up_method = 'Surface'
     self.pretaudem = PreProcessAttrNames(self.ws.pre_dir, self.flow_model)
Example #9
0
def read_ext_conf(ext_file):
    """Read extract typical location configuration file."""
    with open(ext_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    ext_conf_data = list()
    # Read the number of records
    rec_num = int(lines[1].split('\n')[0].split('\t')[1])
    ext_conf_data.append(rec_num)
    for i in range(0, rec_num):
        temp_conf = lines[i + 2].split('\n')[0].split('\t')
        if StringClass.string_in_list(temp_conf[1],
                                      ['profc', 'horizc', 'planc']):
            min_v = 1000. * float(temp_conf[3])
            max_v = 1000. * float(temp_conf[4])
        else:
            min_v = float(temp_conf[3])
            max_v = float(temp_conf[4])
        min_s = str(round(min_v, 2))
        max_s = str(round(max_v, 2))
        ext_conf_data.append([temp_conf[1], min_s, max_s])
    return ext_conf_data
Example #10
0
def read_inf_conf(ext_file):
    """Read fuzzy inference configuration file."""
    f = open(ext_file)
    lines = f.readlines()
    f.close()
    inf_conf_data = []
    # Read the number of records
    rec_num = int(lines[2].split('\n')[0].split('\t')[1])
    inf_conf_data.append(rec_num)
    for i in range(0, rec_num):
        temp_conf = lines[i + 3].split('\n')[0].split('\t')
        if StringClass.string_in_list(temp_conf[1],
                                      ['profc', 'horizc', 'planc']):
            w1_v = 1000. * float(temp_conf[4])
            w2_v = 1000. * float(temp_conf[7])
        else:
            w1_v = float(temp_conf[4])
            w2_v = float(temp_conf[7])
        w1S = str(round(w1_v, 2))
        w2S = str(round(w2_v, 2))
        inf_conf_data.append([temp_conf[1], temp_conf[3], w1S, w2S])
    return inf_conf_data
Example #11
0
    def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file):
        """
        Read observed data from txt file
        Args:
            maindb: Main spatial database
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = dict()
                for j, v in enumerate(site_data_items[i]):
                    if StringClass.string_match(site_flds[j], StationFields.id):
                        dic[StationFields.id] = int(v)
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j], StationFields.name):
                        dic[StationFields.name] = v.strip()
                    elif StringClass.string_match(site_flds[j], StationFields.type):
                        types = StringClass.split_string(v.strip(), '-')
                    elif StringClass.string_match(site_flds[j], StationFields.lat):
                        dic[StationFields.lat] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.lon):
                        dic[StationFields.lon] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.x):
                        dic[StationFields.x] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.y):
                        dic[StationFields.y] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.unit):
                        dic[StationFields.unit] = v.strip()
                    elif StringClass.string_match(site_flds[j], StationFields.elev):
                        dic[StationFields.elev] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.outlet):
                        dic[StationFields.outlet] = float(v)

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_sids = ImportObservedData.match_subbasin(subbsn_file, site_dic,
                                                                          maindb)
                    if not matched:
                        break
                    cur_subbsn_id_str = ''
                    if len(cur_sids) == 1:  # if only one subbasin ID, store integer
                        cur_subbsn_id_str = cur_sids[0]
                    else:
                        cur_subbsn_id_str = ','.join(str(cid) for cid in cur_sids if cur_sids is None)
                    site_dic[StationFields.subbsn] = cur_subbsn_id_str
                    curfilter = {StationFields.id: site_dic[StationFields.id],
                                 StationFields.type: site_dic[StationFields.type]}
                    # print(curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, site_dic,
                                                                           upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = dic[StationFields.unit]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print(measDataFile)
            obs_data_items = read_data_items_from_txt(measDataFile)
            tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(measDataFile)
            if tsysin == 'UTCTIME':
                tzonein = time.timezone / -3600
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [StationFields.id, DataValueFields.type, DataValueFields.value]

            for fld in required_flds:
                if not StringClass.string_in_list(fld, obs_flds):  # data can not meet the request!
                    raise ValueError('The %s can not meet the required format!' % measDataFile)
            for i, cur_obs_data_item in enumerate(obs_data_items):
                dic = dict()
                if i == 0:
                    continue
                for j, cur_data_value in enumerate(cur_obs_data_item):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(cur_data_value)
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j], DataValueFields.type):
                        dic[DataValueFields.type] = cur_data_value
                    elif StringClass.string_match(obs_flds[j], DataValueFields.value):
                        dic[DataValueFields.value] = float(cur_data_value)
                utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(obs_flds,
                                                                                cur_obs_data_item,
                                                                                tsysin, tzonein)
                dic[DataValueFields.local_time] = utc_t + timedelta(minutes=tzonein * 60)
                dic[DataValueFields.time_zone] = tzonein
                dic[DataValueFields.utc] = utc_t
                # curfilter = {StationFields.id: dic[StationFields.id],
                #              DataValueFields.type: dic[DataValueFields.type],
                #              DataValueFields.utc: dic[DataValueFields.utc]}
                # bulk.find(curfilter).replace_one(dic)
                bulk.insert(dic)
                count += 1
                if count % 500 == 0:
                    MongoUtil.run_bulk(bulk)
                    bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = []
        for curVar in variable_lists:
            # print(curVar)
            # if the unit is mg/L, then change the Type name with the suffix 'Conc',
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find({StationFields.type: cur_type}):
                # print(item)
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == 'mg/L' or cur_unit == 'g/L':
                    # update the Type name
                    dic[StationFields.type] = cur_type + 'Conc'
                    curfilter = {StationFields.id: dic[StationFields.id],
                                 DataValueFields.type: cur_type,
                                 DataValueFields.utc: dic[DataValueFields.utc]}
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic,
                                                                              upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {StationFields.type: 'Q',
                              DataValueFields.utc: dic[DataValueFields.utc],
                              StationFields.id: dic[StationFields.id]}
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(filter=cur_filter)

                q = -9999.
                if q_dic is not None:
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == 'mg/L':
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == 'g/L':
                    # convert g/L to kg
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] * q * 86400., 2)
                elif cur_unit == 'kg':
                    dic[StationFields.type] = cur_type + 'Conc'
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {StationFields.id: dic[StationFields.id],
                         DataValueFields.type: dic[DataValueFields.type],
                         DataValueFields.utc: dic[DataValueFields.utc]}
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
Example #12
0
    def data_from_txt(maindb, hydro_clim_db, obs_txts_list,
                      sites_info_txts_list, subbsn_file):
        """
        Read observed data from txt file
        Args:
            maindb: Main spatial database
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = dict()
                types = list()
                units = list()
                for j, v in enumerate(site_data_items[i]):
                    if StringClass.string_match(site_flds[j],
                                                StationFields.id):
                        dic[StationFields.id] = int(v)
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.name):
                        dic[StationFields.name] = v.strip()
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.type):
                        types = StringClass.split_string(v.strip(), '-')
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lat):
                        dic[StationFields.lat] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lon):
                        dic[StationFields.lon] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.x):
                        dic[StationFields.x] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.y):
                        dic[StationFields.y] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.unit):
                        units = StringClass.split_string(v.strip(), '-')
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.elev):
                        dic[StationFields.elev] = float(v)
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.outlet):
                        dic[StationFields.outlet] = float(v)

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.unit] = units[j]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_sids = ImportObservedData.match_subbasin(
                        subbsn_file, site_dic, maindb)
                    if not matched:
                        break
                    if len(cur_sids
                           ) == 1:  # if only one subbasin ID, store integer
                        cur_subbsn_id_str = cur_sids[0]
                    else:
                        cur_subbsn_id_str = ','.join(
                            str(cid) for cid in cur_sids if cur_sids is None)
                    site_dic[StationFields.subbsn] = cur_subbsn_id_str
                    curfilter = {
                        StationFields.id: site_dic[StationFields.id],
                        StationFields.type: site_dic[StationFields.type]
                    }
                    # print(curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(
                        curfilter, site_dic, upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = units[j]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[
            DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print(measDataFile)
            obs_data_items = read_data_items_from_txt(measDataFile)
            tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(
                measDataFile)
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [
                StationFields.id, DataValueFields.type, DataValueFields.value
            ]

            for fld in required_flds:
                if not StringClass.string_in_list(
                        fld, obs_flds):  # data can not meet the request!
                    raise ValueError(
                        'The %s can not meet the required format!' %
                        measDataFile)
            for i, cur_obs_data_item in enumerate(obs_data_items):
                dic = dict()
                if i == 0:
                    continue
                for j, cur_data_value in enumerate(cur_obs_data_item):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(cur_data_value)
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.type):
                        dic[DataValueFields.type] = cur_data_value
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.value):
                        dic[DataValueFields.value] = float(cur_data_value)
                utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(
                    obs_flds, cur_obs_data_item, tsysin, tzonein)
                dic[DataValueFields.local_time] = utc_t - timedelta(
                    minutes=tzonein * 60)
                dic[DataValueFields.time_zone] = tzonein
                dic[DataValueFields.utc] = utc_t
                # curfilter = {StationFields.id: dic[StationFields.id],
                #              DataValueFields.type: dic[DataValueFields.type],
                #              DataValueFields.utc: dic[DataValueFields.utc]}
                # bulk.find(curfilter).replace_one(dic)
                bulk.insert(dic)
                count += 1
                if count % 500 == 0:
                    MongoUtil.run_bulk(bulk)
                    bulk = hydro_clim_db[
                        DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = list()
        for curVar in variable_lists:
            # print(curVar)
            # if the unit is mg/L, then change the Type name with the suffix 'Conc',
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find(
                {StationFields.type: cur_type}):
                # print(item)
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[
                    DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[
                    DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == 'mg/L' or cur_unit == 'g/L':
                    # update the Type name
                    dic[StationFields.type] = '%sConc' % cur_type
                    curfilter = {
                        StationFields.id: dic[StationFields.id],
                        DataValueFields.type: cur_type,
                        DataValueFields.utc: dic[DataValueFields.utc]
                    }
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                        curfilter, dic, upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {
                    StationFields.type: 'Q',
                    DataValueFields.utc: dic[DataValueFields.utc],
                    StationFields.id: dic[StationFields.id]
                }
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(
                    filter=cur_filter)

                if q_dic is not None:
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == 'mg/L':
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == 'g/L':
                    # convert g/L to kg
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] * q * 86400., 2)
                elif cur_unit == 'kg':
                    dic[StationFields.type] = '%sConc' % cur_type
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {
                StationFields.id: dic[StationFields.id],
                DataValueFields.type: dic[DataValueFields.type],
                DataValueFields.utc: dic[DataValueFields.utc]
            }
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                curfilter, dic, upsert=True)
    def model_io_configuration(cfg, maindb):
        """
        Import Input and Output Configuration of SEIMS, i.e., file.in and file.out
        Args:
            cfg: SEIMS config object
            maindb: MongoDB database object
        """
        file_in_path = cfg.modelcfgs.filein
        file_out_path = cfg.paramcfgs.init_outputs_file
        # initialize if collection not existed
        c_list = maindb.collection_names()
        conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout]
        for item in conf_tabs:
            if not StringClass.string_in_list(item, c_list):
                maindb.create_collection(item)
            else:
                maindb.drop_collection(item)
        file_in_items = read_data_items_from_txt(file_in_path)

        for item in file_in_items:
            file_in_dict = dict()
            values = StringClass.split_string(item[0].strip(), ['|'])
            if len(values) != 2:
                raise ValueError(
                    'One item should only have one Tag and one value string,'
                    ' split by "|"')
            file_in_dict[ModelCfgFields.tag] = values[0]
            file_in_dict[ModelCfgFields.value] = values[1]
            maindb[DBTableNames.main_filein].insert(file_in_dict)

        # begin to import initial outputs settings
        file_out_items = read_data_items_from_txt(file_out_path)
        bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op()
        out_field_array = file_out_items[0]

        # print(out_data_array)

        def read_output_item(output_fields, item):
            file_out_dict = dict()
            for i, v in enumerate(output_fields):
                if StringClass.string_match(ModelCfgFields.mod_cls, v):
                    file_out_dict[ModelCfgFields.mod_cls] = item[i]
                elif StringClass.string_match(ModelCfgFields.output_id, v):
                    file_out_dict[ModelCfgFields.output_id] = item[i]
                elif StringClass.string_match(ModelCfgFields.desc, v):
                    file_out_dict[ModelCfgFields.desc] = item[i]
                elif StringClass.string_match(ModelCfgFields.unit, v):
                    file_out_dict[ModelCfgFields.unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.type, v):
                    file_out_dict[ModelCfgFields.type] = item[i]
                elif StringClass.string_match(ModelCfgFields.stime, v):
                    file_out_dict[ModelCfgFields.stime] = item[i]
                elif StringClass.string_match(ModelCfgFields.etime, v):
                    file_out_dict[ModelCfgFields.etime] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval, v):
                    file_out_dict[ModelCfgFields.interval] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval_unit, v):
                    file_out_dict[ModelCfgFields.interval_unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.filename, v):
                    file_out_dict[ModelCfgFields.filename] = item[i]
                elif StringClass.string_match(ModelCfgFields.use, v):
                    file_out_dict[ModelCfgFields.use] = item[i]
                elif StringClass.string_match(ModelCfgFields.subbsn, v):
                    file_out_dict[ModelCfgFields.subbsn] = item[i]
            if not list(file_out_dict.keys()):
                raise ValueError(
                    'There are not any valid output item stored in file.out!')
            return file_out_dict

        for idx, iitem in enumerate(file_out_items):
            if idx == 0:
                continue
            iitem_dict = read_output_item(out_field_array, iitem)
            bulk.insert(iitem_dict)
        MongoUtil.run_bulk(
            bulk,
            'No operations to execute when import initial outputs settings.')

        # begin to import the desired outputs
        # initialize bulk operator
        bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op()
        # read initial parameters from txt file
        data_items = read_data_items_from_txt(cfg.modelcfgs.fileout)
        # print(field_names)
        user_out_field_array = data_items[0]
        if ModelCfgFields.output_id not in user_out_field_array:
            if len(data_items[0]) != 7:  # For the compatibility of old code!
                raise RuntimeError(
                    'If header information is not provided,'
                    'items in file.out must have 7 columns, i.e., OUTPUTID,'
                    'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.'
                    'Otherwise, the OUTPUTID MUST existed in the header!')
            user_out_field_array = [
                'OUTPUTID', 'TYPE', 'STARTTIME', 'ENDTIME', 'INTERVAL',
                'INTERVAL_UNIT', 'SUBBASIN'
            ]
            data_items.insert(0, user_out_field_array)

        for idx, iitem in enumerate(data_items):
            if idx == 0:
                continue
            data_import = read_output_item(user_out_field_array, iitem)
            data_import[ModelCfgFields.use] = 1
            cur_filter = dict()
            cur_filter[ModelCfgFields.output_id] = data_import[
                ModelCfgFields.output_id]
            bulk.find(cur_filter).update({'$set': data_import})
        # execute import operators
        MongoUtil.run_bulk(
            bulk, 'No operations to excute when import the desired outputs.')
    def lookup_tables_as_collection_and_gridfs(cfg, maindb):
        """Import lookup tables (from txt file) as Collection and GridFS
        Args:
            cfg: SEIMS config object
            maindb: workflow model database
        """
        for tablename, txt_file in list(
                cfg.paramcfgs.lookup_tabs_dict.items()):
            # import each lookup table as a collection and GridFS file.
            c_list = maindb.collection_names()
            if not StringClass.string_in_list(tablename.upper(), c_list):
                maindb.create_collection(tablename.upper())
            else:
                maindb.drop_collection(tablename.upper())
            # initial bulk operator
            bulk = maindb[tablename.upper()].initialize_ordered_bulk_op()
            # delete if the tablename gridfs file existed
            spatial = GridFS(maindb, DBTableNames.gridfs_spatial)
            if spatial.exists(filename=tablename.upper()):
                x = spatial.get_version(filename=tablename.upper())
                spatial.delete(x._id)

            # read data items
            data_items = read_data_items_from_txt(txt_file)
            field_names = data_items[0][0:]
            item_values = list()  # import as gridfs file
            for i, cur_data_item in enumerate(data_items):
                if i == 0:
                    continue
                data_import = dict()  # import as Collection
                item_value = list()  # import as gridfs file
                for idx, fld in enumerate(field_names):
                    if MathClass.isnumerical(cur_data_item[idx]):
                        tmp_value = float(cur_data_item[idx])
                        data_import[fld] = tmp_value
                        item_value.append(tmp_value)
                    else:
                        data_import[fld] = cur_data_item[idx]
                bulk.insert(data_import)
                if len(item_value) > 0:
                    item_values.append(item_value)
            MongoUtil.run_bulk(bulk,
                               'No operations during import %s.' % tablename)
            # begin import gridfs file
            n_row = len(item_values)
            # print(item_values)
            if n_row >= 1:
                n_col = len(item_values[0])
                for i in range(n_row):
                    if n_col != len(item_values[i]):
                        raise ValueError(
                            'Please check %s to make sure each item has '
                            'the same numeric dimension. The size of first '
                            'row is: %d, and the current data item is: %d' %
                            (tablename, n_col, len(item_values[i])))
                    else:
                        item_values[i].insert(0, n_col)

                metadic = {
                    ModelParamDataUtils.item_count: n_row,
                    ModelParamDataUtils.field_count: n_col
                }
                cur_lookup_gridfs = spatial.new_file(
                    filename=tablename.upper(), metadata=metadic)
                header = [n_row]
                fmt = '%df' % 1
                s = pack(fmt, *header)
                cur_lookup_gridfs.write(s)
                fmt = '%df' % (n_col + 1)
                for i in range(n_row):
                    s = pack(fmt, *item_values[i])
                    cur_lookup_gridfs.write(s)
                cur_lookup_gridfs.close()
Example #15
0
    def scenario_from_texts(cfg, main_db, scenario_db):
        """Import BMPs Scenario data to MongoDB
        Args:
            cfg: SEIMS configuration object
            main_db: climate database
            scenario_db: scenario database
        Returns:
            False if failed, otherwise True.
        """
        if not cfg.use_scernario:
            return False
        print('Import BMP Scenario Data... ')
        bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir,
                                                       ['.txt'])
        bmp_tabs = list()
        bmp_tabs_path = list()
        for f in bmp_files:
            bmp_tabs.append(f.split('.')[0])
            bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f)

        # initialize if collection not existed
        c_list = scenario_db.collection_names()
        for item in bmp_tabs:
            if not StringClass.string_in_list(item.upper(), c_list):
                scenario_db.create_collection(item.upper())
            else:
                scenario_db.drop_collection(item.upper())
        # Read subbasin.tif and dist2Stream.tif
        subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn)
        dist2stream_r = RasterUtilClass.read_raster(
            cfg.spatials.dist2stream_d8)
        # End reading
        for j, bmp_txt in enumerate(bmp_tabs_path):
            bmp_tab_name = bmp_tabs[j]
            data_array = read_data_items_from_txt(bmp_txt)
            field_array = data_array[0]
            data_array = data_array[1:]
            for item in data_array:
                dic = dict()
                for i, field_name in enumerate(field_array):
                    if MathClass.isnumerical(item[i]):
                        v = float(item[i])
                        if v % 1. == 0.:
                            v = int(v)
                        dic[field_name.upper()] = v
                    else:
                        dic[field_name.upper()] = str(item[i]).upper()
                if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \
                        StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())):
                    subbsn_id = subbasin_r.get_value_by_xy(
                        dic[ImportScenario2Mongo._LocalX.upper()],
                        dic[ImportScenario2Mongo._LocalY.upper()])
                    distance = dist2stream_r.get_value_by_xy(
                        dic[ImportScenario2Mongo._LocalX.upper()],
                        dic[ImportScenario2Mongo._LocalY.upper()])
                    if subbsn_id is not None and distance is not None:
                        dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id)
                        dic[ImportScenario2Mongo._DISTDOWN] = float(distance)
                        scenario_db[bmp_tab_name.upper()].find_one_and_replace(
                            dic, dic, upsert=True)
                else:
                    scenario_db[bmp_tab_name.upper()].find_one_and_replace(
                        dic, dic, upsert=True)
        # print('BMP tables are imported.')
        # Write BMP database name into Model workflow database
        c_list = main_db.collection_names()
        if not StringClass.string_in_list(DBTableNames.main_scenario, c_list):
            main_db.create_collection(DBTableNames.main_scenario)

        bmp_info_dic = dict()
        bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db
        main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic,
                                                                 bmp_info_dic,
                                                                 upsert=True)
        return True
Example #16
0
    def daily_data_from_txt(climdb, data_txt_file, sites_info_dict):
        """Import climate data table"""
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_txt_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_txt_file)
        clim_flds = clim_data_items[0]
        # PHUCalDic is used for Calculating potential heat units (PHU)
        # for each climate station and each year.
        # format is {StationID:{Year1:[values],Year2:[Values]...}, ...}
        # PHUCalDic = {}
        # format: {StationID1: climateStats1, ...}
        hydro_climate_stats = dict()
        required_flds = [DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws]
        output_flds = [DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp,
                       DataType.rm, DataType.pet, DataType.ws, DataType.sr]
        # remove existed records
        for fld in output_flds:
            climdb[DBTableNames.data_values].remove({'TYPE': fld})
        for fld in required_flds:
            if not StringClass.string_in_list(fld, clim_flds):
                raise ValueError('Meteorological Daily data MUST contain %s!' % fld)
        # Create bulk object
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i, cur_clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            cur_ssd = DEFAULT_NODATA

            for j, clim_data_v in enumerate(cur_clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.id):
                    dic[DataValueFields.id] = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.mean_tmp):
                    dic[DataType.mean_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.min_tmp):
                    dic[DataType.min_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.max_tmp):
                    dic[DataType.max_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.pet):
                    dic[DataType.pet] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.sr):
                    dic[DataType.sr] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.ws):
                    dic[DataType.ws] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.rm):
                    dic[DataType.rm] = float(clim_data_v) * 0.01
                elif StringClass.string_match(clim_flds[j], DataType.ssd):
                    cur_ssd = float(clim_data_v)
            # Get datetime and utc/local transformation
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds,
                                                                               cur_clim_data_item,
                                                                               tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time
            dic[DataValueFields.y] = utc_time.year

            # Do if some of these data are not provided
            if DataType.mean_tmp not in list(dic.keys()):
                dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2.
            if DataType.sr not in list(dic.keys()):
                if cur_ssd == DEFAULT_NODATA:
                    raise ValueError(DataType.sr + ' or ' + DataType.ssd + ' must be provided!')
                else:
                    if dic[DataValueFields.id] in list(sites_info_dict.keys()):
                        cur_lon, cur_lat = sites_info_dict[dic[DataValueFields.id]].lon_lat()
                        sr = round(HydroClimateUtilClass.rs(DateClass.day_of_year(utc_time),
                                                            float(cur_ssd), cur_lat * PI / 180.), 1)
                        dic[DataType.sr] = sr

            for fld in output_flds:
                cur_dic = dict()
                if fld in list(dic.keys()):
                    cur_dic[DataValueFields.value] = dic[fld]
                    cur_dic[DataValueFields.id] = dic[
                        DataValueFields.id]
                    cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                    cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone]
                    cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time]
                    cur_dic[DataValueFields.type] = fld
                    # Old code, insert or update one item a time, which is quite inefficiency
                    # Update by using bulk operation interface. lj
                    # # find old records and remove (deprecated because of low efficiency, lj.)
                    # curfilter = {DataValueFields.type: fld,
                    #              DataValueFields.utc: dic[DataValueFields.utc]}
                    # bulk.find(curfilter).upsert().replace_one(cur_dic)
                    bulk.insert(cur_dic)
                    count += 1
                    if count % 500 == 0:  # execute each 500 records
                        MongoUtil.run_bulk(bulk)
                        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()

            if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()):
                hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats()
            hydro_climate_stats[dic[DataValueFields.id]].add_item(dic)
        # execute the remained records
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        for item, cur_climate_stats in list(hydro_climate_stats.items()):
            cur_climate_stats.annual_stats()
        # Create index
        climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING),
                                                       (DataValueFields.type, ASCENDING),
                                                       (DataValueFields.utc, ASCENDING)])
        # prepare dic for MongoDB
        for s_id, stats_v in list(hydro_climate_stats.items()):
            for YYYY in list(stats_v.Count.keys()):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY]
                cur_dic[DataValueFields.id] = s_id
                cur_dic[DataValueFields.y] = YYYY
                cur_dic[VariableDesc.unit] = 'heat units'
                cur_dic[VariableDesc.type] = DataType.phu_tot
                curfilter = {DataValueFields.id: s_id,
                             VariableDesc.type: DataType.phu_tot,
                             DataValueFields.y: YYYY}
                climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                       upsert=True)
                # import annual mean temperature
                cur_dic[VariableDesc.type] = DataType.mean_tmp
                cur_dic[VariableDesc.unit] = 'deg C'
                cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY]
                curfilter = {DataValueFields.id: s_id,
                             VariableDesc.type: DataType.mean_tmp,
                             DataValueFields.y: YYYY}
                climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                       upsert=True)
            cur_dic[DataValueFields.value] = stats_v.PHU0
            cur_dic[DataValueFields.id] = s_id
            cur_dic[DataValueFields.y] = DEFAULT_NODATA
            cur_dic[VariableDesc.unit] = 'heat units'
            cur_dic[VariableDesc.type] = DataType.phu0
            curfilter = {DataValueFields.id: s_id,
                         VariableDesc.type: DataType.phu0,
                         DataValueFields.y: DEFAULT_NODATA}
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                   upsert=True)
            # import annual mean temperature
            cur_dic[VariableDesc.type] = DataType.mean_tmp0
            cur_dic[VariableDesc.unit] = 'deg C'
            cur_dic[DataValueFields.value] = stats_v.MeanTmp0
            curfilter = {DataValueFields.id: s_id,
                         VariableDesc.type: DataType.mean_tmp0,
                         DataValueFields.y: DEFAULT_NODATA}
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                   upsert=True)
Example #17
0
    def scenario_from_texts(cfg, main_db, scenario_db):
        """Import BMPs Scenario data to MongoDB
        Args:
            cfg: SEIMS configuration object
            main_db: climate database
            scenario_db: scenario database
        Returns:
            False if failed, otherwise True.
        """
        if not cfg.use_scernario:
            return False
        print('Import BMP Scenario Data... ')
        bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt'])
        bmp_tabs = list()
        bmp_tabs_path = list()
        for f in bmp_files:
            bmp_tabs.append(f.split('.')[0])
            bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f)

        # initialize if collection not existed
        c_list = scenario_db.collection_names()
        for item in bmp_tabs:
            if not StringClass.string_in_list(item.upper(), c_list):
                scenario_db.create_collection(item.upper())
            else:
                scenario_db.drop_collection(item.upper())
        # Read subbasin.tif and dist2Stream.tif
        subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn)
        dist2stream_r = RasterUtilClass.read_raster(cfg.spatials.dist2stream_d8)
        # End reading
        for j, bmp_txt in enumerate(bmp_tabs_path):
            bmp_tab_name = bmp_tabs[j]
            data_array = read_data_items_from_txt(bmp_txt)
            field_array = data_array[0]
            data_array = data_array[1:]
            for item in data_array:
                dic = dict()
                for i, field_name in enumerate(field_array):
                    if MathClass.isnumerical(item[i]):
                        v = float(item[i])
                        if v % 1. == 0.:
                            v = int(v)
                        dic[field_name.upper()] = v
                    else:
                        dic[field_name.upper()] = str(item[i]).upper()
                if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \
                        StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())):
                    subbsn_id = subbasin_r.get_value_by_xy(
                            dic[ImportScenario2Mongo._LocalX.upper()],
                            dic[ImportScenario2Mongo._LocalY.upper()])
                    distance = dist2stream_r.get_value_by_xy(
                            dic[ImportScenario2Mongo._LocalX.upper()],
                            dic[ImportScenario2Mongo._LocalY.upper()])
                    if subbsn_id is not None and distance is not None:
                        dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id)
                        dic[ImportScenario2Mongo._DISTDOWN] = float(distance)
                        scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic,
                                                                               upsert=True)
                else:
                    scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic,
                                                                           upsert=True)
        # print('BMP tables are imported.')
        # Write BMP database name into Model workflow database
        c_list = main_db.collection_names()
        if not StringClass.string_in_list(DBTableNames.main_scenario, c_list):
            main_db.create_collection(DBTableNames.main_scenario)

        bmp_info_dic = dict()
        bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db
        main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic,
                                                                 upsert=True)
        return True
Example #18
0
    def daily_data_from_txt(climdb, data_txt_file, sites_info_dict):
        """Import climate data table"""
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(
            data_txt_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_txt_file)
        clim_flds = clim_data_items[0]
        # PHUCalDic is used for Calculating potential heat units (PHU)
        # for each climate station and each year.
        # format is {StationID:{Year1:[values],Year2:[Values]...}, ...}
        # PHUCalDic = {}
        # format: {StationID1: climateStats1, ...}
        hydro_climate_stats = dict()
        required_flds = [
            DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws
        ]
        output_flds = [
            DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp, DataType.rm,
            DataType.pet, DataType.ws, DataType.sr
        ]
        # remove existed records
        for fld in output_flds:
            climdb[DBTableNames.data_values].remove({'TYPE': fld})
        for fld in required_flds:
            if not StringClass.string_in_list(fld, clim_flds):
                raise ValueError('Meteorological Daily data MUST contain %s!' %
                                 fld)
        # Create bulk object
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i, cur_clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            cur_ssd = DEFAULT_NODATA

            for j, clim_data_v in enumerate(cur_clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.id):
                    dic[DataValueFields.id] = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.mean_tmp):
                    dic[DataType.mean_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.min_tmp):
                    dic[DataType.min_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.max_tmp):
                    dic[DataType.max_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.pet):
                    dic[DataType.pet] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.sr):
                    dic[DataType.sr] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.ws):
                    dic[DataType.ws] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.rm):
                    dic[DataType.rm] = float(clim_data_v) * 0.01
                elif StringClass.string_match(clim_flds[j], DataType.ssd):
                    cur_ssd = float(clim_data_v)
            # Get datetime and utc/local transformation
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(
                clim_flds, cur_clim_data_item, tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(
                minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time
            dic[DataValueFields.y] = utc_time.year

            # Do if some of these data are not provided
            if DataType.mean_tmp not in list(dic.keys()):
                dic[DataType.mean_tmp] = (dic[DataType.max_tmp] +
                                          dic[DataType.min_tmp]) / 2.
            if DataType.sr not in list(dic.keys()):
                if cur_ssd == DEFAULT_NODATA:
                    raise ValueError(DataType.sr + ' or ' + DataType.ssd +
                                     ' must be provided!')
                else:
                    if dic[DataValueFields.id] in list(sites_info_dict.keys()):
                        cur_lon, cur_lat = sites_info_dict[dic[
                            DataValueFields.id]].lon_lat()
                        sr = round(
                            HydroClimateUtilClass.rs(
                                DateClass.day_of_year(utc_time),
                                float(cur_ssd), cur_lat * PI / 180.), 1)
                        dic[DataType.sr] = sr

            for fld in output_flds:
                cur_dic = dict()
                if fld in list(dic.keys()):
                    cur_dic[DataValueFields.value] = dic[fld]
                    cur_dic[DataValueFields.id] = dic[DataValueFields.id]
                    cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                    cur_dic[DataValueFields.time_zone] = dic[
                        DataValueFields.time_zone]
                    cur_dic[DataValueFields.local_time] = dic[
                        DataValueFields.local_time]
                    cur_dic[DataValueFields.type] = fld
                    # Old code, insert or update one item a time, which is quite inefficiency
                    # Update by using bulk operation interface. lj
                    # # find old records and remove (deprecated because of low efficiency, lj.)
                    # curfilter = {DataValueFields.type: fld,
                    #              DataValueFields.utc: dic[DataValueFields.utc]}
                    # bulk.find(curfilter).upsert().replace_one(cur_dic)
                    bulk.insert(cur_dic)
                    count += 1
                    if count % 500 == 0:  # execute each 500 records
                        MongoUtil.run_bulk(bulk)
                        bulk = climdb[
                            DBTableNames.
                            data_values].initialize_ordered_bulk_op()

            if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()):
                hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats()
            hydro_climate_stats[dic[DataValueFields.id]].add_item(dic)
        # execute the remained records
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        for item, cur_climate_stats in list(hydro_climate_stats.items()):
            cur_climate_stats.annual_stats()
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
        # prepare dic for MongoDB
        for s_id, stats_v in list(hydro_climate_stats.items()):
            for YYYY in list(stats_v.Count.keys()):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY]
                cur_dic[DataValueFields.id] = s_id
                cur_dic[DataValueFields.y] = YYYY
                cur_dic[VariableDesc.unit] = 'heat units'
                cur_dic[VariableDesc.type] = DataType.phu_tot
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.phu_tot,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
                # import annual mean temperature
                cur_dic[VariableDesc.type] = DataType.mean_tmp
                cur_dic[VariableDesc.unit] = 'deg C'
                cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY]
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.mean_tmp,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
            cur_dic[DataValueFields.value] = stats_v.PHU0
            cur_dic[DataValueFields.id] = s_id
            cur_dic[DataValueFields.y] = DEFAULT_NODATA
            cur_dic[VariableDesc.unit] = 'heat units'
            cur_dic[VariableDesc.type] = DataType.phu0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.phu0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
            # import annual mean temperature
            cur_dic[VariableDesc.type] = DataType.mean_tmp0
            cur_dic[VariableDesc.unit] = 'deg C'
            cur_dic[DataValueFields.value] = stats_v.MeanTmp0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.mean_tmp0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
    def lookup_tables_as_collection_and_gridfs(cfg, maindb):
        """Import lookup tables (from txt file) as Collection and GridFS
        Args:
            cfg: SEIMS config object
            maindb: workflow model database
        """
        for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()):
            # import each lookup table as a collection and GridFS file.
            c_list = maindb.collection_names()
            if not StringClass.string_in_list(tablename.upper(), c_list):
                maindb.create_collection(tablename.upper())
            else:
                maindb.drop_collection(tablename.upper())
            # initial bulk operator
            bulk = maindb[tablename.upper()].initialize_ordered_bulk_op()
            # delete if the tablename gridfs file existed
            spatial = GridFS(maindb, DBTableNames.gridfs_spatial)
            if spatial.exists(filename=tablename.upper()):
                x = spatial.get_version(filename=tablename.upper())
                spatial.delete(x._id)

            # read data items
            data_items = read_data_items_from_txt(txt_file)
            field_names = data_items[0][0:]
            item_values = list()  # import as gridfs file
            for i, cur_data_item in enumerate(data_items):
                if i == 0:
                    continue
                data_import = dict()  # import as Collection
                item_value = list()  # import as gridfs file
                for idx, fld in enumerate(field_names):
                    if MathClass.isnumerical(cur_data_item[idx]):
                        tmp_value = float(cur_data_item[idx])
                        data_import[fld] = tmp_value
                        item_value.append(tmp_value)
                    else:
                        data_import[fld] = cur_data_item[idx]
                bulk.insert(data_import)
                if len(item_value) > 0:
                    item_values.append(item_value)
            MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename)
            # begin import gridfs file
            n_row = len(item_values)
            # print(item_values)
            if n_row >= 1:
                n_col = len(item_values[0])
                for i in range(n_row):
                    if n_col != len(item_values[i]):
                        raise ValueError('Please check %s to make sure each item has '
                                         'the same numeric dimension. The size of first '
                                         'row is: %d, and the current data item is: %d' %
                                         (tablename, n_col, len(item_values[i])))
                    else:
                        item_values[i].insert(0, n_col)

                metadic = {ModelParamDataUtils.item_count: n_row,
                           ModelParamDataUtils.field_count: n_col}
                cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic)
                header = [n_row]
                fmt = '%df' % 1
                s = pack(fmt, *header)
                cur_lookup_gridfs.write(s)
                fmt = '%df' % (n_col + 1)
                for i in range(n_row):
                    s = pack(fmt, *item_values[i])
                    cur_lookup_gridfs.write(s)
                cur_lookup_gridfs.close()
    def model_io_configuration(cfg, maindb):
        """
        Import Input and Output Configuration of SEIMS, i.e., file.in and file.out
        Args:
            cfg: SEIMS config object
            maindb: MongoDB database object
        """
        file_in_path = cfg.modelcfgs.filein
        file_out_path = cfg.paramcfgs.init_outputs_file
        # initialize if collection not existed
        c_list = maindb.collection_names()
        conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout]
        for item in conf_tabs:
            if not StringClass.string_in_list(item, c_list):
                maindb.create_collection(item)
            else:
                maindb.drop_collection(item)
        file_in_items = read_data_items_from_txt(file_in_path)
        file_out_items = read_data_items_from_txt(file_out_path)

        for item in file_in_items:
            file_in_dict = dict()
            values = StringClass.split_string(item[0].strip(), ['|'])
            if len(values) != 2:
                raise ValueError('One item should only have one Tag and one value string,'
                                 ' split by "|"')
            file_in_dict[ModelCfgFields.tag] = values[0]
            file_in_dict[ModelCfgFields.value] = values[1]
            maindb[DBTableNames.main_filein].insert(file_in_dict)

        # begin to import initial outputs settings
        bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op()
        out_field_array = file_out_items[0]
        out_data_array = file_out_items[1:]
        # print(out_data_array)
        for item in out_data_array:
            file_out_dict = dict()
            for i, v in enumerate(out_field_array):
                if StringClass.string_match(ModelCfgFields.mod_cls, v):
                    file_out_dict[ModelCfgFields.mod_cls] = item[i]
                elif StringClass.string_match(ModelCfgFields.output_id, v):
                    file_out_dict[ModelCfgFields.output_id] = item[i]
                elif StringClass.string_match(ModelCfgFields.desc, v):
                    file_out_dict[ModelCfgFields.desc] = item[i]
                elif StringClass.string_match(ModelCfgFields.unit, v):
                    file_out_dict[ModelCfgFields.unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.type, v):
                    file_out_dict[ModelCfgFields.type] = item[i]
                elif StringClass.string_match(ModelCfgFields.stime, v):
                    file_out_dict[ModelCfgFields.stime] = item[i]
                elif StringClass.string_match(ModelCfgFields.etime, v):
                    file_out_dict[ModelCfgFields.etime] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval, v):
                    file_out_dict[ModelCfgFields.interval] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval_unit, v):
                    file_out_dict[ModelCfgFields.interval_unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.filename, v):
                    file_out_dict[ModelCfgFields.filename] = item[i]
                elif StringClass.string_match(ModelCfgFields.use, v):
                    file_out_dict[ModelCfgFields.use] = item[i]
                elif StringClass.string_match(ModelCfgFields.subbsn, v):
                    file_out_dict[ModelCfgFields.subbsn] = item[i]
            if not list(file_out_dict.keys()):
                raise ValueError('There are not any valid output item stored in file.out!')
            bulk.insert(file_out_dict)
        MongoUtil.run_bulk(bulk, 'No operations to excute when import initial outputs settings.')

        # begin to import the desired outputs
        # initialize bulk operator
        bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op()
        # read initial parameters from txt file
        data_items = read_data_items_from_txt(cfg.modelcfgs.fileout)
        # print(field_names)
        for i, cur_data_item in enumerate(data_items):
            data_import = dict()
            cur_filter = dict()
            # print(cur_data_item)
            if len(cur_data_item) == 7:
                data_import[ModelCfgFields.output_id] = cur_data_item[0]
                data_import[ModelCfgFields.type] = cur_data_item[1]
                data_import[ModelCfgFields.stime] = cur_data_item[2]
                data_import[ModelCfgFields.etime] = cur_data_item[3]
                data_import[ModelCfgFields.interval] = cur_data_item[4]
                data_import[ModelCfgFields.interval_unit] = cur_data_item[5]
                data_import[ModelCfgFields.subbsn] = cur_data_item[6]
                data_import[ModelCfgFields.use] = 1
                cur_filter[ModelCfgFields.output_id] = cur_data_item[0]
            else:
                raise RuntimeError('Items in file.out must have 7 columns, i.e., OUTPUTID,'
                                   'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.')

            bulk.find(cur_filter).update({'$set': data_import})
        # execute import operators
        MongoUtil.run_bulk(bulk, 'No operations to excute when import the desired outputs.')
 def initial_params_from_txt(cfg, maindb):
     """
     import initial calibration parameters from txt data file.
     Args:
         cfg: SEIMS config object
         maindb: MongoDB database object
     """
     # delete if existed, initialize if not existed
     c_list = maindb.collection_names()
     if not StringClass.string_in_list(DBTableNames.main_parameter, c_list):
         maindb.create_collection(DBTableNames.main_parameter)
     else:
         maindb.drop_collection(DBTableNames.main_parameter)
     # initialize bulk operator
     bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op()
     # read initial parameters from txt file
     data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file)
     field_names = data_items[0][0:]
     # print(field_names)
     for i, cur_data_item in enumerate(data_items):
         if i == 0:
             continue
         # print(cur_data_item)
         # initial one default blank parameter dict.
         data_import = {
             ModelParamFields.name: '',
             ModelParamFields.desc: '',
             ModelParamFields.unit: '',
             ModelParamFields.module: '',
             ModelParamFields.value: DEFAULT_NODATA,
             ModelParamFields.impact: DEFAULT_NODATA,
             ModelParamFields.change: 'NC',
             ModelParamFields.max: DEFAULT_NODATA,
             ModelParamFields.min: DEFAULT_NODATA,
             ModelParamFields.type: ''
         }
         for k, v in list(data_import.items()):
             idx = field_names.index(k)
             if cur_data_item[idx] == '':
                 if StringClass.string_match(k, ModelParamFields.change_ac):
                     data_import[k] = 0
                 elif StringClass.string_match(k,
                                               ModelParamFields.change_rc):
                     data_import[k] = 1
                 elif StringClass.string_match(k,
                                               ModelParamFields.change_nc):
                     data_import[k] = 0
                 elif StringClass.string_match(k,
                                               ModelParamFields.change_vc):
                     data_import[
                         k] = DEFAULT_NODATA  # Be careful to check NODATA when use!
             else:
                 if MathClass.isnumerical(cur_data_item[idx]):
                     data_import[k] = float(cur_data_item[idx])
                 else:
                     data_import[k] = cur_data_item[idx]
         bulk.insert(data_import)
     # execute import operators
     MongoUtil.run_bulk(bulk,
                        'No operation during initial_params_from_txt.')
     # initialize index by parameter's type and name by ascending order.
     maindb[DBTableNames.main_parameter].create_index([
         (ModelParamFields.type, ASCENDING),
         (ModelParamFields.name, ASCENDING)
     ])