def model_io_configuration(cfg, maindb):
        """
        Import Input and Output Configuration of SEIMS, i.e., file.in and file.out
        Args:
            cfg: SEIMS config object
            maindb: MongoDB database object
        """
        file_in_path = cfg.modelcfgs.filein
        file_out_path = cfg.paramcfgs.init_outputs_file
        # create if collection not existed
        c_list = maindb.collection_names()
        conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout]
        for item in conf_tabs:
            if not StringClass.string_in_list(item, c_list):
                maindb.create_collection(item)
            else:
                maindb.drop_collection(item)
        file_in_items = read_data_items_from_txt(file_in_path)
        file_out_items = read_data_items_from_txt(file_out_path)

        for item in file_in_items:
            file_in_dict = dict()
            values = StringClass.split_string(StringClass.strip_string(item[0]), ['|'])
            if len(values) != 2:
                raise ValueError("One item should only have one Tag and one value string,"
                                 " split by '|'")
            file_in_dict[ModelCfgFields.tag] = values[0]
            file_in_dict[ModelCfgFields.value] = values[1]
            maindb[DBTableNames.main_filein].insert(file_in_dict)

        # begin to import initial outputs settings
        bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op()
        out_field_array = file_out_items[0]
        out_data_array = file_out_items[1:]
        # print out_data_array
        for item in out_data_array:
            file_out_dict = dict()
            for i, v in enumerate(out_field_array):
                if StringClass.string_match(ModelCfgFields.mod_cls, v):
                    file_out_dict[ModelCfgFields.mod_cls] = item[i]
                elif StringClass.string_match(ModelCfgFields.output_id, v):
                    file_out_dict[ModelCfgFields.output_id] = item[i]
                elif StringClass.string_match(ModelCfgFields.desc, v):
                    file_out_dict[ModelCfgFields.desc] = item[i]
                elif StringClass.string_match(ModelCfgFields.unit, v):
                    file_out_dict[ModelCfgFields.unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.type, v):
                    file_out_dict[ModelCfgFields.type] = item[i]
                elif StringClass.string_match(ModelCfgFields.stime, v):
                    file_out_dict[ModelCfgFields.stime] = item[i]
                elif StringClass.string_match(ModelCfgFields.etime, v):
                    file_out_dict[ModelCfgFields.etime] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval, v):
                    file_out_dict[ModelCfgFields.interval] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval_unit, v):
                    file_out_dict[ModelCfgFields.interval_unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.filename, v):
                    file_out_dict[ModelCfgFields.filename] = item[i]
                elif StringClass.string_match(ModelCfgFields.use, v):
                    file_out_dict[ModelCfgFields.use] = item[i]
                elif StringClass.string_match(ModelCfgFields.subbsn, v):
                    file_out_dict[ModelCfgFields.subbsn] = item[i]
            if file_out_dict.keys() is []:
                raise ValueError("There are not any valid output item stored in file.out!")
            bulk.insert(file_out_dict)
        bulk.execute()

        # begin to import the desired outputs
        # create bulk operator
        bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op()
        # read initial parameters from txt file
        data_items = read_data_items_from_txt(cfg.modelcfgs.fileout)
        # print (field_names)
        for i, cur_data_item in enumerate(data_items):
            data_import = dict()
            cur_filter = dict()
            # print (cur_data_item)
            if len(cur_data_item) == 7:
                data_import[ModelCfgFields.output_id] = cur_data_item[0]
                data_import[ModelCfgFields.type] = cur_data_item[1]
                data_import[ModelCfgFields.stime] = cur_data_item[2]
                data_import[ModelCfgFields.etime] = cur_data_item[3]
                data_import[ModelCfgFields.interval] = cur_data_item[4]
                data_import[ModelCfgFields.interval_unit] = cur_data_item[5]
                data_import[ModelCfgFields.subbsn] = cur_data_item[6]
                data_import[ModelCfgFields.use] = 1
                cur_filter[ModelCfgFields.output_id] = cur_data_item[0]
            else:
                raise RuntimeError("Items in file.out must have 7 columns, i.e., OUTPUTID,"
                                   "TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.")

            bulk.find(cur_filter).update({'$set': data_import})
        # execute import operators
        bulk.execute()
Ejemplo n.º 2
0
    def data_from_txt(hydro_clim_db, obs_txts_list, sites_info_txts_list,
                      subbsn_file):
        """
        Read observed data from txt file
        Args:
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = {}
                for j in range(len(site_data_items[i])):
                    if StringClass.string_match(site_flds[j],
                                                StationFields.id):
                        dic[StationFields.id] = int(site_data_items[i][j])
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.name):
                        dic[StationFields.name] = StringClass.strip_string(
                            site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.type):
                        types = StringClass.split_string(
                            StringClass.strip_string(site_data_items[i][j]),
                            ',')
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lat):
                        dic[StationFields.lat] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lon):
                        dic[StationFields.lon] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.x):
                        dic[StationFields.x] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.y):
                        dic[StationFields.y] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.unit):
                        dic[StationFields.unit] = StringClass.strip_string(
                            site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.elev):
                        dic[StationFields.elev] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.outlet):
                        dic[StationFields.outlet] = float(
                            site_data_items[i][j])

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_subbsn_id = ImportObservedData.match_subbasin(
                        subbsn_file, site_dic)
                    if not matched:
                        break
                    cur_subbsn_id_str = ''
                    for tmp_id in cur_subbsn_id:
                        if tmp_id is not None:
                            cur_subbsn_id_str += str(tmp_id) + ','
                    cur_subbsn_id_str = cur_subbsn_id_str[:-1]
                    site_dic[StationFields.id] = cur_subbsn_id_str
                    curfilter = {
                        StationFields.id: site_dic[StationFields.id],
                        StationFields.type: site_dic[StationFields.type]
                    }
                    # print (curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(
                        curfilter, site_dic, upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = dic[StationFields.unit]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[
            DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print measDataFile
            obs_data_items = read_data_items_from_txt(measDataFile)
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [
                StationFields.id, DataValueFields.y, DataValueFields.m,
                DataValueFields.d, DataValueFields.type, DataValueFields.value
            ]
            for fld in required_flds:
                if not StringClass.string_in_list(
                        fld, obs_flds):  # data can not meet the request!
                    raise ValueError(
                        "The %s can not meet the required format!" %
                        measDataFile)
            for i in range(1, len(obs_data_items)):
                dic = dict()
                cur_y = 0
                cur_m = 0
                cur_d = 0
                for j in range(len(obs_data_items[i])):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(obs_data_items[i][j])
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.y):
                        cur_y = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.m):
                        cur_m = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.d):
                        cur_d = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.type):
                        dic[DataValueFields.type] = obs_data_items[i][j]
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.value):
                        dic[DataValueFields.value] = float(
                            obs_data_items[i][j])
                dt = datetime(cur_y, cur_m, cur_d, 0, 0)
                sec = time.mktime(dt.timetuple())
                utc_time = time.gmtime(sec)
                dic[DataValueFields.local_time] = dt
                dic[DataValueFields.time_zone] = time.timezone / 3600
                dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1],
                                                    utc_time[2], utc_time[3])
                curfilter = {
                    StationFields.id: dic[StationFields.id],
                    DataValueFields.type: dic[DataValueFields.type],
                    DataValueFields.utc: dic[DataValueFields.utc]
                }
                bulk.find(curfilter).replace_one(dic)
                count += 1
                if count % 500 == 0:
                    bulk.execute()
                    bulk = hydro_clim_db[
                        DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            bulk.execute()
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = []
        for curVar in variable_lists:
            # print curVar
            # if the unit is mg/L, then change the Type name with the suffix "Conc",
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find(
                {StationFields.type: cur_type}):
                # print item
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[
                    DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[
                    DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == "mg/L":
                    # update the Type name
                    dic[StationFields.type] = cur_type + "Conc"
                    curfilter = {
                        StationFields.id: dic[StationFields.id],
                        DataValueFields.type: cur_type,
                        DataValueFields.utc: dic[DataValueFields.utc]
                    }
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                        curfilter, dic, upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {
                    StationFields.type: "Q",
                    DataValueFields.utc: dic[DataValueFields.utc],
                    StationFields.id: dic[StationFields.id]
                }
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(
                    filter=cur_filter)

                q = -9999.
                if q_dic is not None:  # and q_dic.has_key(DataValueFields.value):
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == "mg/L":
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == "kg":
                    dic[StationFields.type] = cur_type + "Conc"
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {
                StationFields.id: dic[StationFields.id],
                DataValueFields.type: dic[DataValueFields.type],
                DataValueFields.utc: dic[DataValueFields.utc]
            }
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                curfilter, dic, upsert=True)