def initial_params_from_txt(cfg, maindb):
     """
     import initial calibration parameters from txt data file.
     Args:
         cfg: SEIMS config object
         maindb: MongoDB database object
     """
     # delete if existed, create if not existed
     c_list = maindb.collection_names()
     if not StringClass.string_in_list(DBTableNames.main_parameter, c_list):
         maindb.create_collection(DBTableNames.main_parameter)
     else:
         maindb.drop_collection(DBTableNames.main_parameter)
     # create bulk operator
     bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op()
     # read initial parameters from txt file
     data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file)
     field_names = data_items[0][0:]
     # print (field_names)
     for i, cur_data_item in enumerate(data_items):
         if i == 0:
             continue
         # print cur_data_item
         # initial one default blank parameter dict.
         data_import = {ModelParamFields.name: '', ModelParamFields.desc: '',
                        ModelParamFields.unit: '', ModelParamFields.module: '',
                        ModelParamFields.value: DEFAULT_NODATA,
                        ModelParamFields.impact: DEFAULT_NODATA,
                        ModelParamFields.change: 'NC',
                        ModelParamFields.max: DEFAULT_NODATA,
                        ModelParamFields.min: DEFAULT_NODATA,
                        ModelParamFields.type: ''}
         for k, v in data_import.items():
             idx = field_names.index(k)
             if cur_data_item[idx] == '':
                 if StringClass.string_match(k, ModelParamFields.change_ac):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_rc):
                     data_import[k] = 1
                 elif StringClass.string_match(k, ModelParamFields.change_nc):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_vc):
                     data_import[k] = DEFAULT_NODATA  # Be careful to check NODATA when use!
             else:
                 if MathClass.isnumerical(cur_data_item[idx]):
                     data_import[k] = float(cur_data_item[idx])
                 else:
                     data_import[k] = cur_data_item[idx]
         bulk.insert(data_import)
     # execute import operators
     bulk.execute()
     # create index by parameter's type and name by ascending order.
     maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING),
                                                       (ModelParamFields.name, ASCENDING)])
Exemple #2
0
 def variable_table(db, var_file):
     """Import variables table"""
     var_data_items = read_data_items_from_txt(var_file)
     var_flds = var_data_items[0]
     for i in range(1, len(var_data_items)):
         dic = {}
         for j in range(len(var_data_items[i])):
             if StringClass.string_match(var_flds[j], VariableDesc.type):
                 dic[VariableDesc.type] = var_data_items[i][j]
             elif StringClass.string_match(var_flds[j], VariableDesc.unit):
                 dic[VariableDesc.unit] = var_data_items[i][j]
         # If this item existed already, then update it, otherwise insert one.
         curfilter = {VariableDesc.type: dic[VariableDesc.type]}
         db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
Exemple #3
0
    def initialize_landcover_parameters(landcover_file,
                                        landcover_initial_fields_file,
                                        dst_dir):
        """generate initial landcover_init_param parameters"""
        lc_data_items = read_data_items_from_txt(landcover_initial_fields_file)
        # print lc_data_items
        field_names = lc_data_items[0]
        lu_id = -1
        for i, v in enumerate(field_names):
            if StringClass.string_match(v, 'LANDUSE_ID'):
                lu_id = i
                break
        data_items = lc_data_items[1:]
        replace_dicts = dict()
        for item in data_items:
            for i, v in enumerate(item):
                if i != lu_id:
                    if field_names[i].upper() not in replace_dicts.keys():
                        replace_dicts[field_names[i].upper()] = {
                            float(item[lu_id]): float(v)
                        }
                    else:
                        replace_dicts[field_names[i].upper()][float(
                            item[lu_id])] = float(v)
        # print replace_dicts

        # Generate GTIFF
        for item, v in replace_dicts.items():
            filename = dst_dir + SEP + item + '.tif'
            print(filename)
            RasterUtilClass.raster_reclassify(landcover_file, v, filename)
        return replace_dicts['LANDCOVER'].values()
Exemple #4
0
    def export_landuse_lookup_files_from_mongodb(cfg, maindb):
        """export landuse lookup tables to txt file from MongoDB."""
        lookup_dir = cfg.dirs.lookup
        property_namelist = ModelParamDataUtils.landuse_fields
        property_map = {}
        property_namelist.append('USLE_P')
        query_result = maindb['LANDUSELOOKUP'].find()
        if query_result is None:
            raise RuntimeError(
                "LanduseLoop Collection is not existed or empty!")
        count = 0
        for row in query_result:
            # print row
            value_map = dict()
            for i, p_name in enumerate(property_namelist):
                if StringClass.string_match(p_name, "USLE_P"):
                    # Currently, USLE_P is set as 1 for all landuse.
                    value_map[p_name] = 1
                else:
                    if StringClass.string_match(p_name, "Manning"):
                        value_map[p_name] = row.get(p_name) * 10
                    else:
                        value_map[p_name] = row.get(p_name)
            count += 1
            property_map[count] = value_map

        n = len(property_map)
        UtilClass.rmmkdir(lookup_dir)
        for propertyName in property_namelist:
            f = open("%s/%s.txt" % (
                lookup_dir,
                propertyName,
            ), 'w')
            f.write("%d\n" % n)
            for prop_id in property_map:
                s = "%d %f\n" % (prop_id, property_map[prop_id][propertyName])
                f.write(s)
            f.close()
Exemple #5
0
    def sites_table(hydro_clim_db, site_file, site_type):
        """Import HydroClimate sites table"""
        sites_loc = dict()
        site_data_items = read_data_items_from_txt(site_file)
        site_flds = site_data_items[0]
        for i in range(1, len(site_data_items)):
            dic = dict()
            for j in range(len(site_data_items[i])):
                if StringClass.string_match(site_flds[j], StationFields.id):
                    dic[StationFields.id] = int(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.name):
                    # unicode(site_data_items[i][j], 'gb2312')
                    dic[StationFields.name] = site_data_items[i][j]
                elif StringClass.string_match(site_flds[j], StationFields.x):
                    dic[StationFields.x] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.y):
                    dic[StationFields.y] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.lon):
                    dic[StationFields.lon] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.lat):
                    dic[StationFields.lat] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.elev):
                    dic[StationFields.elev] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.outlet):
                    dic[StationFields.outlet] = float(site_data_items[i][j])
            dic[StationFields.type] = site_type
            curfilter = {StationFields.id: dic[StationFields.id],
                         StationFields.type: dic[StationFields.type]}
            hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True)

            if dic[StationFields.id] not in sites_loc.keys():
                sites_loc[dic[StationFields.id]] = SiteInfo(dic[StationFields.id],
                                                            dic[StationFields.name],
                                                            dic[StationFields.lat],
                                                            dic[StationFields.lon],
                                                            dic[StationFields.x],
                                                            dic[StationFields.y],
                                                            dic[StationFields.elev])
        hydro_clim_db[DBTableNames.sites].create_index([(StationFields.id, ASCENDING),
                                                        (StationFields.type, ASCENDING)])
        return sites_loc
Exemple #6
0
    def __init__(self, cf):
        # 1. Directories
        self.base_dir = None
        self.clim_dir = None
        self.spatial_dir = None
        self.observe_dir = None
        self.scenario_dir = None
        self.model_dir = None
        self.txt_db_dir = None
        self.preproc_script_dir = None
        self.seims_bin = None
        self.mpi_bin = None
        self.workspace = None
        # 1.1. Directory determined flags
        self.use_observed = True
        self.use_scernario = True
        # 2. MongoDB configuration and database, collation, GridFS names
        self.hostname = '127.0.0.1'  # localhost by default
        self.port = 27017
        self.climate_db = ''
        self.bmp_scenario_db = ''
        self.spatial_db = ''
        # 3. Switch for building SEIMS
        self.cluster = False
        self.storm_mode = False
        self.gen_cn = True
        self.gen_runoff_coef = True
        self.gen_crop = True
        self.gen_iuh = True
        # 4. Climate inputs
        self.hydro_climate_vars = None
        self.prec_sites = None
        self.prec_data = None
        self.Meteo_sites = None
        self.Meteo_data = None
        self.thiessen_field = 'ID'
        # 5. Spatial inputs
        self.prec_sites_thiessen = None
        self.meteo_sites_thiessen = None
        self.dem = None
        self.outlet_file = None
        self.landuse = None
        self.landcover_init_param = None
        self.soil = None
        self.soil_property = None
        self.mgt_field = None
        # 6. Option parameters
        self.is_TauDEM = True
        self.d8acc_threshold = 0
        self.np = 4
        self.d8down_method = 's'
        self.dorm_hr = -1.
        self.temp_base = 0.
        self.imper_perc_in_urban = 0.3
        self.default_reach_depth = 5.
        self.default_landuse = -1
        self.default_soil = -1
        # 1. Directories
        if 'PATH' in cf.sections():
            self.base_dir = cf.get('PATH', 'base_data_dir')
            self.clim_dir = cf.get('PATH', 'climate_data_dir')
            self.spatial_dir = cf.get('PATH', 'spatial_data_dir')
            self.observe_dir = cf.get('PATH', 'measurement_data_dir')
            self.scenario_dir = cf.get('PATH', 'bmp_data_dir')
            self.model_dir = cf.get('PATH', 'model_dir')
            self.txt_db_dir = cf.get('PATH', 'txt_db_dir')
            self.preproc_script_dir = cf.get('PATH', 'preproc_script_dir')
            self.seims_bin = cf.get('PATH', 'cpp_program_dir')
            self.mpi_bin = cf.get('PATH', 'mpiexec_dir')
            self.workspace = cf.get('PATH', 'working_dir')
        else:
            raise ValueError("[PATH] section MUST be existed in *.ini file.")
        if not (FileClass.is_file_exists(self.base_dir)
                and FileClass.is_file_exists(self.model_dir)
                and FileClass.is_file_exists(self.txt_db_dir)
                and FileClass.is_file_exists(self.preproc_script_dir)
                and FileClass.is_file_exists(self.seims_bin)):
            raise IOError(
                "Please Check Directories defined in [PATH]. "
                "BASE_DIR, MODEL_DIR, TXT_DB_DIR, PREPROC_SCRIPT_DIR, "
                "and CPP_PROGRAM_DIR are required!")
        if not FileClass.is_file_exists(self.mpi_bin):
            self.mpi_bin = None
        if not os.path.isdir(self.workspace):
            try:  # first try to make dirs
                os.mkdir(self.workspace)
            except OSError as exc:
                self.workspace = self.model_dir + os.sep + 'preprocess_output'
                print(
                    "WARNING: Make WORKING_DIR failed: %s. Use the default: %s"
                    % (exc.message, self.workspace))
                if not os.path.exists(self.workspace):
                    os.mkdir(self.workspace)

        self.dirs = DirNameUtils(self.workspace)
        self.logs = LogNameUtils(self.dirs.log)
        self.vecs = VectorNameUtils(self.dirs.geoshp)
        self.taudems = TauDEMFilesUtils(self.dirs.taudem)
        self.spatials = SpatialNamesUtils(self.dirs.geodata2db)
        self.modelcfgs = ModelCfgUtils(self.model_dir)
        self.paramcfgs = ModelParamDataUtils(self.preproc_script_dir + os.sep +
                                             'database')

        if not FileClass.is_file_exists(self.clim_dir):
            print(
                "The CLIMATE_DATA_DIR is not existed, try the default folder name 'climate'."
            )
            self.clim_dir = self.base_dir + os.sep + 'climate'
            if not FileClass.is_file_exists(self.clim_dir):
                raise IOError(
                    "Directories named 'climate' MUST BE located in [base_dir]!"
                )

        if not FileClass.is_file_exists(self.spatial_dir):
            print(
                "The SPATIAL_DATA_DIR is not existed, try the default folder name 'spatial'."
            )
            self.spatial_dir = self.base_dir + os.sep + 'spatial'
            raise IOError(
                "Directories named 'spatial' MUST BE located in [base_dir]!")

        if not FileClass.is_file_exists(self.observe_dir):
            self.observe_dir = None
            self.use_observed = False

        if not FileClass.is_file_exists(self.scenario_dir):
            self.scenario_dir = None
            self.use_scernario = False

        # 2. MongoDB related
        if 'MONGODB' in cf.sections():
            self.hostname = cf.get('MONGODB', 'hostname')
            self.port = cf.getint('MONGODB', 'port')
            self.climate_db = cf.get('MONGODB', 'climatedbname')
            self.bmp_scenario_db = cf.get('MONGODB', 'BMPScenarioDBName')
            self.spatial_db = cf.get('MONGODB', 'SpatialDBName')
        else:
            raise ValueError(
                "[MONGODB] section MUST be existed in *.ini file.")
        if not StringClass.is_valid_ip_addr(self.hostname):
            raise ValueError("HOSTNAME illegal defined in [MONGODB]!")

        # 3. Model related switch
        # by default, OpenMP version and daily (longterm) mode will be built
        if 'SWITCH' in cf.sections():
            self.cluster = cf.getboolean('SWITCH', 'forCluster')
            self.storm_mode = cf.getboolean('SWITCH', 'stormMode')
            self.gen_cn = cf.getboolean('SWITCH', 'genCN')
            self.gen_runoff_coef = cf.getboolean('SWITCH', 'genRunoffCoef')
            self.gen_crop = cf.getboolean('SWITCH', 'genCrop')

        if self.storm_mode:
            self.gen_iuh = False
            self.climate_db = ModelNameUtils.standardize_climate_dbname(
                self.climate_db)

        self.spatial_db = ModelNameUtils.standardize_spatial_dbname(
            self.cluster, self.storm_mode, self.spatial_db)

        # 4. Climate Input
        if 'CLIMATE' in cf.sections():
            self.hydro_climate_vars = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'hydroclimatevarfile')
            self.prec_sites = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'precsitefile')
            self.prec_data = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'precdatafile')
            self.Meteo_sites = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'meteositefile')
            self.Meteo_data = self.clim_dir + os.sep + cf.get(
                'CLIMATE', 'meteodatafile')
            self.thiessen_field = cf.get('CLIMATE', 'thiessenidfield')
        else:
            raise ValueError(
                "Climate input file names MUST be provided in [CLIMATE]!")

        # 5. Spatial Input
        if 'SPATIAL' in cf.sections():
            self.prec_sites_thiessen = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'precsitesthiessen')
            self.meteo_sites_thiessen = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'meteositesthiessen')
            self.dem = self.spatial_dir + os.sep + cf.get('SPATIAL', 'dem')
            self.outlet_file = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'outlet_file')
            if not os.path.exists(self.outlet_file):
                self.outlet_file = None
            self.landuse = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'landusefile')
            self.landcover_init_param = self.txt_db_dir + os.sep \
                                        + cf.get('SPATIAL', 'landcoverinitfile')
            self.soil = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'soilseqnfile')
            self.soil_property = self.txt_db_dir + os.sep + cf.get(
                'SPATIAL', 'soilseqntext')
            self.mgt_field = self.spatial_dir + os.sep + cf.get(
                'SPATIAL', 'mgtfieldfile')
            if not os.path.exists(self.mgt_field) or \
                    StringClass.string_match(self.mgt_field, 'none'):
                self.mgt_field = None
        else:
            raise ValueError(
                "Spatial input file names MUST be provided in [SPATIAL]!")

        # 6. Option parameters
        if 'OPTIONAL_PARAMETERS' in cf.sections():
            self.is_TauDEM = cf.getboolean('OPTIONAL_PARAMETERS', 'istaudemd8')
            self.d8acc_threshold = cf.getfloat('OPTIONAL_PARAMETERS',
                                               'd8accthreshold')
            self.np = cf.getint('OPTIONAL_PARAMETERS', 'np')
            self.d8down_method = cf.get('OPTIONAL_PARAMETERS', 'd8downmethod')
            if StringClass.string_match(self.d8down_method, 'surface'):
                self.d8down_method = 's'
            elif StringClass.string_match(self.d8down_method, 'horizontal'):
                self.d8down_method = 'h'
            elif StringClass.string_match(self.d8down_method, 'pythagoras'):
                self.d8down_method = 'p'
            elif StringClass.string_match(self.d8down_method, 'vertical'):
                self.d8down_method = 'v'
            else:
                self.d8down_method = self.d8down_method.lower()
                if self.d8down_method not in ['s', 'h', 'p', 'v']:
                    self.d8down_method = 'h'
            self.dorm_hr = cf.getfloat('OPTIONAL_PARAMETERS', 'dorm_hr')
            self.temp_base = cf.getfloat('OPTIONAL_PARAMETERS', 't_base')
            self.imper_perc_in_urban = cf.getfloat(
                'OPTIONAL_PARAMETERS', 'imperviouspercinurbancell')
            self.default_reach_depth = cf.getfloat('OPTIONAL_PARAMETERS',
                                                   'default_reach_depth')
            self.default_landuse = cf.getint('OPTIONAL_PARAMETERS',
                                             'defaultlanduse')
            self.default_soil = cf.getint('OPTIONAL_PARAMETERS', 'defaultsoil')
    def model_io_configuration(cfg, maindb):
        """
        Import Input and Output Configuration of SEIMS, i.e., file.in and file.out
        Args:
            cfg: SEIMS config object
            maindb: MongoDB database object
        """
        file_in_path = cfg.modelcfgs.filein
        file_out_path = cfg.paramcfgs.init_outputs_file
        # create if collection not existed
        c_list = maindb.collection_names()
        conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout]
        for item in conf_tabs:
            if not StringClass.string_in_list(item, c_list):
                maindb.create_collection(item)
            else:
                maindb.drop_collection(item)
        file_in_items = read_data_items_from_txt(file_in_path)
        file_out_items = read_data_items_from_txt(file_out_path)

        for item in file_in_items:
            file_in_dict = dict()
            values = StringClass.split_string(StringClass.strip_string(item[0]), ['|'])
            if len(values) != 2:
                raise ValueError("One item should only have one Tag and one value string,"
                                 " split by '|'")
            file_in_dict[ModelCfgFields.tag] = values[0]
            file_in_dict[ModelCfgFields.value] = values[1]
            maindb[DBTableNames.main_filein].insert(file_in_dict)

        # begin to import initial outputs settings
        bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op()
        out_field_array = file_out_items[0]
        out_data_array = file_out_items[1:]
        # print out_data_array
        for item in out_data_array:
            file_out_dict = dict()
            for i, v in enumerate(out_field_array):
                if StringClass.string_match(ModelCfgFields.mod_cls, v):
                    file_out_dict[ModelCfgFields.mod_cls] = item[i]
                elif StringClass.string_match(ModelCfgFields.output_id, v):
                    file_out_dict[ModelCfgFields.output_id] = item[i]
                elif StringClass.string_match(ModelCfgFields.desc, v):
                    file_out_dict[ModelCfgFields.desc] = item[i]
                elif StringClass.string_match(ModelCfgFields.unit, v):
                    file_out_dict[ModelCfgFields.unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.type, v):
                    file_out_dict[ModelCfgFields.type] = item[i]
                elif StringClass.string_match(ModelCfgFields.stime, v):
                    file_out_dict[ModelCfgFields.stime] = item[i]
                elif StringClass.string_match(ModelCfgFields.etime, v):
                    file_out_dict[ModelCfgFields.etime] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval, v):
                    file_out_dict[ModelCfgFields.interval] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval_unit, v):
                    file_out_dict[ModelCfgFields.interval_unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.filename, v):
                    file_out_dict[ModelCfgFields.filename] = item[i]
                elif StringClass.string_match(ModelCfgFields.use, v):
                    file_out_dict[ModelCfgFields.use] = item[i]
                elif StringClass.string_match(ModelCfgFields.subbsn, v):
                    file_out_dict[ModelCfgFields.subbsn] = item[i]
            if file_out_dict.keys() is []:
                raise ValueError("There are not any valid output item stored in file.out!")
            bulk.insert(file_out_dict)
        bulk.execute()

        # begin to import the desired outputs
        # create bulk operator
        bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op()
        # read initial parameters from txt file
        data_items = read_data_items_from_txt(cfg.modelcfgs.fileout)
        # print (field_names)
        for i, cur_data_item in enumerate(data_items):
            data_import = dict()
            cur_filter = dict()
            # print (cur_data_item)
            if len(cur_data_item) == 7:
                data_import[ModelCfgFields.output_id] = cur_data_item[0]
                data_import[ModelCfgFields.type] = cur_data_item[1]
                data_import[ModelCfgFields.stime] = cur_data_item[2]
                data_import[ModelCfgFields.etime] = cur_data_item[3]
                data_import[ModelCfgFields.interval] = cur_data_item[4]
                data_import[ModelCfgFields.interval_unit] = cur_data_item[5]
                data_import[ModelCfgFields.subbsn] = cur_data_item[6]
                data_import[ModelCfgFields.use] = 1
                cur_filter[ModelCfgFields.output_id] = cur_data_item[0]
            else:
                raise RuntimeError("Items in file.out must have 7 columns, i.e., OUTPUTID,"
                                   "TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.")

            bulk.find(cur_filter).update({'$set': data_import})
        # execute import operators
        bulk.execute()
    def data_from_txt(hydro_clim_db, obs_txts_list, sites_info_txts_list,
                      subbsn_file):
        """
        Read observed data from txt file
        Args:
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = {}
                for j in range(len(site_data_items[i])):
                    if StringClass.string_match(site_flds[j],
                                                StationFields.id):
                        dic[StationFields.id] = int(site_data_items[i][j])
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.name):
                        dic[StationFields.name] = StringClass.strip_string(
                            site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.type):
                        types = StringClass.split_string(
                            StringClass.strip_string(site_data_items[i][j]),
                            ',')
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lat):
                        dic[StationFields.lat] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.lon):
                        dic[StationFields.lon] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.x):
                        dic[StationFields.x] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.y):
                        dic[StationFields.y] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.unit):
                        dic[StationFields.unit] = StringClass.strip_string(
                            site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.elev):
                        dic[StationFields.elev] = float(site_data_items[i][j])
                    elif StringClass.string_match(site_flds[j],
                                                  StationFields.outlet):
                        dic[StationFields.outlet] = float(
                            site_data_items[i][j])

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_subbsn_id = ImportObservedData.match_subbasin(
                        subbsn_file, site_dic)
                    if not matched:
                        break
                    cur_subbsn_id_str = ''
                    for tmp_id in cur_subbsn_id:
                        if tmp_id is not None:
                            cur_subbsn_id_str += str(tmp_id) + ','
                    cur_subbsn_id_str = cur_subbsn_id_str[:-1]
                    site_dic[StationFields.id] = cur_subbsn_id_str
                    curfilter = {
                        StationFields.id: site_dic[StationFields.id],
                        StationFields.type: site_dic[StationFields.type]
                    }
                    # print (curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(
                        curfilter, site_dic, upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = dic[StationFields.unit]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[
            DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print measDataFile
            obs_data_items = read_data_items_from_txt(measDataFile)
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [
                StationFields.id, DataValueFields.y, DataValueFields.m,
                DataValueFields.d, DataValueFields.type, DataValueFields.value
            ]
            for fld in required_flds:
                if not StringClass.string_in_list(
                        fld, obs_flds):  # data can not meet the request!
                    raise ValueError(
                        "The %s can not meet the required format!" %
                        measDataFile)
            for i in range(1, len(obs_data_items)):
                dic = dict()
                cur_y = 0
                cur_m = 0
                cur_d = 0
                for j in range(len(obs_data_items[i])):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(obs_data_items[i][j])
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.y):
                        cur_y = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.m):
                        cur_m = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.d):
                        cur_d = int(obs_data_items[i][j])
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.type):
                        dic[DataValueFields.type] = obs_data_items[i][j]
                    elif StringClass.string_match(obs_flds[j],
                                                  DataValueFields.value):
                        dic[DataValueFields.value] = float(
                            obs_data_items[i][j])
                dt = datetime(cur_y, cur_m, cur_d, 0, 0)
                sec = time.mktime(dt.timetuple())
                utc_time = time.gmtime(sec)
                dic[DataValueFields.local_time] = dt
                dic[DataValueFields.time_zone] = time.timezone / 3600
                dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1],
                                                    utc_time[2], utc_time[3])
                curfilter = {
                    StationFields.id: dic[StationFields.id],
                    DataValueFields.type: dic[DataValueFields.type],
                    DataValueFields.utc: dic[DataValueFields.utc]
                }
                bulk.find(curfilter).replace_one(dic)
                count += 1
                if count % 500 == 0:
                    bulk.execute()
                    bulk = hydro_clim_db[
                        DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            bulk.execute()
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = []
        for curVar in variable_lists:
            # print curVar
            # if the unit is mg/L, then change the Type name with the suffix "Conc",
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find(
                {StationFields.type: cur_type}):
                # print item
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[
                    DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[
                    DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == "mg/L":
                    # update the Type name
                    dic[StationFields.type] = cur_type + "Conc"
                    curfilter = {
                        StationFields.id: dic[StationFields.id],
                        DataValueFields.type: cur_type,
                        DataValueFields.utc: dic[DataValueFields.utc]
                    }
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                        curfilter, dic, upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {
                    StationFields.type: "Q",
                    DataValueFields.utc: dic[DataValueFields.utc],
                    StationFields.id: dic[StationFields.id]
                }
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(
                    filter=cur_filter)

                q = -9999.
                if q_dic is not None:  # and q_dic.has_key(DataValueFields.value):
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == "mg/L":
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == "kg":
                    dic[StationFields.type] = cur_type + "Conc"
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                        dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {
                StationFields.id: dic[StationFields.id],
                DataValueFields.type: dic[DataValueFields.type],
                DataValueFields.utc: dic[DataValueFields.utc]
            }
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(
                curfilter, dic, upsert=True)
    def daily_data_from_txt(climdb, data_txt_file, sites_info_dict):
        """Import climate data table"""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove(
            {DataValueFields.type: DataType.m})

        clim_data_items = read_data_items_from_txt(data_txt_file)
        clim_flds = clim_data_items[0]
        # PHUCalDic is used for Calculating potential heat units (PHU)
        # for each climate station and each year.
        # format is {StationID:{Year1:[values],Year2:[Values]...}, ...}
        # PHUCalDic = {}
        # format: {StationID1: climateStats1, ...}
        hydro_climate_stats = {}
        required_flds = [
            DataValueFields.y, DataValueFields.m, DataValueFields.d,
            DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws
        ]
        for fld in required_flds:
            if not StringClass.string_in_list(fld, clim_flds):
                raise ValueError(
                    "Meteorological Daily data is invalid, please Check!")
        # Create bulk object
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i, cur_clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            cur_ssd = DEFAULT_NODATA
            cur_y = 0
            cur_m = 0
            cur_d = 0
            for j, clim_data_v in enumerate(cur_clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.id):
                    dic[DataValueFields.id] = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.y):
                    cur_y = int(clim_data_v)
                    dic[DataValueFields.y] = cur_y
                elif StringClass.string_match(clim_flds[j], DataValueFields.m):
                    cur_m = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.d):
                    cur_d = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.mean_tmp):
                    dic[DataType.mean_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.min_tmp):
                    dic[DataType.min_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.max_tmp):
                    dic[DataType.max_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.pet):
                    dic[DataType.pet] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.sr):
                    dic[DataType.sr] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.ws):
                    dic[DataType.ws] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.rm):
                    dic[DataType.rm] = float(clim_data_v) * 0.01
                elif StringClass.string_match(clim_flds[j], DataType.ssd):
                    cur_ssd = float(clim_data_v)
            # Date transformation
            dt = datetime(cur_y, cur_m, cur_d, 0, 0)
            sec = time.mktime(dt.timetuple())
            utc_time = time.gmtime(sec)
            dic[DataValueFields.local_time] = dt
            dic[DataValueFields.time_zone] = time.timezone / 3600
            dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1],
                                                utc_time[2], utc_time[3])

            # Do if some of these data are not provided
            if DataType.mean_tmp not in dic.keys():
                dic[DataType.mean_tmp] = (dic[DataType.max_tmp] +
                                          dic[DataType.min_tmp]) / 2.
            if DataType.sr not in dic.keys():
                if cur_ssd == DEFAULT_NODATA:
                    raise ValueError(DataType.sr + " or " + DataType.ssd +
                                     " must be provided!")
                else:
                    if dic[DataValueFields.id] in sites_info_dict.keys():
                        cur_lon, cur_lat = sites_info_dict[dic[
                            DataValueFields.id]].lon_lat()
                        dic[DataType.sr] = round(
                            HydroClimateUtilClass.rs(DateClass.day_of_year(dt),
                                                     float(cur_ssd),
                                                     cur_lat * PI / 180.), 1)
            output_flds = [
                DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp,
                DataType.rm, DataType.pet, DataType.ws, DataType.sr
            ]
            for fld in output_flds:
                cur_dic = dict()
                if fld in dic.keys():
                    cur_dic[DataValueFields.value] = dic[fld]
                    cur_dic[DataValueFields.id] = dic[DataValueFields.id]
                    cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                    cur_dic[DataValueFields.time_zone] = dic[
                        DataValueFields.time_zone]
                    cur_dic[DataValueFields.local_time] = dic[
                        DataValueFields.local_time]
                    cur_dic[DataValueFields.type] = fld
                    # Old code, insert or update one item a time, which is quite inefficiency
                    # Update by using bulk operation interface. lj
                    bulk.insert(cur_dic)
                    count += 1
                    if count % 500 == 0:  # execute each 500 records
                        bulk.execute()
                        bulk = climdb[
                            DBTableNames.
                            data_values].initialize_ordered_bulk_op()

            if dic[DataValueFields.id] not in hydro_climate_stats.keys():
                hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats()
            hydro_climate_stats[dic[DataValueFields.id]].add_item(dic)
        # execute the remained records
        if count % 500 != 0:
            bulk.execute()
        for item, cur_climate_stats in hydro_climate_stats.items():
            cur_climate_stats.annual_stats()
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
        # prepare dic for MongoDB
        for s_id, stats_v in hydro_climate_stats.items():
            for YYYY in stats_v.Count.keys():
                cur_dic = dict()
                cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY]
                cur_dic[DataValueFields.id] = s_id
                cur_dic[DataValueFields.y] = YYYY
                cur_dic[VariableDesc.unit] = "heat units"
                cur_dic[VariableDesc.type] = DataType.phu_tot
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.phu_tot,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
                # import annual mean temperature
                cur_dic[VariableDesc.type] = DataType.mean_tmp
                cur_dic[VariableDesc.unit] = "deg C"
                cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY]
                curfilter = {
                    DataValueFields.id: s_id,
                    VariableDesc.type: DataType.mean_tmp,
                    DataValueFields.y: YYYY
                }
                climdb[DBTableNames.annual_stats].find_one_and_replace(
                    curfilter, cur_dic, upsert=True)
            cur_dic[DataValueFields.value] = stats_v.PHU0
            cur_dic[DataValueFields.id] = s_id
            cur_dic[DataValueFields.y] = DEFAULT_NODATA
            cur_dic[VariableDesc.unit] = "heat units"
            cur_dic[VariableDesc.type] = DataType.phu0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.phu0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
            # import annual mean temperature
            cur_dic[VariableDesc.type] = DataType.mean_tmp0
            cur_dic[VariableDesc.unit] = "deg C"
            cur_dic[DataValueFields.value] = stats_v.MeanTmp0
            curfilter = {
                DataValueFields.id: s_id,
                VariableDesc.type: DataType.mean_tmp0,
                DataValueFields.y: DEFAULT_NODATA
            }
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter,
                                                                   cur_dic,
                                                                   upsert=True)
Exemple #10
0
    def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove(
            {DataValueFields.type: DataType.p})

        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = []
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i in range(3, len(clim_flds)):
            station_id.append(clim_flds[i])
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = []
            cur_y = 0
            cur_m = 0
            cur_d = 0
            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.y):
                    cur_y = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.m):
                    cur_m = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataValueFields.d):
                    cur_d = int(clim_data_v)
                else:
                    for k, cur_id in enumerate(station_id):
                        if StringClass.string_match(clim_flds[j], cur_id):
                            precipitation.append(float(clim_data_v))

            dt = datetime(cur_y, cur_m, cur_d, 0, 0)
            sec = time.mktime(dt.timetuple())
            utc_time = time.gmtime(sec)
            dic[DataValueFields.local_time] = dt
            dic[DataValueFields.time_zone] = time.timezone / 3600.
            dic[DataValueFields.utc] = datetime(utc_time[0], utc_time[1],
                                                utc_time[2], utc_time[3])

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[
                    DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[
                    DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    bulk.execute()
                    bulk = climdb[
                        DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            bulk.execute()
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])