Example #1
0
    def workflow(cfg, maindb, climdb):
        """
        This function mainly to import measurement data to MongoDB
        data type may include Q (discharge, m3/s), SED (mg/L), TN (mg/L), TP (mg/L), etc.
        the required parameters that defined in configuration file (*.ini)
        """
        if not cfg.use_observed:
            return False
        c_list = climdb.collection_names()
        if not StringClass.string_in_list(DBTableNames.observes, c_list):
            climdb.create_collection(DBTableNames.observes)
        else:
            climdb.drop_collection(DBTableNames.observes)
        if not StringClass.string_in_list(DBTableNames.sites, c_list):
            climdb.create_collection(DBTableNames.sites)
        if not StringClass.string_in_list(DBTableNames.var_desc, c_list):
            climdb.create_collection(DBTableNames.var_desc)

        file_list = FileClass.get_full_filename_by_suffixes(
            cfg.observe_dir, ['.txt', '.csv'])
        meas_file_list = list()
        site_loc = list()
        for fl in file_list:
            if StringClass.is_substring('observed_', fl):
                meas_file_list.append(fl)
            else:
                site_loc.append(fl)
        ImportObservedData.data_from_txt(maindb, climdb, meas_file_list,
                                         site_loc, cfg.spatials.subbsn)
        return True
    def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove(
            {DataValueFields.type: DataType.p})
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(
            data_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = list()
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for fld in clim_flds:
            if not StringClass.string_in_list(fld, [
                    DataValueFields.dt, DataValueFields.y, DataValueFields.m,
                    DataValueFields.d, DataValueFields.hour,
                    DataValueFields.minute, DataValueFields.second
            ]):
                station_id.append(fld)
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = list()

            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_in_list(clim_flds[j], station_id):
                    precipitation.append(float(clim_data_v))
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(
                clim_flds, clim_data_item, tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(
                minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[
                    DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[
                    DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    MongoUtil.run_bulk(bulk)
                    bulk = climdb[
                        DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # Create index
        climdb[DBTableNames.data_values].create_index([
            (DataValueFields.id, ASCENDING), (DataValueFields.type, ASCENDING),
            (DataValueFields.utc, ASCENDING)
        ])
Example #3
0
    def calculate_environment(self):
        if not self.modelrun:  # no evaluate done
            self.economy = self.worst_econ
            self.environment = self.worst_env
            return
        rfile = self.modelout_dir + os.path.sep + self.bmps_info['ENVEVAL']

        if not FileClass.is_file_exists(rfile):
            time.sleep(5)  # sleep 5 seconds wait for the ouput
        if not FileClass.is_file_exists(rfile):
            print('WARNING: Although SEIMS model runs successfully, the desired output: %s'
                  ' cannot be found!' % rfile)
            self.economy = self.worst_econ
            self.environment = self.worst_env
            return

        base_amount = self.bmps_info['BASE_ENV']
        if StringClass.string_match(rfile.split('.')[-1], 'tif'):  # Raster data
            rr = RasterUtilClass.read_raster(rfile)
            soil_erosion_amount = rr.get_sum() / self.timerange  # unit: year
            # reduction rate of soil erosion
            self.environment = (base_amount - soil_erosion_amount) / base_amount
        elif StringClass.string_match(rfile.split('.')[-1], 'txt'):  # Time series data
            sed_sum = read_simulation_from_txt(self.modelout_dir)  # TODO, fix it later, lj
            self.environment = (base_amount - sed_sum) / base_amount
        else:
            self.economy = self.worst_econ
            self.environment = self.worst_env
            return
Example #4
0
    def calculate_environment(self):
        if not self.modelrun:  # no evaluate done
            self.economy = self.worst_econ
            self.environment = self.worst_env
            return
        rfile = self.modelout_dir + os.path.sep + self.bmps_info['ENVEVAL']

        if not FileClass.is_file_exists(rfile):
            time.sleep(5)  # sleep 5 seconds wait for the ouput
        if not FileClass.is_file_exists(rfile):
            print(
                'WARNING: Although SEIMS model runs successfully, the desired output: %s'
                ' cannot be found!' % rfile)
            self.economy = self.worst_econ
            self.environment = self.worst_env
            return

        base_amount = self.bmps_info['BASE_ENV']
        if StringClass.string_match(rfile.split('.')[-1],
                                    'tif'):  # Raster data
            rr = RasterUtilClass.read_raster(rfile)
            soil_erosion_amount = rr.get_sum() / self.timerange  # unit: year
            # reduction rate of soil erosion
            self.environment = (base_amount -
                                soil_erosion_amount) / base_amount
        elif StringClass.string_match(rfile.split('.')[-1],
                                      'txt'):  # Time series data
            sed_sum = read_simulation_from_txt(
                self.modelout_dir)  # TODO, fix it later, lj
            self.environment = (base_amount - sed_sum) / base_amount
        else:
            self.economy = self.worst_econ
            self.environment = self.worst_env
            return
Example #5
0
    def workflow(cfg, maindb, climdb):
        """
        This function mainly to import measurement data to MongoDB
        data type may include Q (discharge, m3/s), SED (mg/L), tn (mg/L), tp (mg/L), etc.
        the required parameters that defined in configuration file (*.ini)
        """
        if not cfg.use_observed:
            return False
        c_list = climdb.collection_names()
        if not StringClass.string_in_list(DBTableNames.observes, c_list):
            climdb.create_collection(DBTableNames.observes)
        else:
            climdb.drop_collection(DBTableNames.observes)
        if not StringClass.string_in_list(DBTableNames.sites, c_list):
            climdb.create_collection(DBTableNames.sites)
        if not StringClass.string_in_list(DBTableNames.var_desc, c_list):
            climdb.create_collection(DBTableNames.var_desc)

        file_list = FileClass.get_full_filename_by_suffixes(cfg.observe_dir, ['.txt'])
        meas_file_list = []
        site_loc = []
        for fl in file_list:
            if StringClass.is_substring('observed_', fl):
                meas_file_list.append(fl)
            else:
                site_loc.append(fl)
        ImportObservedData.data_from_txt(maindb, climdb, meas_file_list, site_loc,
                                         cfg.spatials.subbsn)
        return True
Example #6
0
 def read_optionfuzinf_section(self, _optfuzinf):
     """Optional parameter-settings for Fuzzy slope position inference."""
     if _optfuzinf not in self.cf.sections():
         return
     if self.cf.has_option(_optfuzinf, 'inferparams'):
         fuzinf_strs = self.cf.get(_optfuzinf, 'inferparams')
         if StringClass.string_match(fuzinf_strs, 'none'):
             return
         self.inferparam = dict()
         fuzinf_types = StringClass.split_string(fuzinf_strs, ';')
         if len(fuzinf_types) != len(self.slppostype):
             raise RuntimeError(
                 "InferParams (%s) MUST be consistent with slope position types"
                 " and separated by ';'!" % fuzinf_strs)
         for i, slppos in enumerate(self.slppostype):
             self.inferparam[slppos] = dict()
             infparams = StringClass.extract_numeric_values_from_string(
                 fuzinf_types[i])
             if len(infparams) % 4 != 0:
                 raise RuntimeError(
                     "Each item of InferParams MUST contains four elements,"
                     "i.e., Attribute No., FMF No., w1, w2! Please check item: "
                     "%s for %s." % (fuzinf_types[i], slppos))
             for j in range(int(len(infparams) / 4)):
                 attridx = int(infparams[j * 4]) - 1
                 attrname = self.selectedtopolist[attridx]
                 fmf = self._FMFTYPE[int(infparams[j * 4 + 1])]
                 curinfparam = self._FMFPARAM[fmf][:]
                 curinfparam[0] = infparams[j * 4 + 2]  # w1
                 curinfparam[3] = infparams[j * 4 + 3]  # w2
                 self.inferparam[slppos][attrname] = [fmf] + curinfparam
Example #7
0
    def OutputItems(self):
        # type: (...) -> (List[AnyStr], Dict[AnyStr, Optional[List[AnyStr]]])
        """Read output ID and items from database.

        Returns:
            _output_ids (list): OUTPUTID list
            _output_items (dict): key is core file name of output,
                                  value is None or list of aggregated types
        """
        if self._output_ids and self._output_items:
            return self._output_ids, self._output_items
        cursor = self.fileout_tab.find(
            {'$or': [{
                ModelCfgFields.use: '1'
            }, {
                ModelCfgFields.use: 1
            }]})
        if cursor is not None:
            for item in cursor:
                self._output_ids.append(item[ModelCfgFields.output_id])
                name = item[ModelCfgFields.filename]
                corename = StringClass.split_string(name, '.')[0]
                types = item[ModelCfgFields.type]
                if StringClass.string_match(types, 'NONE'):
                    self._output_items.setdefault(corename, None)
                else:
                    self._output_items.setdefault(
                        corename, StringClass.split_string(types, '-'))
        return self._output_ids, self._output_items
Example #8
0
    def __init__(self, cf, method='morris'):
        """Initialization."""
        self.method = method
        # 1. SEIMS model related
        self.model = ParseSEIMSConfig(cf)
        # 2. Common settings of parameters sensitivity analysis
        if 'PSA_Settings' not in cf.sections():
            raise ValueError(
                "[PSA_Settings] section MUST be existed in *.ini file.")

        self.evaluate_params = list()
        if cf.has_option('PSA_Settings', 'evaluateparam'):
            eva_str = cf.get('PSA_Settings', 'evaluateparam')
            self.evaluate_params = StringClass.split_string(eva_str, ',')
        else:
            self.evaluate_params = ['Q']  # Default

        self.param_range_def = 'morris_param_rng.def'  # Default
        if cf.has_option('PSA_Settings', 'paramrngdef'):
            self.param_range_def = cf.get('PSA_Settings', 'paramrngdef')
        self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def
        if not FileClass.is_file_exists(self.param_range_def):
            raise IOError('Ranges of parameters MUST be provided!')

        if not (cf.has_option('PSA_Settings', 'psa_time_start')
                and cf.has_option('PSA_Settings', 'psa_time_end')):
            raise ValueError(
                "Start and end time of PSA MUST be specified in [PSA_Settings]."
            )
        try:
            # UTCTIME
            tstart = cf.get('PSA_Settings', 'psa_time_start')
            tend = cf.get('PSA_Settings', 'psa_time_end')
            self.psa_stime = StringClass.get_datetime(tstart)
            self.psa_etime = StringClass.get_datetime(tend)
        except ValueError:
            raise ValueError('The time format MUST be"YYYY-MM-DD HH:MM:SS".')
        if self.psa_stime >= self.psa_etime:
            raise ValueError("Wrong time settings in [PSA_Settings]!")

        # 3. Parameters settings for specific sensitivity analysis methods
        self.morris = None
        self.fast = None
        if self.method == 'fast':
            self.fast = FASTConfig(cf)
            self.psa_outpath = '%s/PSA_FAST_N%dM%d' % (
                self.model.model_dir, self.fast.N, self.fast.M)
        elif self.method == 'morris':
            self.morris = MorrisConfig(cf)
            self.psa_outpath = '%s/PSA_Morris_N%dL%d' % (
                self.model.model_dir, self.morris.N, self.morris.num_levels)
        # 4. (Optional) Plot settings for matplotlib
        self.plot_cfg = PlotConfig(cf)

        # Do not remove psa_outpath if already existed
        UtilClass.mkdir(self.psa_outpath)
        self.outfiles = PSAOutputs(self.psa_outpath)
Example #9
0
def read_pareto_solutions_from_txt(txt_file, sce_name='scenario', field_name='gene_values'):
    # type: (AnyStr, AnyStr, AnyStr) -> (Dict[int, List[List[float]]])
    """Read Pareto points from `runtime.log` file.

    Args:
        txt_file: Full file path of `runtime.log` output by NSGA2 algorithm.
        sce_name: Field name followed by `generation`, e.g., 'calibrationID', 'scenario', etc.
        field_name: Filed name in header for gene values, 'gene_values' by default

    Returns:
        pareto_solutions: `OrderedDict`, key is generation ID, value is arrays of Pareto solutions
    """
    with open(txt_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    pareto_solutions = OrderedDict()
    found = False
    cur_gen = -1
    field_idx = -1

    for lno, line in enumerate(lines):
        str_line = line
        for LF in LFs:
            if LF in line:
                str_line = line.split(LF)[0]
                break
        if str_line == '':
            continue
        values = StringClass.extract_numeric_values_from_string(str_line)
        # Check generation
        if str_line[0] == '#' and 'Generation' in str_line:
            if len(values) != 1:
                continue
            # e.g., ###### Generation: 23 ######
            gen = int(values[0])
            found = True
            cur_gen = gen
            pareto_solutions[cur_gen] = list()
            continue
        if not found:  # If the first "###### Generation: 1 ######" has not been found.
            continue
        line_list = StringClass.split_string(str_line.upper(), ['\t'])
        if values is None:  # means header line
            if field_idx >= 0:
                continue
            for idx, v in enumerate(line_list):
                if field_name.upper() in v.upper():
                    field_idx = idx
                    break
            continue
        if field_idx < 0:
            continue
        # now append the real Pareto solutions data
        tmpvalues = StringClass.extract_numeric_values_from_string(line_list[field_idx])
        pareto_solutions[cur_gen].append(tmpvalues[:])

    return pareto_solutions
Example #10
0
def read_inf_param_from_file(conf):
    """Read fuzzy inference parameters from file."""
    params_list = list()
    with open(conf, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            eles = line.split('\n')[0].split('\t')
            params = StringClass.extract_numeric_values_from_string(line.split('\n')[0])
            if StringClass.string_match(eles[0], 'Parameters') and len(params) >= 6:
                params_list.append([eles[1]] + [eles[3]] + params[-6:])
    return params_list
Example #11
0
    def __init__(self, cf, method='morris'):
        """Initialization."""
        self.method = method
        # 1. SEIMS model related
        self.model = ParseSEIMSConfig(cf)
        # 2. Common settings of parameters sensitivity analysis
        if 'PSA_Settings' not in cf.sections():
            raise ValueError("[PSA_Settings] section MUST be existed in *.ini file.")

        self.evaluate_params = list()
        if cf.has_option('PSA_Settings', 'evaluateparam'):
            eva_str = cf.get('PSA_Settings', 'evaluateparam')
            self.evaluate_params = StringClass.split_string(eva_str, ',')
        else:
            self.evaluate_params = ['Q']  # Default

        self.param_range_def = 'morris_param_rng.def'  # Default
        if cf.has_option('PSA_Settings', 'paramrngdef'):
            self.param_range_def = cf.get('PSA_Settings', 'paramrngdef')
        self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def
        if not FileClass.is_file_exists(self.param_range_def):
            raise IOError('Ranges of parameters MUST be provided!')

        if not (cf.has_option('PSA_Settings', 'psa_time_start') and
                cf.has_option('PSA_Settings', 'psa_time_end')):
            raise ValueError("Start and end time of PSA MUST be specified in [PSA_Settings].")
        try:
            # UTCTIME
            tstart = cf.get('PSA_Settings', 'psa_time_start')
            tend = cf.get('PSA_Settings', 'psa_time_end')
            self.psa_stime = StringClass.get_datetime(tstart)
            self.psa_etime = StringClass.get_datetime(tend)
        except ValueError:
            raise ValueError('The time format MUST be"YYYY-MM-DD HH:MM:SS".')
        if self.psa_stime >= self.psa_etime:
            raise ValueError("Wrong time settings in [PSA_Settings]!")

        # 3. Parameters settings for specific sensitivity analysis methods
        self.morris = None
        self.fast = None
        if self.method == 'fast':
            self.fast = FASTConfig(cf)
            self.psa_outpath = '%s/PSA-FAST-N%dM%d' % (self.model.model_dir,
                                                       self.fast.N, self.fast.M)
        elif self.method == 'morris':
            self.morris = MorrisConfig(cf)
            self.psa_outpath = '%s/PSA-Morris-N%dL%dJ%d' % (self.model.model_dir,
                                                            self.morris.N,
                                                            self.morris.num_levels,
                                                            self.morris.grid_jump)

        # Do not remove psa_outpath if already existed
        UtilClass.mkdir(self.psa_outpath)
        self.outfiles = PSAOutputs(self.psa_outpath)
Example #12
0
    def regular_data_from_txt(climdb, data_file):
        """Regular precipitation data from text file."""
        # delete existed precipitation data
        climdb[DBTableNames.data_values].remove({DataValueFields.type: DataType.p})
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_file)
        clim_flds = clim_data_items[0]
        station_id = list()
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for fld in clim_flds:
            if not StringClass.string_in_list(fld,
                                              [DataValueFields.dt, DataValueFields.y,
                                               DataValueFields.m,
                                               DataValueFields.d, DataValueFields.hour,
                                               DataValueFields.minute, DataValueFields.second]):
                station_id.append(fld)
        for i, clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            precipitation = list()

            for j, clim_data_v in enumerate(clim_data_item):
                if StringClass.string_in_list(clim_flds[j], station_id):
                    precipitation.append(float(clim_data_v))
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds,
                                                                               clim_data_item,
                                                                               tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time

            for j, cur_id in enumerate(station_id):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = precipitation[j]
                cur_dic[DataValueFields.id] = int(cur_id)
                cur_dic[DataValueFields.type] = DataType.p
                cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone]
                cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time]
                cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                bulk.insert(cur_dic)
                count += 1
                if count % 500 == 0:  # execute each 500 records
                    MongoUtil.run_bulk(bulk)
                    bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # Create index
        climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING),
                                                       (DataValueFields.type, ASCENDING),
                                                       (DataValueFields.utc, ASCENDING)])
 def initial_params_from_txt(cfg, maindb):
     """
     import initial calibration parameters from txt data file.
     Args:
         cfg: SEIMS config object
         maindb: MongoDB database object
     """
     # delete if existed, initialize if not existed
     c_list = maindb.collection_names()
     if not StringClass.string_in_list(DBTableNames.main_parameter, c_list):
         maindb.create_collection(DBTableNames.main_parameter)
     else:
         maindb.drop_collection(DBTableNames.main_parameter)
     # initialize bulk operator
     bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op()
     # read initial parameters from txt file
     data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file)
     field_names = data_items[0][0:]
     # print(field_names)
     for i, cur_data_item in enumerate(data_items):
         if i == 0:
             continue
         # print(cur_data_item)
         # initial one default blank parameter dict.
         data_import = {ModelParamFields.name: '', ModelParamFields.desc: '',
                        ModelParamFields.unit: '', ModelParamFields.module: '',
                        ModelParamFields.value: DEFAULT_NODATA,
                        ModelParamFields.impact: DEFAULT_NODATA,
                        ModelParamFields.change: 'NC',
                        ModelParamFields.max: DEFAULT_NODATA,
                        ModelParamFields.min: DEFAULT_NODATA,
                        ModelParamFields.type: ''}
         for k, v in list(data_import.items()):
             idx = field_names.index(k)
             if cur_data_item[idx] == '':
                 if StringClass.string_match(k, ModelParamFields.change_ac):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_rc):
                     data_import[k] = 1
                 elif StringClass.string_match(k, ModelParamFields.change_nc):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_vc):
                     data_import[k] = DEFAULT_NODATA  # Be careful to check NODATA when use!
             else:
                 if MathClass.isnumerical(cur_data_item[idx]):
                     data_import[k] = float(cur_data_item[idx])
                 else:
                     data_import[k] = cur_data_item[idx]
         bulk.insert(data_import)
     # execute import operators
     MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.')
     # initialize index by parameter's type and name by ascending order.
     maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING),
                                                       (ModelParamFields.name, ASCENDING)])
Example #14
0
 def convertstatsmethod(method_str):
     """Convert statistics method to ave, min, and max."""
     if StringClass.string_match(method_str, 'Average'):
         return 'ave'
     elif StringClass.string_match(method_str, 'Maximum'):
         return 'max'
     elif StringClass.string_match(method_str, 'Minimum'):
         return 'min'
     elif method_str.lower() in ['ave', 'max', 'min']:
         return method_str.lower()
     else:
         return 'ave'
Example #15
0
 def convertstatsmethod(method_str):
     """Convert statistics method to ave, min, and max."""
     if StringClass.string_match(method_str, 'Average'):
         return 'ave'
     elif StringClass.string_match(method_str, 'Maximum'):
         return 'max'
     elif StringClass.string_match(method_str, 'Minimum'):
         return 'min'
     elif method_str.lower() in ['ave', 'max', 'min']:
         return method_str.lower()
     else:
         return 'ave'
Example #16
0
def read_simulation_from_txt(
        ws,  # type: AnyStr
        plot_vars,  # type: List[AnyStr]
        subbsnID,  # type: int
        stime,  # type: datetime
        etime  # type: datetime
):
    # type: (...) -> (List[AnyStr], Dict[datetime, List[float]])
    """
    Read simulation data from text file according to subbasin ID.
    Returns:
        1. Matched variable names, [var1, var2, ...]
        2. Simulation data dict of all plotted variables, with UTCDATETIME.
           {Datetime: [value_of_var1, value_of_var2, ...], ...}
    """
    plot_vars_existed = list()
    sim_data_dict = OrderedDict()
    for i, v in enumerate(plot_vars):
        txtfile = ws + os.path.sep + v + '.txt'
        if not FileClass.is_file_exists(txtfile):
            print('WARNING: Simulation variable file: %s is not existed!' %
                  txtfile)
            continue
        data_items = read_data_items_from_txt(txtfile)
        found = False
        data_available = False
        for item in data_items:
            item_vs = StringClass.split_string(item[0], ' ', elim_empty=True)
            if len(item_vs) == 2:
                if int(item_vs[1]) == subbsnID and not found:
                    found = True
                elif int(item_vs[1]) != subbsnID and found:
                    break
            if not found:
                continue
            if len(item_vs) != 3:
                continue
            date_str = '%s %s' % (item_vs[0], item_vs[1])
            sim_datetime = StringClass.get_datetime(date_str,
                                                    "%Y-%m-%d %H:%M:%S")

            if stime <= sim_datetime <= etime:
                if sim_datetime not in sim_data_dict:
                    sim_data_dict[sim_datetime] = list()
                sim_data_dict[sim_datetime].append(float(item_vs[2]))
                data_available = True
        if data_available:
            plot_vars_existed.append(v)

    print('Read simulation from %s to %s done.' %
          (stime.strftime('%c'), etime.strftime('%c')))
    return plot_vars_existed, sim_data_dict
Example #17
0
    def calculate_environment(self):
        """Calculate environment benefit based on the output and base values predefined in
        configuration file.
        """
        if not self.modelrun:  # no evaluate done
            self.economy = self.worst_econ
            self.environment = self.worst_env
            return
        rfile = self.modelout_dir + os.path.sep + self.eval_info['ENVEVAL']

        if not FileClass.is_file_exists(rfile):
            time.sleep(
                0.1
            )  # Wait a moment in case of unpredictable file system error
        if not FileClass.is_file_exists(rfile):
            print(
                'WARNING: Although SEIMS model has been executed, the desired output: %s'
                ' cannot be found!' % rfile)
            self.economy = self.worst_econ
            self.environment = self.worst_env
            # model clean
            self.model.SetMongoClient()
            self.model.clean(delete_scenario=True)
            self.model.UnsetMongoClient()
            return

        base_amount = self.eval_info['BASE_ENV']
        if StringClass.string_match(rfile.split('.')[-1],
                                    'tif'):  # Raster data
            rr = RasterUtilClass.read_raster(rfile)
            sed_sum = rr.get_sum() / self.eval_timerange  # unit: year
        elif StringClass.string_match(rfile.split('.')[-1],
                                      'txt'):  # Time series data
            sed_sum = read_simulation_from_txt(self.modelout_dir, ['SED'],
                                               self.model.OutletID,
                                               self.cfg.eval_stime,
                                               self.cfg.eval_etime)
        else:
            raise ValueError('The file format of ENVEVAL MUST be tif or txt!')

        if base_amount < 0:  # indicates a base scenario
            self.environment = sed_sum
        else:
            # reduction rate of soil erosion
            self.environment = (base_amount - sed_sum) / base_amount
            # print exception values
            if self.environment > 1. or self.environment < 0. or self.environment is numpy.nan:
                print('Exception Information: Scenario ID: %d, '
                      'SUM(%s): %s' % (self.ID, rfile, repr(sed_sum)))
                self.environment = self.worst_env
Example #18
0
 def variable_table(db, var_file):
     """Import variables table"""
     var_data_items = read_data_items_from_txt(var_file)
     var_flds = var_data_items[0]
     for i in range(1, len(var_data_items)):
         dic = dict()
         for j in range(len(var_data_items[i])):
             if StringClass.string_match(var_flds[j], VariableDesc.type):
                 dic[VariableDesc.type] = var_data_items[i][j]
             elif StringClass.string_match(var_flds[j], VariableDesc.unit):
                 dic[VariableDesc.unit] = var_data_items[i][j]
         # If this item existed already, then update it, otherwise insert one.
         curfilter = {VariableDesc.type: dic[VariableDesc.type]}
         db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
Example #19
0
 def convertdistmethod(method_str):
     """Convert distance method to h, v, p, and s."""
     if StringClass.string_match(method_str, 'Horizontal'):
         return 'h'
     elif StringClass.string_match(method_str, 'Vertical'):
         return 'v'
     elif StringClass.string_match(method_str, 'Pythagoras'):
         return 'p'
     elif StringClass.string_match(method_str, 'Surface'):
         return 's'
     elif method_str.lower() in ['h', 'v', 'p', 's']:
         return method_str.lower()
     else:
         return 's'
Example #20
0
 def variable_table(db, var_file):
     """Import variables table"""
     var_data_items = read_data_items_from_txt(var_file)
     var_flds = var_data_items[0]
     for i in range(1, len(var_data_items)):
         dic = {}
         for j in range(len(var_data_items[i])):
             if StringClass.string_match(var_flds[j], VariableDesc.type):
                 dic[VariableDesc.type] = var_data_items[i][j]
             elif StringClass.string_match(var_flds[j], VariableDesc.unit):
                 dic[VariableDesc.unit] = var_data_items[i][j]
         # If this item existed already, then update it, otherwise insert one.
         curfilter = {VariableDesc.type: dic[VariableDesc.type]}
         db[DBTableNames.var_desc].find_one_and_replace(curfilter, dic, upsert=True)
Example #21
0
 def convertdistmethod(method_str):
     """Convert distance method to h, v, p, and s."""
     if StringClass.string_match(method_str, 'Horizontal'):
         return 'h'
     elif StringClass.string_match(method_str, 'Vertical'):
         return 'v'
     elif StringClass.string_match(method_str, 'Pythagoras'):
         return 'p'
     elif StringClass.string_match(method_str, 'Surface'):
         return 's'
     elif method_str.lower() in ['h', 'v', 'p', 's']:
         return method_str.lower()
     else:
         return 's'
Example #22
0
def read_pareto_popsize_from_txt(txt_file, sce_name='scenario'):
    # type: (AnyStr, AnyStr) -> (List[int], List[int])
    """Read the population size of each generations."""
    with open(txt_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    pareto_popnum = OrderedDict()
    found = False
    cur_gen = -1
    iden_idx = -1
    for line in lines:
        str_line = line
        for LF in LFs:
            if LF in line:
                str_line = line.split(LF)[0]
                break
        if str_line == '':
            continue
        values = StringClass.extract_numeric_values_from_string(str_line)
        # Check generation
        if str_line[0] == '#' and 'Generation' in str_line:
            if len(values) != 1:
                continue
            gen = int(values[0])
            found = True
            cur_gen = gen
            pareto_popnum[cur_gen] = list()
            continue
        if not found:
            continue
        if values is None:  # means header line
            line_list = StringClass.split_string(str_line, ['\t'])
            for idx, v in enumerate(line_list):
                if StringClass.string_match(v, sce_name):
                    iden_idx = idx
                    break
            continue
        if iden_idx < 0:
            continue
        # now append the real Pareto front point data
        pareto_popnum[cur_gen].append(int(values[iden_idx]))

    all_sceids = list()
    acc_num = list()
    genids = sorted(list(pareto_popnum.keys()))
    for idx, genid in enumerate(genids):
        for _id in pareto_popnum[genid]:
            if _id not in all_sceids:
                all_sceids.append(_id)
        acc_num.append(len(all_sceids))
    return genids, acc_num
Example #23
0
    def __init__(self, cf):
        """Get parameters from ConfigParser object."""
        self.N = 100
        self.num_levels = 10
        self.grid_jump = 2
        self.optimal_t = None
        self.local_opt = True
        section_name = 'Morris_Method'
        if section_name not in cf.sections():
            raise ValueError('[%s] section MUST be existed in *.ini file.' % section_name)

        if cf.has_option(section_name, 'n'):
            self.N = cf.getint(section_name, 'n')
        if cf.has_option(section_name, 'num_levels'):
            self.num_levels = cf.getint(section_name, 'num_levels')
        if cf.has_option(section_name, 'grid_jump'):
            self.grid_jump = cf.getint(section_name, 'grid_jump')
        if cf.has_option(section_name, 'optimal_trajectories'):
            tmp_opt_t = cf.get(section_name, 'optimal_trajectories')
            if not StringClass.string_match(tmp_opt_t, 'none'):
                self.optimal_t = cf.getint(section_name, 'optimal_trajectories')
                if self.optimal_t > self.N or self.optimal_t < 2:
                    self.optimal_t = None
        if cf.has_option(section_name, 'local_optimization'):
            self.local_opt = cf.getboolean(section_name, 'local_optimization')
    def get_time_system_from_data_file(in_file):
        # type: (str) -> (str, int)
        """Get the time system from the data file. The basic format is:
           #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #LOCALTIME -2, #UTCTIME

        Returns:
            time_sys: 'UTCTIME' or 'LOCALTIME'
            time_zone(int): Positive for West time zone, and negative for East.
        """
        time_sys = 'LOCALTIME'
        time_zone = time.timezone // 3600
        with open(in_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        for line in lines:
            str_line = line.strip()
            # for LF in LFs:
            #     if LF in line:
            #         str_line = line.split(LF)[0]
            #         break
            if str_line[0] != '#':
                break
            if str_line.lower().find('utc') >= 0:
                time_sys = 'UTCTIME'
                time_zone = 0
                break
            if str_line.lower().find('local') >= 0:
                line_list = StringClass.split_string(str_line, [' ', ','])
                if len(line_list) == 2 and MathClass.isnumerical(line_list[1]):
                    time_zone = -1 * int(line_list[1])
                break
        return time_sys, time_zone
Example #25
0
    def workflow(cfg, main_db, clim_db):
        """Workflow"""
        # 1. Find meteorology and precipitation sites in study area
        thiessen_file_list = [cfg.meteo_sites_thiessen, cfg.prec_sites_thiessen]
        type_list = [DataType.m, DataType.p]

        # The entire basin, used for OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.bsn,
                                               FieldNames.basin, thiessen_file_list,
                                               cfg.thiessen_field, type_list)
        # The subbasins, used for MPI&OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db, cfg.vecs.subbsn,
                                           FieldNames.subbasin_id, thiessen_file_list,
                                           cfg.thiessen_field, type_list)

        # 2. Import geographic information of each sites to Hydro-Climate database
        c_list = clim_db.collection_names()
        tables = [DBTableNames.sites, DBTableNames.var_desc]
        for tb in tables:
            if not StringClass.string_in_list(tb, c_list):
                clim_db.create_collection(tb)
        ImportHydroClimateSites.variable_table(clim_db, cfg.hydro_climate_vars)
        site_m_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.Meteo_sites, DataType.m)
        site_p_loc = ImportHydroClimateSites.sites_table(clim_db, cfg.prec_sites, DataType.p)
        # print(site_m_loc, site_p_loc)
        return site_m_loc, site_p_loc
Example #26
0
    def initialize_landcover_parameters(landcover_file,
                                        landcover_initial_fields_file,
                                        dst_dir):
        """generate initial landcover_init_param parameters"""
        lc_data_items = read_data_items_from_txt(landcover_initial_fields_file)
        # print(lc_data_items)
        field_names = lc_data_items[0]
        lu_id = -1
        for i, v in enumerate(field_names):
            if StringClass.string_match(v, 'LANDUSE_ID'):
                lu_id = i
                break
        data_items = lc_data_items[1:]
        replace_dicts = dict()
        for item in data_items:
            for i, v in enumerate(item):
                if i != lu_id:
                    if field_names[i].upper() not in list(
                            replace_dicts.keys()):
                        replace_dicts[field_names[i].upper()] = {
                            float(item[lu_id]): float(v)
                        }
                    else:
                        replace_dicts[field_names[i].upper()][float(
                            item[lu_id])] = float(v)
        # print(replace_dicts)

        # Generate GTIFF
        for item, v in list(replace_dicts.items()):
            filename = dst_dir + os.path.sep + item + '.tif'
            print(filename)
            RasterUtilClass.raster_reclassify(landcover_file, v, filename)
        return list(replace_dicts['LANDCOVER'].values())
Example #27
0
    def export_landuse_lookup_files_from_mongodb(cfg, maindb):
        """export landuse lookup tables to txt file from MongoDB."""
        lookup_dir = cfg.dirs.lookup
        property_namelist = ModelParamDataUtils.landuse_fields
        property_map = {}
        property_namelist.append('USLE_P')
        query_result = maindb['LANDUSELOOKUP'].find()
        if query_result is None:
            raise RuntimeError("LanduseLoop Collection is not existed or empty!")
        count = 0
        for row in query_result:
            # print(row)
            value_map = dict()
            for i, p_name in enumerate(property_namelist):
                if StringClass.string_match(p_name, "USLE_P"):
                    # Currently, USLE_P is set as 1 for all landuse.
                    value_map[p_name] = 1
                else:
                    # I do not know why manning * 10 here. Just uncommented now. lj
                    # if StringClass.string_match(p_name, "Manning"):
                    #     value_map[p_name] = row.get(p_name) * 10
                    # else:
                    value_map[p_name] = row.get(p_name)
            count += 1
            property_map[count] = value_map

        n = len(property_map)
        UtilClass.rmmkdir(lookup_dir)
        for propertyName in property_namelist:
            with open("%s/%s.txt" % (lookup_dir, propertyName,), 'w') as f:
                f.write("%d\n" % n)
                for prop_id in property_map:
                    s = "%d %f\n" % (prop_id, property_map[prop_id][propertyName])
                    f.write(s)
    def workflow(cfg, main_db, clim_db):
        """Workflow"""
        # 1. Find meteorology and precipitation sites in study area
        thiessen_file_list = [
            cfg.meteo_sites_thiessen, cfg.prec_sites_thiessen
        ]
        type_list = [DataType.m, DataType.p]

        # The entire basin, used for OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db,
                                           cfg.vecs.bsn, FieldNames.basin,
                                           thiessen_file_list,
                                           cfg.thiessen_field, type_list)
        # The subbasins, used for MPI&OpenMP version
        ImportHydroClimateSites.find_sites(main_db, cfg.climate_db,
                                           cfg.vecs.subbsn,
                                           FieldNames.subbasin_id,
                                           thiessen_file_list,
                                           cfg.thiessen_field, type_list)

        # 2. Import geographic information of each sites to Hydro-Climate database
        c_list = clim_db.collection_names()
        tables = [DBTableNames.sites, DBTableNames.var_desc]
        for tb in tables:
            if not StringClass.string_in_list(tb, c_list):
                clim_db.create_collection(tb)
        ImportHydroClimateSites.variable_table(clim_db, cfg.hydro_climate_vars)
        site_m_loc = ImportHydroClimateSites.sites_table(
            clim_db, cfg.Meteo_sites, DataType.m)
        site_p_loc = ImportHydroClimateSites.sites_table(
            clim_db, cfg.prec_sites, DataType.p)
        # print(site_m_loc, site_p_loc)
        return site_m_loc, site_p_loc
 def get_time_system_from_data_file(in_file):
     """Get the time system from the data file. The basic format is:
        #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
     """
     time_sys = 'LOCALTIME'
     time_zone = time.timezone // -3600
     with open(in_file, 'r') as f:
         lines = f.readlines()
     for line in lines:
         str_line = line.strip()
         # for LF in LFs:
         #     if LF in line:
         #         str_line = line.split(LF)[0]
         #         break
         if str_line[0] != '#':
             break
         if str_line.lower().find('utc') >= 0:
             time_sys = 'UTCTIME'
             time_zone = 0
             break
         if str_line.lower().find('local') >= 0:
             line_list = StringClass.split_string(str_line, [','])
             if len(line_list) == 2 and MathClass.isnumerical(line_list[1]):
                 time_zone = -1 * int(line_list[1])
             break
     return time_sys, time_zone
Example #30
0
    def initialize_landcover_parameters(landcover_file, landcover_initial_fields_file, dst_dir):
        """generate initial landcover_init_param parameters"""
        lc_data_items = read_data_items_from_txt(landcover_initial_fields_file)
        # print(lc_data_items)
        field_names = lc_data_items[0]
        lu_id = -1
        for i, v in enumerate(field_names):
            if StringClass.string_match(v, 'LANDUSE_ID'):
                lu_id = i
                break
        data_items = lc_data_items[1:]
        replace_dicts = dict()
        for item in data_items:
            for i, v in enumerate(item):
                if i != lu_id:
                    if field_names[i].upper() not in list(replace_dicts.keys()):
                        replace_dicts[field_names[i].upper()] = {float(item[lu_id]): float(v)}
                    else:
                        replace_dicts[field_names[i].upper()][float(item[lu_id])] = float(v)
        # print(replace_dicts)

        # Generate GTIFF
        for item, v in list(replace_dicts.items()):
            filename = dst_dir + os.path.sep + item + '.tif'
            print(filename)
            RasterUtilClass.raster_reclassify(landcover_file, v, filename)
        return list(replace_dicts['LANDCOVER'].values())
Example #31
0
 def read_optional_section(self, _opt):
     """read and check OPTIONAL inputs."""
     if _opt not in self.cf.sections():
         return
     self.mpi_dir = self.cf.get(_opt, 'mpiexedir')
     self.hostfile = self.cf.get(_opt, 'hostfile')
     self.outlet = self.cf.get(_opt, 'outlet')
     self.valley = self.cf.get(_opt, 'vlysrc')
     self.ridge = self.cf.get(_opt, 'rdgsrc')
     self.regional_attr = self.cf.get(_opt, 'regionalattr')
     if self.proc <= 0 or self.proc is None:
         if self.cf.has_option(_opt, 'inputproc'):
             self.proc = self.cf.getint(_opt, 'inputproc')
         else:
             self.proc = cpu_count() / 2
     # if mpi directory is not set
     if self.mpi_dir is None or StringClass.string_match(self.mpi_dir, 'none') \
         or not os.path.isdir(self.mpi_dir):
         mpipath = FileClass.get_executable_fullpath('mpiexec')
         self.mpi_dir = os.path.dirname(mpipath)
     if self.mpi_dir is None:
         raise RuntimeError('Can not find mpiexec!')
     self.hostfile = AutoFuzSlpPosConfig.check_file_available(self.hostfile)
     self.outlet = AutoFuzSlpPosConfig.check_file_available(self.outlet)
     self.valley = AutoFuzSlpPosConfig.check_file_available(self.valley)
     self.ridge = AutoFuzSlpPosConfig.check_file_available(self.ridge)
     self.regional_attr = AutoFuzSlpPosConfig.check_file_available(
         self.regional_attr)
     if self.topoparam is None:
         self.topoparam = TopoAttrNames(self.ws)
     if self.regional_attr is not None:
         self.topoparam.add_user_defined_attribute('rpi',
                                                   self.regional_attr, True)
Example #32
0
    def __init__(self, cf):
        """Get parameters from ConfigParser object."""
        self.N = 100
        self.num_levels = 10
        self.grid_jump = 2
        self.optimal_t = None
        self.local_opt = True
        section_name = 'Morris_Method'
        if section_name not in cf.sections():
            raise ValueError('[%s] section MUST be existed in *.ini file.' %
                             section_name)

        if cf.has_option(section_name, 'n'):
            self.N = cf.getint(section_name, 'n')
        if cf.has_option(section_name, 'num_levels'):
            self.num_levels = cf.getint(section_name, 'num_levels')
        if cf.has_option(section_name, 'grid_jump'):
            self.grid_jump = cf.getint(section_name, 'grid_jump')
        if cf.has_option(section_name, 'optimal_trajectories'):
            tmp_opt_t = cf.get(section_name, 'optimal_trajectories')
            if not StringClass.string_match(tmp_opt_t, 'none'):
                self.optimal_t = cf.getint(section_name,
                                           'optimal_trajectories')
                if self.optimal_t > self.N or self.optimal_t < 2:
                    self.optimal_t = None
        if cf.has_option(section_name, 'local_optimization'):
            self.local_opt = cf.getboolean(section_name, 'local_optimization')
Example #33
0
 def check_file_available(in_f):
     """Check the input file is existed or not, and return None, if not."""
     if StringClass.string_match(in_f, 'none') or in_f == '' or in_f is None:
         return None
     if not FileClass.is_file_exists(in_f):
         raise ValueError("The %s is not existed or have no access permission!" % in_f)
     else:
         return in_f
 def SimulationPeriod(self):
     if self._stime is not None and self._etime is not None:
         return self._stime, self._etime
     st = self.filein_tab.find_one(
         {ModelCfgFields.tag: ModelCfgFields.stime})[ModelCfgFields.value]
     et = self.filein_tab.find_one(
         {ModelCfgFields.tag: ModelCfgFields.etime})[ModelCfgFields.value]
     st = StringClass.get_datetime(st)
     et = StringClass.get_datetime(et)
     if self._stime is None or st > self._stime:
         self._stime = st
     if self._etime is None or et < self._etime:
         self._etime = et
     if st > self._etime > self._stime:
         self._stime = st
         self._etime = et
     return self._stime, self._etime
Example #35
0
    def __init__(self, cf, method='nsga2'):
        """Initialization."""
        # 1. SEIMS model related
        self.model = ParseSEIMSConfig(cf)

        # 2. Common settings of auto-calibration
        if 'CALI_Settings' not in cf.sections():
            raise ValueError(
                "[CALI_Settings] section MUST be existed in *.ini file.")
        self.param_range_def = 'cali_param_rng.def'
        if cf.has_option('CALI_Settings', 'paramrngdef'):
            self.param_range_def = cf.get('CALI_Settings', 'paramrngdef')
        self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def
        if not FileClass.is_file_exists(self.param_range_def):
            raise IOError('Ranges of parameters MUST be provided!')

        if not (cf.has_option('CALI_Settings', 'cali_time_start')
                and cf.has_option('CALI_Settings', 'cali_time_end')):
            raise ValueError("Start and end time of Calibration "
                             "MUST be specified in [CALI_Settings].")
        try:  # UTCTIME
            tstart = cf.get('CALI_Settings', 'cali_time_start')
            tend = cf.get('CALI_Settings', 'cali_time_end')
            self.cali_stime = StringClass.get_datetime(tstart)
            self.cali_etime = StringClass.get_datetime(tend)
            self.calc_validation = False
            if cf.has_option('CALI_Settings', 'vali_time_start') and \
                cf.has_option('CALI_Settings', 'vali_time_end'):
                tstart = cf.get('CALI_Settings', 'vali_time_start')
                tend = cf.get('CALI_Settings', 'vali_time_end')
                self.vali_stime = StringClass.get_datetime(tstart)
                self.vali_etime = StringClass.get_datetime(tend)
                self.calc_validation = True
        except ValueError:
            raise ValueError(
                'The time format MUST be "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS".'
            )
        if self.cali_stime >= self.cali_etime or (
                self.calc_validation and self.vali_stime >= self.vali_etime):
            raise ValueError("Wrong time setted in [CALI_Settings]!")

        # 3. Parameters settings for specific optimization algorithm
        self.opt_mtd = method
        self.opt = None
        if self.opt_mtd == 'nsga2':
            self.opt = ParseNSGA2Config(cf, self.model.model_dir)
Example #36
0
 def read_simulation_timerange(self):
     """Read simulation time range from MongoDB."""
     client = ConnectMongoDB(self.hostname, self.port)
     conn = client.get_conn()
     db = conn[self.main_db]
     collection = db['FILE_IN']
     try:
         stime_str = collection.find_one({'TAG': 'STARTTIME'}, no_cursor_timeout=True)['VALUE']
         etime_str = collection.find_one({'TAG': 'ENDTIME'}, no_cursor_timeout=True)['VALUE']
         stime = StringClass.get_datetime(stime_str)
         etime = StringClass.get_datetime(etime_str)
         dlt = etime - stime + timedelta(seconds=1)
         self.timerange = (dlt.days * 86400. + dlt.seconds) / 86400. / 365.
     except NetworkTimeout or Exception:
         # In case of unexpected raise
         self.timerange = 1.  # set default
         pass
     client.close()
Example #37
0
def read_data_items_from_txt(txt_file):
    """Read data items include title from text file, each data element are split by TAB or COMMA.
       Be aware, the separator for each line can only be TAB or COMMA, and COMMA is the recommended.
    Args:
        txt_file: full path of text data file
    Returns:
        2D data array
    """
    data_items = list()
    with open(txt_file, 'r') as f:
        for line in f:
            str_line = line.strip()
            if str_line != '' and str_line.find('#') < 0:
                line_list = StringClass.split_string(str_line, ['\t'])
                if len(line_list) <= 1:
                    line_list = StringClass.split_string(str_line, [','])
                data_items.append(line_list)
    return data_items
Example #38
0
def read_data_items_from_txt(txt_file):
    """Read data items include title from text file, each data element are split by TAB or COMMA.
       Be aware, the separator for each line can only be TAB or COMMA, and COMMA is the recommended.
    Args:
        txt_file: full path of text data file
    Returns:
        2D data array
    """
    data_items = list()
    with open(txt_file, 'r') as f:
        for line in f:
            str_line = line.strip()
            if str_line != '' and str_line.find('#') < 0:
                line_list = StringClass.split_string(str_line, ['\t'])
                if len(line_list) <= 1:
                    line_list = StringClass.split_string(str_line, [','])
                data_items.append(line_list)
    return data_items
Example #39
0
    def read_bmp_parameters(self):
        """Read BMP configuration from MongoDB.
        Each BMP is stored in Collection as one item identified by 'SUBSCENARIO' field,
        so the `self.bmps_params` is dict with BMP_ID ('SUBSCENARIO') as key.
        """
        # client = ConnectMongoDB(self.modelcfg.host, self.modelcfg.port)
        # conn = client.get_conn()
        conn = MongoDBObj.client
        scenariodb = conn[self.scenario_db]

        bmpcoll = scenariodb[self.cfg.bmps_coll]
        findbmps = bmpcoll.find({}, no_cursor_timeout=True)
        for fb in findbmps:
            fb = UtilClass.decode_strs_in_dict(fb)
            if 'SUBSCENARIO' not in fb:
                continue
            curid = fb['SUBSCENARIO']
            if curid not in self.cfg.bmps_subids:
                continue
            if curid not in self.bmps_params:
                self.bmps_params[curid] = dict()
            for k, v in fb.items():
                if k == 'SUBSCENARIO':
                    continue
                elif k == 'LANDUSE':
                    if isinstance(v, int):
                        v = [v]
                    elif v == 'ALL' or v == '':
                        v = None
                    else:
                        v = StringClass.extract_numeric_values_from_string(v)
                        v = [int(abs(nv)) for nv in v]
                    self.bmps_params[curid][k] = v[:]
                elif k == 'SLPPOS':
                    if isinstance(v, int):
                        v = [v]
                    elif v == 'ALL' or v == '':
                        v = list(self.cfg.slppos_tags.keys())
                    else:
                        v = StringClass.extract_numeric_values_from_string(v)
                        v = [int(abs(nv)) for nv in v]
                    self.bmps_params[curid][k] = v[:]
                else:
                    self.bmps_params[curid][k] = v
Example #40
0
    def Precipitation(self, subbsn_id, start_time, end_time):
        # type: (int, datetime, datetime) -> List[List[Union[datetime, float]]]
        """
        The precipitation is read according to the subbasin ID.
            Especially when plot a specific subbasin (such as ID 3).
            For the whole basin, the subbasin ID is 0.
        Returns:
            Precipitation data list with the first element as datetime.
            [[Datetime1, value1], [Datetime2, value2], ..., [Datetimen, valuen]]
        """
        pcp_date_value = list()
        sitelist_tab = self.maindb[DBTableNames.main_sitelist]
        findsites = sitelist_tab.find_one({
            FieldNames.subbasin_id: subbsn_id,
            FieldNames.mode: self.Mode
        })
        if findsites is not None:
            site_liststr = findsites[FieldNames.site_p]
        else:
            raise RuntimeError(
                'Cannot find precipitation site for subbasin %d.' % subbsn_id)
        site_list = StringClass.extract_numeric_values_from_string(
            site_liststr)
        site_list = [int(v) for v in site_list]
        if len(site_list) == 0:
            raise RuntimeError(
                'Cannot find precipitation site for subbasin %d.' % subbsn_id)

        pcp_dict = OrderedDict()

        for pdata in self.climatedb[DBTableNames.data_values].find({
                DataValueFields.utc: {
                    "$gte": start_time,
                    '$lte': end_time
                },
                DataValueFields.type:
                DataType.p,
                DataValueFields.id: {
                    "$in": site_list
                }
        }).sort([(DataValueFields.utc, 1)]):
            curt = pdata[DataValueFields.utc]
            curv = pdata[DataValueFields.value]
            if curt not in pcp_dict:
                pcp_dict[curt] = 0.
            pcp_dict[curt] += curv
        # average
        if len(site_list) > 1:
            for t in pcp_dict:
                pcp_dict[t] /= len(site_list)
        for t, v in pcp_dict.items():
            # print(str(t), v)
            pcp_date_value.append([t, v])
        print('Read precipitation from %s to %s done.' %
              (start_time.strftime('%c'), end_time.strftime('%c')))
        return pcp_date_value
Example #41
0
def read_field_arrays_from_csv(csvf):
    data_items = read_data_items_from_txt(csvf)
    if len(data_items) < 2:
        return
    flds = data_items[0]
    flds_array = dict()
    for idx, data_item in enumerate(data_items):
        if idx == 0:
            continue
        data_item_values = StringClass.extract_numeric_values_from_string(','.join(data_item))
        for fld_idx, fld_name in enumerate(flds):
            if fld_idx == 0 or StringClass.string_match(fld_name, 'FID'):
                continue
            if fld_name not in flds_array:
                flds_array[fld_name] = list()
            flds_array[fld_name].append(data_item_values[fld_idx])
    # for key, value in list(flds_array.items()):
    #     print('%s: %d' % (key, len(value)))
    return combine_multi_layers_array(flds_array)
Example #42
0
    def __init__(self, cf, method='nsga2'):
        """Initialization."""
        # 1. SEIMS model related
        self.model = ParseSEIMSConfig(cf)

        # 2. Common settings of auto-calibration
        if 'CALI_Settings' not in cf.sections():
            raise ValueError("[CALI_Settings] section MUST be existed in *.ini file.")
        self.param_range_def = 'cali_param_rng.def'
        if cf.has_option('CALI_Settings', 'paramrngdef'):
            self.param_range_def = cf.get('CALI_Settings', 'paramrngdef')
        self.param_range_def = self.model.model_dir + os.path.sep + self.param_range_def
        if not FileClass.is_file_exists(self.param_range_def):
            raise IOError('Ranges of parameters MUST be provided!')

        if not (cf.has_option('CALI_Settings', 'cali_time_start') and
                cf.has_option('CALI_Settings', 'cali_time_end')):
            raise ValueError("Start and end time of Calibration "
                             "MUST be specified in [CALI_Settings].")
        try:  # UTCTIME
            tstart = cf.get('CALI_Settings', 'cali_time_start')
            tend = cf.get('CALI_Settings', 'cali_time_end')
            self.cali_stime = StringClass.get_datetime(tstart)
            self.cali_etime = StringClass.get_datetime(tend)
            self.calc_validation = False
            if cf.has_option('CALI_Settings', 'vali_time_start') and \
                cf.has_option('CALI_Settings', 'vali_time_end'):
                tstart = cf.get('CALI_Settings', 'vali_time_start')
                tend = cf.get('CALI_Settings', 'vali_time_end')
                self.vali_stime = StringClass.get_datetime(tstart)
                self.vali_etime = StringClass.get_datetime(tend)
                self.calc_validation = True
        except ValueError:
            raise ValueError('The time format MUST be "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS".')
        if self.cali_stime >= self.cali_etime or (self.calc_validation and
                                                  self.vali_stime >= self.vali_etime):
            raise ValueError("Wrong time setted in [CALI_Settings]!")

        # 3. Parameters settings for specific optimization algorithm
        self.opt_mtd = method
        self.opt = None
        if self.opt_mtd == 'nsga2':
            self.opt = ParseNSGA2Config(cf, self.model.model_dir)
Example #43
0
    def read_bmp_parameters(self):
        """Read BMP configuration from MongoDB."""
        client = ConnectMongoDB(self.hostname, self.port)
        conn = client.get_conn()
        scenariodb = conn[self.bmp_scenario_db]

        bmpcoll = scenariodb[self.bmps_coll]
        findbmps = bmpcoll.find({}, no_cursor_timeout=True)
        for fb in findbmps:
            fb = UtilClass.decode_strs_in_dict(fb)
            if 'SUBSCENARIO' not in fb:
                continue
            curid = fb['SUBSCENARIO']
            if curid not in self.bmps_subids:
                continue
            if curid not in self.bmps_params:
                self.bmps_params[curid] = dict()
            for k, v in fb.items():
                if k == 'SUBSCENARIO':
                    continue
                elif k == 'LANDUSE':
                    if isinstance(v, int):
                        v = [v]
                    elif v == 'ALL' or v == '':
                        v = None
                    else:
                        v = StringClass.extract_numeric_values_from_string(v)
                        v = [int(abs(nv)) for nv in v]
                    self.bmps_params[curid][k] = v[:]
                elif k == 'SLPPOS':
                    if isinstance(v, int):
                        v = [v]
                    elif v == 'ALL' or v == '':
                        v = list(self.slppos_tags.keys())
                    else:
                        v = StringClass.extract_numeric_values_from_string(v)
                        v = [int(abs(nv)) for nv in v]
                    self.bmps_params[curid][k] = v[:]
                else:
                    self.bmps_params[curid][k] = v

        client.close()
Example #44
0
 def read_simulation_timerange(self):
     """Read simulation time range from MongoDB."""
     client = ConnectMongoDB(self.hostname, self.port)
     conn = client.get_conn()
     db = conn[self.main_db]
     collection = db['FILE_IN']
     try:
         stime_str = collection.find_one({'TAG': 'STARTTIME'},
                                         no_cursor_timeout=True)['VALUE']
         etime_str = collection.find_one({'TAG': 'ENDTIME'},
                                         no_cursor_timeout=True)['VALUE']
         stime = StringClass.get_datetime(stime_str)
         etime = StringClass.get_datetime(etime_str)
         dlt = etime - stime + timedelta(seconds=1)
         self.timerange = (dlt.days * 86400. + dlt.seconds) / 86400. / 365.
     except NetworkTimeout or Exception:
         # In case of unexpected raise
         self.timerange = 1.  # set default
         pass
     client.close()
Example #45
0
def read_field_arrays_from_csv(csvf):
    data_items = read_data_items_from_txt(csvf)
    if len(data_items) < 2:
        return
    flds = data_items[0]
    flds_array = dict()
    for idx, data_item in enumerate(data_items):
        if idx == 0:
            continue
        data_item_values = StringClass.extract_numeric_values_from_string(
            ','.join(data_item))
        for fld_idx, fld_name in enumerate(flds):
            if fld_idx == 0 or StringClass.string_match(fld_name, 'FID'):
                continue
            if fld_name not in flds_array:
                flds_array[fld_name] = list()
            flds_array[fld_name].append(data_item_values[fld_idx])
    # for key, value in list(flds_array.items()):
    #     print('%s: %d' % (key, len(value)))
    return combine_multi_layers_array(flds_array)
Example #46
0
def main():
    """TEST CODE"""
    lat_station = 31.45
    ssd_txt = r'C:\z_data\zhongTianShe\model_data_swat\climate\ssd_LY.txt'
    sr_txt = r'C:\z_data\zhongTianShe\model_data_swat\climate\sr_LY.txt'
    sr = list()
    f = open(ssd_txt, 'r')
    ssd_items = f.readlines()
    f.close()
    st_str = ssd_items[0].strip()
    st_time = StringClass.get_datetime(st_str)
    for i in range(1, len(ssd_items)):
        ssd_tmp = StringClass.extract_numeric_values_from_string(ssd_items[i])
        time_tmp = st_time + timedelta(days=i - 1)
        sr_tmp = ([round(rs(DateClass.day_of_year(time_tmp), v, lat_station * PI / 180.), 1)]
                  for v in ssd_tmp)
        sr.extend(sr_tmp)
    f = open(sr_txt, 'w')
    f.write(st_str + '\n')
    for sr_tmp in sr:
        f.write(','.join(str(v) for v in sr_tmp) + '\n')
    f.close()
Example #47
0
    def get_utcdatetime_from_field_values(flds, values, tsys, tzone=None):
        """Get datetime from field-value lists.

        Returns:
            utctime
        """
        cur_y = 0
        cur_m = 0
        cur_d = 0
        cur_hh = 0
        cur_mm = 0
        cur_ss = 0
        dt = None
        for i, fld in enumerate(flds):
            if StringClass.string_match(fld, DataValueFields.dt):
                dt = StringClass.get_datetime(values[i])
            elif StringClass.string_match(fld, DataValueFields.y):
                cur_y = int(values[i])
            elif StringClass.string_match(fld, DataValueFields.m):
                cur_m = int(values[i])
            elif StringClass.string_match(fld, DataValueFields.d):
                cur_d = int(values[i])
            elif StringClass.string_match(fld, DataValueFields.hour):
                cur_hh = int(values[i])
            elif StringClass.string_match(fld, DataValueFields.minute):
                cur_mm = int(values[i])
            elif StringClass.string_match(fld, DataValueFields.second):
                cur_ss = int(values[i])
        # Get datetime and utc/local transformation
        if dt is None:  # 'DATETIME' is not existed
            if cur_y < 1900 or cur_m <= 0 and cur_d <= 0:
                raise ValueError("Can not find TIME information from "
                                 "fields: %s" % ' '.join(fld for fld in flds))
            else:
                dt = datetime(cur_y, cur_m, cur_d, cur_hh, cur_mm, cur_ss)
        if not StringClass.string_match(tsys, 'UTCTIME'):
            if tzone is None:
                tzone = time.timezone // -3600  # positive value for EAST
            dt -= timedelta(minutes=tzone * 60)
        return dt
Example #48
0
 def dinfdistdown(np, ang, fel, slp, src, statsm, distm, edgecontamination, wg, dist,
                  workingdir=None, mpiexedir=None, exedir=None,
                  log_file=None, runtime_file=None, hostfile=None):
     """Run D-inf distance down to stream"""
     in_params = {'-m': '%s %s' % (TauDEM.convertstatsmethod(statsm),
                                   TauDEM.convertdistmethod(distm))}
     if StringClass.string_match(edgecontamination, 'false') or edgecontamination is False:
         in_params['-nc'] = None
     fname = TauDEM.func_name('dinfdistdown')
     return TauDEM.run(FileClass.get_executable_fullpath(fname, exedir),
                       {'-fel': fel, '-slp': slp, '-ang': ang, '-src': src, '-wg': wg},
                       workingdir,
                       in_params,
                       {'-dd': dist},
                       {'mpipath': mpiexedir, 'hostfile': hostfile, 'n': np},
                       {'logfile': log_file, 'runtimefile': runtime_file})
Example #49
0
 def ParseTimespan(self, items):
     """The format of self.timespan is different for OpenMP version and MPI&OpenMP version.
     For OpenMP version:
        {'IO': {'Input': 0.2,
                'Output': 0.04
               }
         'COMP': {'TSD_RD_P': 0.0001,  # All modules
                  'ALL': 12.3
                 }
         'SIMU': {'ALL': 14.1}
        }
     For MPI&OpenMP version:
        {'MAX': {'IO': {'Input': 0.1,
                        'Output': 0.02,
                        'ALL': 0.12
                       }
                 'COMP': {'Slope': 5,
                          'Channel': 0.5,
                          'Barrier': 0.1,
                          'ALL': 5.6
                         }
                 'SIMU': {'ALL': 10.1}
                }
         'MIN': {...}
         'AVG': {...}
        }
     """
     for item in items:
         if 'TIMESPAN' not in item:
             continue
         item = item.split('\n')[0]
         values = StringClass.extract_numeric_values_from_string(item)
         if values is None or len(values) != 1:
             continue
         time = values[0]
         titles = item.replace('[', '').split(']')[:-1]  # e.g., 'TIMESPAN', 'COMP', 'ALL'
         if len(titles) < 3:  # e.g., 'TIMESPAN', 'MAX', 'COMP', 'ALL'
             continue
         titles = [title.strip() for title in titles]
         self.timespan.setdefault(titles[1], dict())
         if len(titles) > 3:
             self.timespan[titles[1]].setdefault(titles[2], dict())
             self.timespan[titles[1]][titles[2]].setdefault(titles[3], time)
         else:
             self.timespan[titles[1]].setdefault(titles[2], time)
Example #50
0
    def sites_table(hydro_clim_db, site_file, site_type):
        """Import HydroClimate sites table"""
        sites_loc = dict()
        site_data_items = read_data_items_from_txt(site_file)
        site_flds = site_data_items[0]
        for i in range(1, len(site_data_items)):
            dic = dict()
            for j in range(len(site_data_items[i])):
                if StringClass.string_match(site_flds[j], StationFields.id):
                    dic[StationFields.id] = int(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.name):
                    dic[StationFields.name] = site_data_items[i][j]
                elif StringClass.string_match(site_flds[j], StationFields.x):
                    dic[StationFields.x] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.y):
                    dic[StationFields.y] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.lon):
                    dic[StationFields.lon] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.lat):
                    dic[StationFields.lat] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.elev):
                    dic[StationFields.elev] = float(site_data_items[i][j])
                elif StringClass.string_match(site_flds[j], StationFields.outlet):
                    dic[StationFields.outlet] = float(site_data_items[i][j])
            dic[StationFields.type] = site_type
            curfilter = {StationFields.id: dic[StationFields.id],
                         StationFields.type: dic[StationFields.type]}
            hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, dic, upsert=True)

            if dic[StationFields.id] not in list(sites_loc.keys()):
                sites_loc[dic[StationFields.id]] = SiteInfo(dic[StationFields.id],
                                                            dic[StationFields.name],
                                                            dic[StationFields.lat],
                                                            dic[StationFields.lon],
                                                            dic[StationFields.x],
                                                            dic[StationFields.y],
                                                            dic[StationFields.elev])
        hydro_clim_db[DBTableNames.sites].create_index([(StationFields.id, ASCENDING),
                                                        (StationFields.type, ASCENDING)])
        return sites_loc
Example #51
0
    def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None,
            log_params=None):
        """
        Run TauDEM function.

         - 1. The command will not execute if any input file does not exist.
         - 2. An error will be detected after running the TauDEM command if
              any output file does not exist;

        Args:
            function_name (str): Full path of TauDEM function.
            in_files (dict, required): Dict of pairs of parameter id (string) and file path
                (string or list) for input files, e.g.::

                    {'-z': '/full/path/to/dem.tif'}

            wp (str, optional): Workspace for outputs. If not specified, the directory of the
                first input file in ``in_files`` will be used.
            in_params (dict, optional): Dict of pairs of parameter id (string) and value
                (or None for a flag parameter without a value) for input parameters, e.g.::

                    {'-nc': None}
                    {'-thresh': threshold}
                    {'-m': 'ave' 's', '-nc': None}

            out_files (dict, optional): Dict of pairs of parameter id (string) and file
                path (string or list) for output files, e.g.::

                    {'-fel': 'filleddem.tif'}
                    {'-maxS': ['harden.tif', 'maxsimi.tif']}

            mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or
                path for MPI setting, e.g.::

                    {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4}
                    {'mpipath':'/soft/bin', 'n':4}
                    {'n':4}

            log_params (dict, optional): Dict of pairs of parameter id (string) and value or
                path for runtime and log output parameters. e.g.::

                    {'logfile': '/home/user/log.txt',
                     'runtimefile': '/home/user/runtime.txt'}

        Returns:
            True if TauDEM run successfully, otherwise False.
        """
        # Check input files
        if in_files is None:
            TauDEM.error('Input files parameter is required!')
        if not isinstance(in_files, dict):
            TauDEM.error('The input files parameter must be a dict!')
        for (pid, infile) in iteritems(in_files):
            if infile is None:
                continue
            if isinstance(infile, list) or isinstance(infile, tuple):
                for idx, inf in enumerate(infile):
                    if inf is None:
                        continue
                    inf, wp = TauDEM.check_infile_and_wp(inf, wp)
                    in_files[pid][idx] = inf
                continue
            if os.path.exists(infile):
                infile, wp = TauDEM.check_infile_and_wp(infile, wp)
                in_files[pid] = os.path.abspath(infile)
            else:
                # For more flexible input files extension.
                # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ...
                # in such unpredictable circumstance, we cannot check the existance of
                # input files, so the developer will check it in other place.
                if len(StringClass.split_string(infile, ' ')) > 1:
                    continue
                else:  # the infile still should be a existing file, so check in workspace
                    if wp is None:
                        TauDEM.error('Workspace should not be None!')
                    infile = wp + os.sep + infile
                    if not os.path.exists(infile):
                        TauDEM.error('Input files parameter %s: %s is not existed!' %
                                     (pid, infile))
                    in_files[pid] = os.path.abspath(infile)
        # Make workspace dir if not existed
        UtilClass.mkdir(wp)
        # Check the log parameter
        log_file = None
        runtime_file = None
        if log_params is not None:
            if not isinstance(log_params, dict):
                TauDEM.error('The log parameter must be a dict!')
            if 'logfile' in log_params and log_params['logfile'] is not None:
                log_file = log_params['logfile']
                # If log_file is just a file name, then save it in the default workspace.
                if os.sep not in log_file:
                    log_file = wp + os.sep + log_file
                    log_file = os.path.abspath(log_file)
            if 'runtimefile' in log_params and log_params['runtimefile'] is not None:
                runtime_file = log_params['runtimefile']
                # If log_file is just a file name, then save it in the default workspace.
                if os.sep not in runtime_file:
                    runtime_file = wp + os.sep + runtime_file
                    runtime_file = os.path.abspath(runtime_file)

        # remove out_files to avoid any file IO related error
        new_out_files = list()
        if out_files is not None:
            if not isinstance(out_files, dict):
                TauDEM.error('The output files parameter must be a dict!')
            for (pid, out_file) in iteritems(out_files):
                if out_file is None:
                    continue
                if isinstance(out_file, list) or isinstance(out_file, tuple):
                    for idx, outf in enumerate(out_file):
                        if outf is None:
                            continue
                        outf = FileClass.get_file_fullpath(outf, wp)
                        FileClass.remove_files(outf)
                        out_files[pid][idx] = outf
                        new_out_files.append(outf)
                else:
                    out_file = FileClass.get_file_fullpath(out_file, wp)
                    FileClass.remove_files(out_file)
                    out_files[pid] = out_file
                    new_out_files.append(out_file)

        # concatenate command line
        commands = list()
        # MPI header
        if mpi_params is not None:
            if not isinstance(mpi_params, dict):
                TauDEM.error('The MPI settings parameter must be a dict!')
            if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None:
                commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec')
            else:
                commands.append('mpiexec')
            if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \
                    and not StringClass.string_match(mpi_params['hostfile'], 'none') \
                    and os.path.isfile(mpi_params['hostfile']):
                commands.append('-f')
                commands.append(mpi_params['hostfile'])
            if 'n' in mpi_params and mpi_params['n'] > 1:
                commands.append('-n')
                commands.append(str(mpi_params['n']))
            else:  # If number of processor is less equal than 1, then do not call mpiexec.
                commands = []
        # append TauDEM function name, which can be full path or just one name
        commands.append(function_name)
        # append input files
        for (pid, infile) in iteritems(in_files):
            if infile is None:
                continue
            if pid[0] != '-':
                pid = '-' + pid
            commands.append(pid)
            if isinstance(infile, list) or isinstance(infile, tuple):
                commands.append(' '.join(tmpf for tmpf in infile))
            else:
                commands.append(infile)
        # append input parameters
        if in_params is not None:
            if not isinstance(in_params, dict):
                TauDEM.error('The input parameters must be a dict!')
            for (pid, v) in iteritems(in_params):
                if pid[0] != '-':
                    pid = '-' + pid
                commands.append(pid)
                # allow for parameter which is an flag without value
                if v != '' and v is not None:
                    if MathClass.isnumerical(v):
                        commands.append(str(v))
                    else:
                        commands.append(v)
        # append output parameters
        if out_files is not None:
            for (pid, outfile) in iteritems(out_files):
                if outfile is None:
                    continue
                if pid[0] != '-':
                    pid = '-' + pid
                commands.append(pid)
                if isinstance(outfile, list) or isinstance(outfile, tuple):
                    commands.append(' '.join(tmpf for tmpf in outfile))
                else:
                    commands.append(outfile)
        # run command
        runmsg = UtilClass.run_command(commands)
        TauDEM.log(runmsg, log_file)
        TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file)
        # Check out_files, raise RuntimeError if not exist.
        for of in new_out_files:
            if not os.path.exists(of):
                TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of))
                return False
        return True
Example #52
0
    def __init__(self, cf):
        """Initialization."""
        # 1. Directories
        self.model_dir = None
        self.scenario_id = -1
        if 'PATH' in cf.sections():
            self.model_dir = cf.get('PATH', 'model_dir')
            self.scenario_id = cf.getint('PATH', 'scenarioid')
            if self.scenario_id < 0:
                self.model_dir = self.model_dir + os.path.sep + 'OUTPUT'
            else:
                self.model_dir = self.model_dir + os.path.sep + 'OUTPUT' + str(self.scenario_id)
        else:
            raise ValueError("[PATH] section MUST be existed in *.ini file.")
        if not FileClass.is_dir_exists(self.model_dir):
            raise ValueError("Please Check Directories defined in [PATH]")

        # 2. MongoDB configuration and database, collation, GridFS names
        self.hostname = '127.0.0.1'  # localhost by default
        self.port = 27017
        self.climate_db = ''
        self.bmp_scenario_db = ''
        self.spatial_db = ''
        if 'MONGODB' in cf.sections():
            self.hostname = cf.get('MONGODB', 'hostname')
            self.port = cf.getint('MONGODB', 'port')
            self.climate_db = cf.get('MONGODB', 'climatedbname')
            self.bmp_scenario_db = cf.get('MONGODB', 'bmpscenariodbname')
            self.spatial_db = cf.get('MONGODB', 'spatialdbname')
        else:
            raise ValueError('[MONGODB] section MUST be existed in *.ini file.')
        if not StringClass.is_valid_ip_addr(self.hostname):
            raise ValueError('HOSTNAME illegal defined in [MONGODB]!')

        # 3. Parameters
        self.plt_subbsnid = -1
        self.plt_vars = list()
        if 'PARAMETERS' in cf.sections():
            self.plt_subbsnid = cf.getint('PARAMETERS', 'plot_subbasinid')
            plt_vars_str = cf.get('PARAMETERS', 'plot_variables')
        else:
            raise ValueError("[PARAMETERS] section MUST be existed in *.ini file.")
        if self.plt_subbsnid < 0:
            raise ValueError("PLOT_SUBBASINID must be greater or equal than 0.")
        if plt_vars_str != '':
            self.plt_vars = StringClass.split_string(plt_vars_str)
        else:
            raise ValueError("PLOT_VARIABLES illegal defined in [PARAMETERS]!")

        # 4. Optional_Parameters
        if 'OPTIONAL_PARAMETERS' in cf.sections():
            tstart = cf.get('OPTIONAL_PARAMETERS', 'time_start')
            tend = cf.get('OPTIONAL_PARAMETERS', 'time_end')
        else:
            raise ValueError("[OPTIONAL_PARAMETERS] section MUST be existed in *.ini file.")
        try:
            # UTCTIME
            self.time_start = StringClass.get_datetime(tstart)
            self.time_end = StringClass.get_datetime(tend)
            if cf.has_option('OPTIONAL_PARAMETERS', 'vali_time_start') and \
                    cf.has_option('OPTIONAL_PARAMETERS', 'vali_time_end'):
                tstart = cf.get('OPTIONAL_PARAMETERS', 'vali_time_start')
                tend = cf.get('OPTIONAL_PARAMETERS', 'vali_time_end')
                self.vali_stime = StringClass.get_datetime(tstart)
                self.vali_etime = StringClass.get_datetime(tend)
            else:
                self.vali_stime = None
                self.vali_etime = None
        except ValueError:
            raise ValueError('The time format MUST be "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS".')
        if self.time_start >= self.time_end:
            raise ValueError("Wrong time setted in [OPTIONAL_PARAMETERS]!")
        # 5. Switches
        self.lang_cn = False
        if 'SWITCH' in cf.sections():
            self.lang_cn = cf.getboolean('SWITCH', 'lang_cn')
Example #53
0
    def data_from_txt(maindb, hydro_clim_db, obs_txts_list, sites_info_txts_list, subbsn_file):
        """
        Read observed data from txt file
        Args:
            maindb: Main spatial database
            hydro_clim_db: hydro-climate dababase
            obs_txts_list: txt file paths of observed data
            sites_info_txts_list: txt file paths of site information
            subbsn_file: subbasin raster file

        Returns:
            True or False
        """
        # 1. Read monitor station information, and store variables information and station IDs
        variable_lists = []
        site_ids = []
        for site_file in sites_info_txts_list:
            site_data_items = read_data_items_from_txt(site_file)
            site_flds = site_data_items[0]
            for i in range(1, len(site_data_items)):
                dic = dict()
                for j, v in enumerate(site_data_items[i]):
                    if StringClass.string_match(site_flds[j], StationFields.id):
                        dic[StationFields.id] = int(v)
                        site_ids.append(dic[StationFields.id])
                    elif StringClass.string_match(site_flds[j], StationFields.name):
                        dic[StationFields.name] = v.strip()
                    elif StringClass.string_match(site_flds[j], StationFields.type):
                        types = StringClass.split_string(v.strip(), '-')
                    elif StringClass.string_match(site_flds[j], StationFields.lat):
                        dic[StationFields.lat] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.lon):
                        dic[StationFields.lon] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.x):
                        dic[StationFields.x] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.y):
                        dic[StationFields.y] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.unit):
                        dic[StationFields.unit] = v.strip()
                    elif StringClass.string_match(site_flds[j], StationFields.elev):
                        dic[StationFields.elev] = float(v)
                    elif StringClass.string_match(site_flds[j], StationFields.outlet):
                        dic[StationFields.outlet] = float(v)

                for j, cur_type in enumerate(types):
                    site_dic = dict()
                    site_dic[StationFields.id] = dic[StationFields.id]
                    site_dic[StationFields.name] = dic[StationFields.name]
                    site_dic[StationFields.type] = cur_type
                    site_dic[StationFields.lat] = dic[StationFields.lat]
                    site_dic[StationFields.lon] = dic[StationFields.lon]
                    site_dic[StationFields.x] = dic[StationFields.x]
                    site_dic[StationFields.y] = dic[StationFields.y]
                    site_dic[StationFields.elev] = dic[StationFields.elev]
                    site_dic[StationFields.outlet] = dic[StationFields.outlet]
                    # Add SubbasinID field
                    matched, cur_sids = ImportObservedData.match_subbasin(subbsn_file, site_dic,
                                                                          maindb)
                    if not matched:
                        break
                    cur_subbsn_id_str = ''
                    if len(cur_sids) == 1:  # if only one subbasin ID, store integer
                        cur_subbsn_id_str = cur_sids[0]
                    else:
                        cur_subbsn_id_str = ','.join(str(cid) for cid in cur_sids if cur_sids is None)
                    site_dic[StationFields.subbsn] = cur_subbsn_id_str
                    curfilter = {StationFields.id: site_dic[StationFields.id],
                                 StationFields.type: site_dic[StationFields.type]}
                    # print(curfilter)
                    hydro_clim_db[DBTableNames.sites].find_one_and_replace(curfilter, site_dic,
                                                                           upsert=True)

                    var_dic = dict()
                    var_dic[StationFields.type] = types[j]
                    var_dic[StationFields.unit] = dic[StationFields.unit]
                    if var_dic not in variable_lists:
                        variable_lists.append(var_dic)
        site_ids = list(set(site_ids))
        # 2. Read measurement data and import to MongoDB
        bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op()
        count = 0
        for measDataFile in obs_txts_list:
            # print(measDataFile)
            obs_data_items = read_data_items_from_txt(measDataFile)
            tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(measDataFile)
            if tsysin == 'UTCTIME':
                tzonein = time.timezone / -3600
            # If the data items is EMPTY or only have one header row, then goto
            # next data file.
            if obs_data_items == [] or len(obs_data_items) == 1:
                continue
            obs_flds = obs_data_items[0]
            required_flds = [StationFields.id, DataValueFields.type, DataValueFields.value]

            for fld in required_flds:
                if not StringClass.string_in_list(fld, obs_flds):  # data can not meet the request!
                    raise ValueError('The %s can not meet the required format!' % measDataFile)
            for i, cur_obs_data_item in enumerate(obs_data_items):
                dic = dict()
                if i == 0:
                    continue
                for j, cur_data_value in enumerate(cur_obs_data_item):
                    if StringClass.string_match(obs_flds[j], StationFields.id):
                        dic[StationFields.id] = int(cur_data_value)
                        # if current site ID is not included, goto next data item
                        if dic[StationFields.id] not in site_ids:
                            continue
                    elif StringClass.string_match(obs_flds[j], DataValueFields.type):
                        dic[DataValueFields.type] = cur_data_value
                    elif StringClass.string_match(obs_flds[j], DataValueFields.value):
                        dic[DataValueFields.value] = float(cur_data_value)
                utc_t = HydroClimateUtilClass.get_utcdatetime_from_field_values(obs_flds,
                                                                                cur_obs_data_item,
                                                                                tsysin, tzonein)
                dic[DataValueFields.local_time] = utc_t + timedelta(minutes=tzonein * 60)
                dic[DataValueFields.time_zone] = tzonein
                dic[DataValueFields.utc] = utc_t
                # curfilter = {StationFields.id: dic[StationFields.id],
                #              DataValueFields.type: dic[DataValueFields.type],
                #              DataValueFields.utc: dic[DataValueFields.utc]}
                # bulk.find(curfilter).replace_one(dic)
                bulk.insert(dic)
                count += 1
                if count % 500 == 0:
                    MongoUtil.run_bulk(bulk)
                    bulk = hydro_clim_db[DBTableNames.observes].initialize_ordered_bulk_op()
                    # db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        # 3. Add measurement data with unit converted
        # loop variables list
        added_dics = []
        for curVar in variable_lists:
            # print(curVar)
            # if the unit is mg/L, then change the Type name with the suffix 'Conc',
            # and convert the corresponding data to kg if the discharge data is
            # available.
            cur_type = curVar[StationFields.type]
            cur_unit = curVar[StationFields.unit]
            # Find data by Type
            for item in hydro_clim_db[DBTableNames.observes].find({StationFields.type: cur_type}):
                # print(item)
                dic = dict()
                dic[StationFields.id] = item[StationFields.id]
                dic[DataValueFields.value] = item[DataValueFields.value]
                dic[StationFields.type] = item[StationFields.type]
                dic[DataValueFields.local_time] = item[DataValueFields.local_time]
                dic[DataValueFields.time_zone] = item[DataValueFields.time_zone]
                dic[DataValueFields.utc] = item[DataValueFields.utc]

                if cur_unit == 'mg/L' or cur_unit == 'g/L':
                    # update the Type name
                    dic[StationFields.type] = cur_type + 'Conc'
                    curfilter = {StationFields.id: dic[StationFields.id],
                                 DataValueFields.type: cur_type,
                                 DataValueFields.utc: dic[DataValueFields.utc]}
                    hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic,
                                                                              upsert=True)
                    dic[StationFields.type] = cur_type

                # find discharge on current day
                cur_filter = {StationFields.type: 'Q',
                              DataValueFields.utc: dic[DataValueFields.utc],
                              StationFields.id: dic[StationFields.id]}
                q_dic = hydro_clim_db[DBTableNames.observes].find_one(filter=cur_filter)

                q = -9999.
                if q_dic is not None:
                    q = q_dic[DataValueFields.value]
                else:
                    continue
                if cur_unit == 'mg/L':
                    # convert mg/L to kg
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] * q * 86400. / 1000., 2)
                elif cur_unit == 'g/L':
                    # convert g/L to kg
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] * q * 86400., 2)
                elif cur_unit == 'kg':
                    dic[StationFields.type] = cur_type + 'Conc'
                    # convert kg to mg/L
                    dic[DataValueFields.value] = round(
                            dic[DataValueFields.value] / q * 1000. / 86400., 2)
                # add new data item
                added_dics.append(dic)
        # import to MongoDB
        for dic in added_dics:
            curfilter = {StationFields.id: dic[StationFields.id],
                         DataValueFields.type: dic[DataValueFields.type],
                         DataValueFields.utc: dic[DataValueFields.utc]}
            hydro_clim_db[DBTableNames.observes].find_one_and_replace(curfilter, dic, upsert=True)
Example #54
0
    def lookup_soil_parameters(dstdir, soiltype_file, soil_lookup_file, landuse_shapefile):
        """Reclassify soil parameters by lookup table."""
        #  Read soil properties from txt file
        soil_lookup_data = read_data_items_from_txt(soil_lookup_file)
        soil_instances = list()
        soil_prop_flds = soil_lookup_data[0][:]
        for i in range(1, len(soil_lookup_data)):
            cur_soil_data_item = soil_lookup_data[i][:]
            cur_seqn = cur_soil_data_item[0]
            cur_sname = cur_soil_data_item[1]
            cur_soil_ins = SoilProperty(cur_seqn, cur_sname)
            for j in range(2, len(soil_prop_flds)):
                cur_flds = StringClass.split_string(cur_soil_data_item[j], '-')  # Get field values
                for k, tmpfld in enumerate(cur_flds):
                    cur_flds[k] = float(tmpfld)  # Convert to float
                if StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NLYRS):
                    cur_soil_ins.SOILLAYERS = int(cur_flds[0])
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._Z):
                    cur_soil_ins.SOILDEPTH = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._OM):
                    cur_soil_ins.OM = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CLAY):
                    cur_soil_ins.CLAY = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SILT):
                    cur_soil_ins.SILT = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SAND):
                    cur_soil_ins.SAND = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ROCK):
                    cur_soil_ins.ROCK = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ZMX):
                    cur_soil_ins.SOL_ZMX = cur_flds[0]
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ANIONEXCL):
                    cur_soil_ins.ANION_EXCL = cur_flds[0]
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._CRK):
                    cur_soil_ins.SOL_CRK = cur_flds[0]
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._BD):
                    cur_soil_ins.DENSITY = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._K):
                    cur_soil_ins.CONDUCTIVITY = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._WP):
                    cur_soil_ins.WILTINGPOINT = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._FC):
                    cur_soil_ins.FIELDCAP = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._AWC):
                    cur_soil_ins.AWC = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._POROSITY):
                    cur_soil_ins.POROSITY = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._USLE_K):
                    cur_soil_ins.USLE_K = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ALB):
                    cur_soil_ins.SOL_ALB = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ESCO):
                    cur_soil_ins.ESCO = cur_flds[0]
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NO3):
                    cur_soil_ins.SOL_NO3 = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._NH4):
                    cur_soil_ins.SOL_NH4 = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGN):
                    cur_soil_ins.SOL_ORGN = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._SOLP):
                    cur_soil_ins.SOL_SOLP = cur_flds
                elif StringClass.string_match(soil_prop_flds[j], SoilUtilClass._ORGP):
                    cur_soil_ins.SOL_ORGP = cur_flds
            cur_soil_ins.check_data_validation()
            soil_instances.append(cur_soil_ins)
        soil_prop_dict = {}
        for sol in soil_instances:
            cur_sol_dict = sol.soil_dict()
            for fld in cur_sol_dict:
                if fld in soil_prop_dict:
                    soil_prop_dict[fld].append(cur_sol_dict[fld])
                else:
                    soil_prop_dict[fld] = [cur_sol_dict[fld]]
        # print(list(soilPropDict.keys()))
        # print(list(soilPropDict.values()))

        replace_dicts = list()
        dst_soil_tifs = list()
        sol_fld_name = list()
        seqns = soil_prop_dict[SoilUtilClass._SEQN]
        max_lyr_num = int(numpy.max(soil_prop_dict[SoilUtilClass._NLYRS]))
        for key in soil_prop_dict:
            if key != SoilUtilClass._SEQN and key != SoilUtilClass._NAME:
                key_l = 1
                for key_v in soil_prop_dict[key]:
                    if isinstance(key_v, list):
                        if len(key_v) > key_l:
                            key_l = len(key_v)
                if key_l == 1:
                    cur_dict = {}
                    for i, tmpseq in enumerate(seqns):
                        cur_dict[float(tmpseq)] = soil_prop_dict[key][i]
                    replace_dicts.append(cur_dict)
                    dst_soil_tifs.append(dstdir + os.path.sep + key + '.tif')
                    sol_fld_name.append(key)
                else:
                    for i in range(max_lyr_num):
                        cur_dict = dict()
                        for j, tmpseq in enumerate(seqns):
                            if i < soil_prop_dict[SoilUtilClass._NLYRS][j]:
                                cur_dict[float(tmpseq)] = soil_prop_dict[key][j][i]
                            else:
                                cur_dict[float(seqns[j])] = DEFAULT_NODATA
                        replace_dicts.append(cur_dict)
                        dst_soil_tifs.append(dstdir + os.path.sep + key + '_' + str(i + 1) + '.tif')
                        sol_fld_name.append(key + '_' + str(i + 1))
        # print(replaceDicts)
        # print(len(replaceDicts))
        # print(dstSoilTifs)
        # print(len(dstSoilTifs))

        # Generate GTIFF
        soil_shp = r'D:\SEIMS\data\zts\data_prepare\spatial\soil_SEQN_all.shp'
        # landuse_basin = r'D:\SEIMS\data\zts\data_prepare\spatial\LanduseFinal_basin.shp'
        # for i, soil_tif in enumerate(sol_fld_name):
        #     print(soil_tif)
        #     SoilProperty.count_by_shp(soil_shp, landuse_shapefile, soil_tif, replace_dicts[i])
        RasterUtilClass.count_by_shp(soil_shp, landuse_shapefile, sol_fld_name, replace_dicts)
    def model_io_configuration(cfg, maindb):
        """
        Import Input and Output Configuration of SEIMS, i.e., file.in and file.out
        Args:
            cfg: SEIMS config object
            maindb: MongoDB database object
        """
        file_in_path = cfg.modelcfgs.filein
        file_out_path = cfg.paramcfgs.init_outputs_file
        # initialize if collection not existed
        c_list = maindb.collection_names()
        conf_tabs = [DBTableNames.main_filein, DBTableNames.main_fileout]
        for item in conf_tabs:
            if not StringClass.string_in_list(item, c_list):
                maindb.create_collection(item)
            else:
                maindb.drop_collection(item)
        file_in_items = read_data_items_from_txt(file_in_path)
        file_out_items = read_data_items_from_txt(file_out_path)

        for item in file_in_items:
            file_in_dict = dict()
            values = StringClass.split_string(item[0].strip(), ['|'])
            if len(values) != 2:
                raise ValueError('One item should only have one Tag and one value string,'
                                 ' split by "|"')
            file_in_dict[ModelCfgFields.tag] = values[0]
            file_in_dict[ModelCfgFields.value] = values[1]
            maindb[DBTableNames.main_filein].insert(file_in_dict)

        # begin to import initial outputs settings
        bulk = maindb[DBTableNames.main_fileout].initialize_unordered_bulk_op()
        out_field_array = file_out_items[0]
        out_data_array = file_out_items[1:]
        # print(out_data_array)
        for item in out_data_array:
            file_out_dict = dict()
            for i, v in enumerate(out_field_array):
                if StringClass.string_match(ModelCfgFields.mod_cls, v):
                    file_out_dict[ModelCfgFields.mod_cls] = item[i]
                elif StringClass.string_match(ModelCfgFields.output_id, v):
                    file_out_dict[ModelCfgFields.output_id] = item[i]
                elif StringClass.string_match(ModelCfgFields.desc, v):
                    file_out_dict[ModelCfgFields.desc] = item[i]
                elif StringClass.string_match(ModelCfgFields.unit, v):
                    file_out_dict[ModelCfgFields.unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.type, v):
                    file_out_dict[ModelCfgFields.type] = item[i]
                elif StringClass.string_match(ModelCfgFields.stime, v):
                    file_out_dict[ModelCfgFields.stime] = item[i]
                elif StringClass.string_match(ModelCfgFields.etime, v):
                    file_out_dict[ModelCfgFields.etime] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval, v):
                    file_out_dict[ModelCfgFields.interval] = item[i]
                elif StringClass.string_match(ModelCfgFields.interval_unit, v):
                    file_out_dict[ModelCfgFields.interval_unit] = item[i]
                elif StringClass.string_match(ModelCfgFields.filename, v):
                    file_out_dict[ModelCfgFields.filename] = item[i]
                elif StringClass.string_match(ModelCfgFields.use, v):
                    file_out_dict[ModelCfgFields.use] = item[i]
                elif StringClass.string_match(ModelCfgFields.subbsn, v):
                    file_out_dict[ModelCfgFields.subbsn] = item[i]
            if not list(file_out_dict.keys()):
                raise ValueError('There are not any valid output item stored in file.out!')
            bulk.insert(file_out_dict)
        MongoUtil.run_bulk(bulk, 'No operations to excute when import initial outputs settings.')

        # begin to import the desired outputs
        # initialize bulk operator
        bulk = maindb[DBTableNames.main_fileout].initialize_ordered_bulk_op()
        # read initial parameters from txt file
        data_items = read_data_items_from_txt(cfg.modelcfgs.fileout)
        # print(field_names)
        for i, cur_data_item in enumerate(data_items):
            data_import = dict()
            cur_filter = dict()
            # print(cur_data_item)
            if len(cur_data_item) == 7:
                data_import[ModelCfgFields.output_id] = cur_data_item[0]
                data_import[ModelCfgFields.type] = cur_data_item[1]
                data_import[ModelCfgFields.stime] = cur_data_item[2]
                data_import[ModelCfgFields.etime] = cur_data_item[3]
                data_import[ModelCfgFields.interval] = cur_data_item[4]
                data_import[ModelCfgFields.interval_unit] = cur_data_item[5]
                data_import[ModelCfgFields.subbsn] = cur_data_item[6]
                data_import[ModelCfgFields.use] = 1
                cur_filter[ModelCfgFields.output_id] = cur_data_item[0]
            else:
                raise RuntimeError('Items in file.out must have 7 columns, i.e., OUTPUTID,'
                                   'TYPE,STARTTIME,ENDTIME,INTERVAL,INTERVAL_UNIT,SUBBASIN.')

            bulk.find(cur_filter).update({'$set': data_import})
        # execute import operators
        MongoUtil.run_bulk(bulk, 'No operations to excute when import the desired outputs.')
Example #56
0
    def daily_data_from_txt(climdb, data_txt_file, sites_info_dict):
        """Import climate data table"""
        tsysin, tzonein = HydroClimateUtilClass.get_time_system_from_data_file(data_txt_file)
        if tsysin == 'UTCTIME':
            tzonein = time.timezone / -3600
        clim_data_items = read_data_items_from_txt(data_txt_file)
        clim_flds = clim_data_items[0]
        # PHUCalDic is used for Calculating potential heat units (PHU)
        # for each climate station and each year.
        # format is {StationID:{Year1:[values],Year2:[Values]...}, ...}
        # PHUCalDic = {}
        # format: {StationID1: climateStats1, ...}
        hydro_climate_stats = dict()
        required_flds = [DataType.max_tmp, DataType.min_tmp, DataType.rm, DataType.ws]
        output_flds = [DataType.mean_tmp, DataType.max_tmp, DataType.min_tmp,
                       DataType.rm, DataType.pet, DataType.ws, DataType.sr]
        # remove existed records
        for fld in output_flds:
            climdb[DBTableNames.data_values].remove({'TYPE': fld})
        for fld in required_flds:
            if not StringClass.string_in_list(fld, clim_flds):
                raise ValueError('Meteorological Daily data MUST contain %s!' % fld)
        # Create bulk object
        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()
        count = 0
        for i, cur_clim_data_item in enumerate(clim_data_items):
            if i == 0:
                continue
            dic = dict()
            cur_ssd = DEFAULT_NODATA

            for j, clim_data_v in enumerate(cur_clim_data_item):
                if StringClass.string_match(clim_flds[j], DataValueFields.id):
                    dic[DataValueFields.id] = int(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.mean_tmp):
                    dic[DataType.mean_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.min_tmp):
                    dic[DataType.min_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.max_tmp):
                    dic[DataType.max_tmp] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.pet):
                    dic[DataType.pet] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.sr):
                    dic[DataType.sr] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.ws):
                    dic[DataType.ws] = float(clim_data_v)
                elif StringClass.string_match(clim_flds[j], DataType.rm):
                    dic[DataType.rm] = float(clim_data_v) * 0.01
                elif StringClass.string_match(clim_flds[j], DataType.ssd):
                    cur_ssd = float(clim_data_v)
            # Get datetime and utc/local transformation
            utc_time = HydroClimateUtilClass.get_utcdatetime_from_field_values(clim_flds,
                                                                               cur_clim_data_item,
                                                                               tsysin, tzonein)
            dic[DataValueFields.local_time] = utc_time + timedelta(minutes=tzonein * 60)
            dic[DataValueFields.time_zone] = tzonein
            dic[DataValueFields.utc] = utc_time
            dic[DataValueFields.y] = utc_time.year

            # Do if some of these data are not provided
            if DataType.mean_tmp not in list(dic.keys()):
                dic[DataType.mean_tmp] = (dic[DataType.max_tmp] + dic[DataType.min_tmp]) / 2.
            if DataType.sr not in list(dic.keys()):
                if cur_ssd == DEFAULT_NODATA:
                    raise ValueError(DataType.sr + ' or ' + DataType.ssd + ' must be provided!')
                else:
                    if dic[DataValueFields.id] in list(sites_info_dict.keys()):
                        cur_lon, cur_lat = sites_info_dict[dic[DataValueFields.id]].lon_lat()
                        sr = round(HydroClimateUtilClass.rs(DateClass.day_of_year(utc_time),
                                                            float(cur_ssd), cur_lat * PI / 180.), 1)
                        dic[DataType.sr] = sr

            for fld in output_flds:
                cur_dic = dict()
                if fld in list(dic.keys()):
                    cur_dic[DataValueFields.value] = dic[fld]
                    cur_dic[DataValueFields.id] = dic[
                        DataValueFields.id]
                    cur_dic[DataValueFields.utc] = dic[DataValueFields.utc]
                    cur_dic[DataValueFields.time_zone] = dic[DataValueFields.time_zone]
                    cur_dic[DataValueFields.local_time] = dic[DataValueFields.local_time]
                    cur_dic[DataValueFields.type] = fld
                    # Old code, insert or update one item a time, which is quite inefficiency
                    # Update by using bulk operation interface. lj
                    # # find old records and remove (deprecated because of low efficiency, lj.)
                    # curfilter = {DataValueFields.type: fld,
                    #              DataValueFields.utc: dic[DataValueFields.utc]}
                    # bulk.find(curfilter).upsert().replace_one(cur_dic)
                    bulk.insert(cur_dic)
                    count += 1
                    if count % 500 == 0:  # execute each 500 records
                        MongoUtil.run_bulk(bulk)
                        bulk = climdb[DBTableNames.data_values].initialize_ordered_bulk_op()

            if dic[DataValueFields.id] not in list(hydro_climate_stats.keys()):
                hydro_climate_stats[dic[DataValueFields.id]] = ClimateStats()
            hydro_climate_stats[dic[DataValueFields.id]].add_item(dic)
        # execute the remained records
        if count % 500 != 0:
            MongoUtil.run_bulk(bulk)
        for item, cur_climate_stats in list(hydro_climate_stats.items()):
            cur_climate_stats.annual_stats()
        # Create index
        climdb[DBTableNames.data_values].create_index([(DataValueFields.id, ASCENDING),
                                                       (DataValueFields.type, ASCENDING),
                                                       (DataValueFields.utc, ASCENDING)])
        # prepare dic for MongoDB
        for s_id, stats_v in list(hydro_climate_stats.items()):
            for YYYY in list(stats_v.Count.keys()):
                cur_dic = dict()
                cur_dic[DataValueFields.value] = stats_v.PHUTOT[YYYY]
                cur_dic[DataValueFields.id] = s_id
                cur_dic[DataValueFields.y] = YYYY
                cur_dic[VariableDesc.unit] = 'heat units'
                cur_dic[VariableDesc.type] = DataType.phu_tot
                curfilter = {DataValueFields.id: s_id,
                             VariableDesc.type: DataType.phu_tot,
                             DataValueFields.y: YYYY}
                climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                       upsert=True)
                # import annual mean temperature
                cur_dic[VariableDesc.type] = DataType.mean_tmp
                cur_dic[VariableDesc.unit] = 'deg C'
                cur_dic[DataValueFields.value] = stats_v.MeanTmp[YYYY]
                curfilter = {DataValueFields.id: s_id,
                             VariableDesc.type: DataType.mean_tmp,
                             DataValueFields.y: YYYY}
                climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                       upsert=True)
            cur_dic[DataValueFields.value] = stats_v.PHU0
            cur_dic[DataValueFields.id] = s_id
            cur_dic[DataValueFields.y] = DEFAULT_NODATA
            cur_dic[VariableDesc.unit] = 'heat units'
            cur_dic[VariableDesc.type] = DataType.phu0
            curfilter = {DataValueFields.id: s_id,
                         VariableDesc.type: DataType.phu0,
                         DataValueFields.y: DEFAULT_NODATA}
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                   upsert=True)
            # import annual mean temperature
            cur_dic[VariableDesc.type] = DataType.mean_tmp0
            cur_dic[VariableDesc.unit] = 'deg C'
            cur_dic[DataValueFields.value] = stats_v.MeanTmp0
            curfilter = {DataValueFields.id: s_id,
                         VariableDesc.type: DataType.mean_tmp0,
                         DataValueFields.y: DEFAULT_NODATA}
            climdb[DBTableNames.annual_stats].find_one_and_replace(curfilter, cur_dic,
                                                                   upsert=True)
    def lookup_tables_as_collection_and_gridfs(cfg, maindb):
        """Import lookup tables (from txt file) as Collection and GridFS
        Args:
            cfg: SEIMS config object
            maindb: workflow model database
        """
        for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()):
            # import each lookup table as a collection and GridFS file.
            c_list = maindb.collection_names()
            if not StringClass.string_in_list(tablename.upper(), c_list):
                maindb.create_collection(tablename.upper())
            else:
                maindb.drop_collection(tablename.upper())
            # initial bulk operator
            bulk = maindb[tablename.upper()].initialize_ordered_bulk_op()
            # delete if the tablename gridfs file existed
            spatial = GridFS(maindb, DBTableNames.gridfs_spatial)
            if spatial.exists(filename=tablename.upper()):
                x = spatial.get_version(filename=tablename.upper())
                spatial.delete(x._id)

            # read data items
            data_items = read_data_items_from_txt(txt_file)
            field_names = data_items[0][0:]
            item_values = list()  # import as gridfs file
            for i, cur_data_item in enumerate(data_items):
                if i == 0:
                    continue
                data_import = dict()  # import as Collection
                item_value = list()  # import as gridfs file
                for idx, fld in enumerate(field_names):
                    if MathClass.isnumerical(cur_data_item[idx]):
                        tmp_value = float(cur_data_item[idx])
                        data_import[fld] = tmp_value
                        item_value.append(tmp_value)
                    else:
                        data_import[fld] = cur_data_item[idx]
                bulk.insert(data_import)
                if len(item_value) > 0:
                    item_values.append(item_value)
            MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename)
            # begin import gridfs file
            n_row = len(item_values)
            # print(item_values)
            if n_row >= 1:
                n_col = len(item_values[0])
                for i in range(n_row):
                    if n_col != len(item_values[i]):
                        raise ValueError('Please check %s to make sure each item has '
                                         'the same numeric dimension. The size of first '
                                         'row is: %d, and the current data item is: %d' %
                                         (tablename, n_col, len(item_values[i])))
                    else:
                        item_values[i].insert(0, n_col)

                metadic = {ModelParamDataUtils.item_count: n_row,
                           ModelParamDataUtils.field_count: n_col}
                cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic)
                header = [n_row]
                fmt = '%df' % 1
                s = pack(fmt, *header)
                cur_lookup_gridfs.write(s)
                fmt = '%df' % (n_col + 1)
                for i in range(n_row):
                    s = pack(fmt, *item_values[i])
                    cur_lookup_gridfs.write(s)
                cur_lookup_gridfs.close()