Ejemplo n.º 1
0
    def subbasin_boundary_cells(self, subbsn_perc):
        """Subbasin boundary cells that are potential ridge sources."""
        dir_deltas = FlowModelConst.d8delta_ag.values()
        subbsn_elevs = dict()

        def add_elev_to_subbsn_elevs(sid, elev):
            if sid not in subbsn_elevs:
                subbsn_elevs[sid] = [elev]
            else:
                subbsn_elevs[sid].append(elev)

        for row in range(self.nrows):
            for col in range(self.ncols):
                if MathClass.floatequal(self.subbsn_data[row][col],
                                        self.nodata_subbsn):
                    continue
                for r, c in dir_deltas:
                    new_row = row + r
                    new_col = col + c
                    if 0 <= new_row < self.nrows and 0 <= new_col < self.ncols:
                        if MathClass.floatequal(
                                self.subbsn_data[new_row][new_col],
                                self.nodata_subbsn):
                            subbsnid = self.subbsn_data[row][col]
                            self.rdgpot[row][col] = subbsnid
                            add_elev_to_subbsn_elevs(subbsnid,
                                                     self.elev_data[row][col])
                        elif not MathClass.floatequal(
                                self.subbsn_data[row][col],
                                self.subbsn_data[new_row][new_col]):
                            subbsnid = self.subbsn_data[row][col]
                            subbsnid2 = self.subbsn_data[new_row][new_col]
                            self.rdgpot[row][col] = subbsnid
                            self.rdgpot[new_row][new_col] = subbsnid2
                            add_elev_to_subbsn_elevs(subbsnid,
                                                     self.elev_data[row][col])
                            add_elev_to_subbsn_elevs(
                                subbsnid2, self.elev_data[new_row][new_col])

        RasterUtilClass.write_gtiff_file(self.boundsrc, self.nrows, self.ncols,
                                         self.rdgpot, self.geotrans, self.srs,
                                         DEFAULT_NODATA, 6)
        subbsn_elevs_thresh = dict()
        for sid, elevs in list(subbsn_elevs.items()):
            tmpelev = numpy.array(elevs)
            tmpelev.sort()
            subbsn_elevs_thresh[sid] = tmpelev[int(len(tmpelev) * subbsn_perc)]
        for row in range(self.nrows):
            for col in range(self.ncols):
                if MathClass.floatequal(self.rdgpot[row][col], DEFAULT_NODATA):
                    continue
                if self.elev_data[row][col] < subbsn_elevs_thresh[
                        self.subbsn_data[row][col]]:
                    self.rdgpot[row][col] = DEFAULT_NODATA
        RasterUtilClass.write_gtiff_file(self.boundsrcfilter, self.nrows,
                                         self.ncols, self.rdgpot,
                                         self.geotrans, self.srs,
                                         DEFAULT_NODATA, 6)
Ejemplo n.º 2
0
def cal_model_performance(obsl, siml):
    """Calculate model performance indexes."""
    nse = MathClass.nashcoef(obsl, siml)
    r2 = MathClass.rsquare(obsl, siml)
    rmse = MathClass.rmse(obsl, siml)
    pbias = MathClass.pbias(obsl, siml)
    rsr = MathClass.rsr(obsl, siml)
    print('NSE: %.2f, R-square: %.2f, PBIAS: %.2f%%, RMSE: %.2f, RSR: %.2f' %
          (nse, r2, pbias, rmse, rsr))
def cal_model_performance(obsl, siml):
    """Calculate model performance indexes."""
    nse = MathClass.nashcoef(obsl, siml)
    r2 = MathClass.rsquare(obsl, siml)
    rmse = MathClass.rmse(obsl, siml)
    pbias = MathClass.pbias(obsl, siml)
    rsr = MathClass.rsr(obsl, siml)
    print('NSE: %.2f, R$^2$: %.2f, PBIAS: %.2f%%, RMSE: %.2f, RSR: %.2f' %
          (nse, r2, pbias, rmse, rsr))
Ejemplo n.º 4
0
 def filter_ridge_by_subbasin_boundary(self):
     for row in range(self.nrows):
         for col in range(self.ncols):
             if MathClass.floatequal(self.rdgsrc_data[row][col],
                                     DEFAULT_NODATA):
                 continue
             if MathClass.floatequal(self.rdgpot[row][col], DEFAULT_NODATA):
                 self.rdgsrc_data[row][col] = DEFAULT_NODATA
     RasterUtilClass.write_gtiff_file(self.rdgsrc, self.nrows, self.ncols,
                                      self.rdgsrc_data, self.geotrans,
                                      self.srs, DEFAULT_NODATA, 6)
Ejemplo n.º 5
0
 def check_orthogonal(angle):
     """Check the given Dinf angle based on D8 flow direction encoding code by ArcGIS"""
     flow_dir_taudem = -1
     flow_dir = -1
     if MathClass.floatequal(angle, FlowModelConst.e):
         flow_dir_taudem = FlowModelConst.e
         flow_dir = 1
     elif MathClass.floatequal(angle, FlowModelConst.ne):
         flow_dir_taudem = FlowModelConst.ne
         flow_dir = 128
     elif MathClass.floatequal(angle, FlowModelConst.n):
         flow_dir_taudem = FlowModelConst.n
         flow_dir = 64
     elif MathClass.floatequal(angle, FlowModelConst.nw):
         flow_dir_taudem = FlowModelConst.nw
         flow_dir = 32
     elif MathClass.floatequal(angle, FlowModelConst.w):
         flow_dir_taudem = FlowModelConst.w
         flow_dir = 16
     elif MathClass.floatequal(angle, FlowModelConst.sw):
         flow_dir_taudem = FlowModelConst.sw
         flow_dir = 8
     elif MathClass.floatequal(angle, FlowModelConst.s):
         flow_dir_taudem = FlowModelConst.s
         flow_dir = 4
     elif MathClass.floatequal(angle, FlowModelConst.se):
         flow_dir_taudem = FlowModelConst.se
         flow_dir = 2
     return flow_dir_taudem, flow_dir
Ejemplo n.º 6
0
 def check_orthogonal(angle):
     """Check the given Dinf angle based on D8 flow direction encoding code by ArcGIS"""
     flow_dir_taudem = -1
     flow_dir = -1
     if MathClass.floatequal(angle, FlowModelConst.e):
         flow_dir_taudem = FlowModelConst.e
         flow_dir = 1
     elif MathClass.floatequal(angle, FlowModelConst.ne):
         flow_dir_taudem = FlowModelConst.ne
         flow_dir = 128
     elif MathClass.floatequal(angle, FlowModelConst.n):
         flow_dir_taudem = FlowModelConst.n
         flow_dir = 64
     elif MathClass.floatequal(angle, FlowModelConst.nw):
         flow_dir_taudem = FlowModelConst.nw
         flow_dir = 32
     elif MathClass.floatequal(angle, FlowModelConst.w):
         flow_dir_taudem = FlowModelConst.w
         flow_dir = 16
     elif MathClass.floatequal(angle, FlowModelConst.sw):
         flow_dir_taudem = FlowModelConst.sw
         flow_dir = 8
     elif MathClass.floatequal(angle, FlowModelConst.s):
         flow_dir_taudem = FlowModelConst.s
         flow_dir = 4
     elif MathClass.floatequal(angle, FlowModelConst.se):
         flow_dir_taudem = FlowModelConst.se
         flow_dir = 2
     return flow_dir_taudem, flow_dir
Ejemplo n.º 7
0
 def output_runtime_to_log(title, lines, logfile):
     if logfile is None:
         return
     fname = FileClass.get_core_name_without_suffix(title)
     time_dict = {
         'name': fname,
         'readt': 0,
         'writet': 0,
         'computet': 0,
         'totalt': 0
     }
     for line in lines:
         # print(line)
         line = line.lower()
         time_value = line.split(os.linesep)[0].split(':')[-1]
         if not MathClass.isnumerical(time_value):
             continue
         time_value = float(time_value)
         if line.find('read') >= 0 and line.find('time') >= 0:
             time_dict['readt'] += time_value
         elif line.find('compute') >= 0 and line.find('time') >= 0:
             time_dict['computet'] += time_value
         elif line.find('write') >= 0 and line.find('time') >= 0:
             time_dict['writet'] += time_value
         elif line.find('total') >= 0 and line.find('time') >= 0:
             time_dict['totalt'] += time_value
     TauDEM.write_time_log(logfile, time_dict)
Ejemplo n.º 8
0
 def reclassify_landcover_parameters(landuse_file, landcover_file, landcover_initial_fields_file,
                                     landcover_lookup_file, attr_names, dst_dir, landuse_shp):
     """relassify landcover_init_param parameters"""
     land_cover_codes = LanduseUtilClass.initialize_landcover_parameters(
             landuse_file, landcover_initial_fields_file, dst_dir, landuse_shp)
     attr_map = LanduseUtilClass.read_crop_lookup_table(landcover_lookup_file)
     n = len(attr_names)
     replace_dicts = []
     replace_dicts_attrn = dict()
     dst_crop_tifs = []
     for i in range(n):
         cur_attr = attr_names[i]
         cur_dict = dict()
         dic = attr_map[cur_attr]
         for code in land_cover_codes:
             if MathClass.floatequal(code, DEFAULT_NODATA):
                 continue
             if code not in list(cur_dict.keys()):
                 cur_dict[code] = dic.get(code)
         replace_dicts_attrn[cur_attr] = cur_dict
         replace_dicts.append(cur_dict)
         dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif')
     # print(replace_dicts)
     # print(len(replace_dicts))
     # print(dst_crop_tifs)
     # print(len(dst_crop_tifs))
     # Generate GTIFF
     landcover_rec_csv = r'D:\SEIMS\data\zts\data_prepare\spatial\test\landcover_rec_csv.csv'
     RasterUtilClass.landuse_cover_reclassify(landcover_file, landuse_shp, replace_dicts_attrn, landcover_rec_csv)
     print (landcover_rec_csv)
Ejemplo n.º 9
0
    def get_time_system_from_data_file(in_file):
        # type: (str) -> (str, int)
        """Get the time system from the data file. The basic format is:
           #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #LOCALTIME -2, #UTCTIME

        Returns:
            time_sys: 'UTCTIME' or 'LOCALTIME'
            time_zone(int): Positive for West time zone, and negative for East.
        """
        time_sys = 'LOCALTIME'
        time_zone = time.timezone // 3600
        with open(in_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        for line in lines:
            str_line = line.strip()
            # for LF in LFs:
            #     if LF in line:
            #         str_line = line.split(LF)[0]
            #         break
            if str_line[0] != '#':
                break
            if str_line.lower().find('utc') >= 0:
                time_sys = 'UTCTIME'
                time_zone = 0
                break
            if str_line.lower().find('local') >= 0:
                line_list = StringClass.split_string(str_line, [' ', ','])
                if len(line_list) == 2 and MathClass.isnumerical(line_list[1]):
                    time_zone = -1 * int(line_list[1])
                break
        return time_sys, time_zone
Ejemplo n.º 10
0
    def raster_to_gtiff(tif,
                        geotif,
                        change_nodata=False,
                        change_gdal_type=False):
        """Converting Raster format to GeoTIFF.

        Args:
            tif: source raster file path.
            geotif: output raster file path.
            change_nodata: change NoDataValue to -9999 or not.
            gdal_type (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default.
            change_gdal_type: If True, output the Float32 data type.
        """
        rst_file = RasterUtilClass.read_raster(tif)
        nodata = rst_file.noDataValue
        if change_nodata:
            if not MathClass.floatequal(rst_file.noDataValue, DEFAULT_NODATA):
                nodata = DEFAULT_NODATA
                nodata_array = numpy.ones(
                    (rst_file.nRows, rst_file.nCols)) * rst_file.noDataValue
                nodata_check = numpy.isclose(rst_file.data, nodata_array)
                rst_file.data[nodata_check] = DEFAULT_NODATA
                # rst_file.data[rst_file.data == rst_file.noDataValue] = DEFAULT_NODATA
        gdal_type = rst_file.dataType
        if change_gdal_type:
            gdal_type = GDT_Float32
        RasterUtilClass.write_gtiff_file(geotif, rst_file.nRows,
                                         rst_file.nCols, rst_file.data,
                                         rst_file.geotrans, rst_file.srs,
                                         nodata, gdal_type)
Ejemplo n.º 11
0
    def raster_reclassify(srcfile, v_dict, dstfile, gdaltype=GDT_Float32):
        """Reclassify raster by given classifier dict.

        Args:
            srcfile: source raster file.
            v_dict: classifier dict.
            dstfile: destination file path.
            gdaltype (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default.
        """
        src_r = RasterUtilClass.read_raster(srcfile)
        src_data = src_r.data
        dst_data = numpy.copy(src_data)
        if gdaltype == GDT_Float32 and src_r.dataType != GDT_Float32:
            gdaltype = src_r.dataType
        no_data = src_r.noDataValue
        new_no_data = DEFAULT_NODATA
        if gdaltype in [GDT_Unknown, GDT_Byte, GDT_UInt16, GDT_UInt32]:
            new_no_data = 0
        if not MathClass.floatequal(new_no_data, src_r.noDataValue):
            if src_r.noDataValue not in v_dict:
                v_dict[src_r.noDataValue] = new_no_data
                no_data = new_no_data

        for (k, v) in iteritems(v_dict):
            dst_data[src_data == k] = v
        RasterUtilClass.write_gtiff_file(dstfile, src_r.nRows, src_r.nCols, dst_data,
                                         src_r.geotrans, src_r.srs, no_data, gdaltype)
Ejemplo n.º 12
0
 def ridge_without_flowin_cell(self):
     """Find the original ridge sources that have no flow-in cells."""
     for row in range(self.nrows):
         for col in range(self.ncols):
             tempdir = self.flowdir_data[row][col]
             if MathClass.floatequal(tempdir, self.nodata_flow):
                 self.rdgsrc_data[row][col] = DEFAULT_NODATA
                 continue
             if self.flowmodel == 1:  # Dinf flow model
                 temp_coor = DinfUtil.downstream_index_dinf(
                     tempdir, row, col)
                 for temprow, tempcol in temp_coor:
                     if 0 <= temprow < self.nrows and 0 <= tempcol < self.ncols:
                         self.rdgsrc_data[temprow][tempcol] = DEFAULT_NODATA
                     else:
                         self.rdgsrc_data[row][col] = DEFAULT_NODATA
             else:  # D8 flow model
                 temprow, tempcol = D8Util.downstream_index(
                     tempdir, row, col)
                 if 0 <= temprow < self.nrows and 0 <= tempcol < self.ncols:
                     self.rdgsrc_data[temprow][tempcol] = DEFAULT_NODATA
                 else:
                     self.rdgsrc_data[row][col] = DEFAULT_NODATA
     RasterUtilClass.write_gtiff_file(self.rdgorg, self.nrows, self.ncols,
                                      self.rdgsrc_data, self.geotrans,
                                      self.srs, DEFAULT_NODATA, 6)
 def get_time_system_from_data_file(in_file):
     """Get the time system from the data file. The basic format is:
        #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
     """
     time_sys = 'LOCALTIME'
     time_zone = time.timezone // -3600
     with open(in_file, 'r') as f:
         lines = f.readlines()
     for line in lines:
         str_line = line.strip()
         # for LF in LFs:
         #     if LF in line:
         #         str_line = line.split(LF)[0]
         #         break
         if str_line[0] != '#':
             break
         if str_line.lower().find('utc') >= 0:
             time_sys = 'UTCTIME'
             time_zone = 0
             break
         if str_line.lower().find('local') >= 0:
             line_list = StringClass.split_string(str_line, [','])
             if len(line_list) == 2 and MathClass.isnumerical(line_list[1]):
                 time_zone = -1 * int(line_list[1])
             break
     return time_sys, time_zone
Ejemplo n.º 14
0
 def reclassify_landcover_parameters(landuse_file, landcover_file,
                                     landcover_initial_fields_file,
                                     landcover_lookup_file, attr_names,
                                     dst_dir):
     """relassify landcover_init_param parameters"""
     land_cover_codes = LanduseUtilClass.initialize_landcover_parameters(
         landuse_file, landcover_initial_fields_file, dst_dir)
     attr_map = LanduseUtilClass.read_crop_lookup_table(
         landcover_lookup_file)
     n = len(attr_names)
     replace_dicts = list()
     dst_crop_tifs = list()
     for i in range(n):
         cur_attr = attr_names[i]
         cur_dict = dict()
         dic = attr_map[cur_attr]
         for code in land_cover_codes:
             if MathClass.floatequal(code, DEFAULT_NODATA):
                 continue
             if code not in list(cur_dict.keys()):
                 cur_dict[code] = dic.get(code)
         replace_dicts.append(cur_dict)
         dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif')
     # print(replace_dicts)
     # print(len(replace_dicts))
     # print(dst_crop_tifs)
     # print(len(dst_crop_tifs))
     # Generate GTIFF
     for i, v in enumerate(dst_crop_tifs):
         # print(dst_crop_tifs[i])
         RasterUtilClass.raster_reclassify(landcover_file, replace_dicts[i],
                                           v)
Ejemplo n.º 15
0
 def reclassify_landcover_parameters(landuse_file, landcover_file,
                                     landcover_initial_fields_file,
                                     landcover_lookup_file, attr_names,
                                     dst_dir, landuse_shp):
     """relassify landcover_init_param parameters"""
     land_cover_codes = LanduseUtilClass.initialize_landcover_parameters(
         landuse_file, landcover_initial_fields_file, dst_dir, landuse_shp)
     attr_map = LanduseUtilClass.read_crop_lookup_table(
         landcover_lookup_file)
     n = len(attr_names)
     replace_dicts = []
     replace_dicts_attrn = dict()
     dst_crop_tifs = []
     for i in range(n):
         cur_attr = attr_names[i]
         cur_dict = dict()
         dic = attr_map[cur_attr]
         for code in land_cover_codes:
             if MathClass.floatequal(code, DEFAULT_NODATA):
                 continue
             if code not in list(cur_dict.keys()):
                 cur_dict[code] = dic.get(code)
         replace_dicts_attrn[cur_attr] = cur_dict
         replace_dicts.append(cur_dict)
         dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif')
     # print(replace_dicts)
     # print(len(replace_dicts))
     # print(dst_crop_tifs)
     # print(len(dst_crop_tifs))
     # Generate GTIFF
     landcover_rec_csv = r'D:\SEIMS\data\zts\data_prepare\spatial\test\landcover_rec_csv.csv'
     RasterUtilClass.landuse_cover_reclassify(landcover_file, landuse_shp,
                                              replace_dicts_attrn,
                                              landcover_rec_csv)
     print(landcover_rec_csv)
Ejemplo n.º 16
0
    def raster_reclassify(srcfile, v_dict, dstfile, gdaltype=GDT_Float32):
        """Reclassify raster by given classifier dict.

        Args:
            srcfile: source raster file.
            v_dict: classifier dict.
            dstfile: destination file path.
            gdaltype (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default.
        """
        src_r = RasterUtilClass.read_raster(srcfile)
        src_data = src_r.data
        dst_data = numpy.copy(src_data)
        if gdaltype == GDT_Float32 and src_r.dataType != GDT_Float32:
            gdaltype = src_r.dataType
        no_data = src_r.noDataValue
        new_no_data = DEFAULT_NODATA
        if gdaltype in [GDT_Unknown, GDT_Byte, GDT_UInt16, GDT_UInt32]:
            new_no_data = 0
        if not MathClass.floatequal(new_no_data, src_r.noDataValue):
            if src_r.noDataValue not in v_dict:
                v_dict[src_r.noDataValue] = new_no_data
                no_data = new_no_data

        for k, v in v_dict.items():
            dst_data[src_data == k] = v
        RasterUtilClass.write_gtiff_file(dstfile, src_r.nRows, src_r.nCols, dst_data,
                                         src_r.geotrans, src_r.srs, no_data, gdaltype)
Ejemplo n.º 17
0
 def reclassify_landcover_parameters(landuse_file, landcover_file, landcover_initial_fields_file,
                                     landcover_lookup_file, attr_names, dst_dir):
     """relassify landcover_init_param parameters"""
     land_cover_codes = LanduseUtilClass.initialize_landcover_parameters(
             landuse_file, landcover_initial_fields_file, dst_dir)
     attr_map = LanduseUtilClass.read_crop_lookup_table(landcover_lookup_file)
     n = len(attr_names)
     replace_dicts = list()
     dst_crop_tifs = list()
     for i in range(n):
         cur_attr = attr_names[i]
         cur_dict = dict()
         dic = attr_map[cur_attr]
         for code in land_cover_codes:
             if MathClass.floatequal(code, DEFAULT_NODATA):
                 continue
             if code not in list(cur_dict.keys()):
                 cur_dict[code] = dic.get(code)
         replace_dicts.append(cur_dict)
         dst_crop_tifs.append(dst_dir + os.path.sep + cur_attr + '.tif')
     # print(replace_dicts)
     # print(len(replace_dicts))
     # print(dst_crop_tifs)
     # print(len(dst_crop_tifs))
     # Generate GTIFF
     for i, v in enumerate(dst_crop_tifs):
         # print(dst_crop_tifs[i])
         RasterUtilClass.raster_reclassify(landcover_file, replace_dicts[i], v)
Ejemplo n.º 18
0
 def cal_cn2(lucc_id, hg):
     """Calculate CN2 value from landuse ID and Hydro Group number."""
     lucc_id = int(lucc_id)
     if lucc_id < 0 or MathClass.floatequal(lucc_id, nodata_value):
         return DEFAULT_NODATA
     else:
         hg = int(hg) - 1
         return cn2_map[lucc_id][hg]
Ejemplo n.º 19
0
 def cal_cn2(lucc_id, hg):
     """Calculate CN2 value from landuse ID and Hydro Group number."""
     lucc_id = int(lucc_id)
     if lucc_id < 0 or MathClass.floatequal(lucc_id, nodata_value):
         return DEFAULT_NODATA
     else:
         hg = int(hg) - 1
         return cn2_map[lucc_id][hg]
Ejemplo n.º 20
0
 def GetFuzzySlopePositionValues(i_row, i_col):
     seqvalues = [-9999] * len(fuzslppos_rs)
     for iseq, fuzdata in enumerate(fuzslppos_rs):
         curv = fuzdata.data[i_row][i_col]
         if MathClass.floatequal(curv, fuzdata.noDataValue):
             return None
         if curv < 0:
             return None
         seqvalues[iseq] = curv
     return seqvalues
Ejemplo n.º 21
0
 def cal_cn2(lucc_id, hg):
     """Calculate CN2 value from landuse ID and Hydro Group number."""
     lucc_id = int(lucc_id)
     if lucc_id < 0 or MathClass.floatequal(lucc_id, nodata_value):
         return DEFAULT_NODATA
     else:
         hg = int(hg) - 1
         if lucc_id not in cn2_map:
             print("lucc %d not existed in cn2 lookup table!" % lucc_id)
             return DEFAULT_NODATA
         return cn2_map[lucc_id][hg]
Ejemplo n.º 22
0
 def initial_params_from_txt(cfg, maindb):
     """
     import initial calibration parameters from txt data file.
     Args:
         cfg: SEIMS config object
         maindb: MongoDB database object
     """
     # delete if existed, initialize if not existed
     c_list = maindb.collection_names()
     if not StringClass.string_in_list(DBTableNames.main_parameter, c_list):
         maindb.create_collection(DBTableNames.main_parameter)
     else:
         maindb.drop_collection(DBTableNames.main_parameter)
     # initialize bulk operator
     bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op()
     # read initial parameters from txt file
     data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file)
     field_names = data_items[0][0:]
     # print(field_names)
     for i, cur_data_item in enumerate(data_items):
         if i == 0:
             continue
         # print(cur_data_item)
         # initial one default blank parameter dict.
         data_import = {ModelParamFields.name: '', ModelParamFields.desc: '',
                        ModelParamFields.unit: '', ModelParamFields.module: '',
                        ModelParamFields.value: DEFAULT_NODATA,
                        ModelParamFields.impact: DEFAULT_NODATA,
                        ModelParamFields.change: 'NC',
                        ModelParamFields.max: DEFAULT_NODATA,
                        ModelParamFields.min: DEFAULT_NODATA,
                        ModelParamFields.type: ''}
         for k, v in list(data_import.items()):
             idx = field_names.index(k)
             if cur_data_item[idx] == '':
                 if StringClass.string_match(k, ModelParamFields.change_ac):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_rc):
                     data_import[k] = 1
                 elif StringClass.string_match(k, ModelParamFields.change_nc):
                     data_import[k] = 0
                 elif StringClass.string_match(k, ModelParamFields.change_vc):
                     data_import[k] = DEFAULT_NODATA  # Be careful to check NODATA when use!
             else:
                 if MathClass.isnumerical(cur_data_item[idx]):
                     data_import[k] = float(cur_data_item[idx])
                 else:
                     data_import[k] = cur_data_item[idx]
         bulk.insert(data_import)
     # execute import operators
     MongoUtil.run_bulk(bulk, 'No operation during initial_params_from_txt.')
     # initialize index by parameter's type and name by ascending order.
     maindb[DBTableNames.main_parameter].create_index([(ModelParamFields.type, ASCENDING),
                                                       (ModelParamFields.name, ASCENDING)])
Ejemplo n.º 23
0
def check_individual_diff(old_ind,  # type: Union[array.array, List[int], Tuple[int]]
                          new_ind  # type: Union[array.array, List[int], Tuple[int]]
                          ):
    # type: (...) -> bool
    """Check the gene values of two individuals."""
    diff = False
    for i in range(len(old_ind)):
        if not MathClass.floatequal(old_ind[i], new_ind[i]):
            diff = True
            break
    return diff
Ejemplo n.º 24
0
 def cal_cn2(lucc_id, hg):
     """Calculate CN2 value from landuse ID and Hydro Group number."""
     lucc_id = int(lucc_id)
     if lucc_id < 0 or MathClass.floatequal(lucc_id, nodata_value):
         return DEFAULT_NODATA
     else:
         hg = int(hg) - 1
         if lucc_id not in cn2_map:
             print("lucc %d not existed in cn2 lookup table!" % lucc_id)
             return DEFAULT_NODATA
         return cn2_map[lucc_id][hg]
Ejemplo n.º 25
0
def cal_model_performance(obsl, siml):
    nse = MathClass.nashcoef(obsl, siml)
    r2 = MathClass.rsquare(obsl, siml)
    rmse = MathClass.rmse(obsl, siml)
    pbias = MathClass.pbias(obsl, siml)
    rsr = MathClass.rsr(obsl, siml)
    plt.rcParams['xtick.direction'] = 'out'
    plt.rcParams['ytick.direction'] = 'out'
    plt.rcParams['font.family'] = 'Times New Roman'
    fig, ax = plt.subplots(figsize=(4, 4))
    plt.scatter(obsl, siml, marker='.', s=50, color='black')
    plt.xlabel('Observation', fontsize=20)
    plt.ylabel('Simulation', fontsize=20)
    plt.title('\nNSE: %.2f, R$^2$: %.2f, PBIAS: %.2f%%\nRMSE: %.2f, RSR: %.2f' %
              (nse, r2, pbias, rmse, rsr), color='red', loc='right')
    minv = math.floor(min(min(obsl), min(siml)))
    maxv = math.ceil(max(max(obsl), max(siml)))
    ax.set_xlim(left=minv, right=maxv)
    ax.set_ylim(bottom=minv, top=maxv)
    plt.tight_layout()
    plt.show()
Ejemplo n.º 26
0
def delete_model_outputs(model_workdir, hostname, port, dbname):
    """Delete model outputs and scenario in MongoDB."""
    f_list = os.listdir(model_workdir)
    sids = list()
    for f in f_list:
        outfilename = model_workdir + os.path.sep + f
        if os.path.isdir(outfilename):
            if len(f) > 9:
                if MathClass.isnumerical(f[-9:]):
                    shutil.rmtree(outfilename)
                    sid = int(f[-9:])
                    sids.append(sid)
    if len(sids) > 0:
        delete_scenarios_by_ids(hostname, port, dbname, sids)
Ejemplo n.º 27
0
 def check_orthogonal(angle):
     """Check the given Dinf angle based on D8 flow direction encoding code by ArcGIS"""
     flow_dir = -1
     if MathClass.floatequal(angle, FlowModelConst.e):
         flow_dir = 1  # 1
     elif MathClass.floatequal(angle, FlowModelConst.ne):
         flow_dir = 2  # 128
     elif MathClass.floatequal(angle, FlowModelConst.n):
         flow_dir = 3  # 64
     elif MathClass.floatequal(angle, FlowModelConst.nw):
         flow_dir = 4  # 32
     elif MathClass.floatequal(angle, FlowModelConst.w):
         flow_dir = 5  # 16
     elif MathClass.floatequal(angle, FlowModelConst.sw):
         flow_dir = 6  # 8
     elif MathClass.floatequal(angle, FlowModelConst.s):
         flow_dir = 7  # 4
     elif MathClass.floatequal(angle, FlowModelConst.se):
         flow_dir = 8  # 2
     return flow_dir
Ejemplo n.º 28
0
    def compress_dinf(angle, nodata, minfrac=0.01):
        """Compress dinf flow direction to D8 direction with weight follows ArcGIS D8 codes.

        Args:
            angle: D-inf flow direction angle
            nodata: NoData value
            minfrac: Minimum flow fraction that accounted, percent, e.g., 0.01

        Returns:
            1. Updated Dinf values
            2. Compressed flow direction follows ArcGIS D8 codes rule
            3. Weight of the first direction by counter-clockwise
        """
        if MathClass.floatequal(angle, nodata):
            return DEFAULT_NODATA, DEFAULT_NODATA, DEFAULT_NODATA
        angle, d = DinfUtil.check_orthogonal(angle, minfrac=minfrac)
        if d != -1:
            return angle, d, 1.
        if angle < FlowModelConst.ne:
            a1 = angle
            d = 129  # 1+128
        elif angle < FlowModelConst.n:
            a1 = angle - FlowModelConst.ne
            d = 192  # 128+64
        elif angle < FlowModelConst.nw:
            a1 = angle - FlowModelConst.n
            d = 96  # 64+32
        elif angle < FlowModelConst.w:
            a1 = angle - FlowModelConst.nw
            d = 48  # 32+16
        elif angle < FlowModelConst.sw:
            a1 = angle - FlowModelConst.w
            d = 24  # 16+8
        elif angle < FlowModelConst.s:
            a1 = angle - FlowModelConst.sw
            d = 12  # 8+4
        elif angle < FlowModelConst.se:
            a1 = angle - FlowModelConst.s
            d = 6  # 4+2
        else:
            a1 = angle - FlowModelConst.se
            d = 3  # 2+1
        return angle, d, 1. - a1 / PI * 4.0
Ejemplo n.º 29
0
    def get_value_by_row_col(self, row, col):
        """Get raster value by (row, col).

        Args:
            row: row number.
            col: col number.

        Returns:
            raster value, None if the input are invalid.
        """
        if row < 0 or row >= self.nRows or col < 0 or col >= self.nCols:
            raise ValueError("The row or col must be >=0 and less than "
                             "nRows (%d) or nCols (%d)!" %
                             (self.nRows, self.nCols))
        else:
            value = self.data[int(round(row))][int(round(col))]
            if MathClass.floatequal(value, self.noDataValue):
                return None
            else:
                return value
Ejemplo n.º 30
0
    def compress_dinf(angle, nodata):
        """Compress dinf flow direction to D8 direction with weight follows ArcGIS D8 codes.
        Args:
            angle: D-inf flow direction angle
            nodata: NoData value

        Returns:
            1. Updated Dinf values
            2. Compressed flow direction follows ArcGIS D8 codes rule
            3. Weight of the first direction
        """
        if MathClass.floatequal(angle, nodata):
            return DEFAULT_NODATA, DEFAULT_NODATA, DEFAULT_NODATA
        taud, d = DinfUtil.check_orthogonal(angle)
        if d != -1:
            return taud, d, 1
        if angle < FlowModelConst.ne:
            a1 = angle
            d = 129  # 1+128
        elif angle < FlowModelConst.n:
            a1 = angle - FlowModelConst.ne
            d = 192  # 128+64
        elif angle < FlowModelConst.nw:
            a1 = angle - FlowModelConst.n
            d = 96  # 64+32
        elif angle < FlowModelConst.w:
            a1 = angle - FlowModelConst.nw
            d = 48  # 32+16
        elif angle < FlowModelConst.sw:
            a1 = angle - FlowModelConst.w
            d = 24  # 16+8
        elif angle < FlowModelConst.s:
            a1 = angle - FlowModelConst.sw
            d = 12  # 8+4
        elif angle < FlowModelConst.se:
            a1 = angle - FlowModelConst.s
            d = 6  # 4+2
        else:
            a1 = angle - FlowModelConst.se
            d = 3  # 2+1
        return angle, d, a1 / PI * 4.0
Ejemplo n.º 31
0
    def compress_dinf(angle, nodata):
        """Compress dinf flow direction to D8 direction with weight follows ArcGIS D8 codes.
        Args:
            angle: D-inf flow direction angle
            nodata: NoData value

        Returns:
            1. Updated Dinf values
            2. Compressed flow direction follows ArcGIS D8 codes rule
            3. Weight of the first direction
        """
        if MathClass.floatequal(angle, nodata):
            return DEFAULT_NODATA, DEFAULT_NODATA, DEFAULT_NODATA
        taud, d = DinfUtil.check_orthogonal(angle)
        if d != -1:
            return taud, d, 1
        if angle < FlowModelConst.ne:
            a1 = angle
            d = 129  # 1+128
        elif angle < FlowModelConst.n:
            a1 = angle - FlowModelConst.ne
            d = 192  # 128+64
        elif angle < FlowModelConst.nw:
            a1 = angle - FlowModelConst.n
            d = 96  # 64+32
        elif angle < FlowModelConst.w:
            a1 = angle - FlowModelConst.nw
            d = 48  # 32+16
        elif angle < FlowModelConst.sw:
            a1 = angle - FlowModelConst.w
            d = 24  # 16+8
        elif angle < FlowModelConst.s:
            a1 = angle - FlowModelConst.sw
            d = 12  # 8+4
        elif angle < FlowModelConst.se:
            a1 = angle - FlowModelConst.s
            d = 6  # 4+2
        else:
            a1 = angle - FlowModelConst.se
            d = 3  # 2+1
        return angle, d, a1 / PI * 4.0
Ejemplo n.º 32
0
 def output_runtime_to_log(title, lines, logfile):
     if logfile is None:
         return
     fname = FileClass.get_core_name_without_suffix(title)
     time_dict = {'name': fname, 'readt': 0, 'writet': 0, 'computet': 0, 'totalt': 0}
     for line in lines:
         # print(line)
         line = line.lower()
         time_value = line.split(os.linesep)[0].split(':')[-1]
         if not MathClass.isnumerical(time_value):
             continue
         time_value = float(time_value)
         if line.find('read') >= 0 and line.find('time') >= 0:
             time_dict['readt'] += time_value
         elif line.find('compute') >= 0 and line.find('time') >= 0:
             time_dict['computet'] += time_value
         elif line.find('write') >= 0 and line.find('time') >= 0:
             time_dict['writet'] += time_value
         elif line.find('total') >= 0 and line.find('time') >= 0:
             time_dict['totalt'] += time_value
     TauDEM.write_time_log(logfile, time_dict)
Ejemplo n.º 33
0
    def raster_to_gtiff(tif, geotif, change_nodata=False, change_gdal_type=False):
        """Converting Raster format to GeoTIFF.

        Args:
            tif: source raster file path.
            geotif: output raster file path.
            change_nodata: change NoDataValue to -9999 or not.
            gdal_type (:obj:`pygeoc.raster.GDALDataType`): GDT_Float32 as default.
            change_gdal_type: If True, output the Float32 data type.
        """
        rst_file = RasterUtilClass.read_raster(tif)
        nodata = rst_file.noDataValue
        if change_nodata:
            if not MathClass.floatequal(rst_file.noDataValue, DEFAULT_NODATA):
                nodata = DEFAULT_NODATA
                rst_file.data[rst_file.data == rst_file.noDataValue] = DEFAULT_NODATA
        gdal_type = rst_file.dataType
        if change_gdal_type:
            gdal_type = GDT_Float32
        RasterUtilClass.write_gtiff_file(geotif, rst_file.nRows, rst_file.nCols, rst_file.data,
                                         rst_file.geotrans, rst_file.srs, nodata,
                                         gdal_type)
Ejemplo n.º 34
0
    def compress_dinf(angle, nodata):
        """Compress dinf flow direction to D8 direction with weight
        Args:
            angle: D-inf flow direction angle
            nodata: NoData value

        Returns:
            Compressed flow direction and weight of the first direction
        """
        if MathClass.floatequal(angle, nodata):
            return DEFAULT_NODATA, DEFAULT_NODATA
        d = DinfUtil.check_orthogonal(angle)
        if d is not None:
            return d, 1
        if angle < FlowModelConst.ne:
            a1 = angle
            d = 129  # 1+128
        elif angle < FlowModelConst.n:
            a1 = angle - FlowModelConst.ne
            d = 192  # 128+64
        elif angle < FlowModelConst.nw:
            a1 = angle - FlowModelConst.n
            d = 96  # 64+32
        elif angle < FlowModelConst.w:
            a1 = angle - FlowModelConst.nw
            d = 48  # 32+16
        elif angle < FlowModelConst.sw:
            a1 = angle - FlowModelConst.w
            d = 24  # 16+8
        elif angle < FlowModelConst.s:
            a1 = angle - FlowModelConst.sw
            d = 12  # 8+4
        elif angle < FlowModelConst.se:
            a1 = angle - FlowModelConst.s
            d = 6  # 4+2
        else:
            a1 = angle - FlowModelConst.se
            d = 3  # 2+1
        return d, a1 / PI * 4.0
Ejemplo n.º 35
0
def test_mathclass_isnumerical():
    assert MathClass.isnumerical('78') == True
    assert MathClass.isnumerical('1.e-5') == True
    assert MathClass.isnumerical(None) == False
    assert MathClass.isnumerical('a1.2') == False
Ejemplo n.º 36
0
    def run(function_name,
            in_files,
            wp=None,
            in_params=None,
            out_files=None,
            mpi_params=None,
            log_params=None):
        """
        Run TauDEM function.

           1. The command will not execute if any input file does not exist.
           2. An error will be detected after running the TauDEM command if
           any output file does not exist;

        Args:
            function_name (str): Full path of TauDEM function.
            in_files (dict, required): Dict of pairs of parameter id (string) and file path
                (string or list) for input files, e.g.::

                    {'-z': '/full/path/to/dem.tif'}

            wp (str, optional): Workspace for outputs. If not specified, the directory of the
                first input file in ``in_files`` will be used.
            in_params (dict, optional): Dict of pairs of parameter id (string) and value
                (or None for a flag parameter without a value) for input parameters, e.g.::

                    {'-nc': None}
                    {'-thresh': threshold}
                    {'-m': 'ave' 's', '-nc': None}

            out_files (dict, optional): Dict of pairs of parameter id (string) and file
                path (string or list) for output files, e.g.::

                    {'-fel': 'filleddem.tif'}
                    {'-maxS': ['harden.tif', 'maxsimi.tif']}

            mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or
                path for MPI setting, e.g.::

                    {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4}
                    {'mpipath':'/soft/bin', 'n':4}
                    {'n':4}

            log_params (dict, optional): Dict of pairs of parameter id (string) and value or
                path for runtime and log output parameters. e.g.::

                    {'logfile': '/home/user/log.txt',
                     'runtimefile': '/home/user/runtime.txt'}

        Returns:
            True if TauDEM run successfully, otherwise False.
        """
        # Check input files
        if in_files is None:
            TauDEM.error('Input files parameter is required!')
        if not isinstance(in_files, dict):
            TauDEM.error('The input files parameter must be a dict!')
        for (pid, infile) in list(in_files.items()):
            if infile is None:
                continue
            if isinstance(infile, list) or isinstance(infile, tuple):
                for idx, inf in enumerate(infile):
                    if inf is None:
                        continue
                    inf, wp = TauDEM.check_infile_and_wp(inf, wp)
                    in_files[pid][idx] = inf
                continue
            if os.path.exists(infile):
                infile, wp = TauDEM.check_infile_and_wp(infile, wp)
                in_files[pid] = os.path.abspath(infile)
            else:
                # For more flexible input files extension.
                # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ...
                # in such unpredictable circumstance, we cannot check the existance of
                # input files, so the developer will check it in other place.
                if len(StringClass.split_string(infile, ' ')) > 1:
                    continue
                else:  # the infile still should be a existing file, so check in workspace
                    if wp is None:
                        TauDEM.error('Workspace should not be None!')
                    infile = wp + os.sep + infile
                    if not os.path.exists(infile):
                        TauDEM.error(
                            'Input files parameter %s: %s is not existed!' %
                            (pid, infile))
                    in_files[pid] = os.path.abspath(infile)
        # Make workspace dir if not existed
        UtilClass.mkdir(wp)
        # Check the log parameter
        log_file = None
        runtime_file = None
        if log_params is not None:
            if not isinstance(log_params, dict):
                TauDEM.error('The log parameter must be a dict!')
            if 'logfile' in log_params and log_params['logfile'] is not None:
                log_file = log_params['logfile']
                # If log_file is just a file name, then save it in the default workspace.
                if os.sep not in log_file:
                    log_file = wp + os.sep + log_file
                    log_file = os.path.abspath(log_file)
            if 'runtimefile' in log_params and log_params[
                    'runtimefile'] is not None:
                runtime_file = log_params['runtimefile']
                # If log_file is just a file name, then save it in the default workspace.
                if os.sep not in runtime_file:
                    runtime_file = wp + os.sep + runtime_file
                    runtime_file = os.path.abspath(runtime_file)

        # remove out_files to avoid any file IO related error
        new_out_files = list()
        if out_files is not None:
            if not isinstance(out_files, dict):
                TauDEM.error('The output files parameter must be a dict!')
            for (pid, out_file) in list(out_files.items()):
                if out_file is None:
                    continue
                if isinstance(out_file, list) or isinstance(out_file, tuple):
                    for idx, outf in enumerate(out_file):
                        if outf is None:
                            continue
                        outf = FileClass.get_file_fullpath(outf, wp)
                        FileClass.remove_files(outf)
                        out_files[pid][idx] = outf
                        new_out_files.append(outf)
                else:
                    out_file = FileClass.get_file_fullpath(out_file, wp)
                    FileClass.remove_files(out_file)
                    out_files[pid] = out_file
                    new_out_files.append(out_file)

        # concatenate command line
        commands = list()
        # MPI header
        if mpi_params is not None:
            if not isinstance(mpi_params, dict):
                TauDEM.error('The MPI settings parameter must be a dict!')
            if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None:
                commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec')
            else:
                commands.append('mpiexec')
            if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \
                    and not StringClass.string_match(mpi_params['hostfile'], 'none') \
                    and os.path.isfile(mpi_params['hostfile']):
                commands.append('-f')
                commands.append(mpi_params['hostfile'])
            if 'n' in mpi_params and mpi_params['n'] > 1:
                commands.append('-n')
                commands.append(str(mpi_params['n']))
            else:  # If number of processor is less equal than 1, then do not call mpiexec.
                commands = []
        # append TauDEM function name, which can be full path or just one name
        commands.append(function_name)
        # append input files
        for (pid, infile) in list(in_files.items()):
            if infile is None:
                continue
            if pid[0] != '-':
                pid = '-' + pid
            commands.append(pid)
            if isinstance(infile, list) or isinstance(infile, tuple):
                commands.append(' '.join(tmpf for tmpf in infile))
            else:
                commands.append(infile)
        # append input parameters
        if in_params is not None:
            if not isinstance(in_params, dict):
                TauDEM.error('The input parameters must be a dict!')
            for (pid, v) in list(in_params.items()):
                if pid[0] != '-':
                    pid = '-' + pid
                commands.append(pid)
                # allow for parameter which is an flag without value
                if v != '' and v is not None:
                    if MathClass.isnumerical(v):
                        commands.append(str(v))
                    else:
                        commands.append(v)
        # append output parameters
        if out_files is not None:
            for (pid, outfile) in list(out_files.items()):
                if outfile is None:
                    continue
                if pid[0] != '-':
                    pid = '-' + pid
                commands.append(pid)
                if isinstance(outfile, list) or isinstance(outfile, tuple):
                    commands.append(' '.join(tmpf for tmpf in outfile))
                else:
                    commands.append(outfile)
        # run command
        runmsg = UtilClass.run_command(commands)
        TauDEM.log(runmsg, log_file)
        TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file)
        # Check out_files, raise RuntimeError if not exist.
        for of in new_out_files:
            if not os.path.exists(of):
                TauDEM.error('%s failed, and the %s was not generated!' %
                             (function_name, of))
                return False
        return True
Ejemplo n.º 37
0
 def initial_params_from_txt(cfg, maindb):
     """
     import initial calibration parameters from txt data file.
     Args:
         cfg: SEIMS config object
         maindb: MongoDB database object
     """
     # delete if existed, initialize if not existed
     c_list = maindb.collection_names()
     if not StringClass.string_in_list(DBTableNames.main_parameter, c_list):
         maindb.create_collection(DBTableNames.main_parameter)
     else:
         maindb.drop_collection(DBTableNames.main_parameter)
     # initialize bulk operator
     bulk = maindb[DBTableNames.main_parameter].initialize_ordered_bulk_op()
     # read initial parameters from txt file
     data_items = read_data_items_from_txt(cfg.paramcfgs.init_params_file)
     field_names = data_items[0][0:]
     # print(field_names)
     for i, cur_data_item in enumerate(data_items):
         if i == 0:
             continue
         # print(cur_data_item)
         # initial one default blank parameter dict.
         data_import = {
             ModelParamFields.name: '',
             ModelParamFields.desc: '',
             ModelParamFields.unit: '',
             ModelParamFields.module: '',
             ModelParamFields.value: DEFAULT_NODATA,
             ModelParamFields.impact: DEFAULT_NODATA,
             ModelParamFields.change: 'NC',
             ModelParamFields.max: DEFAULT_NODATA,
             ModelParamFields.min: DEFAULT_NODATA,
             ModelParamFields.type: ''
         }
         for k, v in list(data_import.items()):
             idx = field_names.index(k)
             if cur_data_item[idx] == '':
                 if StringClass.string_match(k, ModelParamFields.change_ac):
                     data_import[k] = 0
                 elif StringClass.string_match(k,
                                               ModelParamFields.change_rc):
                     data_import[k] = 1
                 elif StringClass.string_match(k,
                                               ModelParamFields.change_nc):
                     data_import[k] = 0
                 elif StringClass.string_match(k,
                                               ModelParamFields.change_vc):
                     data_import[
                         k] = DEFAULT_NODATA  # Be careful to check NODATA when use!
             else:
                 if MathClass.isnumerical(cur_data_item[idx]):
                     data_import[k] = float(cur_data_item[idx])
                 else:
                     data_import[k] = cur_data_item[idx]
         bulk.insert(data_import)
     # execute import operators
     MongoUtil.run_bulk(bulk,
                        'No operation during initial_params_from_txt.')
     # initialize index by parameter's type and name by ascending order.
     maindb[DBTableNames.main_parameter].create_index([
         (ModelParamFields.type, ASCENDING),
         (ModelParamFields.name, ASCENDING)
     ])
Ejemplo n.º 38
0
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time,
                                                  eliminate_zero=False,
                                                  time_sys_output='UTCTIME', day_divided_hour=0):
    """
    Interpolate not regular observed data to regular time interval data.

    Todo: Not tested yet!

    Args:
        in_file: input data file, the basic format is as follows:
                 line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
                 line 2: DATETIME,field1,field2,...
                 line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,...
                 line 4: ...
                 ...
                 Field name can be PCP, FLOW, SED
                 the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively.
        time_interval: time interval, unit is minute, e.g., daily output is 1440
        start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system
                    is based on time_sys.
        end_time: end time, see also start_time.
        eliminate_zero: Boolean flag. If true, the time interval without original records will
                        not be output.
        time_sys_output: time system of output time_system, the format must be
                  '<time_system> [<time_zone>]', e.g.,
                  'LOCALTIME'
                  'LOCALTIME 8'
                  'UTCTIME' (default)
        day_divided_hour: If the time_interval is equal to N*1440, this parameter should be
                          carefully specified. The value must range from 0 to 23. e.g.,
                          day_divided_hour ==> day ranges (all expressed as 2013-02-03)
                          0  ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default)
                          8  ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59
                          20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59
    Returns:
        The output data files are located in the same directory with the input file.
        The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g.,
        pcp_utctime_1440_nonzero.csv, flow_localtime_60.csv.
        Note that `.txt` format is also supported.
    """
    FileClass.check_file_exists(in_file)
    time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file)
    data_items = read_data_items_from_txt(in_file)
    flds = data_items[0][:]
    data_items.remove(flds)
    if not 0 <= day_divided_hour <= 23:
        raise ValueError('Day divided hour must range from 0 to 23!')
    try:
        date_idx = flds.index('DATETIME')
        flds.remove('DATETIME')
    except ValueError:
        raise ValueError('DATETIME must be one of the fields!')
    # available field
    available_flds = ['FLOW', 'SED', 'PCP']

    def check_avaiable_field(cur_fld):
        """Check if the given field name is supported."""
        support_flag = False
        for fff in available_flds:
            if fff.lower() in cur_fld.lower():
                support_flag = True
                break
        return support_flag

    ord_data = OrderedDict()
    time_zone_output = time.timezone // 3600
    if time_sys_output.lower().find('local') >= 0:
        tmpstrs = StringClass.split_string(time_sys_output, [' '])
        if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]):
            time_zone_output = -1 * int(tmpstrs[1])
        time_sys_output = 'LOCALTIME'
    else:
        time_sys_output = 'UTCTIME'
        time_zone_output = 0
    for item in data_items:
        org_datetime = StringClass.get_datetime(item[date_idx])
        if time_sys_input == 'LOCALTIME':
            org_datetime += timedelta(hours=time_zone_input)  # now, org_datetime is UTC time.
        if time_sys_output == 'LOCALTIME':
            org_datetime -= timedelta(hours=time_zone_output)
        # now, org_datetime is consistent with the output time system
        ord_data[org_datetime] = list()
        for i, v in enumerate(item):
            if i == date_idx:
                continue
            if MathClass.isnumerical(v):
                ord_data[org_datetime].append(float(v))
            else:
                ord_data[org_datetime].append(v)
    # print(ord_data)
    itp_data = OrderedDict()
    out_time_delta = timedelta(minutes=time_interval)
    sdatetime = StringClass.get_datetime(start_time)
    edatetime = StringClass.get_datetime(end_time)
    item_dtime = sdatetime
    if time_interval % 1440 == 0:
        item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \
                     timedelta(minutes=day_divided_hour * 60)
    while item_dtime <= edatetime:
        # print(item_dtime)
        # if item_dtime.month == 12 and item_dtime.day == 31:
        #     print("debug")
        sdt = item_dtime  # start datetime of records
        edt = item_dtime + out_time_delta  # end datetime of records
        # get original data items
        org_items = list()
        pre_dt = list(ord_data.keys())[0]
        pre_added = False
        for i, v in list(ord_data.items()):
            if sdt <= i < edt:
                if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta:
                    # only add one item that less than sdt.
                    org_items.append([pre_dt] + ord_data.get(pre_dt))
                    pre_added = True
                org_items.append([i] + v)
            if i > edt:
                break
            pre_dt = i
        if len(org_items) > 0:
            org_items.append([edt])  # Just add end time for compute convenient
            if org_items[0][0] < sdt:
                org_items[0][0] = sdt  # set the begin datetime of current time interval
        # if eliminate time interval without original records
        # initial interpolated list
        itp_data[item_dtime] = [0.] * len(flds)
        if len(org_items) == 0:
            if eliminate_zero:
                itp_data.popitem()
            item_dtime += out_time_delta
            continue
        # core interpolation code
        flow_idx = -1
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            if 'SED' in v_name.upper():  # FLOW must be existed
                for v_idx2, v_name2 in enumerate(flds):
                    if 'FLOW' in v_name2.upper():
                        flow_idx = v_idx2
                        break
                if flow_idx < 0:
                    raise RuntimeError('To interpolate SED, FLOW must be provided!')
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            itp_value = 0.
            itp_auxiliary_value = 0.
            for org_item_idx, org_item_dtv in enumerate(org_items):
                if org_item_idx == 0:
                    continue
                org_item_dt = org_item_dtv[0]
                pre_item_dtv = org_items[org_item_idx - 1]
                pre_item_dt = pre_item_dtv[0]
                tmp_delta_dt = org_item_dt - pre_item_dt
                tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds
                if 'SED' in v_name.upper():
                    itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \
                                 tmp_delta_secs
                    itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs
                else:
                    itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs
            if 'SED' in v_name.upper():
                if MathClass.floatequal(itp_auxiliary_value, 0.):
                    itp_value = 0.
                    print('WARNING: Flow is 0 for %s, please check!' %
                          item_dtime.strftime('%Y-%m-%d %H:%M:%S'))
                itp_value /= itp_auxiliary_value
            elif 'FLOW' in v_name.upper():
                itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds)
            elif 'PCP' in v_name.upper():  # the input is mm/h, and output is mm
                itp_value /= 3600.
            itp_data[item_dtime][v_idx] = round(itp_value, 4)
        item_dtime += out_time_delta

    # for i, v in itp_data.items():
    #     print(i, v)
    # output to files
    work_path = os.path.dirname(in_file)
    header_str = '#' + time_sys_output
    if time_sys_output == 'LOCALTIME':
        header_str = header_str + ' ' + str(time_zone_output)
    for idx, fld in enumerate(flds):
        if not check_avaiable_field(fld):
            continue
        file_name = fld + '_' + time_sys_output + '_' + str(time_interval)
        if eliminate_zero:
            file_name += '_nonzero'
        file_name += '.csv'
        out_file = work_path + os.path.sep + file_name
        with open(out_file, 'w', encoding='utf-8') as f:
            f.write(header_str + '\n')
            f.write('DATETIME,' + fld + '\n')
            for i, v in list(itp_data.items()):
                cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n'
                f.write(cur_line)
Ejemplo n.º 39
0
def DelinateSlopePositionByThreshold(
        modelcfg,  # type: ParseSEIMSConfig
        thresholds,  # type: Dict[int, List]
        fuzzyslppos_fnames,  # type: List[Tuple[int, AnyStr, AnyStr]]
        outfname,  # type: AnyStr
        subbsn_id=0  # type: int
):
    # type: (...) -> Dict
    """

    Args:
        model_cfg: Configuration of SEIMS-based model
        thresholds: {HillslopeID: {rdgID, bksID, vlyID, T_bks2rdg, T_bks2vly}, ...}
        fuzzyslppos_fnames: [(1, 'summit', 'rdgInf'), ...]
        outfname: output GridFS name
        subbsn_id: By default use the whole watershed data
    Returns:
        hillslp_data(dict): {}
    """
    # 1. Read raster data from MongoDB
    hillslpr = ReadRasterFromMongoDB(modelcfg.host, modelcfg.port,
                                     modelcfg.db_name,
                                     DBTableNames.gridfs_spatial,
                                     '%d_HILLSLOPE_MERGED' % subbsn_id)
    landuser = ReadRasterFromMongoDB(modelcfg.host, modelcfg.port,
                                     modelcfg.db_name,
                                     DBTableNames.gridfs_spatial,
                                     '%d_LANDUSE' % subbsn_id)
    fuzslppos_rs = list()
    for tag, tagname, gfsname in fuzzyslppos_fnames:
        fuzslppos_rs.append(
            ReadRasterFromMongoDB(modelcfg.host, modelcfg.port,
                                  modelcfg.db_name,
                                  DBTableNames.gridfs_spatial,
                                  '%d_%s' % (subbsn_id, gfsname.upper())))

    # Output for test
    # out_dir = r'D:\data_m\youwuzhen\seims_models_phd\data_prepare\spatial\spatial_units\tmp'
    # out_hillslp = out_dir + os.sep + 'hillslope.tif'
    # RasterUtilClass.write_gtiff_file(out_hillslp, hillslpr.nRows, hillslpr.nCols,
    #                                  hillslpr.data, hillslpr.geotrans, hillslpr.srs,
    #                                  hillslpr.noDataValue)
    # out_landuse = out_dir + os.sep + 'landuse.tif'
    # RasterUtilClass.write_gtiff_file(out_landuse, landuser.nRows, landuser.nCols,
    #                                  landuser.data, landuser.geotrans, landuser.srs,
    #                                  landuser.noDataValue)
    # for i, (tag, tagname, gfsname) in enumerate(fuzzyslppos_fnames):
    #     curname = out_dir + os.sep + '%s.tif' % gfsname
    #     RasterUtilClass.write_gtiff_file(curname, fuzslppos_rs[i].nRows, fuzslppos_rs[i].nCols,
    #                                      fuzslppos_rs[i].data, fuzslppos_rs[i].geotrans,
    #                                      fuzslppos_rs[i].srs,
    #                                      fuzslppos_rs[i].noDataValue)

    # 2. Initialize output
    outgfsname = '%d_%s' % (subbsn_id, outfname.upper())
    outdict = dict(
    )  # type: Dict[AnyStr, Dict[int, Dict[AnyStr, Union[float, Dict[int, float]]]]]
    slppos_cls = numpy.ones(
        (hillslpr.nRows, hillslpr.nCols)) * hillslpr.noDataValue
    valid_cells = 0

    # Get the fuzzy slope position values from up to bottom
    def GetFuzzySlopePositionValues(i_row, i_col):
        seqvalues = [-9999] * len(fuzslppos_rs)
        for iseq, fuzdata in enumerate(fuzslppos_rs):
            curv = fuzdata.data[i_row][i_col]
            if MathClass.floatequal(curv, fuzdata.noDataValue):
                return None
            if curv < 0:
                return None
            seqvalues[iseq] = curv
        return seqvalues

    # ACTUAL ALGORITHM
    for row in range(hillslpr.nRows):
        for col in range(hillslpr.nCols):
            # Exclude invalid situation
            hillslp_id = hillslpr.data[row][col]
            if MathClass.floatequal(hillslp_id, hillslpr.noDataValue):
                continue
            if hillslp_id not in thresholds:
                continue
            landuse_id = landuser.data[row][col]
            if MathClass.floatequal(landuse_id, landuser.noDataValue):
                continue
            fuzzyvalues = GetFuzzySlopePositionValues(row, col)
            if fuzzyvalues is None:
                continue

            # THIS PART SHOULD BE REVIEWED CAREFULLY LATER! --START
            # Step 1. Get the index of slope position with maximum similarity
            max_fuz = max(fuzzyvalues)
            max_idx = fuzzyvalues.index(max_fuz)
            tmpfuzzyvalues = fuzzyvalues[:]
            tmpfuzzyvalues.remove(max_fuz)
            sec_fuz = max(tmpfuzzyvalues)
            sec_idx = fuzzyvalues.index(sec_fuz)

            sel_idx = max_idx  # Select the maximum by default

            cur_threshs = thresholds[hillslp_id][1 - len(fuzzyvalues):]

            if max_idx == len(fuzzyvalues) - 1:  # the bottom position
                if sec_idx == len(
                        fuzzyvalues
                ) - 2 and 0 < max_fuz - sec_fuz < cur_threshs[-1]:
                    sel_idx = sec_idx  # change valley to backslope
            elif max_idx == 0:  # the upper position
                if sec_idx == 1 and 0 < max_fuz - sec_fuz < cur_threshs[0]:
                    sel_idx = sec_idx  # change ridge to backslope
            else:  # the middle positions
                # Two thresholds could be applied,
                #     i.e., cur_threshs[max_idx-1] and cur_threshs[max_idx]
                if sec_idx == max_idx - 1 and 0. > sec_fuz - max_fuz > cur_threshs[
                        max_idx - 1]:
                    sel_idx = sec_idx
                elif sec_idx == max_idx + 1 and 0. > sec_fuz - max_fuz > cur_threshs[
                        max_idx]:
                    sel_idx = sec_idx

            # Exception:
            if sec_fuz < 0.1 and sel_idx == sec_idx:
                sel_idx = max_idx

            # if sel_idx != max_idx:  # boundary has been adapted
            #     print('fuzzy values: %s, thresholds: %s, '
            #           'sel_idx: %d' % (fuzzyvalues.__str__(), cur_threshs.__str__(), sel_idx))

            slppos_id = thresholds[hillslp_id][sel_idx]
            # THIS PART SHOULD BE REVIEWED CAREFULLY LATER! --END

            slppos_cls[row][col] = slppos_id
            sel_tagname = fuzzyslppos_fnames[sel_idx][1]
            if sel_tagname not in outdict:
                outdict[sel_tagname] = dict()
            if slppos_id not in outdict[sel_tagname]:
                outdict[sel_tagname][slppos_id] = {
                    'area': 0,
                    'landuse': dict()
                }
            outdict[sel_tagname][slppos_id]['area'] += 1
            if landuse_id not in outdict[sel_tagname][slppos_id]['landuse']:
                outdict[sel_tagname][slppos_id]['landuse'][landuse_id] = 0.
            outdict[sel_tagname][slppos_id]['landuse'][landuse_id] += 1.

            valid_cells += 1
    # Change cell counts to area
    area_km2 = hillslpr.dx * hillslpr.dx * 1.e-6
    for tagname, slpposdict in viewitems(outdict):
        for sid, datadict in viewitems(slpposdict):
            outdict[tagname][sid]['area'] *= area_km2
            for luid in outdict[tagname][sid]['landuse']:
                outdict[tagname][sid]['landuse'][luid] *= area_km2

    # 3. Write the classified slope positions data back to mongodb
    metadata = dict()
    metadata[RasterMetadata.subbasin] = subbsn_id
    metadata['ID'] = outgfsname
    metadata['TYPE'] = outfname.upper()
    metadata[RasterMetadata.cellsize] = hillslpr.dx
    metadata[RasterMetadata.nodata] = hillslpr.noDataValue
    metadata[RasterMetadata.ncols] = hillslpr.nCols
    metadata[RasterMetadata.nrows] = hillslpr.nRows
    metadata[RasterMetadata.xll] = hillslpr.xMin + 0.5 * hillslpr.dx
    metadata[RasterMetadata.yll] = hillslpr.yMin + 0.5 * hillslpr.dx
    metadata['LAYERS'] = 1.
    metadata[RasterMetadata.cellnum] = valid_cells
    metadata[RasterMetadata.srs] = hillslpr.srs

    client = ConnectMongoDB(modelcfg.host, modelcfg.port)
    conn = client.get_conn()
    maindb = conn[modelcfg.db_name]
    spatial_gfs = GridFS(maindb, DBTableNames.gridfs_spatial)
    # delete if the tablename gridfs file existed
    if spatial_gfs.exists(filename=outgfsname):
        x = spatial_gfs.get_version(filename=outgfsname)
        spatial_gfs.delete(x._id)
    # create and write new GridFS file
    new_gridfs = spatial_gfs.new_file(filename=outgfsname, metadata=metadata)
    new_gridfs_array = slppos_cls.reshape(
        (1, hillslpr.nCols * hillslpr.nRows)).tolist()[0]

    fmt = '%df' % hillslpr.nCols * hillslpr.nRows
    s = pack(fmt, *new_gridfs_array)
    new_gridfs.write(s)
    new_gridfs.close()

    # Read and output for test
    # slpposcls_r = ReadRasterFromMongoDB(modelcfg.host, modelcfg.port,
    #                                     modelcfg.db_name, DBTableNames.gridfs_spatial, outgfsname)
    # out_slpposcls = out_dir + os.sep + '%s.tif' % outgfsname
    # RasterUtilClass.write_gtiff_file(out_slpposcls, slpposcls_r.nRows, slpposcls_r.nCols,
    #                                  slpposcls_r.data, slpposcls_r.geotrans, slpposcls_r.srs,
    #                                  slpposcls_r.noDataValue)
    client.close()

    return outdict
Ejemplo n.º 40
0
    def scenario_from_texts(cfg, main_db, scenario_db):
        """Import BMPs Scenario data to MongoDB
        Args:
            cfg: SEIMS configuration object
            main_db: climate database
            scenario_db: scenario database
        Returns:
            False if failed, otherwise True.
        """
        if not cfg.use_scernario:
            return False
        print('Import BMP Scenario Data... ')
        bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir,
                                                       ['.txt'])
        bmp_tabs = list()
        bmp_tabs_path = list()
        for f in bmp_files:
            bmp_tabs.append(f.split('.')[0])
            bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f)

        # initialize if collection not existed
        c_list = scenario_db.collection_names()
        for item in bmp_tabs:
            if not StringClass.string_in_list(item.upper(), c_list):
                scenario_db.create_collection(item.upper())
            else:
                scenario_db.drop_collection(item.upper())
        # Read subbasin.tif and dist2Stream.tif
        subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn)
        dist2stream_r = RasterUtilClass.read_raster(
            cfg.spatials.dist2stream_d8)
        # End reading
        for j, bmp_txt in enumerate(bmp_tabs_path):
            bmp_tab_name = bmp_tabs[j]
            data_array = read_data_items_from_txt(bmp_txt)
            field_array = data_array[0]
            data_array = data_array[1:]
            for item in data_array:
                dic = dict()
                for i, field_name in enumerate(field_array):
                    if MathClass.isnumerical(item[i]):
                        v = float(item[i])
                        if v % 1. == 0.:
                            v = int(v)
                        dic[field_name.upper()] = v
                    else:
                        dic[field_name.upper()] = str(item[i]).upper()
                if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \
                        StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())):
                    subbsn_id = subbasin_r.get_value_by_xy(
                        dic[ImportScenario2Mongo._LocalX.upper()],
                        dic[ImportScenario2Mongo._LocalY.upper()])
                    distance = dist2stream_r.get_value_by_xy(
                        dic[ImportScenario2Mongo._LocalX.upper()],
                        dic[ImportScenario2Mongo._LocalY.upper()])
                    if subbsn_id is not None and distance is not None:
                        dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id)
                        dic[ImportScenario2Mongo._DISTDOWN] = float(distance)
                        scenario_db[bmp_tab_name.upper()].find_one_and_replace(
                            dic, dic, upsert=True)
                else:
                    scenario_db[bmp_tab_name.upper()].find_one_and_replace(
                        dic, dic, upsert=True)
        # print('BMP tables are imported.')
        # Write BMP database name into Model workflow database
        c_list = main_db.collection_names()
        if not StringClass.string_in_list(DBTableNames.main_scenario, c_list):
            main_db.create_collection(DBTableNames.main_scenario)

        bmp_info_dic = dict()
        bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db
        main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic,
                                                                 bmp_info_dic,
                                                                 upsert=True)
        return True
Ejemplo n.º 41
0
def test_mathclass_isnumerical():
    assert MathClass.isnumerical('78') == True
    assert MathClass.isnumerical('1.e-5') == True
    assert MathClass.isnumerical(None) == False
    assert MathClass.isnumerical('a1.2') == False
Ejemplo n.º 42
0
def calculate_statistics(
        sim_obs_dict,  # type: Optional[Dict[str, Dict[str, Union[List[datetime], List[float], float]]]]
        stime=None,  # type: Optional[datetime]
        etime=None  # type: Optional[datetime]
):
    # type: (...) -> Optional[List[str]]
    """Calculate NSE, R-square, RMSE, PBIAS, and RSR.
    Args:
        sim_obs_dict: {VarName: {'UTCDATETIME': [t1, t2, ..., tn],
                                 'Obs': [o1, o2, ..., on],
                                 'Sim': [s1, s2, ..., sn]
                                 },
                       ...
                       }
        stime: Start time for statistics calculation.
        etime: End time for statistics calculation.
    Returns:
        The dict with the format:
        {VarName: {'UTCDATETIME': [t1, t2, ..., tn],
                   'Obs': [o1, o2, ..., on],
                   'Sim': [s1, s2, ..., sn]},
                   'NSE': nse_value,
                   'R-square': r2_value,
                   'RMSE': rmse_value,
                   'PBIAS': pbias_value,
                   'lnNSE': lnnse_value,
                   'NSE1': nse1_value,
                   'NSE3': nse3_value
                   },
        ...
        }
        Return name list of the calculated statistics
    """
    if not sim_obs_dict:
        return None
    for param, values in sim_obs_dict.items():
        if stime is None and etime is None:
            sidx = 0
            eidx = len(values['UTCDATETIME'])
        else:
            sidx = bisect.bisect_left(values['UTCDATETIME'], stime)
            eidx = bisect.bisect_right(values['UTCDATETIME'], etime)
        obsl = values['Obs'][sidx:eidx]
        siml = values['Sim'][sidx:eidx]

        nse_value = MathClass.nashcoef(obsl, siml)
        r2_value = MathClass.rsquare(obsl, siml)
        rmse_value = MathClass.rmse(obsl, siml)
        pbias_value = MathClass.pbias(obsl, siml)
        rsr_value = MathClass.rsr(obsl, siml)
        lnnse_value = MathClass.nashcoef(obsl, siml, log=True)
        nse1_value = MathClass.nashcoef(obsl, siml, expon=1)
        nse3_value = MathClass.nashcoef(obsl, siml, expon=3)

        values['NSE'] = nse_value
        values['R-square'] = r2_value
        values['RMSE'] = rmse_value
        values['PBIAS'] = pbias_value
        values['RSR'] = rsr_value
        values['lnNSE'] = lnnse_value
        values['NSE1'] = nse1_value
        values['NSE3'] = nse3_value

        # print('Statistics for %s, NSE: %.3f, R2: %.3f, RMSE: %.3f, PBIAS: %.3f, RSR: %.3f,'
        #       ' lnNSE: %.3f, NSE1: %.3f, NSE3: %.3f' %
        #       (param, nse_value, r2_value, rmse_value, pbias_value, rsr_value,
        #        lnnse_value, nse1_value, nse3_value))
    return ['NSE', 'R-square', 'RMSE', 'PBIAS', 'RSR', 'lnNSE', 'NSE1', 'NSE3']
Ejemplo n.º 43
0
    def lookup_tables_as_collection_and_gridfs(cfg, maindb):
        """Import lookup tables (from txt file) as Collection and GridFS
        Args:
            cfg: SEIMS config object
            maindb: workflow model database
        """
        for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()):
            # import each lookup table as a collection and GridFS file.
            c_list = maindb.collection_names()
            if not StringClass.string_in_list(tablename.upper(), c_list):
                maindb.create_collection(tablename.upper())
            else:
                maindb.drop_collection(tablename.upper())
            # initial bulk operator
            bulk = maindb[tablename.upper()].initialize_ordered_bulk_op()
            # delete if the tablename gridfs file existed
            spatial = GridFS(maindb, DBTableNames.gridfs_spatial)
            if spatial.exists(filename=tablename.upper()):
                x = spatial.get_version(filename=tablename.upper())
                spatial.delete(x._id)

            # read data items
            data_items = read_data_items_from_txt(txt_file)
            field_names = data_items[0][0:]
            item_values = list()  # import as gridfs file
            for i, cur_data_item in enumerate(data_items):
                if i == 0:
                    continue
                data_import = dict()  # import as Collection
                item_value = list()  # import as gridfs file
                for idx, fld in enumerate(field_names):
                    if MathClass.isnumerical(cur_data_item[idx]):
                        tmp_value = float(cur_data_item[idx])
                        data_import[fld] = tmp_value
                        item_value.append(tmp_value)
                    else:
                        data_import[fld] = cur_data_item[idx]
                bulk.insert(data_import)
                if len(item_value) > 0:
                    item_values.append(item_value)
            MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename)
            # begin import gridfs file
            n_row = len(item_values)
            # print(item_values)
            if n_row >= 1:
                n_col = len(item_values[0])
                for i in range(n_row):
                    if n_col != len(item_values[i]):
                        raise ValueError('Please check %s to make sure each item has '
                                         'the same numeric dimension. The size of first '
                                         'row is: %d, and the current data item is: %d' %
                                         (tablename, n_col, len(item_values[i])))
                    else:
                        item_values[i].insert(0, n_col)

                metadic = {ModelParamDataUtils.item_count: n_row,
                           ModelParamDataUtils.field_count: n_col}
                cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic)
                header = [n_row]
                fmt = '%df' % 1
                s = pack(fmt, *header)
                cur_lookup_gridfs.write(s)
                fmt = '%df' % (n_col + 1)
                for i in range(n_row):
                    s = pack(fmt, *item_values[i])
                    cur_lookup_gridfs.write(s)
                cur_lookup_gridfs.close()
Ejemplo n.º 44
0
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time,
                                                  eliminate_zero=False,
                                                  time_sys_output='UTCTIME', day_divided_hour=0):
    """
    Interpolate not regular observed data to regular time interval data.
    Args:
        in_file: input data file, the basic format is as follows:
                 line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
                 line 2: DATETIME,field1,field2,...
                 line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,...
                 line 4: ...
                 ...
                 Field name can be PCP, FLOW, SED
                 the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively.
        time_interval: time interval, unit is minute, e.g., daily output is 1440
        start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system
                    is based on time_sys.
        end_time: end time, see also start_time.
        eliminate_zero: Boolean flag. If true, the time interval without original records will
                        not be output.
        time_sys_output: time system of output time_system, the format must be
                  '<time_system> [<time_zone>]', e.g.,
                  'LOCALTIME'
                  'LOCALTIME 8'
                  'UTCTIME' (default)
        day_divided_hour: If the time_interval is equal to N*1440, this parameter should be
                          carefully specified. The value must range from 0 to 23. e.g.,
                          day_divided_hour ==> day ranges (all expressed as 2013-02-03)
                          0  ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default)
                          8  ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59
                          20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59
    Returns:
        The output data files are located in the same directory with the input file.
        The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g.,
        pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt
    """
    FileClass.check_file_exists(in_file)
    time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file)
    data_items = read_data_items_from_txt(in_file)
    flds = data_items[0][:]
    data_items.remove(flds)
    if not 0 <= day_divided_hour <= 23:
        raise ValueError('Day divided hour must range from 0 to 23!')
    try:
        date_idx = flds.index('DATETIME')
        flds.remove('DATETIME')
    except ValueError:
        raise ValueError('DATETIME must be one of the fields!')
    # available field
    available_flds = ['FLOW', 'SED', 'PCP']

    def check_avaiable_field(cur_fld):
        """Check if the given field name is supported."""
        support_flag = False
        for fff in available_flds:
            if fff.lower() in cur_fld.lower():
                support_flag = True
                break
        return support_flag

    ord_data = OrderedDict()
    time_zone_output = time.timezone / -3600
    if time_sys_output.lower().find('local') >= 0:
        tmpstrs = StringClass.split_string(time_sys_output, [' '])
        if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]):
            time_zone_output = int(tmpstrs[1])
        time_sys_output = 'LOCALTIME'
    else:
        time_sys_output = 'UTCTIME'
        time_zone_output = 0
    for item in data_items:
        org_datetime = StringClass.get_datetime(item[date_idx])
        if time_sys_input == 'LOCALTIME':
            org_datetime -= timedelta(hours=time_zone_input)
        # now, org_datetime is UTC time.
        if time_sys_output == 'LOCALTIME':
            org_datetime += timedelta(hours=time_zone_output)
        # now, org_datetime is consistent with the output time system
        ord_data[org_datetime] = list()
        for i, v in enumerate(item):
            if i == date_idx:
                continue
            if MathClass.isnumerical(v):
                ord_data[org_datetime].append(float(v))
            else:
                ord_data[org_datetime].append(v)
    # print(ord_data)
    itp_data = OrderedDict()
    out_time_delta = timedelta(minutes=time_interval)
    sdatetime = StringClass.get_datetime(start_time)
    edatetime = StringClass.get_datetime(end_time)
    item_dtime = sdatetime
    if time_interval % 1440 == 0:
        item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \
                     timedelta(minutes=day_divided_hour * 60)
    while item_dtime <= edatetime:
        # print(item_dtime)
        # if item_dtime.month == 12 and item_dtime.day == 31:
        #     print("debug")
        sdt = item_dtime  # start datetime of records
        edt = item_dtime + out_time_delta  # end datetime of records
        # get original data items
        org_items = list()
        pre_dt = list(ord_data.keys())[0]
        pre_added = False
        for i, v in list(ord_data.items()):
            if sdt <= i < edt:
                if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta:
                    # only add one item that less than sdt.
                    org_items.append([pre_dt] + ord_data.get(pre_dt))
                    pre_added = True
                org_items.append([i] + v)
            if i > edt:
                break
            pre_dt = i
        if len(org_items) > 0:
            org_items.append([edt])  # Just add end time for compute convenient
            if org_items[0][0] < sdt:
                org_items[0][0] = sdt  # set the begin datetime of current time interval
        # if eliminate time interval without original records
        # initial interpolated list
        itp_data[item_dtime] = [0.] * len(flds)
        if len(org_items) == 0:
            if eliminate_zero:
                itp_data.popitem()
            item_dtime += out_time_delta
            continue
        # core interpolation code
        flow_idx = -1
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            if 'SED' in v_name.upper():  # FLOW must be existed
                for v_idx2, v_name2 in enumerate(flds):
                    if 'FLOW' in v_name2.upper():
                        flow_idx = v_idx2
                        break
                if flow_idx < 0:
                    raise RuntimeError('To interpolate SED, FLOW must be provided!')
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            itp_value = 0.
            itp_auxiliary_value = 0.
            for org_item_idx, org_item_dtv in enumerate(org_items):
                if org_item_idx == 0:
                    continue
                org_item_dt = org_item_dtv[0]
                pre_item_dtv = org_items[org_item_idx - 1]
                pre_item_dt = pre_item_dtv[0]
                tmp_delta_dt = org_item_dt - pre_item_dt
                tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds
                if 'SED' in v_name.upper():
                    itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \
                                 tmp_delta_secs
                    itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs
                else:
                    itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs
            if 'SED' in v_name.upper():
                if MathClass.floatequal(itp_auxiliary_value, 0.):
                    itp_value = 0.
                    print('WARNING: Flow is 0 for %s, please check!' %
                          item_dtime.strftime('%Y-%m-%d %H:%M:%S'))
                itp_value /= itp_auxiliary_value
            elif 'FLOW' in v_name.upper():
                itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds)
            elif 'PCP' in v_name.upper():  # the input is mm/h, and output is mm
                itp_value /= 3600.
            itp_data[item_dtime][v_idx] = round(itp_value, 4)
        item_dtime += out_time_delta

    # for i, v in itp_data.items():
    #     print(i, v)
    # output to files
    work_path = os.path.dirname(in_file)
    header_str = '#' + time_sys_output
    if time_sys_output == 'LOCALTIME':
        header_str = header_str + ' ' + str(time_zone_output)
    for idx, fld in enumerate(flds):
        if not check_avaiable_field(fld):
            continue
        file_name = fld + '_' + time_sys_output + '_' + str(time_interval)
        if eliminate_zero:
            file_name += '_nonzero'
        file_name += '.txt'
        out_file = work_path + os.path.sep + file_name
        with open(out_file, 'w') as f:
            f.write(header_str + '\n')
            f.write('DATETIME,' + fld + '\n')
            for i, v in list(itp_data.items()):
                cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n'
                f.write(cur_line)
Ejemplo n.º 45
0
    def scenario_from_texts(cfg, main_db, scenario_db):
        """Import BMPs Scenario data to MongoDB
        Args:
            cfg: SEIMS configuration object
            main_db: climate database
            scenario_db: scenario database
        Returns:
            False if failed, otherwise True.
        """
        if not cfg.use_scernario:
            return False
        print('Import BMP Scenario Data... ')
        bmp_files = FileClass.get_filename_by_suffixes(cfg.scenario_dir, ['.txt'])
        bmp_tabs = list()
        bmp_tabs_path = list()
        for f in bmp_files:
            bmp_tabs.append(f.split('.')[0])
            bmp_tabs_path.append(cfg.scenario_dir + os.path.sep + f)

        # initialize if collection not existed
        c_list = scenario_db.collection_names()
        for item in bmp_tabs:
            if not StringClass.string_in_list(item.upper(), c_list):
                scenario_db.create_collection(item.upper())
            else:
                scenario_db.drop_collection(item.upper())
        # Read subbasin.tif and dist2Stream.tif
        subbasin_r = RasterUtilClass.read_raster(cfg.spatials.subbsn)
        dist2stream_r = RasterUtilClass.read_raster(cfg.spatials.dist2stream_d8)
        # End reading
        for j, bmp_txt in enumerate(bmp_tabs_path):
            bmp_tab_name = bmp_tabs[j]
            data_array = read_data_items_from_txt(bmp_txt)
            field_array = data_array[0]
            data_array = data_array[1:]
            for item in data_array:
                dic = dict()
                for i, field_name in enumerate(field_array):
                    if MathClass.isnumerical(item[i]):
                        v = float(item[i])
                        if v % 1. == 0.:
                            v = int(v)
                        dic[field_name.upper()] = v
                    else:
                        dic[field_name.upper()] = str(item[i]).upper()
                if StringClass.string_in_list(ImportScenario2Mongo._LocalX, list(dic.keys())) and \
                        StringClass.string_in_list(ImportScenario2Mongo._LocalY, list(dic.keys())):
                    subbsn_id = subbasin_r.get_value_by_xy(
                            dic[ImportScenario2Mongo._LocalX.upper()],
                            dic[ImportScenario2Mongo._LocalY.upper()])
                    distance = dist2stream_r.get_value_by_xy(
                            dic[ImportScenario2Mongo._LocalX.upper()],
                            dic[ImportScenario2Mongo._LocalY.upper()])
                    if subbsn_id is not None and distance is not None:
                        dic[ImportScenario2Mongo._SUBBASINID] = int(subbsn_id)
                        dic[ImportScenario2Mongo._DISTDOWN] = float(distance)
                        scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic,
                                                                               upsert=True)
                else:
                    scenario_db[bmp_tab_name.upper()].find_one_and_replace(dic, dic,
                                                                           upsert=True)
        # print('BMP tables are imported.')
        # Write BMP database name into Model workflow database
        c_list = main_db.collection_names()
        if not StringClass.string_in_list(DBTableNames.main_scenario, c_list):
            main_db.create_collection(DBTableNames.main_scenario)

        bmp_info_dic = dict()
        bmp_info_dic[ImportScenario2Mongo._FLD_DB] = cfg.bmp_scenario_db
        main_db[DBTableNames.main_scenario].find_one_and_replace(bmp_info_dic, bmp_info_dic,
                                                                 upsert=True)
        return True
Ejemplo n.º 46
0
    def run(function_name, in_files, wp=None, in_params=None, out_files=None, mpi_params=None,
            log_params=None):
        """
        Run TauDEM function.

         - 1. The command will not execute if any input file does not exist.
         - 2. An error will be detected after running the TauDEM command if
              any output file does not exist;

        Args:
            function_name (str): Full path of TauDEM function.
            in_files (dict, required): Dict of pairs of parameter id (string) and file path
                (string or list) for input files, e.g.::

                    {'-z': '/full/path/to/dem.tif'}

            wp (str, optional): Workspace for outputs. If not specified, the directory of the
                first input file in ``in_files`` will be used.
            in_params (dict, optional): Dict of pairs of parameter id (string) and value
                (or None for a flag parameter without a value) for input parameters, e.g.::

                    {'-nc': None}
                    {'-thresh': threshold}
                    {'-m': 'ave' 's', '-nc': None}

            out_files (dict, optional): Dict of pairs of parameter id (string) and file
                path (string or list) for output files, e.g.::

                    {'-fel': 'filleddem.tif'}
                    {'-maxS': ['harden.tif', 'maxsimi.tif']}

            mpi_params (dict, optional): Dict of pairs of parameter id (string) and value or
                path for MPI setting, e.g.::

                    {'mpipath':'/soft/bin','hostfile':'/soft/bin/cluster.node','n':4}
                    {'mpipath':'/soft/bin', 'n':4}
                    {'n':4}

            log_params (dict, optional): Dict of pairs of parameter id (string) and value or
                path for runtime and log output parameters. e.g.::

                    {'logfile': '/home/user/log.txt',
                     'runtimefile': '/home/user/runtime.txt'}

        Returns:
            True if TauDEM run successfully, otherwise False.
        """
        # Check input files
        if in_files is None:
            TauDEM.error('Input files parameter is required!')
        if not isinstance(in_files, dict):
            TauDEM.error('The input files parameter must be a dict!')
        for (pid, infile) in iteritems(in_files):
            if infile is None:
                continue
            if isinstance(infile, list) or isinstance(infile, tuple):
                for idx, inf in enumerate(infile):
                    if inf is None:
                        continue
                    inf, wp = TauDEM.check_infile_and_wp(inf, wp)
                    in_files[pid][idx] = inf
                continue
            if os.path.exists(infile):
                infile, wp = TauDEM.check_infile_and_wp(infile, wp)
                in_files[pid] = os.path.abspath(infile)
            else:
                # For more flexible input files extension.
                # e.g., -inputtags 1 <path/to/tag1.tif> 2 <path/to/tag2.tif> ...
                # in such unpredictable circumstance, we cannot check the existance of
                # input files, so the developer will check it in other place.
                if len(StringClass.split_string(infile, ' ')) > 1:
                    continue
                else:  # the infile still should be a existing file, so check in workspace
                    if wp is None:
                        TauDEM.error('Workspace should not be None!')
                    infile = wp + os.sep + infile
                    if not os.path.exists(infile):
                        TauDEM.error('Input files parameter %s: %s is not existed!' %
                                     (pid, infile))
                    in_files[pid] = os.path.abspath(infile)
        # Make workspace dir if not existed
        UtilClass.mkdir(wp)
        # Check the log parameter
        log_file = None
        runtime_file = None
        if log_params is not None:
            if not isinstance(log_params, dict):
                TauDEM.error('The log parameter must be a dict!')
            if 'logfile' in log_params and log_params['logfile'] is not None:
                log_file = log_params['logfile']
                # If log_file is just a file name, then save it in the default workspace.
                if os.sep not in log_file:
                    log_file = wp + os.sep + log_file
                    log_file = os.path.abspath(log_file)
            if 'runtimefile' in log_params and log_params['runtimefile'] is not None:
                runtime_file = log_params['runtimefile']
                # If log_file is just a file name, then save it in the default workspace.
                if os.sep not in runtime_file:
                    runtime_file = wp + os.sep + runtime_file
                    runtime_file = os.path.abspath(runtime_file)

        # remove out_files to avoid any file IO related error
        new_out_files = list()
        if out_files is not None:
            if not isinstance(out_files, dict):
                TauDEM.error('The output files parameter must be a dict!')
            for (pid, out_file) in iteritems(out_files):
                if out_file is None:
                    continue
                if isinstance(out_file, list) or isinstance(out_file, tuple):
                    for idx, outf in enumerate(out_file):
                        if outf is None:
                            continue
                        outf = FileClass.get_file_fullpath(outf, wp)
                        FileClass.remove_files(outf)
                        out_files[pid][idx] = outf
                        new_out_files.append(outf)
                else:
                    out_file = FileClass.get_file_fullpath(out_file, wp)
                    FileClass.remove_files(out_file)
                    out_files[pid] = out_file
                    new_out_files.append(out_file)

        # concatenate command line
        commands = list()
        # MPI header
        if mpi_params is not None:
            if not isinstance(mpi_params, dict):
                TauDEM.error('The MPI settings parameter must be a dict!')
            if 'mpipath' in mpi_params and mpi_params['mpipath'] is not None:
                commands.append(mpi_params['mpipath'] + os.sep + 'mpiexec')
            else:
                commands.append('mpiexec')
            if 'hostfile' in mpi_params and mpi_params['hostfile'] is not None \
                    and not StringClass.string_match(mpi_params['hostfile'], 'none') \
                    and os.path.isfile(mpi_params['hostfile']):
                commands.append('-f')
                commands.append(mpi_params['hostfile'])
            if 'n' in mpi_params and mpi_params['n'] > 1:
                commands.append('-n')
                commands.append(str(mpi_params['n']))
            else:  # If number of processor is less equal than 1, then do not call mpiexec.
                commands = []
        # append TauDEM function name, which can be full path or just one name
        commands.append(function_name)
        # append input files
        for (pid, infile) in iteritems(in_files):
            if infile is None:
                continue
            if pid[0] != '-':
                pid = '-' + pid
            commands.append(pid)
            if isinstance(infile, list) or isinstance(infile, tuple):
                commands.append(' '.join(tmpf for tmpf in infile))
            else:
                commands.append(infile)
        # append input parameters
        if in_params is not None:
            if not isinstance(in_params, dict):
                TauDEM.error('The input parameters must be a dict!')
            for (pid, v) in iteritems(in_params):
                if pid[0] != '-':
                    pid = '-' + pid
                commands.append(pid)
                # allow for parameter which is an flag without value
                if v != '' and v is not None:
                    if MathClass.isnumerical(v):
                        commands.append(str(v))
                    else:
                        commands.append(v)
        # append output parameters
        if out_files is not None:
            for (pid, outfile) in iteritems(out_files):
                if outfile is None:
                    continue
                if pid[0] != '-':
                    pid = '-' + pid
                commands.append(pid)
                if isinstance(outfile, list) or isinstance(outfile, tuple):
                    commands.append(' '.join(tmpf for tmpf in outfile))
                else:
                    commands.append(outfile)
        # run command
        runmsg = UtilClass.run_command(commands)
        TauDEM.log(runmsg, log_file)
        TauDEM.output_runtime_to_log(function_name, runmsg, runtime_file)
        # Check out_files, raise RuntimeError if not exist.
        for of in new_out_files:
            if not os.path.exists(of):
                TauDEM.error('%s failed, and the %s was not generated!' % (function_name, of))
                return False
        return True