Esempio n. 1
0
 def calculate_zonal(self, out_name=''):
     
     inP = self.in_pop.read()
     inA = self.admin_layer #gpd.read_file(self.admin_layer)        
     
     res = rMisc.zonalStats(inA, self.in_pop, minVal=0)
     final = pd.DataFrame(res, columns=["TOTALPOP_%s_%s" % (os.path.basename(self.pop_layer), x) for x in ['SUM', 'MIN', 'MAX', 'MEAN']])
         
     for lyr in [self.urban_layer, self.urban_hd_layer]:
         name = os.path.basename(lyr)
         in_urban = rasterio.open(lyr)
         inU = in_urban.read()
         cur_pop = inP * inU
         out_file = os.path.join(self.temp_folder, "urban_pop.tif")
         
         with rasterio.open(out_file, 'w', **self.in_pop.meta) as out_urban:
             out_urban.write(cur_pop)
             
         res = rMisc.zonalStats(inA, out_file, minVal=0)
         res = pd.DataFrame(res, columns=["%s_%s_%s" % (out_name, name, x) for x in ['SUM', 'MIN', 'MAX', 'MEAN']])
         try:
             final = final.join(res)
         except:
             final = res
     return(final)
Esempio n. 2
0
def run_zonal(admin_shapes, rasters):
    ''' Calculate zonal results for submitted admin and raster
        
        INPUTS
            admin_shapes [geopandas] - features within which to calculate statistics
            rasters [dictionary] - data dictionary containing the raster and the required information
                { 'HNP_Var1':{
                        'raster_file': 'path_to_raster',
                        'vars':['SUM','MEAN'],
                        'description':'Lorem Ipsum'
                    }
                }
    '''
    for shp in admin_shapes:
        inD = gpd.read_file(shp)
        out_zonal = shp.replace(".shp", "_zonal.shp")
        if not os.path.exists(out_zonal):
            for var_name, definition in rasters.items():
                # Zonal stats
                res = rMisc.zonalStats(inD,
                                       definition['raster_file'],
                                       minVal=0,
                                       reProj=True)
                res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
                res.columns = [f"{var_name}_{x}" for x in res.columns]
                for var in definition['vars']:
                    inD[f"{var_name}_{var}"] = res[f"{var_name}_{var}"]
            inD.to_file(out_zonal)
Esempio n. 3
0
 def summarizeLULC(self, lulcFile, outFile):
     ''' Run zonal stats against input lulcFile; such as the GBDx 
     '''
     lulcRes = rasterMisc.zonalStats(self.gridFile,
                                     lulcFile,
                                     reProj=True,
                                     verbose=True,
                                     rastType='C',
                                     unqVals=[0, 1, 2, 3, 4, 5, 6])
     lulcRes = pd.DataFrame(
         lulcRes,
         columns=["NoData", "Veg", "H2O", "Bare", "Cloud", "Shdw", "Built"])
     lulcRes.to_csv(lulcFile)
Esempio n. 4
0
    def run_zonal(self, file_defs):
        ''' Calculate zonal results for submitted admin and raster
            
            INPUTS
                admin_shapes [geopandas] - features within which to calculate statistics
                rasters [dictionary] - data dictionary containing the raster and the required information
                    { 'HNP_Var1':{
                            'raster_file': 'path_to_raster',
                            'vars':['SUM','MEAN'],
                            'description':'Lorem Ipsum'
                        }
                    }
        '''
        admin_shapes = [self.adm0_file, self.adm1_file, self.adm2_file]
        if os.path.exists(self.urb_bounds):
            admin_shapes.append(self.urb_bounds)
        if os.path.exists(self.hd_urb_bounds):
            admin_shapes.append(self.hd_urb_bounds)
        #Add fishnet files to list
        for folder in [self.urban_fishnets, self.hd_urban_fishnets]:
            for root, dirs, files in os.walk(folder):
                for f in files:
                    if f[-4:] == ".shp":
                        admin_shapes.append(os.path.join(root, f))

        for shp in admin_shapes:
            if os.path.exists(shp) and not "zonal" in shp:
                inD = gpd.read_file(shp)
                out_zonal = shp.replace(".shp", "_zonal.shp")
                if not os.path.exists(out_zonal):
                    for var_name, definition in file_defs.items():
                        if definition['raster_file'] != '_INPUT_':
                            rFile = os.path.join(self.out_folder,
                                                 definition['raster_file'])
                            if os.path.exists(rFile):
                                # Zonal stats
                                res = rMisc.zonalStats(inD,
                                                       rFile,
                                                       minVal=0,
                                                       reProj=True)
                                res = pd.DataFrame(
                                    res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
                                res.columns = [
                                    f"{var_name}_{x}" for x in res.columns
                                ]
                                for var in definition['vars']:
                                    inD[f"{var_name}_{var}"] = res[
                                        f"{var_name}_{var}"]
                    inD.to_file(out_zonal)
Esempio n. 5
0
 def summarizeLandcover(self,
                        lcFile,
                        unqVals=[1, 2, 3, 4, 5, 6, 7, 8, 10, 200]):
     '''run categorical zonal stats on lcFile
     ###TODO: calculate zonal values from the input LCFile
     '''
     if not os.path.exists(self.lcFile):
         logging.info("Running categorical summary of landcover file")
         xx = rasterMisc.zonalStats(self.gridFile,
                                    lcFile,
                                    rastType='C',
                                    reProj=True,
                                    verbose=True,
                                    unqVals=unqVals)
         outX = pd.DataFrame(xx, columns=unqVals)
         outX.to_csv(self.lcFile)
     else:
         logging.info("Landcover summary already exists")
Esempio n. 6
0
 def summarizeGHSL(self):
     if not os.path.exists(self.ghslSummary):
         logging.info("Running GHSL Summary")
         urbanParams = misc.getUrbanParams()
         ghslVRT = urbanParams["ghslVRT"]
         ghslRes = rasterMisc.zonalStats(self.gridFile,
                                         ghslVRT,
                                         reProj=True,
                                         verbose=True,
                                         rastType='C',
                                         unqVals=[0, 1, 2, 3, 4, 5, 6])
         ghslRes = pd.DataFrame(ghslRes,
                                columns=[
                                    'NoData', 'Water', 'NotBuilt', 'b2014',
                                    'b2000', 'b1990', 'b1975'
                                ])
         #Create total built metrics
         ghslRes['TotalBuilt'] = ghslRes['b2014'] + ghslRes[
             'b2000'] + ghslRes['b1990'] + ghslRes['b1975']
         ghslRes.to_csv(self.ghslSummary)
     else:
         logging.info("GHSL Summary already exists")
#Countries to process
inCountries = ["Afghanistan","Bangladesh","Burkina Faso","Chad","Ecuador","El Salvador","Fiji","Guinea","Haiti","Honduras","Madagascar","Mali","Mongolia","Myanmar","Mozambique","Niger","Pakistan","Papua New Guinea","Peru","Philippines","Senegal","Somalia","Thailand","Ukraine"]
for c in ['Somalia']:#inD.NAME_0.unique(): 
    curD = inD[inD.NAME_0 == c]        
    admFile = os.path.join(outFolder, "%s_adm2.shp" % c)
    euroStatFile = os.path.join(outFolder, "%s_GADM_lowestAdmin.csv" % c)
    ghslStatFile = os.path.join(outFolder, "%s_GADM_GHSL.csv" % c)
    
    if not os.path.exists(admFile):
        curD.to_file(admFile)        
    
    if not os.path.exists(ghslStatFile):
        #Summarize GHSL
        ghslRes = rM.zonalStats(admFile, urbanParams['ghslVRT'], 
                    bandNum=1, reProj = True, minVal = '',verbose=False , 
                    rastType='C', unqVals=[0,1,2,3,4,5,6])
        ghslFinal = pd.DataFrame(ghslRes, columns = ['NoData','Water','NotBuilt','b00_14','b90_00','b75_90','bPre75'])
        curD_ghsl = pd.concat([curD, ghslFinal], axis=1)
        curD_ghsl.to_csv(ghslStatFile)

    if not os.path.exists(euroStatFile):
        #Run Eurostat urbanization methodology
        try:
            if not os.path.exists(euroStatFile):
                unionedFile = os.path.join(outFolder, "%s_unioned_areas_population.csv" % c)
                tabledOutput = unionedFile.replace(".csv", "_%s_Pivotted.csv" % c)
                newAdmin = adminFile.replace(".shp", "_noNull.shp")
                xx = GOSTRocks.Urban.UrbanAdminComparison.compareAreas(c, admFile, outFolder)
                unionedFile = xx.calculateVectorIntersection()
                tabulatedResults = xx.tabulateVectorResults(admCode=admCode)
Esempio n. 8
0
    def summarizeSSBN(self, ssbnFolder, imageType="tif"):
        ''' Run zonal statistics on all files in the input SSBN folder
        '''
        #Get reference to the appropriate flood files
        if ssbnFolder == '':
            raise ValueError("No SSBN Folder defined")
        if not os.path.exists(self.SSBNSummary):
            #Get a reference to SSBN flood tif files
            allTifs = []
            for root, dirs, files in os.walk(ssbnFolder):
                for f in files:
                    if f[-3:] == imageType:
                        allTifs.append(os.path.join(root, f))
            #Run zonal stats on all SSBN tif files
            for cTif in allTifs:
                name = os.path.basename(cTif).replace(imageType, "")
                logging.info("Processing SSBN file %s" % name)
                ##Get the flood type from the raster name
                columnNames = [
                    "%s-%s" % (name, x) for x in ["SUM", "MIN", "MAX", "MEAN"]
                ]
                curRes = rasterMisc.zonalStats(self.gridFile,
                                               cTif,
                                               rastType='N',
                                               reProj=True,
                                               verbose=False)
                curPD = pd.DataFrame(curRes, columns=columnNames)
                #Drop the Min and Sum columns
                curPD = curPD.drop(curPD.columns[[0, 1]], axis=1)
                try:
                    final = final.join(curPD)
                except:
                    final = curPD
            final.to_csv(self.SSBNSummary)
        else:
            final = pd.read_csv(self.SSBNSummary)
        #Create Flood derived statistics
        #Which cells have both fluvuial and pluvial risk
        returnRates = [1000, 500, 250, 200, 100, 75, 50, 20, 10, 5]
        curStat = "MAX"

        def calculateMinReturn(x, minVal=0, maxVal=100):
            ''' For each row, identify the column where the first non-0 value appears and return that year '''
            fVals = x[(x > minVal) & (x < maxVal)]
            try:
                return (min([int(x.split("-")[2]) for x in fVals.index]))
            except:
                return 0

        #For each cell, determine the return rate at which it floods
        #Select appropriate columns for each of U and D
        curStat = "MAX"
        curD = final[[col for col in final.columns if curStat in col]]
        cFlood = "FD"
        allRes = []
        for cFlood in ["FD", "FU", "PD", "PU", "UD", "UU"]:
            curD = final[[col for col in final.columns if cFlood in col]]
            allRes.append(curD.apply(calculateMinReturn, axis=1))

        xx = pd.DataFrame(allRes).transpose()
        xx.columns = ["FD", "FU", "PD", "PU", "UD", "UU"]
        xx.to_csv(self.SSBNReturn)