def _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri): cell_size = raster_utils.get_cell_size_from_uri(forest_edge_distance_uri) forest_edge_nodata = raster_utils.get_nodata_from_uri(forest_edge_distance_uri) biomass_nodata = raster_utils.get_nodata_from_uri(biomass_uri) outfile = open(biomass_stats_uri, 'w') ecoregion_dataset = gdal.Open(ecoregion_dataset_uri) ecoregion_band = ecoregion_dataset.GetRasterBand(1) biomass_ds = gdal.Open(biomass_uri, gdal.GA_ReadOnly) biomass_band = biomass_ds.GetRasterBand(1) forest_edge_distance_ds = gdal.Open(forest_edge_distance_uri) forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1) n_rows, n_cols = raster_utils.get_row_col_from_uri(biomass_uri) base_srs = osr.SpatialReference(biomass_ds.GetProjection()) lat_lng_srs = base_srs.CloneGeogCS() coord_transform = osr.CoordinateTransformation( base_srs, lat_lng_srs) gt = biomass_ds.GetGeoTransform() grid_resolution_list = [25, 50, 100, 150, 200, 300, 400, 500] grid_coordinates = dict((resolution, {}) for resolution in grid_resolution_list) block_col_size, block_row_size = biomass_band.GetBlockSize() n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size)) n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size)) last_time = time.time() for global_block_row in xrange(n_global_block_rows): current_time = time.time() if current_time - last_time > 5.0: print "aggregation %.1f%% complete" % (global_block_row / float(n_global_block_rows) * 100) last_time = current_time for global_block_col in xrange(n_global_block_cols): xoff = global_block_col * block_col_size yoff = global_block_row * block_row_size win_xsize = min(block_col_size, n_cols - xoff) win_ysize = min(block_row_size, n_rows - yoff) biomass_block = biomass_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) forest_edge_distance_block = forest_edge_distance_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) ecoregion_id_block = ecoregion_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) for global_row in xrange(global_block_row*block_row_size, min((global_block_row+1)*block_row_size, n_rows)): for global_col in xrange(global_block_col*block_col_size, min((global_block_col+1)*block_col_size, n_cols)): row_coord = gt[3] + global_row * gt[5] col_coord = gt[0] + global_col * gt[1] local_row = global_row - global_block_row * block_row_size local_col = global_col - global_block_col * block_col_size lng_coord, lat_coord, _ = coord_transform.TransformPoint( col_coord, row_coord) #normalize the coordinates so they don't go negative global_grid_row = row_coord - GLOBAL_UPPER_LEFT_ROW global_grid_col = col_coord - GLOBAL_UPPER_LEFT_COL ecoregion_id = ecoregion_id_block[local_row, local_col] if (forest_edge_distance_block[local_row, local_col] != forest_edge_nodata and forest_edge_distance_block[local_row, local_col] > 0.0 and biomass_block[local_row, local_col] != biomass_nodata): outfile.write("%f;%f;%f;%f;%s;%s;%s" % ( forest_edge_distance_block[local_row, local_col] * cell_size, biomass_block[local_row, local_col], lat_coord, lng_coord, ecoregion_lookup[ecoregion_id]['ECO_NAME'], ecoregion_lookup[ecoregion_id]['ECODE_NAME'], ecoregion_lookup[ecoregion_id]['WWF_MHTNAM'])) outfile.write(";%f;%f" % (global_grid_row, global_grid_col)) for global_grid_resolution in grid_resolution_list: #output a grid coordinate in the form 'grid_row-grid_col' grid_row = int(global_grid_row/(global_grid_resolution*1000)) grid_col = int(global_grid_col/(global_grid_resolution*1000)) grid_id = str(grid_row) + '-' + str(grid_col) outfile.write(";%s" % grid_id) if grid_id not in grid_coordinates[global_grid_resolution]: grid_row_center = grid_row * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_ROW grid_col_center = grid_col * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_COL grid_lng_coord, grid_lat_coord, _ = coord_transform.TransformPoint( grid_col_center, grid_row_center) grid_coordinates[global_grid_resolution][grid_id] = (grid_lat_coord, grid_lng_coord) print grid_lat_coord, grid_lng_coord outfile.write('/n') outfile.close() for global_grid_resolution in grid_resolution_list: output_dir, base_filename = os.path.split(biomass_stats_uri) basename = os.path.basename(base_filename) grid_output_file = open(os.path.join(output_dir, basename + '_' + str(global_grid_resolution) + '.csv'), 'w') grid_output_file.write('grid id;lat_coord;lng_coord/n') open(biomass_stats_uri, 'w') for grid_id, (lat, lng) in grid_coordinates[global_grid_resolution].iteritems(): grid_output_file.write('%s;%s;%s/n' % (grid_id, lat, lng)) grid_output_file.close()
def run(self): _, n_cols = raster_utils.get_row_col_from_uri(self.base_uri) base_ds = gdal.Open(self.base_uri, gdal.GA_ReadOnly) geo_trans = base_ds.GetGeoTransform() output_sr = osr.SpatialReference(base_ds.GetProjection()) #got this from reading the grid output string_args = [ 'Confidence', 'gridID', 'forest', 'main_biome', 'main_ecoregion', 'Continent'] for global_grid_resolution, grid_filename, shapefile_filename in \ zip(GRID_RESOLUTION_LIST, self.grid_table_file_list, self.shapefile_output_list): if os.path.isfile(shapefile_filename): os.remove(shapefile_filename) driver = ogr.GetDriverByName('ESRI Shapefile') datasource = driver.CreateDataSource(shapefile_filename) #Create the layer name from the uri paths basename without the #extension uri_basename = os.path.basename(shapefile_filename) layer_name = os.path.splitext(uri_basename)[0].encode("utf-8") grid_layer = datasource.CreateLayer( layer_name, output_sr, ogr.wkbPolygon) grid_file = open(grid_filename, 'rU') headers = grid_file.readline().rstrip().split(',') # Add a single ID field field = ogr.FieldDefn(headers[0], ogr.OFTString) grid_layer.CreateField(field) field_names = [headers[0]] for arg in headers[1:]: if arg.startswith('anthrome_'): arg = 'anth' + arg[9:] elif arg.startswith('prop_main'): arg = 'pr_mn' + arg[9:14] else: arg = arg[:10] if arg in string_args: field = ogr.FieldDefn(arg, ogr.OFTString) else: field = ogr.FieldDefn(arg, ogr.OFTReal) field_names.append(arg) grid_layer.CreateField(field) grid_layer.CommitTransaction() for line in grid_file: gridid = line.split(',')[0] lat_coord = int(gridid.split('-')[0]) lng_coord = int(gridid.split('-')[1]) ring = ogr.Geometry(ogr.wkbLinearRing) ring.AddPoint( lng_coord * (global_grid_resolution * 1000) + geo_trans[0], -lat_coord * (global_grid_resolution * 1000) + geo_trans[3]) ring.AddPoint( lng_coord * (global_grid_resolution * 1000) + geo_trans[0], -(1+lat_coord) * (global_grid_resolution * 1000) + geo_trans[3]) ring.AddPoint( (1+lng_coord) * (global_grid_resolution * 1000) + geo_trans[0], -(1+lat_coord) * (global_grid_resolution * 1000) + geo_trans[3]) ring.AddPoint( (1+lng_coord) * (global_grid_resolution * 1000) + geo_trans[0], -lat_coord * (global_grid_resolution * 1000) + geo_trans[3]) ring.AddPoint( lng_coord * (global_grid_resolution * 1000) + geo_trans[0], -lat_coord * (global_grid_resolution * 1000) + geo_trans[3]) poly = ogr.Geometry(ogr.wkbPolygon) poly.AddGeometry(ring) feature = ogr.Feature(grid_layer.GetLayerDefn()) feature.SetGeometry(poly) #feature.SetField(0, gridid) for value, field_name in zip( line.rstrip().split(','), field_names): if field_name in string_args: if value == '-9999': value = 'NA' feature.SetField(field_name, str(value)) else: try: feature.SetField(field_name, float(value)) except ValueError: feature.SetField(field_name, -9999) grid_layer.CreateFeature(feature) datasource.SyncToDisk() datasource = None
def run(self): biomass_ds = gdal.Open(GLOBAL_BIOMASS_URI, gdal.GA_ReadOnly) n_rows, n_cols = raster_utils.get_row_col_from_uri(GLOBAL_BIOMASS_URI) base_srs = osr.SpatialReference(biomass_ds.GetProjection()) lat_lng_srs = base_srs.CloneGeogCS() coord_transform = osr.CoordinateTransformation( base_srs, lat_lng_srs) geo_trans = biomass_ds.GetGeoTransform() biomass_band = biomass_ds.GetRasterBand(1) biomass_nodata = biomass_band.GetNoDataValue() forest_table = raster_utils.get_lookup_from_csv( self.forest_only_table_uri, 'gridID') forest_headers = list(forest_table.values()[0].keys()) nonexistant_files = [] for uri in ALIGNED_LAYERS_TO_AVERAGE: if not os.path.isfile(uri): nonexistant_files.append(uri) if len(nonexistant_files) > 0: raise Exception( "The following files don't exist: %s" % (str(nonexistant_files))) average_dataset_list = [ gdal.Open(uri) for uri in ALIGNED_LAYERS_TO_AVERAGE] average_band_list = [ds.GetRasterBand(1) for ds in average_dataset_list] average_nodata_list = [ band.GetNoDataValue() for band in average_band_list] max_dataset_list = [gdal.Open(uri) for uri in ALIGNED_LAYERS_TO_MAX] max_band_list = [ds.GetRasterBand(1) for ds in max_dataset_list] max_nodata_list = [band.GetNoDataValue() for band in max_band_list] for global_grid_resolution, grid_output_filename in \ zip(GRID_RESOLUTION_LIST, self.grid_output_file_list): try: grid_output_file = open(grid_output_filename, 'w') grid_output_file.write('grid id,lat_coord,lng_coord') for filename in ( ALIGNED_LAYERS_TO_AVERAGE + ALIGNED_LAYERS_TO_MAX): grid_output_file.write( ',%s' % os.path.splitext( os.path.basename(filename))[0][len('aligned_'):]) for header in forest_headers: grid_output_file.write(',%s' % header) grid_output_file.write('\n') n_grid_rows = int( (-geo_trans[5] * n_rows) / (global_grid_resolution * 1000)) n_grid_cols = int( (geo_trans[1] * n_cols) / (global_grid_resolution * 1000)) grid_row_stepsize = int(n_rows / float(n_grid_rows)) grid_col_stepsize = int(n_cols / float(n_grid_cols)) for grid_row in xrange(n_grid_rows): for grid_col in xrange(n_grid_cols): #first check to make sure there is biomass at all! global_row = grid_row * grid_row_stepsize global_col = grid_col * grid_col_stepsize global_col_size = min( grid_col_stepsize, n_cols - global_col) global_row_size = min( grid_row_stepsize, n_rows - global_row) array = biomass_band.ReadAsArray( global_col, global_row, global_col_size, global_row_size) if numpy.count_nonzero(array != biomass_nodata) == 0: continue grid_id = '%d-%d' % (grid_row, grid_col) grid_row_center = ( -(grid_row + 0.5) * (global_grid_resolution*1000) + geo_trans[3]) grid_col_center = ( (grid_col + 0.5) * (global_grid_resolution*1000) + geo_trans[0]) grid_lng_coord, grid_lat_coord, _ = ( coord_transform.TransformPoint( grid_col_center, grid_row_center)) grid_output_file.write( '%s,%s,%s' % (grid_id, grid_lat_coord, grid_lng_coord)) #take the average values for band, nodata, layer_uri in zip( average_band_list, average_nodata_list, ALIGNED_LAYERS_TO_AVERAGE + ALIGNED_LAYERS_TO_MAX): nodata = band.GetNoDataValue() array = band.ReadAsArray( global_col, global_row, global_col_size, global_row_size) layer_name = os.path.splitext( os.path.basename(layer_uri)) \ [0][len('aligned_'):] pure_average_layers = [ 'global_elevation', 'global_water_capacity', 'fi_average', 'lighted_area_luminosity', 'glbctd1t0503m', 'glbgtd1t0503m', 'glbpgd1t0503m', 'glbshd1t0503m', 'glds00ag', 'glds00g'] if layer_name not in pure_average_layers: array[array == nodata] = 0.0 valid_values = array[array != nodata] if valid_values.size != 0: value = numpy.average(valid_values) else: value = -9999. grid_output_file.write(',%f' % value) #take the mode values for band, nodata in zip(max_band_list, max_nodata_list): nodata = band.GetNoDataValue() array = band.ReadAsArray( global_col, global_row, global_col_size, global_row_size) #get the most common value valid_values = array[array != nodata] if valid_values.size != 0: value = scipy.stats.mode(valid_values)[0][0] grid_output_file.write(',%f' % value) else: grid_output_file.write(',-9999') #add the forest_only values for header in forest_headers: try: value = forest_table[grid_id][header] if type(value) == unicode: grid_output_file.write( ',%s' % forest_table[grid_id][header].\ encode('latin-1', 'replace')) else: grid_output_file.write( ',%s' % forest_table[grid_id][header]) except KeyError: grid_output_file.write(',-9999') grid_output_file.write('\n') grid_output_file.close() except IndexError as exception: grid_output_file.close() os.remove(grid_output_filename) raise exception
def run(self): ecoregion_lookup = raster_utils.extract_datasource_table_by_key( ECOREGION_SHAPEFILE_URI, 'ECO_ID_U') ecoregion_nodata = -1 ecoregion_lookup[ecoregion_nodata] = { 'ECO_NAME': 'UNKNOWN', 'ECODE_NAME': 'UNKNOWN', 'WWF_MHTNAM': 'UNKNOWN', } cell_size = raster_utils.get_cell_size_from_uri( FOREST_EDGE_DISTANCE_URI) forest_edge_nodata = raster_utils.get_nodata_from_uri( FOREST_EDGE_DISTANCE_URI) biomass_nodata = raster_utils.get_nodata_from_uri(GLOBAL_BIOMASS_URI) outfile = open(BIOMASS_STATS_URI, 'w') ecoregion_dataset = gdal.Open(ECOREGION_DATASET_URI) ecoregion_band = ecoregion_dataset.GetRasterBand(1) biomass_ds = gdal.Open(GLOBAL_BIOMASS_URI, gdal.GA_ReadOnly) biomass_band = biomass_ds.GetRasterBand(1) forest_edge_distance_ds = gdal.Open(FOREST_EDGE_DISTANCE_URI) forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1) n_rows, n_cols = raster_utils.get_row_col_from_uri(GLOBAL_BIOMASS_URI) base_srs = osr.SpatialReference(biomass_ds.GetProjection()) lat_lng_srs = base_srs.CloneGeogCS() coord_transform = osr.CoordinateTransformation( base_srs, lat_lng_srs) geo_trans = biomass_ds.GetGeoTransform() block_col_size, block_row_size = biomass_band.GetBlockSize() n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size)) n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size)) last_time = time.time() for global_block_row in xrange(n_global_block_rows): current_time = time.time() if current_time - last_time > 5.0: print ( "aggregation %.1f%% complete" % (global_block_row / float(n_global_block_rows) * 100)) last_time = current_time for global_block_col in xrange(n_global_block_cols): xoff = global_block_col * block_col_size yoff = global_block_row * block_row_size win_xsize = min(block_col_size, n_cols - xoff) win_ysize = min(block_row_size, n_rows - yoff) biomass_block = biomass_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) forest_edge_distance_block = ( forest_edge_distance_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)) ecoregion_id_block = ecoregion_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) for global_row in xrange( global_block_row*block_row_size, min((global_block_row+1)*block_row_size, n_rows)): for global_col in xrange( global_block_col*block_col_size, min((global_block_col+1)*block_col_size, n_cols)): row_coord = ( geo_trans[3] + global_row * geo_trans[5]) col_coord = ( geo_trans[0] + global_col * geo_trans[1]) local_row = ( global_row - global_block_row * block_row_size) local_col = ( global_col - global_block_col * block_col_size) lng_coord, lat_coord, _ = ( coord_transform.TransformPoint( col_coord, row_coord)) ecoregion_id = ecoregion_id_block[local_row, local_col] if (forest_edge_distance_block[local_row, local_col] != forest_edge_nodata and forest_edge_distance_block [local_row, local_col] > 0.0 and biomass_block [local_row, local_col] != biomass_nodata): outfile.write("%f;%f;%f;%f;%s;%s;%s" % ( forest_edge_distance_block [local_row, local_col] * cell_size, biomass_block[local_row, local_col], lat_coord, lng_coord, ecoregion_lookup[ecoregion_id]['ECO_NAME'], ecoregion_lookup[ecoregion_id]['ECODE_NAME'], ecoregion_lookup[ecoregion_id]['WWF_MHTNAM'])) for global_grid_resolution in GRID_RESOLUTION_LIST: #output a grid coordinate in the form #'grid_row-grid_col' grid_row = ( int((geo_trans[3] - row_coord) / (global_grid_resolution*1000))) grid_col = ( int((col_coord - geo_trans[0]) / (global_grid_resolution*1000))) grid_id = str(grid_row) + '-' + str(grid_col) outfile.write(";%s" % grid_id) outfile.write('\n') outfile.close()