def run(self): lulc_nodata = raster_utils.get_nodata_from_uri(GLOBAL_LANDCOVER_URI) forest_lulc_codes = [1, 2, 3, 4, 5] mask_uri = os.path.join(OUTPUT_DIR, "forest_mask.tif") mask_nodata = 2 def mask_nonforest(lulc): """Takes in a numpy array of landcover values and returns 1s where they match forest codes and 0 otherwise""" mask = numpy.empty(lulc.shape, dtype=numpy.int8) mask[:] = 1 for lulc_code in forest_lulc_codes: mask[lulc == lulc_code] = 0 mask[lulc == lulc_nodata] = mask_nodata return mask cell_size = raster_utils.get_cell_size_from_uri(GLOBAL_LANDCOVER_URI) raster_utils.vectorize_datasets( [GLOBAL_LANDCOVER_URI,], mask_nonforest, mask_uri, gdal.GDT_Byte, mask_nodata, cell_size, 'intersection', dataset_to_align_index=0, dataset_to_bound_index=None, aoi_uri=None, assert_datasets_projected=True, process_pool=None, vectorize_op=False, datasets_are_pre_aligned=True) raster_utils.distance_transform_edt( mask_uri, FOREST_EDGE_DISTANCE_URI)
def run(self): nodata = raster_utils.get_nodata_from_uri(UNION_LANDCOVER_URI) cell_size = raster_utils.get_cell_size_from_uri(UNION_LANDCOVER_URI) raster_utils.vectorize_datasets( [UNION_LANDCOVER_URI, UNION_BIOMASS_URI], lambda x, y: x, GLOBAL_LANDCOVER_URI, gdal.GDT_Int16, nodata, cell_size, "intersection", dataset_to_align_index=0, vectorize_op=False)
def _align_raster_with_biomass(input_uri, output_uri): """Function to use internally to take an input and align it with the GLOBAL_BIOMASS_URI raster""" nodata = raster_utils.get_nodata_from_uri(input_uri) if nodata is None: nodata = -9999 cell_size = raster_utils.get_cell_size_from_uri(GLOBAL_BIOMASS_URI) raster_utils.vectorize_datasets( [input_uri, GLOBAL_BIOMASS_URI], lambda x, y: x, output_uri, gdal.GDT_Float32, nodata, cell_size, "dataset", dataset_to_bound_index=1, vectorize_op=False)
def run(self): def union_op(*array_list): """Given an array stack return an array that has a value defined in the stack that is not nodata. used for overlapping nodata stacks.""" output_array = array_list[0] for array in array_list[1:]: output_array = numpy.where( array != nodata, array, output_array) return output_array nodata = raster_utils.get_nodata_from_uri(self.dataset_uri_list[0]) cell_size = raster_utils.get_cell_size_from_uri( self.dataset_uri_list[0]) raster_utils.vectorize_datasets( list(self.dataset_uri_list), union_op, self.dataset_out_uri, gdal.GDT_Int16, nodata, cell_size, "union", dataset_to_align_index=0, vectorize_op=False)
def run(self): biomass_raster_list = [ "C:/Users/rpsharp/Dropbox_stanford/Dropbox/forest_edge_carbon/af_biov2ct1.tif", "C:/Users/rpsharp/Dropbox_stanford/Dropbox/forest_edge_carbon/am_biov2ct1.tif", "C:/Users/rpsharp/Dropbox_stanford/Dropbox/forest_edge_carbon/as_biov2ct1.tif", ] nodata = raster_utils.get_nodata_from_uri(biomass_raster_list[0]) cell_size = raster_utils.get_cell_size_from_uri(biomass_raster_list[0]) def union_op(*biomass_array_list): output_array = biomass_array_list[0] for biomass_array in biomass_array_list[1:]: output_array = numpy.where( biomass_array != nodata, biomass_array, output_array) return output_array raster_utils.create_directories([os.path.dirname(self.output_uri)]) raster_utils.vectorize_datasets( biomass_raster_list, union_op, self.output_uri, gdal.GDT_Int16, nodata, cell_size, 'union', dataset_to_align_index=0, vectorize_op=False)
def process_ecoregion(prefix): ecoregion_shapefile_uri = os.path.join( DATA_DIR, 'ecoregions', 'ecoregions_projected.shp') ecoregion_lookup = raster_utils.extract_datasource_table_by_key( ecoregion_shapefile_uri, 'ECO_ID_U') ecoregion_nodata = -1 ecoregion_lookup[ecoregion_nodata] = { 'ECO_NAME': 'UNKNOWN', 'ECODE_NAME': 'UNKNOWN', 'WWF_MHTNAM': 'UNKNOWN', } lulc_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, LULC_BASE)) biomass_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, BIOMASS_BASE)) cell_size = raster_utils.get_cell_size_from_uri(lulc_raw_uri) lulc_uri = os.path.join(OUTPUT_DIR, "%s_lulc_aligned.tif" % (prefix)) biomass_uri = os.path.join(OUTPUT_DIR, "%s_biomass_aligned.tif" % (prefix)) raster_utils.align_dataset_list( [lulc_raw_uri, biomass_raw_uri], [lulc_uri, biomass_uri], ['nearest']*2, cell_size, 'intersection', 0, dataset_to_bound_index=None, aoi_uri=None, assert_datasets_projected=True, process_pool=None) #create ecoregion id ecoregion_dataset_uri = os.path.join( OUTPUT_DIR, "%s_ecoregion_id.tif" % (prefix)) raster_utils.new_raster_from_base_uri( lulc_uri, ecoregion_dataset_uri, 'GTiff', ecoregion_nodata, gdal.GDT_Int16) raster_utils.rasterize_layer_uri( ecoregion_dataset_uri, ecoregion_shapefile_uri, option_list=["ATTRIBUTE=ECO_ID_U"]) lulc_nodata = raster_utils.get_nodata_from_uri(lulc_uri) forest_lulc_codes = [1, 2, 3, 4, 5] mask_uri = os.path.join(OUTPUT_DIR, "%s_mask.tif" % prefix) mask_nodata = 2 def mask_nonforest(lulc): mask = numpy.empty(lulc.shape, dtype=numpy.int8) mask[:] = 1 for lulc_code in forest_lulc_codes: mask[lulc == lulc_code] = 0 mask[lulc == lulc_nodata] = mask_nodata return mask raster_utils.vectorize_datasets( [lulc_uri,], mask_nonforest, mask_uri, gdal.GDT_Byte, mask_nodata, cell_size, 'intersection', dataset_to_align_index=0, dataset_to_bound_index=None, aoi_uri=None, assert_datasets_projected=True, process_pool=None, vectorize_op=False, datasets_are_pre_aligned=True) forest_edge_distance_uri = os.path.join(OUTPUT_DIR, "%s_forest_edge.tif" % prefix) raster_utils.distance_transform_edt(mask_uri, forest_edge_distance_uri) biomass_stats_uri = os.path.join(OUTPUT_DIR, "%s_biomass_stats.csv" % prefix) _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri)
def _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri): cell_size = raster_utils.get_cell_size_from_uri(forest_edge_distance_uri) forest_edge_nodata = raster_utils.get_nodata_from_uri(forest_edge_distance_uri) biomass_nodata = raster_utils.get_nodata_from_uri(biomass_uri) outfile = open(biomass_stats_uri, 'w') ecoregion_dataset = gdal.Open(ecoregion_dataset_uri) ecoregion_band = ecoregion_dataset.GetRasterBand(1) biomass_ds = gdal.Open(biomass_uri, gdal.GA_ReadOnly) biomass_band = biomass_ds.GetRasterBand(1) forest_edge_distance_ds = gdal.Open(forest_edge_distance_uri) forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1) n_rows, n_cols = raster_utils.get_row_col_from_uri(biomass_uri) base_srs = osr.SpatialReference(biomass_ds.GetProjection()) lat_lng_srs = base_srs.CloneGeogCS() coord_transform = osr.CoordinateTransformation( base_srs, lat_lng_srs) gt = biomass_ds.GetGeoTransform() grid_resolution_list = [25, 50, 100, 150, 200, 300, 400, 500] grid_coordinates = dict((resolution, {}) for resolution in grid_resolution_list) block_col_size, block_row_size = biomass_band.GetBlockSize() n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size)) n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size)) last_time = time.time() for global_block_row in xrange(n_global_block_rows): current_time = time.time() if current_time - last_time > 5.0: print "aggregation %.1f%% complete" % (global_block_row / float(n_global_block_rows) * 100) last_time = current_time for global_block_col in xrange(n_global_block_cols): xoff = global_block_col * block_col_size yoff = global_block_row * block_row_size win_xsize = min(block_col_size, n_cols - xoff) win_ysize = min(block_row_size, n_rows - yoff) biomass_block = biomass_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) forest_edge_distance_block = forest_edge_distance_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) ecoregion_id_block = ecoregion_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) for global_row in xrange(global_block_row*block_row_size, min((global_block_row+1)*block_row_size, n_rows)): for global_col in xrange(global_block_col*block_col_size, min((global_block_col+1)*block_col_size, n_cols)): row_coord = gt[3] + global_row * gt[5] col_coord = gt[0] + global_col * gt[1] local_row = global_row - global_block_row * block_row_size local_col = global_col - global_block_col * block_col_size lng_coord, lat_coord, _ = coord_transform.TransformPoint( col_coord, row_coord) #normalize the coordinates so they don't go negative global_grid_row = row_coord - GLOBAL_UPPER_LEFT_ROW global_grid_col = col_coord - GLOBAL_UPPER_LEFT_COL ecoregion_id = ecoregion_id_block[local_row, local_col] if (forest_edge_distance_block[local_row, local_col] != forest_edge_nodata and forest_edge_distance_block[local_row, local_col] > 0.0 and biomass_block[local_row, local_col] != biomass_nodata): outfile.write("%f;%f;%f;%f;%s;%s;%s" % ( forest_edge_distance_block[local_row, local_col] * cell_size, biomass_block[local_row, local_col], lat_coord, lng_coord, ecoregion_lookup[ecoregion_id]['ECO_NAME'], ecoregion_lookup[ecoregion_id]['ECODE_NAME'], ecoregion_lookup[ecoregion_id]['WWF_MHTNAM'])) outfile.write(";%f;%f" % (global_grid_row, global_grid_col)) for global_grid_resolution in grid_resolution_list: #output a grid coordinate in the form 'grid_row-grid_col' grid_row = int(global_grid_row/(global_grid_resolution*1000)) grid_col = int(global_grid_col/(global_grid_resolution*1000)) grid_id = str(grid_row) + '-' + str(grid_col) outfile.write(";%s" % grid_id) if grid_id not in grid_coordinates[global_grid_resolution]: grid_row_center = grid_row * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_ROW grid_col_center = grid_col * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_COL grid_lng_coord, grid_lat_coord, _ = coord_transform.TransformPoint( grid_col_center, grid_row_center) grid_coordinates[global_grid_resolution][grid_id] = (grid_lat_coord, grid_lng_coord) print grid_lat_coord, grid_lng_coord outfile.write('/n') outfile.close() for global_grid_resolution in grid_resolution_list: output_dir, base_filename = os.path.split(biomass_stats_uri) basename = os.path.basename(base_filename) grid_output_file = open(os.path.join(output_dir, basename + '_' + str(global_grid_resolution) + '.csv'), 'w') grid_output_file.write('grid id;lat_coord;lng_coord/n') open(biomass_stats_uri, 'w') for grid_id, (lat, lng) in grid_coordinates[global_grid_resolution].iteritems(): grid_output_file.write('%s;%s;%s/n' % (grid_id, lat, lng)) grid_output_file.close()
def _map_intensity(forest_edge_distance_uri, biomass_uri): grid_resolution_list = [25, 50, 100, 150, 200, 300, 400, 500] forest_edge_distance_ds = gdal.Open(forest_edge_distance_uri) forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1) forest_edge_distance_nodata = raster_utils.get_nodata_from_uri(forest_edge_distance_uri) biomass_ds = gdal.Open(biomass_uri) biomass_band = biomass_ds.GetRasterBand(1) biomass_nodata = raster_utils.get_nodata_from_uri(biomass_uri) n_rows = biomass_ds.RasterYSize n_cols = biomass_ds.RasterXSize projection = biomass_ds.GetProjection() geotransform = biomass_ds.GetGeoTransform() driver = gdal.GetDriverByName('GTiff') gt = biomass_ds.GetGeoTransform() for grid_resolution in grid_resolution_list: output_dir, base_filename = os.path.split(biomass_uri) basename = os.path.basename(base_filename) output_uri = os.path.join( output_dir, basename + '_intensity_' + str(grid_resolution) + '.tif') n_rows_grid = int(-gt[5] * n_rows / (grid_resolution * 1000.0)) n_cols_grid = int(gt[1] * n_cols / (grid_resolution * 1000.0)) new_geotransform = ( gt[0], grid_resolution * 1000.0, gt[2], gt[3], gt[4], -grid_resolution * 1000.0) output_ds = driver.Create( output_uri.encode('utf-8'), n_cols_grid, n_rows_grid, 1, gdal.GDT_Float32) output_ds.SetProjection(projection) output_ds.SetGeoTransform(new_geotransform) output_band = output_ds.GetRasterBand(1) output_nodata = -1 output_band.SetNoDataValue(output_nodata) output_band.Fill(output_nodata) last_time = time.time() for grid_row_index in xrange(n_rows_grid): current_time = time.time() if current_time - last_time > 5.0: print "magnitude %.1f%% complete" % (grid_row_index / float(n_rows_grid) * 100) last_time = current_time for grid_col_index in xrange(n_cols_grid): xoff = int(grid_col_index * (grid_resolution * 1000.0) / (gt[1])) yoff = int(grid_row_index * (grid_resolution * 1000.0) / (-gt[5])) win_xsize = int((grid_resolution * 1000.0) / (gt[1])) win_ysize = int((grid_resolution * 1000.0) / (gt[1])) biomass_block = biomass_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) forest_edge_distance_block = forest_edge_distance_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) valid_mask = numpy.where( (forest_edge_distance_block != forest_edge_distance_nodata) & (biomass_block != biomass_nodata)) flat_valid_biomass = biomass_block[valid_mask] sorted_forest_edge = numpy.argsort(flat_valid_biomass) flat_biomass = flat_valid_biomass[sorted_forest_edge] n_elements = flat_biomass.size if n_elements <= 10: continue lower_biomass = numpy.average(flat_biomass[0:int(n_elements*0.1)]) upper_biomass = numpy.average(flat_biomass[int(n_elements*0.9):n_elements]) if lower_biomass == 0: continue magnitude = upper_biomass/lower_biomass output_band.WriteArray( numpy.array([[magnitude]]), xoff=grid_col_index, yoff=grid_row_index)
def run(self): ecoregion_lookup = raster_utils.extract_datasource_table_by_key( ECOREGION_SHAPEFILE_URI, 'ECO_ID_U') ecoregion_nodata = -1 ecoregion_lookup[ecoregion_nodata] = { 'ECO_NAME': 'UNKNOWN', 'ECODE_NAME': 'UNKNOWN', 'WWF_MHTNAM': 'UNKNOWN', } cell_size = raster_utils.get_cell_size_from_uri( FOREST_EDGE_DISTANCE_URI) forest_edge_nodata = raster_utils.get_nodata_from_uri( FOREST_EDGE_DISTANCE_URI) biomass_nodata = raster_utils.get_nodata_from_uri(GLOBAL_BIOMASS_URI) outfile = open(BIOMASS_STATS_URI, 'w') ecoregion_dataset = gdal.Open(ECOREGION_DATASET_URI) ecoregion_band = ecoregion_dataset.GetRasterBand(1) biomass_ds = gdal.Open(GLOBAL_BIOMASS_URI, gdal.GA_ReadOnly) biomass_band = biomass_ds.GetRasterBand(1) forest_edge_distance_ds = gdal.Open(FOREST_EDGE_DISTANCE_URI) forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1) n_rows, n_cols = raster_utils.get_row_col_from_uri(GLOBAL_BIOMASS_URI) base_srs = osr.SpatialReference(biomass_ds.GetProjection()) lat_lng_srs = base_srs.CloneGeogCS() coord_transform = osr.CoordinateTransformation( base_srs, lat_lng_srs) geo_trans = biomass_ds.GetGeoTransform() block_col_size, block_row_size = biomass_band.GetBlockSize() n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size)) n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size)) last_time = time.time() for global_block_row in xrange(n_global_block_rows): current_time = time.time() if current_time - last_time > 5.0: print ( "aggregation %.1f%% complete" % (global_block_row / float(n_global_block_rows) * 100)) last_time = current_time for global_block_col in xrange(n_global_block_cols): xoff = global_block_col * block_col_size yoff = global_block_row * block_row_size win_xsize = min(block_col_size, n_cols - xoff) win_ysize = min(block_row_size, n_rows - yoff) biomass_block = biomass_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) forest_edge_distance_block = ( forest_edge_distance_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)) ecoregion_id_block = ecoregion_band.ReadAsArray( xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize) for global_row in xrange( global_block_row*block_row_size, min((global_block_row+1)*block_row_size, n_rows)): for global_col in xrange( global_block_col*block_col_size, min((global_block_col+1)*block_col_size, n_cols)): row_coord = ( geo_trans[3] + global_row * geo_trans[5]) col_coord = ( geo_trans[0] + global_col * geo_trans[1]) local_row = ( global_row - global_block_row * block_row_size) local_col = ( global_col - global_block_col * block_col_size) lng_coord, lat_coord, _ = ( coord_transform.TransformPoint( col_coord, row_coord)) ecoregion_id = ecoregion_id_block[local_row, local_col] if (forest_edge_distance_block[local_row, local_col] != forest_edge_nodata and forest_edge_distance_block [local_row, local_col] > 0.0 and biomass_block [local_row, local_col] != biomass_nodata): outfile.write("%f;%f;%f;%f;%s;%s;%s" % ( forest_edge_distance_block [local_row, local_col] * cell_size, biomass_block[local_row, local_col], lat_coord, lng_coord, ecoregion_lookup[ecoregion_id]['ECO_NAME'], ecoregion_lookup[ecoregion_id]['ECODE_NAME'], ecoregion_lookup[ecoregion_id]['WWF_MHTNAM'])) for global_grid_resolution in GRID_RESOLUTION_LIST: #output a grid coordinate in the form #'grid_row-grid_col' grid_row = ( int((geo_trans[3] - row_coord) / (global_grid_resolution*1000))) grid_col = ( int((col_coord - geo_trans[0]) / (global_grid_resolution*1000))) grid_id = str(grid_row) + '-' + str(grid_col) outfile.write(";%s" % grid_id) outfile.write('\n') outfile.close()