def run(self):
        ecoregion_lookup = raster_utils.extract_datasource_table_by_key(
            ECOREGION_SHAPEFILE_URI, 'ECO_ID_U')
        ecoregion_nodata = -1
        ecoregion_lookup[ecoregion_nodata] = {
            'ECO_NAME': 'UNKNOWN',
            'ECODE_NAME': 'UNKNOWN',
            'WWF_MHTNAM': 'UNKNOWN',
            }

        #create ecoregion id
        raster_utils.new_raster_from_base_uri(
            GLOBAL_LANDCOVER_URI, ECOREGION_DATASET_URI, 'GTiff',
            ecoregion_nodata, gdal.GDT_Int16)
        raster_utils.rasterize_layer_uri(
            ECOREGION_DATASET_URI, ECOREGION_SHAPEFILE_URI,
            option_list=["ATTRIBUTE=ECO_ID_U"])
def process_ecoregion(prefix):
    ecoregion_shapefile_uri = os.path.join(
        DATA_DIR, 'ecoregions', 'ecoregions_projected.shp')

    ecoregion_lookup = raster_utils.extract_datasource_table_by_key(
        ecoregion_shapefile_uri, 'ECO_ID_U')
    ecoregion_nodata = -1
    ecoregion_lookup[ecoregion_nodata] = {
        'ECO_NAME': 'UNKNOWN',
        'ECODE_NAME': 'UNKNOWN',
        'WWF_MHTNAM': 'UNKNOWN',
        }

    lulc_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, LULC_BASE))
    biomass_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, BIOMASS_BASE))

    cell_size = raster_utils.get_cell_size_from_uri(lulc_raw_uri)

    lulc_uri = os.path.join(OUTPUT_DIR, "%s_lulc_aligned.tif" % (prefix))
    biomass_uri = os.path.join(OUTPUT_DIR, "%s_biomass_aligned.tif" % (prefix))

    raster_utils.align_dataset_list(
        [lulc_raw_uri, biomass_raw_uri], [lulc_uri, biomass_uri], ['nearest']*2,
        cell_size, 'intersection', 0, dataset_to_bound_index=None,
        aoi_uri=None, assert_datasets_projected=True, process_pool=None)

    #create ecoregion id
    ecoregion_dataset_uri = os.path.join(
        OUTPUT_DIR, "%s_ecoregion_id.tif" % (prefix))
    raster_utils.new_raster_from_base_uri(
        lulc_uri, ecoregion_dataset_uri, 'GTiff', ecoregion_nodata, gdal.GDT_Int16)
    raster_utils.rasterize_layer_uri(
        ecoregion_dataset_uri, ecoregion_shapefile_uri,
        option_list=["ATTRIBUTE=ECO_ID_U"])

    lulc_nodata = raster_utils.get_nodata_from_uri(lulc_uri)

    forest_lulc_codes = [1, 2, 3, 4, 5]

    mask_uri = os.path.join(OUTPUT_DIR, "%s_mask.tif" % prefix)
    mask_nodata = 2

    def mask_nonforest(lulc):
        mask = numpy.empty(lulc.shape, dtype=numpy.int8)
        mask[:] = 1
        for lulc_code in forest_lulc_codes:
            mask[lulc == lulc_code] = 0
        mask[lulc == lulc_nodata] = mask_nodata
        return mask

    raster_utils.vectorize_datasets(
        [lulc_uri,], mask_nonforest, mask_uri, gdal.GDT_Byte,
        mask_nodata, cell_size, 'intersection', dataset_to_align_index=0,
        dataset_to_bound_index=None, aoi_uri=None,
        assert_datasets_projected=True, process_pool=None, vectorize_op=False,
        datasets_are_pre_aligned=True)

    forest_edge_distance_uri = os.path.join(OUTPUT_DIR, "%s_forest_edge.tif" % prefix)
    raster_utils.distance_transform_edt(mask_uri, forest_edge_distance_uri)

    biomass_stats_uri = os.path.join(OUTPUT_DIR, "%s_biomass_stats.csv" % prefix)
    _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri)
    def run(self):
        ecoregion_lookup = raster_utils.extract_datasource_table_by_key(
            ECOREGION_SHAPEFILE_URI, 'ECO_ID_U')
        ecoregion_nodata = -1
        ecoregion_lookup[ecoregion_nodata] = {
            'ECO_NAME': 'UNKNOWN',
            'ECODE_NAME': 'UNKNOWN',
            'WWF_MHTNAM': 'UNKNOWN',
            }
        cell_size = raster_utils.get_cell_size_from_uri(
            FOREST_EDGE_DISTANCE_URI)
        forest_edge_nodata = raster_utils.get_nodata_from_uri(
            FOREST_EDGE_DISTANCE_URI)
        biomass_nodata = raster_utils.get_nodata_from_uri(GLOBAL_BIOMASS_URI)
        outfile = open(BIOMASS_STATS_URI, 'w')

        ecoregion_dataset = gdal.Open(ECOREGION_DATASET_URI)
        ecoregion_band = ecoregion_dataset.GetRasterBand(1)

        biomass_ds = gdal.Open(GLOBAL_BIOMASS_URI, gdal.GA_ReadOnly)
        biomass_band = biomass_ds.GetRasterBand(1)

        forest_edge_distance_ds = gdal.Open(FOREST_EDGE_DISTANCE_URI)
        forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1)

        n_rows, n_cols = raster_utils.get_row_col_from_uri(GLOBAL_BIOMASS_URI)

        base_srs = osr.SpatialReference(biomass_ds.GetProjection())
        lat_lng_srs = base_srs.CloneGeogCS()
        coord_transform = osr.CoordinateTransformation(
            base_srs, lat_lng_srs)
        geo_trans = biomass_ds.GetGeoTransform()

        block_col_size, block_row_size = biomass_band.GetBlockSize()
        n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size))
        n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size))

        last_time = time.time()
        for global_block_row in xrange(n_global_block_rows):
            current_time = time.time()
            if current_time - last_time > 5.0:
                print (
                    "aggregation %.1f%% complete" %
                    (global_block_row / float(n_global_block_rows) * 100))
                last_time = current_time
            for global_block_col in xrange(n_global_block_cols):
                xoff = global_block_col * block_col_size
                yoff = global_block_row * block_row_size
                win_xsize = min(block_col_size, n_cols - xoff)
                win_ysize = min(block_row_size, n_rows - yoff)
                biomass_block = biomass_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                    win_ysize=win_ysize)
                forest_edge_distance_block = (
                    forest_edge_distance_band.ReadAsArray(
                        xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                        win_ysize=win_ysize))
                ecoregion_id_block = ecoregion_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                    win_ysize=win_ysize)

                for global_row in xrange(
                        global_block_row*block_row_size,
                        min((global_block_row+1)*block_row_size, n_rows)):
                    for global_col in xrange(
                            global_block_col*block_col_size,
                            min((global_block_col+1)*block_col_size, n_cols)):
                        row_coord = (
                            geo_trans[3] + global_row * geo_trans[5])
                        col_coord = (
                            geo_trans[0] + global_col * geo_trans[1])

                        local_row = (
                            global_row - global_block_row * block_row_size)
                        local_col = (
                            global_col - global_block_col * block_col_size)

                        lng_coord, lat_coord, _ = (
                            coord_transform.TransformPoint(
                                col_coord, row_coord))

                        ecoregion_id = ecoregion_id_block[local_row, local_col]
                        if (forest_edge_distance_block[local_row, local_col] !=
                                forest_edge_nodata and
                                forest_edge_distance_block
                                [local_row, local_col] > 0.0 and
                                biomass_block
                                [local_row, local_col] != biomass_nodata):
                            outfile.write("%f;%f;%f;%f;%s;%s;%s" % (
                                forest_edge_distance_block
                                [local_row, local_col] * cell_size,
                                biomass_block[local_row, local_col],
                                lat_coord, lng_coord,
                                ecoregion_lookup[ecoregion_id]['ECO_NAME'],
                                ecoregion_lookup[ecoregion_id]['ECODE_NAME'],
                                ecoregion_lookup[ecoregion_id]['WWF_MHTNAM']))
                            for global_grid_resolution in GRID_RESOLUTION_LIST:
                                #output a grid coordinate in the form
                                #'grid_row-grid_col'
                                grid_row = (
                                    int((geo_trans[3] - row_coord) /
                                        (global_grid_resolution*1000)))
                                grid_col = (
                                    int((col_coord - geo_trans[0]) /
                                        (global_grid_resolution*1000)))
                                grid_id = str(grid_row) + '-' + str(grid_col)
                                outfile.write(";%s" % grid_id)
                            outfile.write('\n')
        outfile.close()