def run(self):
        lulc_nodata = raster_utils.get_nodata_from_uri(GLOBAL_LANDCOVER_URI)

        forest_lulc_codes = [1, 2, 3, 4, 5]

        mask_uri = os.path.join(OUTPUT_DIR, "forest_mask.tif")
        mask_nodata = 2

        def mask_nonforest(lulc):
            """Takes in a numpy array of landcover values and returns 1s
                where they match forest codes and 0 otherwise"""
            mask = numpy.empty(lulc.shape, dtype=numpy.int8)
            mask[:] = 1
            for lulc_code in forest_lulc_codes:
                mask[lulc == lulc_code] = 0
            mask[lulc == lulc_nodata] = mask_nodata
            return mask

        cell_size = raster_utils.get_cell_size_from_uri(GLOBAL_LANDCOVER_URI)
        raster_utils.vectorize_datasets(
            [GLOBAL_LANDCOVER_URI,], mask_nonforest, mask_uri, gdal.GDT_Byte,
            mask_nodata, cell_size, 'intersection', dataset_to_align_index=0,
            dataset_to_bound_index=None, aoi_uri=None,
            assert_datasets_projected=True, process_pool=None,
            vectorize_op=False, datasets_are_pre_aligned=True)

        raster_utils.distance_transform_edt(
            mask_uri, FOREST_EDGE_DISTANCE_URI)
def process_ecoregion(prefix):
    ecoregion_shapefile_uri = os.path.join(
        DATA_DIR, 'ecoregions', 'ecoregions_projected.shp')

    ecoregion_lookup = raster_utils.extract_datasource_table_by_key(
        ecoregion_shapefile_uri, 'ECO_ID_U')
    ecoregion_nodata = -1
    ecoregion_lookup[ecoregion_nodata] = {
        'ECO_NAME': 'UNKNOWN',
        'ECODE_NAME': 'UNKNOWN',
        'WWF_MHTNAM': 'UNKNOWN',
        }

    lulc_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, LULC_BASE))
    biomass_raw_uri = os.path.join(DATA_DIR, '%s%s' % (prefix, BIOMASS_BASE))

    cell_size = raster_utils.get_cell_size_from_uri(lulc_raw_uri)

    lulc_uri = os.path.join(OUTPUT_DIR, "%s_lulc_aligned.tif" % (prefix))
    biomass_uri = os.path.join(OUTPUT_DIR, "%s_biomass_aligned.tif" % (prefix))

    raster_utils.align_dataset_list(
        [lulc_raw_uri, biomass_raw_uri], [lulc_uri, biomass_uri], ['nearest']*2,
        cell_size, 'intersection', 0, dataset_to_bound_index=None,
        aoi_uri=None, assert_datasets_projected=True, process_pool=None)

    #create ecoregion id
    ecoregion_dataset_uri = os.path.join(
        OUTPUT_DIR, "%s_ecoregion_id.tif" % (prefix))
    raster_utils.new_raster_from_base_uri(
        lulc_uri, ecoregion_dataset_uri, 'GTiff', ecoregion_nodata, gdal.GDT_Int16)
    raster_utils.rasterize_layer_uri(
        ecoregion_dataset_uri, ecoregion_shapefile_uri,
        option_list=["ATTRIBUTE=ECO_ID_U"])

    lulc_nodata = raster_utils.get_nodata_from_uri(lulc_uri)

    forest_lulc_codes = [1, 2, 3, 4, 5]

    mask_uri = os.path.join(OUTPUT_DIR, "%s_mask.tif" % prefix)
    mask_nodata = 2

    def mask_nonforest(lulc):
        mask = numpy.empty(lulc.shape, dtype=numpy.int8)
        mask[:] = 1
        for lulc_code in forest_lulc_codes:
            mask[lulc == lulc_code] = 0
        mask[lulc == lulc_nodata] = mask_nodata
        return mask

    raster_utils.vectorize_datasets(
        [lulc_uri,], mask_nonforest, mask_uri, gdal.GDT_Byte,
        mask_nodata, cell_size, 'intersection', dataset_to_align_index=0,
        dataset_to_bound_index=None, aoi_uri=None,
        assert_datasets_projected=True, process_pool=None, vectorize_op=False,
        datasets_are_pre_aligned=True)

    forest_edge_distance_uri = os.path.join(OUTPUT_DIR, "%s_forest_edge.tif" % prefix)
    raster_utils.distance_transform_edt(mask_uri, forest_edge_distance_uri)

    biomass_stats_uri = os.path.join(OUTPUT_DIR, "%s_biomass_stats.csv" % prefix)
    _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri)