def _aggregate_results(forest_edge_distance_uri, biomass_uri, ecoregion_dataset_uri, ecoregion_lookup, biomass_stats_uri):
    cell_size = raster_utils.get_cell_size_from_uri(forest_edge_distance_uri)

    forest_edge_nodata = raster_utils.get_nodata_from_uri(forest_edge_distance_uri)
    biomass_nodata = raster_utils.get_nodata_from_uri(biomass_uri)    

    outfile = open(biomass_stats_uri, 'w')

    ecoregion_dataset = gdal.Open(ecoregion_dataset_uri)
    ecoregion_band = ecoregion_dataset.GetRasterBand(1)

    biomass_ds = gdal.Open(biomass_uri, gdal.GA_ReadOnly)
    biomass_band = biomass_ds.GetRasterBand(1)

    forest_edge_distance_ds = gdal.Open(forest_edge_distance_uri)
    forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1)

    n_rows, n_cols = raster_utils.get_row_col_from_uri(biomass_uri)

    base_srs = osr.SpatialReference(biomass_ds.GetProjection())
    lat_lng_srs = base_srs.CloneGeogCS()
    coord_transform = osr.CoordinateTransformation(
        base_srs, lat_lng_srs)
    gt = biomass_ds.GetGeoTransform()

    grid_resolution_list = [25, 50, 100, 150, 200, 300, 400, 500]
    grid_coordinates = dict((resolution, {}) for resolution in grid_resolution_list)

    block_col_size, block_row_size = biomass_band.GetBlockSize()
    n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size))
    n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size))
    
    last_time = time.time()
    for global_block_row in xrange(n_global_block_rows):
        current_time = time.time()
        if current_time - last_time > 5.0:
            print "aggregation %.1f%% complete" % (global_block_row / float(n_global_block_rows) * 100)
            last_time = current_time
        for global_block_col in xrange(n_global_block_cols):
            xoff = global_block_col * block_col_size
            yoff = global_block_row * block_row_size
            win_xsize = min(block_col_size, n_cols - xoff)
            win_ysize = min(block_row_size, n_rows - yoff)
            biomass_block = biomass_band.ReadAsArray(
                xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)
            forest_edge_distance_block = forest_edge_distance_band.ReadAsArray(
                xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)
            ecoregion_id_block = ecoregion_band.ReadAsArray(
                xoff=xoff, yoff=yoff, win_xsize=win_xsize, win_ysize=win_ysize)

            for global_row in xrange(global_block_row*block_row_size, min((global_block_row+1)*block_row_size, n_rows)):
                for global_col in xrange(global_block_col*block_col_size, min((global_block_col+1)*block_col_size, n_cols)):
                    row_coord = gt[3] + global_row * gt[5]    
                    col_coord = gt[0] + global_col * gt[1]

                    local_row = global_row - global_block_row * block_row_size
                    local_col = global_col - global_block_col * block_col_size

                    lng_coord, lat_coord, _ = coord_transform.TransformPoint(
                        col_coord, row_coord)

                    #normalize the coordinates so they don't go negative
                    global_grid_row = row_coord - GLOBAL_UPPER_LEFT_ROW
                    global_grid_col = col_coord - GLOBAL_UPPER_LEFT_COL

                    ecoregion_id = ecoregion_id_block[local_row, local_col]
                    if (forest_edge_distance_block[local_row, local_col] != forest_edge_nodata and
                            forest_edge_distance_block[local_row, local_col] > 0.0 and
                            biomass_block[local_row, local_col] != biomass_nodata):

                        outfile.write("%f;%f;%f;%f;%s;%s;%s" % (
                            forest_edge_distance_block[local_row, local_col] * cell_size,
                            biomass_block[local_row, local_col], lat_coord, lng_coord,
                            ecoregion_lookup[ecoregion_id]['ECO_NAME'],
                            ecoregion_lookup[ecoregion_id]['ECODE_NAME'],
                            ecoregion_lookup[ecoregion_id]['WWF_MHTNAM']))
                        outfile.write(";%f;%f" % (global_grid_row, global_grid_col))
                        for global_grid_resolution in grid_resolution_list:
                            #output a grid coordinate in the form 'grid_row-grid_col'
                            grid_row = int(global_grid_row/(global_grid_resolution*1000))
                            grid_col = int(global_grid_col/(global_grid_resolution*1000))
                            grid_id = str(grid_row) + '-' + str(grid_col)
                            outfile.write(";%s" % grid_id)
                            if grid_id not in grid_coordinates[global_grid_resolution]:
                                grid_row_center = grid_row * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_ROW
                                grid_col_center = grid_col * global_grid_resolution*1000 + GLOBAL_UPPER_LEFT_COL
                                grid_lng_coord, grid_lat_coord, _ = coord_transform.TransformPoint(
                                    grid_col_center, grid_row_center)
                                grid_coordinates[global_grid_resolution][grid_id] = (grid_lat_coord, grid_lng_coord)
                                print grid_lat_coord, grid_lng_coord
                        outfile.write('/n')
    outfile.close()
    for global_grid_resolution in grid_resolution_list:
        output_dir, base_filename = os.path.split(biomass_stats_uri)
        basename = os.path.basename(base_filename)
        grid_output_file = open(os.path.join(output_dir, basename + '_' + str(global_grid_resolution) + '.csv'), 'w')
        grid_output_file.write('grid id;lat_coord;lng_coord/n')
        open(biomass_stats_uri, 'w')
        for grid_id, (lat, lng) in grid_coordinates[global_grid_resolution].iteritems():
            grid_output_file.write('%s;%s;%s/n' % (grid_id, lat, lng))
        grid_output_file.close()
    def run(self):
        _, n_cols = raster_utils.get_row_col_from_uri(self.base_uri)

        base_ds = gdal.Open(self.base_uri, gdal.GA_ReadOnly)
        geo_trans = base_ds.GetGeoTransform()
        output_sr = osr.SpatialReference(base_ds.GetProjection())
        #got this from reading the grid output
        string_args = [
            'Confidence', 'gridID', 'forest', 'main_biome', 'main_ecoregion',
            'Continent']
        for global_grid_resolution, grid_filename, shapefile_filename in \
                zip(GRID_RESOLUTION_LIST, self.grid_table_file_list,
                    self.shapefile_output_list):

            if os.path.isfile(shapefile_filename):
                os.remove(shapefile_filename)

            driver = ogr.GetDriverByName('ESRI Shapefile')
            datasource = driver.CreateDataSource(shapefile_filename)

            #Create the layer name from the uri paths basename without the
            #extension
            uri_basename = os.path.basename(shapefile_filename)
            layer_name = os.path.splitext(uri_basename)[0].encode("utf-8")
            grid_layer = datasource.CreateLayer(
                layer_name, output_sr, ogr.wkbPolygon)

            grid_file = open(grid_filename, 'rU')
            headers = grid_file.readline().rstrip().split(',')

            # Add a single ID field
            field = ogr.FieldDefn(headers[0], ogr.OFTString)
            grid_layer.CreateField(field)
            field_names = [headers[0]]
            for arg in headers[1:]:

                if arg.startswith('anthrome_'):
                    arg = 'anth' + arg[9:]
                elif arg.startswith('prop_main'):
                    arg = 'pr_mn' + arg[9:14]
                else:
                    arg = arg[:10]

                if arg in string_args:
                    field = ogr.FieldDefn(arg, ogr.OFTString)
                else:
                    field = ogr.FieldDefn(arg, ogr.OFTReal)
                field_names.append(arg)
                grid_layer.CreateField(field)
            grid_layer.CommitTransaction()

            for line in grid_file:
                gridid = line.split(',')[0]
                lat_coord = int(gridid.split('-')[0])
                lng_coord = int(gridid.split('-')[1])

                ring = ogr.Geometry(ogr.wkbLinearRing)
                ring.AddPoint(
                    lng_coord * (global_grid_resolution * 1000) + geo_trans[0],
                    -lat_coord * (global_grid_resolution * 1000) + geo_trans[3])
                ring.AddPoint(
                    lng_coord * (global_grid_resolution * 1000) + geo_trans[0],
                    -(1+lat_coord) * (global_grid_resolution * 1000) +
                    geo_trans[3])
                ring.AddPoint(
                    (1+lng_coord) * (global_grid_resolution * 1000) +
                    geo_trans[0], -(1+lat_coord) *
                    (global_grid_resolution * 1000) + geo_trans[3])
                ring.AddPoint(
                    (1+lng_coord) * (global_grid_resolution * 1000) +
                    geo_trans[0], -lat_coord * (global_grid_resolution * 1000) +
                    geo_trans[3])
                ring.AddPoint(
                    lng_coord * (global_grid_resolution * 1000) + geo_trans[0],
                    -lat_coord * (global_grid_resolution * 1000) + geo_trans[3])

                poly = ogr.Geometry(ogr.wkbPolygon)
                poly.AddGeometry(ring)


                feature = ogr.Feature(grid_layer.GetLayerDefn())
                feature.SetGeometry(poly)
                #feature.SetField(0, gridid)
                for value, field_name in zip(
                        line.rstrip().split(','), field_names):
                    if field_name in string_args:
                        if value == '-9999':
                            value = 'NA'
                        feature.SetField(field_name, str(value))
                    else:
                        try:
                            feature.SetField(field_name, float(value))
                        except ValueError:
                            feature.SetField(field_name, -9999)
                grid_layer.CreateFeature(feature)

            datasource.SyncToDisk()
            datasource = None
    def run(self):
        biomass_ds = gdal.Open(GLOBAL_BIOMASS_URI, gdal.GA_ReadOnly)
        n_rows, n_cols = raster_utils.get_row_col_from_uri(GLOBAL_BIOMASS_URI)

        base_srs = osr.SpatialReference(biomass_ds.GetProjection())
        lat_lng_srs = base_srs.CloneGeogCS()
        coord_transform = osr.CoordinateTransformation(
            base_srs, lat_lng_srs)
        geo_trans = biomass_ds.GetGeoTransform()
        biomass_band = biomass_ds.GetRasterBand(1)
        biomass_nodata = biomass_band.GetNoDataValue()

        forest_table = raster_utils.get_lookup_from_csv(
            self.forest_only_table_uri, 'gridID')
        forest_headers = list(forest_table.values()[0].keys())

        nonexistant_files = []
        for uri in ALIGNED_LAYERS_TO_AVERAGE:
            if not os.path.isfile(uri):
                nonexistant_files.append(uri)
        if len(nonexistant_files) > 0:
            raise Exception(
                "The following files don't exist: %s" %
                (str(nonexistant_files)))

        average_dataset_list = [
            gdal.Open(uri) for uri in ALIGNED_LAYERS_TO_AVERAGE]

        average_band_list = [ds.GetRasterBand(1) for ds in average_dataset_list]
        average_nodata_list = [
            band.GetNoDataValue() for band in average_band_list]

        max_dataset_list = [gdal.Open(uri) for uri in ALIGNED_LAYERS_TO_MAX]
        max_band_list = [ds.GetRasterBand(1) for ds in max_dataset_list]
        max_nodata_list = [band.GetNoDataValue() for band in max_band_list]

        for global_grid_resolution, grid_output_filename in \
                zip(GRID_RESOLUTION_LIST, self.grid_output_file_list):
            try:
                grid_output_file = open(grid_output_filename, 'w')
                grid_output_file.write('grid id,lat_coord,lng_coord')
                for filename in (
                        ALIGNED_LAYERS_TO_AVERAGE + ALIGNED_LAYERS_TO_MAX):
                    grid_output_file.write(
                        ',%s' % os.path.splitext(
                            os.path.basename(filename))[0][len('aligned_'):])
                for header in forest_headers:
                    grid_output_file.write(',%s' % header)
                grid_output_file.write('\n')

                n_grid_rows = int(
                    (-geo_trans[5] * n_rows) / (global_grid_resolution * 1000))
                n_grid_cols = int(
                    (geo_trans[1] * n_cols) / (global_grid_resolution * 1000))

                grid_row_stepsize = int(n_rows / float(n_grid_rows))
                grid_col_stepsize = int(n_cols / float(n_grid_cols))

                for grid_row in xrange(n_grid_rows):
                    for grid_col in xrange(n_grid_cols):
                        #first check to make sure there is biomass at all!
                        global_row = grid_row * grid_row_stepsize
                        global_col = grid_col * grid_col_stepsize
                        global_col_size = min(
                            grid_col_stepsize, n_cols - global_col)
                        global_row_size = min(
                            grid_row_stepsize, n_rows - global_row)
                        array = biomass_band.ReadAsArray(
                            global_col, global_row, global_col_size,
                            global_row_size)
                        if numpy.count_nonzero(array != biomass_nodata) == 0:
                            continue

                        grid_id = '%d-%d' % (grid_row, grid_col)
                        grid_row_center = (
                            -(grid_row + 0.5) * (global_grid_resolution*1000) +
                            geo_trans[3])
                        grid_col_center = (
                            (grid_col + 0.5) * (global_grid_resolution*1000) +
                            geo_trans[0])
                        grid_lng_coord, grid_lat_coord, _ = (
                            coord_transform.TransformPoint(
                                grid_col_center, grid_row_center))
                        grid_output_file.write(
                            '%s,%s,%s' % (grid_id, grid_lat_coord,
                                          grid_lng_coord))


                        #take the average values
                        for band, nodata, layer_uri in zip(
                                average_band_list, average_nodata_list,
                                ALIGNED_LAYERS_TO_AVERAGE +
                                ALIGNED_LAYERS_TO_MAX):
                            nodata = band.GetNoDataValue()
                            array = band.ReadAsArray(
                                global_col, global_row, global_col_size,
                                global_row_size)
                            layer_name = os.path.splitext(
                                os.path.basename(layer_uri)) \
                            [0][len('aligned_'):]

                            pure_average_layers = [
                                'global_elevation', 'global_water_capacity',
                                'fi_average', 'lighted_area_luminosity',
                                'glbctd1t0503m', 'glbgtd1t0503m',
                                'glbpgd1t0503m', 'glbshd1t0503m', 'glds00ag',
                                'glds00g']
                            if layer_name not in pure_average_layers:
                                array[array == nodata] = 0.0
                            valid_values = array[array != nodata]
                            if valid_values.size != 0:
                                value = numpy.average(valid_values)
                            else:
                                value = -9999.
                            grid_output_file.write(',%f' % value)

                        #take the mode values
                        for band, nodata in zip(max_band_list, max_nodata_list):
                            nodata = band.GetNoDataValue()
                            array = band.ReadAsArray(
                                global_col, global_row, global_col_size,
                                global_row_size)
                            #get the most common value
                            valid_values = array[array != nodata]
                            if valid_values.size != 0:
                                value = scipy.stats.mode(valid_values)[0][0]
                                grid_output_file.write(',%f' % value)
                            else:
                                grid_output_file.write(',-9999')

                        #add the forest_only values
                        for header in forest_headers:
                            try:
                                value = forest_table[grid_id][header]
                                if type(value) == unicode:
                                    grid_output_file.write(
                                        ',%s' % forest_table[grid_id][header].\
                                        encode('latin-1', 'replace'))
                                else:
                                    grid_output_file.write(
                                        ',%s' % forest_table[grid_id][header])
                            except KeyError:
                                grid_output_file.write(',-9999')


                        grid_output_file.write('\n')
                grid_output_file.close()
            except IndexError as exception:
                grid_output_file.close()
                os.remove(grid_output_filename)
                raise exception
    def run(self):
        ecoregion_lookup = raster_utils.extract_datasource_table_by_key(
            ECOREGION_SHAPEFILE_URI, 'ECO_ID_U')
        ecoregion_nodata = -1
        ecoregion_lookup[ecoregion_nodata] = {
            'ECO_NAME': 'UNKNOWN',
            'ECODE_NAME': 'UNKNOWN',
            'WWF_MHTNAM': 'UNKNOWN',
            }
        cell_size = raster_utils.get_cell_size_from_uri(
            FOREST_EDGE_DISTANCE_URI)
        forest_edge_nodata = raster_utils.get_nodata_from_uri(
            FOREST_EDGE_DISTANCE_URI)
        biomass_nodata = raster_utils.get_nodata_from_uri(GLOBAL_BIOMASS_URI)
        outfile = open(BIOMASS_STATS_URI, 'w')

        ecoregion_dataset = gdal.Open(ECOREGION_DATASET_URI)
        ecoregion_band = ecoregion_dataset.GetRasterBand(1)

        biomass_ds = gdal.Open(GLOBAL_BIOMASS_URI, gdal.GA_ReadOnly)
        biomass_band = biomass_ds.GetRasterBand(1)

        forest_edge_distance_ds = gdal.Open(FOREST_EDGE_DISTANCE_URI)
        forest_edge_distance_band = forest_edge_distance_ds.GetRasterBand(1)

        n_rows, n_cols = raster_utils.get_row_col_from_uri(GLOBAL_BIOMASS_URI)

        base_srs = osr.SpatialReference(biomass_ds.GetProjection())
        lat_lng_srs = base_srs.CloneGeogCS()
        coord_transform = osr.CoordinateTransformation(
            base_srs, lat_lng_srs)
        geo_trans = biomass_ds.GetGeoTransform()

        block_col_size, block_row_size = biomass_band.GetBlockSize()
        n_global_block_rows = int(math.ceil(float(n_rows) / block_row_size))
        n_global_block_cols = int(math.ceil(float(n_cols) / block_col_size))

        last_time = time.time()
        for global_block_row in xrange(n_global_block_rows):
            current_time = time.time()
            if current_time - last_time > 5.0:
                print (
                    "aggregation %.1f%% complete" %
                    (global_block_row / float(n_global_block_rows) * 100))
                last_time = current_time
            for global_block_col in xrange(n_global_block_cols):
                xoff = global_block_col * block_col_size
                yoff = global_block_row * block_row_size
                win_xsize = min(block_col_size, n_cols - xoff)
                win_ysize = min(block_row_size, n_rows - yoff)
                biomass_block = biomass_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                    win_ysize=win_ysize)
                forest_edge_distance_block = (
                    forest_edge_distance_band.ReadAsArray(
                        xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                        win_ysize=win_ysize))
                ecoregion_id_block = ecoregion_band.ReadAsArray(
                    xoff=xoff, yoff=yoff, win_xsize=win_xsize,
                    win_ysize=win_ysize)

                for global_row in xrange(
                        global_block_row*block_row_size,
                        min((global_block_row+1)*block_row_size, n_rows)):
                    for global_col in xrange(
                            global_block_col*block_col_size,
                            min((global_block_col+1)*block_col_size, n_cols)):
                        row_coord = (
                            geo_trans[3] + global_row * geo_trans[5])
                        col_coord = (
                            geo_trans[0] + global_col * geo_trans[1])

                        local_row = (
                            global_row - global_block_row * block_row_size)
                        local_col = (
                            global_col - global_block_col * block_col_size)

                        lng_coord, lat_coord, _ = (
                            coord_transform.TransformPoint(
                                col_coord, row_coord))

                        ecoregion_id = ecoregion_id_block[local_row, local_col]
                        if (forest_edge_distance_block[local_row, local_col] !=
                                forest_edge_nodata and
                                forest_edge_distance_block
                                [local_row, local_col] > 0.0 and
                                biomass_block
                                [local_row, local_col] != biomass_nodata):
                            outfile.write("%f;%f;%f;%f;%s;%s;%s" % (
                                forest_edge_distance_block
                                [local_row, local_col] * cell_size,
                                biomass_block[local_row, local_col],
                                lat_coord, lng_coord,
                                ecoregion_lookup[ecoregion_id]['ECO_NAME'],
                                ecoregion_lookup[ecoregion_id]['ECODE_NAME'],
                                ecoregion_lookup[ecoregion_id]['WWF_MHTNAM']))
                            for global_grid_resolution in GRID_RESOLUTION_LIST:
                                #output a grid coordinate in the form
                                #'grid_row-grid_col'
                                grid_row = (
                                    int((geo_trans[3] - row_coord) /
                                        (global_grid_resolution*1000)))
                                grid_col = (
                                    int((col_coord - geo_trans[0]) /
                                        (global_grid_resolution*1000)))
                                grid_id = str(grid_row) + '-' + str(grid_col)
                                outfile.write(";%s" % grid_id)
                            outfile.write('\n')
        outfile.close()