Example #1
0
def convert_project_to_gdal_checkerboards(project: Project) -> List[str]:
    # https://github.com/OSGeo/gdal/blob/master/autotest/gdrivers/vrtderived.py
    vsi_path = get_temp_vsi_path()

    domains = project.data['domains']
    bboxes = project.bboxes
    vrts = []
    for idx, domain in enumerate(domains):
        bbox = bboxes[idx]
        dx, dy = domain['cell_size']
        w, h = domain['domain_size_padded']

        geo_transform = (bbox.minx, dx, 0, bbox.maxy, 0, -dy)

        driver = gdal.GetDriverByName('VRT')  # type: gdal.Driver
        vrt_ds = driver.Create(vsi_path, w, h, 0)  # type: gdal.Dataset
        vrt_ds.SetProjection(project.projection.wkt)
        vrt_ds.SetGeoTransform(geo_transform)

        options = [
            'subClass=VRTDerivedRasterBand', 'PixelFunctionLanguage=Python',
            'PixelFunctionType=gis4wrf.core.gdal_checkerboard_pixelfunction'
        ]
        vrt_ds.AddBand(gdal.GDT_Byte, options)
        vrt_ds.FlushCache()
        vrt = read_vsi_string(vsi_path)

        # PixelFunctionLanguage is lost, see https://github.com/OSGeo/gdal/issues/501.
        # This function call fixes that for older gdal versions.
        vrt = fix_pixelfunction_vrt(vrt)

        vrts.append(vrt)

    return vrts
Example #2
0
def array_to_raster(path, array, bounds, crs):
    '''
    path: str
    array: 2D array, north up (array[0,0] is most north west pixel)
    bounds: Bounds2D, minimum and maximum extent of data in CRS coordinates (measured at corners of pixels)
    :param osgeo.osr.SpatialReference crs: the CRS to be used
    '''

    # TODO generalize to arbitrary number of bands
    rows, cols = array.shape
    bands = 1

    # Image pixel coordinates start at (0,0) which corresponds to the top left corner of array[0,0]

    # CRS coordinates of top left corner of array[0,0]
    origin_x = bounds.min.x
    origin_y = bounds.max.y

    # Size in CRS units of each array element
    pixel_width = (bounds.max.x - bounds.min.x) / cols
    pixel_height = (bounds.max.y - bounds.min.y) / rows

    assert pixel_width > 0
    assert pixel_height > 0

    # Setup driver
    driver = gdal.GetDriverByName('GTiff')
    type_code = gdal_array.NumericTypeCodeToGDALTypeCode(array.dtype)
    out_raster = driver.Create(path, cols, rows, bands, type_code)
    out_raster.SetGeoTransform(
        (origin_x, pixel_width, 0, origin_y, 0, -pixel_height))

    # Write input data to first raster band
    out_raster.GetRasterBand(1).WriteArray(array)

    # Set projection
    wkt = crs.ExportToWkt()
    out_raster.SetProjection(wkt)

    # Write to disk
    out_raster.FlushCache()
Example #3
0
def convert_wps_binary_to_vrt_dataset(
        folder: str,
        use_vsi: bool = False) -> Tuple[str, str, str, Callable[[], None]]:
    """Converts a WPS Binary format dataset into a mosaic VRT dataset referencing per-tile VRT datasets."""

    m = read_wps_binary_index_file(folder)

    if m.proj_id == 'regular_ll' and m.stdlon is not None:
        raise UnsupportedError('Rotated pole system is not supported')

    # scan folder for available tiles
    tile_filename_re = re.compile('^({d})-({d})\.({d})-({d})$'.format(
        d='\d{' + str(m.filename_digits) + '}'))
    tiles = []
    for filename in os.listdir(folder):
        match = tile_filename_re.match(filename)
        if match:
            tiles.append({
                'filename': filename,
                'path': os.path.join(folder, filename),
                'start_x': int(match.group(1)),
                'end_x': int(match.group(2)),
                'start_y': int(match.group(3)),
                'end_y': int(match.group(4))
            })
    if not tiles:
        raise UserError(f'No tiles found in {folder}')

    # determine raster dimensions
    xsize = max(tile['end_x'] for tile in tiles)  # type: int
    ysize = max(tile['end_y'] for tile in tiles)  # type: int
    zsize = m.tile_z_end - m.tile_z_start + 1

    # convert to GDAL metadata
    dtype_mapping = {
        (1, False): gdal.GDT_Byte,  # GDAL only supports unsigned byte
        (2, False): gdal.GDT_UInt16,
        (2, True): gdal.GDT_Int16,
        (3, False): gdal.GDT_UInt32,
        (3, True): gdal.GDT_Int32
    }
    try:
        dtype = dtype_mapping[(m.word_size, m.signed)]
    except KeyError:
        raise UnsupportedError(
            'word_size={} signed={} is not supported'.format(
                m.word_size, m.signed))

    if m.proj_id == 'regular_ll':
        crs = CRS.create_lonlat()
    elif m.proj_id == 'lambert':
        # The map distortion of a Lambert Conformal projection is fully
        # defined by the two true latitudes.
        #
        # However, the longitude of origin is important for WRF as well,
        # since we only deal with upright rectangles (the domains) on the map.
        # For that reason, WRF allows the user to define the "standard longitude"
        # which is the longitude of origin.
        #
        # The latitude of origin on the other hand does not have any significance
        # here and cannot be specified by the user. The geo transform for a given
        # grid is computed based on any arbitrary latitude of origin (see below).
        # In QGIS, the only difference are the displayed projected y coordinates,
        # but the actual grid georeferencing is unaffected.
        # This is possible as WRF's georeferencing metadata is based on geographical
        # reference coordinates for a grid cell, not projected coordinates.
        arbitrary_latitude_origin = (m.truelat1 + m.truelat2) / 2
        origin = LonLat(lon=m.stdlon, lat=arbitrary_latitude_origin)
        crs = CRS.create_lambert(m.truelat1, m.truelat2, origin)
    elif m.proj_id == 'mercator':
        # The map distortion of a Mercator projection is fully
        # defined by the true latitude.
        # The longitude of origin does not have any significance and
        # any arbitrary value is handled when computing the geo transform
        # for a given grid (see below). See also the comment above for Lambert.
        arbitrary_longitude_origin = 0
        crs = CRS.create_mercator(m.truelat1, arbitrary_longitude_origin)
    elif m.proj_id == 'albers_nad83':
        # See the comment above for Lambert. The same applies here.
        arbitrary_latitude_origin = (m.truelat1 + m.truelat2) / 2
        origin = LonLat(lon=m.stdlon, lat=arbitrary_latitude_origin)
        crs = CRS.create_albers_nad83(m.truelat1, m.truelat2, origin)
    # FIXME handle polar vs polar_wgs84 differently
    elif m.proj_id == 'polar':
        # See the comment above for Lambert. The same applies here.
        crs = CRS.create_polar(m.truelat1, m.stdlon)
    elif m.proj_id == 'polar_wgs84':
        # See the comment above for Lambert. The same applies here.
        crs = CRS.create_polar(m.truelat1, m.stdlon)
    else:
        raise UnsupportedError(f'Projection {m.proj_id} is not supported')

    known_x_idx_gdal = m.known_idx.x - 0.5
    if m.top_bottom:
        known_y_idx_gdal = ysize - m.known_idx.y - 0.5
        dy_gdal = -m.dy
    else:
        known_y_idx_gdal = m.known_idx.y - 0.5
        dy_gdal = m.dy

    known_xy = crs.to_xy(m.known_lonlat)
    upper_left_x = known_xy.x - known_x_idx_gdal * m.dx
    upper_left_y = known_xy.y + known_y_idx_gdal * m.dy
    geo_transform = (upper_left_x, m.dx, 0, upper_left_y, 0, dy_gdal)

    # print('known_x_idx_gdal: {}'.format(known_x_idx_gdal))
    # print('known_y_idx_gdal: {}'.format(known_y_idx_gdal))
    # print('known_xy: {}'.format(m.known_xy))
    # print('upper_left_x: {}'.format(upper_left_x))
    # print('upper_left_y: {}'.format(upper_left_y))

    # VRTRawRasterBand metadata
    line_width = m.word_size * (m.tile_x + m.tile_bdr * 2
                                )  # x size incl. border
    tile_size = line_width * (m.tile_y + m.tile_bdr * 2
                              )  # tile size incl. border
    line_offset = line_width
    image_offset = m.tile_bdr * line_width + m.tile_bdr * m.word_size
    pixel_offset = m.word_size
    byte_order = 'LSB' if m.little_endian else 'MSB'

    # create tile VRTs
    if use_vsi:
        out_dir = get_temp_vsi_path(ext='')
    else:
        out_dir = get_temp_dir()

    driver = gdal.GetDriverByName('VRT')  # type: gdal.Driver
    tile_vrt_paths = {}
    for tile in tiles:
        vsi_path = '{}/{}.vrt'.format(out_dir, tile['filename'])
        vrt = driver.Create(vsi_path, m.tile_x, m.tile_y,
                            0)  # type: gdal.Dataset

        for z in range(m.tile_z_start - 1, m.tile_z_end):
            options = [
                'subClass=VRTRawRasterBand',
                'SourceFilename={}'.format(tile['path']), 'relativeToVRT=0',
                'ImageOffset={}'.format(z * tile_size + image_offset),
                'PixelOffset={}'.format(pixel_offset),
                'LineOffset={}'.format(line_offset), 'ByteOrder=' + byte_order
            ]
            vrt.AddBand(dtype, options)
        vrt.FlushCache()

        tile_vrt_paths[tile['filename']] = vsi_path

    # create mosaic VRT
    mosaic_vrt_path = '{}/mosaic.vrt'.format(out_dir)
    vrt = driver.Create(mosaic_vrt_path, xsize, ysize, zsize,
                        dtype)  # type: gdal.Dataset
    vrt.SetProjection(crs.proj4)
    vrt.SetGeoTransform(geo_transform)

    if m.categorical:
        color_table, cat_names = get_gdal_categories(m.categories,
                                                     m.category_min,
                                                     m.category_max)

    for band_idx in range(1, zsize + 1):
        band = vrt.GetRasterBand(band_idx)  # type: gdal.Band
        if m.missing_value is not None:
            band.SetNoDataValue(m.missing_value)

        band.SetScale(m.scale_factor)

        if m.categorical:
            band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex)
            band.SetRasterColorTable(color_table)
            band.SetRasterCategoryNames(cat_names)

        sources = {}
        for idx, tile in enumerate(tiles):
            tile_vrt_path = tile_vrt_paths[tile['filename']]

            if m.top_bottom:
                end_y = ysize - tile['start_y'] - 1
                start_y = end_y - m.tile_y + 1
            else:
                start_y = tile['start_y'] - 1

            sources['source_{}'.format(idx)] = ('''
                <SimpleSource>
                    <SourceFilename relativeToVRT="0">{path}</SourceFilename>
                    <SourceBand>{band}</SourceBand>
                    <SrcRect xOff="0" yOff="0" xSize="{tile_x}" ySize="{tile_y}" />
                    <DstRect xOff="{offset_x}" yOff="{offset_y}" xSize="{tile_x}" ySize="{tile_y}" />
                </SimpleSource>''').format(path=tile_vrt_path,
                                           band=band_idx,
                                           tile_x=m.tile_x,
                                           tile_y=m.tile_y,
                                           offset_x=tile['start_x'] - 1,
                                           offset_y=start_y)
        band.SetMetadata(sources, 'vrt_sources')

    vrt.FlushCache()

    vrt_paths = [mosaic_vrt_path] + list(tile_vrt_paths.values())
    if use_vsi:
        dispose = partial(remove_vsis, vrt_paths)
    else:
        dispose = partial(remove_dir, out_dir)

    short_name = os.path.basename(folder)
    title = short_name
    if m.units and m.units != 'category':
        title += ' in ' + m.units
    if m.description:
        title += ' (' + m.description + ')'

    # The title is returned as VRT does not support dataset descriptions.
    return mosaic_vrt_path, title, short_name, dispose
Example #4
0
def convert_wrf_nc_var_to_gdal_dataset(
        path: str,
        var_name: str,
        extra_dim_index: Optional[int],
        interp_level: Optional[float],
        interp_vert_name: Optional[str],
        fmt: GDALFormat = GDALFormat.GTIFF,
        use_vsi: bool = False) -> Tuple[str, Callable[[], None]]:

    if var_name in DIAG_VARS:
        assert wrf is not None
        fmt = GDALFormat.GTIFF

    if interp_level is not None:
        assert interp_vert_name
        fmt = GDALFormat.GTIFF

    # WPS netCDF output files have only float32 variables and there
    # seems to be a unique no-data value which is 32768.
    # TODO find out where in WPS's source code this value is defined
    no_data = 32768.0

    time_steps = get_wrf_nc_time_steps(path)

    ds = nc.Dataset(path)
    try:
        attrs = ds.__dict__  # type: dict

        rows = ds.dimensions['south_north'].size
        cols = ds.dimensions['west_east'].size

        crs = get_crs(ds)
        geo_transform = get_geo_transform(ds, crs)

        if var_name == 'LU_INDEX':
            landuse_color_table, landuse_cat_names = get_landuse_categories(ds)

        if var_name in DIAG_VARS or interp_level is not None:
            try:
                var = wrf.getvar(ds,
                                 var_name,
                                 timeidx=wrf.ALL_TIMES,
                                 missing=no_data,
                                 squeeze=False,
                                 meta=False)
            except:
                var = wrf.getvar(ds,
                                 var_name,
                                 timeidx=wrf.ALL_TIMES,
                                 squeeze=False,
                                 meta=False)
            if interp_level is not None:
                vert = wrf.getvar(ds,
                                  interp_vert_name,
                                  timeidx=wrf.ALL_TIMES,
                                  squeeze=False,
                                  meta=False)
                var = wrf.interplevel(var,
                                      vert,
                                      interp_level,
                                      missing=no_data,
                                      meta=False)
                dims = MASS
            else:
                dims = DIAG_DIMS[var_name]
            shape = var.shape
        else:
            var = ds.variables[var_name]
            dims = var.dimensions
            shape = var.shape

        assert len(dims) == len(shape), f'|{dims}| != |{shape}|'
        if len(dims) == 4:
            # TODO remove once performance issues with VRT are resolved
            #      (see below)
            fmt = GDALFormat.GTIFF

        use_vrt = fmt.is_vrt
        ext = fmt.value

        if use_vsi:
            out_path = get_temp_vsi_path(ext)
        else:
            out_dir = get_temp_dir()
            out_path = os.path.join(out_dir, 'tmp' + ext)

        if use_vrt:
            driver_name = 'VRT'
        elif fmt == GDALFormat.GTIFF:
            driver_name = 'GTIFF'

        driver = gdal.GetDriverByName(driver_name)  # type: gdal.Driver

        is_4d = len(shape) == 4
        if is_4d:
            assert extra_dim_index is not None
            extra_dim_size = shape[1]
            assert extra_dim_index < extra_dim_size
        else:
            assert extra_dim_index is None

        print('Adding {}'.format(var_name))

        if fmt == GDALFormat.GTIFF and var_name == 'LU_INDEX' and landuse_cat_names:
            # All time steps contain the same data, so use only the first here.
            # This also works around the problem that color tables in GDAL's TIFF driver
            # can only be used with single band datasets.
            times = 1
            # Required for color table support in TIFF.
            np_dtype = np.uint8
        else:
            times = shape[0]
            np_dtype = var.dtype

        type_code = gdal_array.NumericTypeCodeToGDALTypeCode(np_dtype)

        gdal_ds = driver.Create(out_path, cols, rows, times,
                                type_code)  # type: gdal.Dataset
        gdal_ds.SetProjection(crs.wkt)
        gdal_ds.SetGeoTransform(geo_transform)

        for band_idx in range(1, times + 1):
            band = gdal_ds.GetRasterBand(band_idx)  # type: gdal.Band
            band.SetNoDataValue(no_data)

            time_step = time_steps[band_idx - 1]
            band.SetDescription(time_step)

            if var_name == 'LU_INDEX' and landuse_cat_names:
                band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex)
                band.SetRasterColorTable(landuse_color_table)
                band.SetRasterCategoryNames(landuse_cat_names)

            if use_vrt:
                # GDAL's HDF5 driver does not support reading 4D variables
                # whereas the NetCDF driver exposes 4D as 2D with many bands but has performance issues
                # (see https://github.com/OSGeo/gdal/issues/620). Therefore, for now, 4D is only
                # supported as GeoTIFF and not VRT.
                assert not is_4d

                def set_band_metadata(fmt: GDALFormat) -> None:
                    if fmt == GDALFormat.HDF5_VRT:
                        subdataset_name = 'HDF5:"{path}"://{var_name}'.format(
                            path=path, var_name=var_name)
                    elif fmt == GDALFormat.NETCDF_VRT:
                        subdataset_name = 'NETCDF:"{path}":{var_name}'.format(
                            path=path, var_name=var_name)
                    band.SetMetadata(
                        {
                            'source_0': ('''
                        <SimpleSource>
                            <SourceFilename relativeToVRT="0">{name}</SourceFilename>
                            <SourceBand>{band}</SourceBand>
                            <SrcRect xOff="0" yOff="0" xSize="{cols}" ySize="{rows}" />
                            <DstRect xOff="0" yOff="0" xSize="{cols}" ySize="{rows}" />
                        </SimpleSource>''').format(name=subdataset_name,
                                                   band=band_idx,
                                                   rows=rows,
                                                   cols=cols)
                        }, 'vrt_sources')

                try:
                    set_band_metadata(fmt)
                except RuntimeError:
                    # Work-around gdal bug where sometimes files cannot be opened with a specific driver.
                    # E.g. "RuntimeError: `HDF5:"C:/.../geo_em.d02.nc"://LU_INDEX' does not exist in the file system,
                    #       and is not recognized as a supported dataset name."
                    if fmt == GDALFormat.HDF5_VRT:
                        fmt = GDALFormat.NETCDF_VRT
                    else:
                        fmt = GDALFormat.HDF5_VRT
                    set_band_metadata(fmt)

            elif fmt == GDALFormat.GTIFF:
                data = var[band_idx - 1]
                if is_4d:
                    data = data[extra_dim_index]
                data = ma.getdata(data)
                band.WriteArray(data.astype(np_dtype, copy=False))

        gdal_ds.FlushCache()
    finally:
        ds.close()

    if use_vsi:
        dispose = partial(remove_vsis, [out_path])
    else:
        dispose = partial(remove_dir, out_dir)

    if fmt == GDALFormat.NETCDF_VRT:
        # NetCDF VRT datasets returned by this function require the
        # GDAL config option GDAL_NETCDF_BOTTOMUP to be set to 'NO'.
        # The default of GDAL is 'YES' which would work as well (by flipping the y axis
        # part of the geo transform) but is extremely slow as GDAL can then
        # only read one line at a time, compared to a whole block otherwise.
        # This is a performance bug which we can work around here since we construct
        # the geotransform ourselves anyway.
        # There is no reliable way to set the config option only for a specific dataset,
        # therefore it is modified globally. This is suboptimal and the ultimate goal
        # is to avoid using the NetCDF driver completely (see above why this is impossible still).
        # References:
        # http://lists.osgeo.org/pipermail/gdal-dev/2016-November/045573.html
        # https://github.com/perrygeo/ncvrt#--flip-or-invert-latitude-of-bottom-up-data
        gdal.SetConfigOption('GDAL_NETCDF_BOTTOMUP', 'NO')

    return out_path, dispose
Example #5
0
def convert_wrf_nc_var_to_gdal_dataset(
    path: str, var_name: str, extra_dim_index: Optional[int],
    interp_level: Optional[float], interp_vert_name: Optional[str],
    fmt: GDALFormat=GDALFormat.GTIFF, use_vsi: bool=False) -> Tuple[str,Callable[[],None]]:
    ''' IMPORTANT: The NetCDF VRT datasets returned by this function require the
        GDAL config option GDAL_NETCDF_BOTTOMUP to be set to 'NO'.
        The default of GDAL is 'YES' which would work as well (by flipping the y axis
        part of the geo transform) but is extremely slow as GDAL can then
        only read one line at a time, compared to a whole block otherwise.
        This is a performance bug which we can work around here since we construct
        the geotransform ourselves anyway.
        References:
        http://lists.osgeo.org/pipermail/gdal-dev/2016-November/045573.html
        https://github.com/perrygeo/ncvrt#--flip-or-invert-latitude-of-bottom-up-data
    '''
    if fmt == GDALFormat.GTIFF:
        # LU_INDEX has a color table which is unsupported with TIFF, so we force HDF5_VRT instead.
        # (GDAL: "SetColorTable() not supported for multi-sample TIFF files.")
        if var_name == 'LU_INDEX':
            fmt = GDALFormat.HDF5_VRT

    if fmt == GDALFormat.HDF5_VRT:
        # TODO remove once gdal bug is fixed: https://github.com/OSGeo/gdal/issues/622 
        if var_name in ['E', 'F']:
            fmt = GDALFormat.NETCDF_VRT

    if var_name in DIAG_VARS:
        assert wrf is not None
        fmt = GDALFormat.GTIFF

    if interp_level is not None:
        assert interp_vert_name
        fmt = GDALFormat.GTIFF

    # WPS netCDF output files have only float32 variables and there
    # seems to be a unique no-data value which is 32768.
    # TODO find out where in WPS's source code this value is defined
    no_data = 32768.0

    time_steps = get_wrf_nc_time_steps(path)

    ds = nc.Dataset(path)
    attrs = ds.__dict__ # type: dict

    rows = ds.dimensions['south_north'].size
    cols = ds.dimensions['west_east'].size

    crs = get_crs(ds)
    geo_transform = get_geo_transform(ds, crs)

    if var_name == 'LU_INDEX':
        landuse_color_table, landuse_cat_names = get_landuse_categories(ds)
    
    if var_name in DIAG_VARS or interp_level is not None:
        try:
            var = wrf.getvar(ds, var_name, timeidx=wrf.ALL_TIMES, missing=no_data, meta=False)
        except:
            var = wrf.getvar(ds, var_name, timeidx=wrf.ALL_TIMES, meta=False)
        if interp_level is not None:
            vert = wrf.getvar(ds, interp_vert_name, timeidx=wrf.ALL_TIMES, meta=False)
            var = wrf.interplevel(var, vert, interp_level, missing=no_data, meta=False)
            dims = MASS
        else:
            dims = DIAG_DIMS[var_name]
        shape = var.shape
    else:
        var = ds.variables[var_name]
        dims = var.dimensions
        shape = var.shape
    assert len(dims) == len(shape)
    if len(dims) == 4:
        # TODO remove once performance issues with VRT are resolved
        #      (see below)
        fmt = GDALFormat.GTIFF

    use_vrt = fmt.is_vrt
    ext = fmt.value

    if use_vsi:
        out_path = get_temp_vsi_path(ext)
    else:
        out_dir = get_temp_dir()
        out_path = os.path.join(out_dir, 'tmp' + ext)

    if use_vrt:
        driver_name = 'VRT'
    elif fmt == GDALFormat.GTIFF:
        driver_name = 'GTIFF'

    driver = gdal.GetDriverByName(driver_name) # type: gdal.Driver

    is_4d = len(shape) == 4
    if is_4d:
        assert extra_dim_index is not None
        extra_dim_size = shape[1]
        assert extra_dim_index < extra_dim_size
    else:
        assert extra_dim_index is None

    print('Adding {}'.format(var_name))
    type_code = gdal_array.NumericTypeCodeToGDALTypeCode(var.dtype)

    times = shape[0]

    gdal_ds = driver.Create(out_path, cols, rows, times, type_code) # type: gdal.Dataset
    gdal_ds.SetProjection(crs.wkt)
    gdal_ds.SetGeoTransform(geo_transform)

    for band_idx in range(1, times + 1):
        band = gdal_ds.GetRasterBand(band_idx) # type: gdal.Band
        band.SetNoDataValue(no_data)

        time_step = time_steps[band_idx-1]
        band.SetDescription(time_step)

        if var_name == 'LU_INDEX' and landuse_cat_names:
            band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex)
            band.SetRasterColorTable(landuse_color_table)
            band.SetRasterCategoryNames(landuse_cat_names)
        
        if use_vrt:
            # GDAL's HDF5 driver does not support reading 4D variables
            # whereas the NetCDF driver exposes 4D as 2D with many bands but has performance issues
            # (see https://github.com/OSGeo/gdal/issues/620). Therefore, for now, 4D is only
            # supported as GeoTIFF and not VRT.
            assert not is_4d

            if fmt == GDALFormat.HDF5_VRT:
                subdataset_name = 'HDF5:"{path}"://{var_name}'.format(path=path, var_name=var_name)
            elif fmt == GDALFormat.NETCDF_VRT:
                subdataset_name = 'NETCDF:"{path}":{var_name}'.format(path=path, var_name=var_name)

            band.SetMetadata({'source_0': ('''
                <SimpleSource>
                    <SourceFilename relativeToVRT="0">{name}</SourceFilename>
                    <SourceBand>{band}</SourceBand>
                    <SrcRect xOff="0" yOff="0" xSize="{cols}" ySize="{rows}" />
                    <DstRect xOff="0" yOff="0" xSize="{cols}" ySize="{rows}" />
                </SimpleSource>''').format(name=subdataset_name, band=band_idx, rows=rows, cols=cols)}, 'vrt_sources')
        elif fmt == GDALFormat.GTIFF:
            data = var[band_idx - 1]
            if is_4d:
                data = data[extra_dim_index]
            band.WriteArray(data)

    gdal_ds.FlushCache()

    if use_vsi:
        dispose = partial(remove_vsis, [out_path])
    else:
        dispose = partial(remove_dir, out_dir)

    return out_path, dispose
Example #6
0
def convert_to_wps_binary(input_path: str, output_folder: str, is_categorical: bool,
                          units: Optional[str]=None, description: Optional[str]=None,
                          strict_datum: bool=True) -> GeogridBinaryDataset:
    '''
    Losslessly convert common geo formats to WPS binary format.
    If the given input file has a CRS or data type unsupported by WRF then an error is raised.

    :param input_path: Path to GDAL-supported raster file.
    :param output_folder: Path to output folder, will be created if not existing
    :param is_categorical: Whether the data is categorical, otherwise continuous
    :param units: units for continuous data
    :param description: single-line dataset description
    :param strict_datum: if True, fail if the input datum is not supported by WRF, otherwise ignore mismatch
    '''
    os.makedirs(output_folder, exist_ok=True)
    if os.listdir(output_folder):
        raise ValueError('Output folder must be empty')

    # FIXME if there is no nodata value, ask the user if it really has no nodata or ask for the value

    src_ds = gdal.Open(input_path) # type: gdal.Dataset
    xsize, ysize = src_ds.RasterXSize, src_ds.RasterYSize
    if xsize > MAX_SIZE or ysize > MAX_SIZE:
        raise UserError(f'Dataset has more than {MAX_SIZE} rows or columns: {ysize} x {xsize}, consider downsampling')

    filename_digits = 6 if xsize > 99999 or ysize > 99999 else 5

    if src_ds.GetLayerCount() > 1:
        raise UnsupportedError('Dataset has more than one layer which is unsupported')

    band = src_ds.GetRasterBand(1) # type: gdal.Band
    src_no_data_value = band.GetNoDataValue()
    has_no_data_value = src_no_data_value is not None

    tilesize_x = find_tile_size(xsize, try_hard=not has_no_data_value)
    tilesize_y = find_tile_size(ysize, try_hard=not has_no_data_value)
    is_perfect_tiling = xsize % tilesize_x == 0 and ysize % tilesize_y == 0

    if is_categorical or (tilesize_x == xsize and tilesize_y == ysize):
        tile_bdr = 0
    else:
        # TODO write unit test that checks whether halo areas have correct values
        tile_bdr = 3

    if tile_bdr > 0 and not has_no_data_value:
        raise UserError('No-data value required as dataset is continuous and halo is non-zero')

    if not is_perfect_tiling and not has_no_data_value:
        raise UserError('No-data value required as no perfect tile size could be found')

    tilesize_bdr_x = tilesize_x + 2*tile_bdr
    tilesize_bdr_y = tilesize_y + 2*tile_bdr

    tiles_x = list(range(0, xsize, tilesize_x))
    tiles_y = list(range(0, ysize, tilesize_y))
    ysize_pad = tilesize_y * len(tiles_y) # ysize including padding caused by imperfect tiling

    # write 'index' file with metadata
    index_path = os.path.join(output_folder, 'index')
    index_dict, datum_mismatch, inv_scale_factor, dst_dtype, dst_no_data_value = create_index_dict(
        src_ds, tilesize_x, tilesize_y, ysize_pad, tile_bdr, filename_digits,
        is_categorical, units, description, strict_datum)
    write_index_file(index_path, index_dict)

    np_dst_dtype = gdal_array.GDALTypeCodeToNumericTypeCode(dst_dtype)
    needs_scaling = inv_scale_factor is not None

    # As we have no control over the auxiliarly files that are created as well during conversion
    # we do everything in a temporary folder and move the binary file out after the conversion.
    # This keeps everything clean and tidy.
    tmp_dir = tempfile.mkdtemp()
    tmp_bin_path = os.path.join(tmp_dir, 'data.bin')

    driver = gdal.GetDriverByName('ENVI') # type: gdal.Driver#

    dy = src_ds.GetGeoTransform()[5]

    try:
        for start_x in tiles_x:
            for start_y in tiles_y:
                end_x = start_x + tilesize_x - 1
                end_y = start_y + tilesize_y - 1
                start_bdr_x = start_x - tile_bdr
                start_bdr_y = start_y - tile_bdr
                end_bdr_x = end_x + tile_bdr
                end_bdr_y = end_y + tile_bdr

                # read source data
                offset_x = max(0, start_bdr_x)
                offset_y = max(0, start_bdr_y)
                if end_bdr_x >= xsize:
                    datasize_x = xsize - offset_x
                else:
                    datasize_x = end_bdr_x - offset_x + 1

                if end_bdr_y >= ysize:
                    datasize_y = ysize - offset_y
                else:
                    datasize_y = end_bdr_y - offset_y + 1

                src_data = band.ReadAsArray(offset_x, offset_y, datasize_x, datasize_y)
                if dy > 0:
                    src_data = src_data[::-1]

                # scale if necessary (float data only)
                if needs_scaling:
                    # TODO test if scaling with no-data works
                    if has_no_data_value:
                        src_data = ma.masked_equal(src_data, src_no_data_value)
                    src_data *= inv_scale_factor
                    np.round(src_data, out=src_data)
                    if has_no_data_value:
                        src_data = ma.filled(src_data, dst_no_data_value)

                # pad incomplete tile with nodata value
                if datasize_x == tilesize_bdr_x and datasize_y == tilesize_bdr_y:
                    dst_data = src_data
                else:
                    assert has_no_data_value
                    dst_data = np.empty((tilesize_bdr_y, tilesize_bdr_x), np_dst_dtype)
                    data_start_x = offset_x - start_bdr_x
                    data_start_y = offset_y - start_bdr_y
                    dst_data[data_start_y:data_start_y+datasize_y,data_start_x:data_start_x+datasize_x] = src_data

                    if start_bdr_x < 0:
                        dst_data[:,:data_start_x] = dst_no_data_value
                    if start_bdr_y < 0:
                        dst_data[:data_start_y,:] = dst_no_data_value
                    if end_bdr_x >= xsize:
                        dst_data[:,data_start_x+datasize_x:] = dst_no_data_value
                    if end_bdr_y >= ysize:
                        dst_data[data_start_y+datasize_y:,:] = dst_no_data_value


                # create tile file
                dst_ds = driver.Create(tmp_bin_path, tilesize_bdr_x, tilesize_bdr_y, 1, dst_dtype) # type: gdal.Dataset
                dst_band = dst_ds.GetRasterBand(1) # type: gdal.Band
                dst_band.WriteArray(dst_data)

                # write to disk
                dst_ds.FlushCache()
                del dst_ds

                # move to final location with WPS-specific filename convention
                fmt_int = '{:0' + str(filename_digits) + 'd}'
                fmt_filename = '{fmt}-{fmt}.{fmt}-{fmt}'.format(fmt=fmt_int)
                if dy < 0:
                    end_y = ysize_pad - start_y - 1
                    start_y = end_y - tilesize_y + 1
                final_path = os.path.join(output_folder, fmt_filename.format(
                    start_x + 1, end_x + 1, start_y + 1, end_y + 1))
                shutil.move(tmp_bin_path, final_path)

        return GeogridBinaryDataset(index_path, datum_mismatch)
    finally:
        shutil.rmtree(tmp_dir)
Example #7
0
def convert_wps_binary_to_vrt_dataset(
        folder: str,
        use_vsi: bool = False) -> Tuple[str, str, str, Callable[[], None]]:
    """Converts a WPS Binary format dataset into a mosaic VRT dataset referencing per-tile VRT datasets."""

    m = read_wps_binary_index_file(folder)

    if m.proj_id == 'regular_ll' and m.stdlon is not None:
        raise NotImplementedError('stdlon not supported for regular_ll')

    # scan folder for available tiles
    tile_filename_re = re.compile('^({d})-({d})\.({d})-({d})$'.format(
        d='\d{' + str(m.filename_digits) + '}'))
    tiles = []
    for filename in os.listdir(folder):
        match = tile_filename_re.match(filename)
        if match:
            tiles.append({
                'filename': filename,
                'path': os.path.join(folder, filename),
                'start_x': int(match.group(1)),
                'end_x': int(match.group(2)),
                'start_y': int(match.group(3)),
                'end_y': int(match.group(4))
            })
    if not tiles:
        raise ValueError('No tiles found')

    # determine raster dimensions
    xsize = max(tile['end_x'] for tile in tiles)  # type: int
    ysize = max(tile['end_y'] for tile in tiles)  # type: int
    zsize = m.tile_z_end - m.tile_z_start + 1

    # convert to GDAL metadata
    dtype_mapping = {
        (1, False): gdal.GDT_Byte,  # GDAL only supports unsigned byte
        (2, False): gdal.GDT_UInt16,
        (2, True): gdal.GDT_Int16,
        (3, False): gdal.GDT_UInt32,
        (3, True): gdal.GDT_Int32
    }
    try:
        dtype = dtype_mapping[(m.word_size, m.signed)]
    except KeyError:
        raise ValueError('word_size/signed combination not supported')

    if m.proj_id == 'regular_ll':
        crs = CRS.create_lonlat()
    elif m.proj_id == 'lambert':
        # It doesn't matter what the origin is. This only influences the
        # projection coordinates to which the data is anchored to but the
        # georeferencing itself does not change. See down below on how
        # the geo transform is computed based on the known geographical
        # coordinates in the data.
        origin = LonLat(lon=m.stdlon, lat=(m.truelat1 + m.truelat2) / 2)
        crs = CRS.create_lambert(m.truelat1, m.truelat2, origin)
    elif m.proj_id == 'mercator':
        # See comment above about origin.
        origin_lon = m.stdlon if m.stdlon is not None else 0
        crs = CRS.create_mercator(m.truelat1, origin_lon)
    elif proj_id == 'albers_nad83':
        # See comment above about origin.
        origin = LonLat(lon=m.stdlon, lat=(m.truelat1 + m.truelat2) / 2)
        crs = CRS.create_albers_nad83(m.truelat1, m.truelat2, origin)
    # FIXME handle polar vs polar_wgs84 differently
    elif m.proj_id == 'polar':
        crs = CRS.create_polar(m.truelat1, m.stdlon)
    elif m.proj_id == 'polar_wgs84':
        crs = CRS.create_polar(m.truelat1, m.stdlon)
    else:
        raise NotImplementedError('Unsupported projection')

    known_x_idx_gdal = m.known_idx.x - 0.5
    if m.top_bottom:
        known_y_idx_gdal = ysize - m.known_idx.y - 0.5
        dy_gdal = -m.dy
    else:
        known_y_idx_gdal = m.known_idx.y - 0.5
        dy_gdal = m.dy

    known_xy = crs.to_xy(m.known_lonlat)
    upper_left_x = known_xy.x - known_x_idx_gdal * m.dx
    upper_left_y = known_xy.y + known_y_idx_gdal * m.dy
    geo_transform = (upper_left_x, m.dx, 0, upper_left_y, 0, dy_gdal)

    # print('known_x_idx_gdal: {}'.format(known_x_idx_gdal))
    # print('known_y_idx_gdal: {}'.format(known_y_idx_gdal))
    # print('known_xy: {}'.format(m.known_xy))
    # print('upper_left_x: {}'.format(upper_left_x))
    # print('upper_left_y: {}'.format(upper_left_y))

    # VRTRawRasterBand metadata
    line_width = m.word_size * (m.tile_x + m.tile_bdr * 2
                                )  # x size incl. border
    tile_size = line_width * (m.tile_y + m.tile_bdr * 2
                              )  # tile size incl. border
    line_offset = line_width
    image_offset = m.tile_bdr * line_width + m.tile_bdr * m.word_size
    pixel_offset = m.word_size
    byte_order = 'LSB' if m.little_endian else 'MSB'

    # create tile VRTs
    if use_vsi:
        out_dir = get_temp_vsi_path(ext='')
    else:
        out_dir = get_temp_dir()

    driver = gdal.GetDriverByName('VRT')  # type: gdal.Driver
    tile_vrt_paths = {}
    for tile in tiles:
        vsi_path = '{}/{}.vrt'.format(out_dir, tile['filename'])
        vrt = driver.Create(vsi_path, m.tile_x, m.tile_y,
                            0)  # type: gdal.Dataset

        for z in range(m.tile_z_start - 1, m.tile_z_end):
            options = [
                'subClass=VRTRawRasterBand',
                'SourceFilename={}'.format(tile['path']), 'relativeToVRT=0',
                'ImageOffset={}'.format(z * tile_size + image_offset),
                'PixelOffset={}'.format(pixel_offset),
                'LineOffset={}'.format(line_offset), 'ByteOrder=' + byte_order
            ]
            vrt.AddBand(dtype, options)
        vrt.FlushCache()

        tile_vrt_paths[tile['filename']] = vsi_path

    # create mosaic VRT
    mosaic_vrt_path = '{}/mosaic.vrt'.format(out_dir)
    vrt = driver.Create(mosaic_vrt_path, xsize, ysize, zsize,
                        dtype)  # type: gdal.Dataset
    vrt.SetProjection(crs.proj4)
    vrt.SetGeoTransform(geo_transform)

    if m.categorical:
        color_table, cat_names = get_gdal_categories(m.categories,
                                                     m.category_min,
                                                     m.category_max)

    for band_idx in range(1, zsize + 1):
        band = vrt.GetRasterBand(band_idx)  # type: gdal.Band
        if m.missing_value is not None:
            band.SetNoDataValue(m.missing_value)

        band.SetScale(m.scale_factor)

        if m.categorical:
            band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex)
            band.SetRasterColorTable(color_table)
            band.SetRasterCategoryNames(cat_names)

        sources = {}
        for idx, tile in enumerate(tiles):
            tile_vrt_path = tile_vrt_paths[tile['filename']]

            if m.top_bottom:
                end_y = ysize - tile['start_y'] - 1
                start_y = end_y - m.tile_y + 1
            else:
                start_y = tile['start_y'] - 1

            sources['source_{}'.format(idx)] = ('''
                <SimpleSource>
                    <SourceFilename relativeToVRT="0">{path}</SourceFilename>
                    <SourceBand>{band}</SourceBand>
                    <SrcRect xOff="0" yOff="0" xSize="{tile_x}" ySize="{tile_y}" />
                    <DstRect xOff="{offset_x}" yOff="{offset_y}" xSize="{tile_x}" ySize="{tile_y}" />
                </SimpleSource>''').format(path=tile_vrt_path,
                                           band=band_idx,
                                           tile_x=m.tile_x,
                                           tile_y=m.tile_y,
                                           offset_x=tile['start_x'] - 1,
                                           offset_y=start_y)
        band.SetMetadata(sources, 'vrt_sources')

    vrt.FlushCache()

    vrt_paths = [mosaic_vrt_path] + list(tile_vrt_paths.values())
    if use_vsi:
        dispose = partial(remove_vsis, vrt_paths)
    else:
        dispose = partial(remove_dir, out_dir)

    short_name = os.path.basename(folder)
    title = short_name
    if m.units and m.units != 'category':
        title += ' in ' + m.units
    if m.description:
        title += ' (' + m.description + ')'

    # The title is returned as VRT does not support dataset descriptions.
    return mosaic_vrt_path, title, short_name, dispose