Exemple #1
0
def read_grib_file_metadata(path: str) -> GribMetadata:
    ds = gdal.Open(path, gdal.GA_ReadOnly)

    # ds.GetMetadata() returns nothing in gdal < 2.3, but with 2.3 it contains the GRIB_IDS
    # item which contains things like the center (e.g. NCEP). See http://www.gdal.org/frmt_grib.html.

    # TODO read bbox

    variables = dict()
    times = set()

    for i in range(1, ds.RasterCount + 1):
        band = ds.GetRasterBand(i)
        meta = band.GetMetadata()
        var_unit = meta['GRIB_UNIT'] # "[m/s]"
        var_name = meta['GRIB_ELEMENT'] # "VGRD"
        var_label = meta['GRIB_COMMENT'] # "v-component of wind [m/s]"
        valid_time = meta['GRIB_VALID_TIME'] # "  1438754400 sec UTC"

        var_label_without_unit = var_label.replace(var_unit, '').strip()
        variables[var_name] = var_label_without_unit
        
        unix = int(''.join(c for c in valid_time if c.isdigit()))
        time = datetime.utcfromtimestamp(unix)
        times.add(time)

    return GribMetadata(variables, sorted(times), path)
Exemple #2
0
def convert_to_wps_binary(input_path: str, output_folder: str, is_categorical: bool,
                          units: Optional[str]=None, description: Optional[str]=None,
                          strict_datum: bool=True) -> GeogridBinaryDataset:
    '''
    Losslessly convert common geo formats to WPS binary format.
    If the given input file has a CRS or data type unsupported by WRF then an error is raised.

    :param input_path: Path to GDAL-supported raster file.
    :param output_folder: Path to output folder, will be created if not existing
    :param is_categorical: Whether the data is categorical, otherwise continuous
    :param units: units for continuous data
    :param description: single-line dataset description
    :param strict_datum: if True, fail if the input datum is not supported by WRF, otherwise ignore mismatch
    '''
    os.makedirs(output_folder, exist_ok=True)
    if os.listdir(output_folder):
        raise ValueError('Output folder must be empty')

    # FIXME if there is no nodata value, ask the user if it really has no nodata or ask for the value

    src_ds = gdal.Open(input_path) # type: gdal.Dataset
    xsize, ysize = src_ds.RasterXSize, src_ds.RasterYSize
    if xsize > MAX_SIZE or ysize > MAX_SIZE:
        raise UserError(f'Dataset has more than {MAX_SIZE} rows or columns: {ysize} x {xsize}, consider downsampling')

    filename_digits = 6 if xsize > 99999 or ysize > 99999 else 5

    if src_ds.GetLayerCount() > 1:
        raise UnsupportedError('Dataset has more than one layer which is unsupported')

    band = src_ds.GetRasterBand(1) # type: gdal.Band
    src_no_data_value = band.GetNoDataValue()
    has_no_data_value = src_no_data_value is not None

    tilesize_x = find_tile_size(xsize, try_hard=not has_no_data_value)
    tilesize_y = find_tile_size(ysize, try_hard=not has_no_data_value)
    is_perfect_tiling = xsize % tilesize_x == 0 and ysize % tilesize_y == 0

    if is_categorical or (tilesize_x == xsize and tilesize_y == ysize):
        tile_bdr = 0
    else:
        # TODO write unit test that checks whether halo areas have correct values
        tile_bdr = 3

    if tile_bdr > 0 and not has_no_data_value:
        raise UserError('No-data value required as dataset is continuous and halo is non-zero')

    if not is_perfect_tiling and not has_no_data_value:
        raise UserError('No-data value required as no perfect tile size could be found')

    tilesize_bdr_x = tilesize_x + 2*tile_bdr
    tilesize_bdr_y = tilesize_y + 2*tile_bdr

    tiles_x = list(range(0, xsize, tilesize_x))
    tiles_y = list(range(0, ysize, tilesize_y))
    ysize_pad = tilesize_y * len(tiles_y) # ysize including padding caused by imperfect tiling

    # write 'index' file with metadata
    index_path = os.path.join(output_folder, 'index')
    index_dict, datum_mismatch, inv_scale_factor, dst_dtype, dst_no_data_value = create_index_dict(
        src_ds, tilesize_x, tilesize_y, ysize_pad, tile_bdr, filename_digits,
        is_categorical, units, description, strict_datum)
    write_index_file(index_path, index_dict)

    np_dst_dtype = gdal_array.GDALTypeCodeToNumericTypeCode(dst_dtype)
    needs_scaling = inv_scale_factor is not None

    # As we have no control over the auxiliarly files that are created as well during conversion
    # we do everything in a temporary folder and move the binary file out after the conversion.
    # This keeps everything clean and tidy.
    tmp_dir = tempfile.mkdtemp()
    tmp_bin_path = os.path.join(tmp_dir, 'data.bin')

    driver = gdal.GetDriverByName('ENVI') # type: gdal.Driver#

    dy = src_ds.GetGeoTransform()[5]

    try:
        for start_x in tiles_x:
            for start_y in tiles_y:
                end_x = start_x + tilesize_x - 1
                end_y = start_y + tilesize_y - 1
                start_bdr_x = start_x - tile_bdr
                start_bdr_y = start_y - tile_bdr
                end_bdr_x = end_x + tile_bdr
                end_bdr_y = end_y + tile_bdr

                # read source data
                offset_x = max(0, start_bdr_x)
                offset_y = max(0, start_bdr_y)
                if end_bdr_x >= xsize:
                    datasize_x = xsize - offset_x
                else:
                    datasize_x = end_bdr_x - offset_x + 1

                if end_bdr_y >= ysize:
                    datasize_y = ysize - offset_y
                else:
                    datasize_y = end_bdr_y - offset_y + 1

                src_data = band.ReadAsArray(offset_x, offset_y, datasize_x, datasize_y)
                if dy > 0:
                    src_data = src_data[::-1]

                # scale if necessary (float data only)
                if needs_scaling:
                    # TODO test if scaling with no-data works
                    if has_no_data_value:
                        src_data = ma.masked_equal(src_data, src_no_data_value)
                    src_data *= inv_scale_factor
                    np.round(src_data, out=src_data)
                    if has_no_data_value:
                        src_data = ma.filled(src_data, dst_no_data_value)

                # pad incomplete tile with nodata value
                if datasize_x == tilesize_bdr_x and datasize_y == tilesize_bdr_y:
                    dst_data = src_data
                else:
                    assert has_no_data_value
                    dst_data = np.empty((tilesize_bdr_y, tilesize_bdr_x), np_dst_dtype)
                    data_start_x = offset_x - start_bdr_x
                    data_start_y = offset_y - start_bdr_y
                    dst_data[data_start_y:data_start_y+datasize_y,data_start_x:data_start_x+datasize_x] = src_data

                    if start_bdr_x < 0:
                        dst_data[:,:data_start_x] = dst_no_data_value
                    if start_bdr_y < 0:
                        dst_data[:data_start_y,:] = dst_no_data_value
                    if end_bdr_x >= xsize:
                        dst_data[:,data_start_x+datasize_x:] = dst_no_data_value
                    if end_bdr_y >= ysize:
                        dst_data[data_start_y+datasize_y:,:] = dst_no_data_value


                # create tile file
                dst_ds = driver.Create(tmp_bin_path, tilesize_bdr_x, tilesize_bdr_y, 1, dst_dtype) # type: gdal.Dataset
                dst_band = dst_ds.GetRasterBand(1) # type: gdal.Band
                dst_band.WriteArray(dst_data)

                # write to disk
                dst_ds.FlushCache()
                del dst_ds

                # move to final location with WPS-specific filename convention
                fmt_int = '{:0' + str(filename_digits) + 'd}'
                fmt_filename = '{fmt}-{fmt}.{fmt}-{fmt}'.format(fmt=fmt_int)
                if dy < 0:
                    end_y = ysize_pad - start_y - 1
                    start_y = end_y - tilesize_y + 1
                final_path = os.path.join(output_folder, fmt_filename.format(
                    start_x + 1, end_x + 1, start_y + 1, end_y + 1))
                shutil.move(tmp_bin_path, final_path)

        return GeogridBinaryDataset(index_path, datum_mismatch)
    finally:
        shutil.rmtree(tmp_dir)