def read_grib_file_metadata(path: str) -> GribMetadata: ds = gdal.Open(path, gdal.GA_ReadOnly) # ds.GetMetadata() returns nothing in gdal < 2.3, but with 2.3 it contains the GRIB_IDS # item which contains things like the center (e.g. NCEP). See http://www.gdal.org/frmt_grib.html. # TODO read bbox variables = dict() times = set() for i in range(1, ds.RasterCount + 1): band = ds.GetRasterBand(i) meta = band.GetMetadata() var_unit = meta['GRIB_UNIT'] # "[m/s]" var_name = meta['GRIB_ELEMENT'] # "VGRD" var_label = meta['GRIB_COMMENT'] # "v-component of wind [m/s]" valid_time = meta['GRIB_VALID_TIME'] # " 1438754400 sec UTC" var_label_without_unit = var_label.replace(var_unit, '').strip() variables[var_name] = var_label_without_unit unix = int(''.join(c for c in valid_time if c.isdigit())) time = datetime.utcfromtimestamp(unix) times.add(time) return GribMetadata(variables, sorted(times), path)
def convert_to_wps_binary(input_path: str, output_folder: str, is_categorical: bool, units: Optional[str]=None, description: Optional[str]=None, strict_datum: bool=True) -> GeogridBinaryDataset: ''' Losslessly convert common geo formats to WPS binary format. If the given input file has a CRS or data type unsupported by WRF then an error is raised. :param input_path: Path to GDAL-supported raster file. :param output_folder: Path to output folder, will be created if not existing :param is_categorical: Whether the data is categorical, otherwise continuous :param units: units for continuous data :param description: single-line dataset description :param strict_datum: if True, fail if the input datum is not supported by WRF, otherwise ignore mismatch ''' os.makedirs(output_folder, exist_ok=True) if os.listdir(output_folder): raise ValueError('Output folder must be empty') # FIXME if there is no nodata value, ask the user if it really has no nodata or ask for the value src_ds = gdal.Open(input_path) # type: gdal.Dataset xsize, ysize = src_ds.RasterXSize, src_ds.RasterYSize if xsize > MAX_SIZE or ysize > MAX_SIZE: raise UserError(f'Dataset has more than {MAX_SIZE} rows or columns: {ysize} x {xsize}, consider downsampling') filename_digits = 6 if xsize > 99999 or ysize > 99999 else 5 if src_ds.GetLayerCount() > 1: raise UnsupportedError('Dataset has more than one layer which is unsupported') band = src_ds.GetRasterBand(1) # type: gdal.Band src_no_data_value = band.GetNoDataValue() has_no_data_value = src_no_data_value is not None tilesize_x = find_tile_size(xsize, try_hard=not has_no_data_value) tilesize_y = find_tile_size(ysize, try_hard=not has_no_data_value) is_perfect_tiling = xsize % tilesize_x == 0 and ysize % tilesize_y == 0 if is_categorical or (tilesize_x == xsize and tilesize_y == ysize): tile_bdr = 0 else: # TODO write unit test that checks whether halo areas have correct values tile_bdr = 3 if tile_bdr > 0 and not has_no_data_value: raise UserError('No-data value required as dataset is continuous and halo is non-zero') if not is_perfect_tiling and not has_no_data_value: raise UserError('No-data value required as no perfect tile size could be found') tilesize_bdr_x = tilesize_x + 2*tile_bdr tilesize_bdr_y = tilesize_y + 2*tile_bdr tiles_x = list(range(0, xsize, tilesize_x)) tiles_y = list(range(0, ysize, tilesize_y)) ysize_pad = tilesize_y * len(tiles_y) # ysize including padding caused by imperfect tiling # write 'index' file with metadata index_path = os.path.join(output_folder, 'index') index_dict, datum_mismatch, inv_scale_factor, dst_dtype, dst_no_data_value = create_index_dict( src_ds, tilesize_x, tilesize_y, ysize_pad, tile_bdr, filename_digits, is_categorical, units, description, strict_datum) write_index_file(index_path, index_dict) np_dst_dtype = gdal_array.GDALTypeCodeToNumericTypeCode(dst_dtype) needs_scaling = inv_scale_factor is not None # As we have no control over the auxiliarly files that are created as well during conversion # we do everything in a temporary folder and move the binary file out after the conversion. # This keeps everything clean and tidy. tmp_dir = tempfile.mkdtemp() tmp_bin_path = os.path.join(tmp_dir, 'data.bin') driver = gdal.GetDriverByName('ENVI') # type: gdal.Driver# dy = src_ds.GetGeoTransform()[5] try: for start_x in tiles_x: for start_y in tiles_y: end_x = start_x + tilesize_x - 1 end_y = start_y + tilesize_y - 1 start_bdr_x = start_x - tile_bdr start_bdr_y = start_y - tile_bdr end_bdr_x = end_x + tile_bdr end_bdr_y = end_y + tile_bdr # read source data offset_x = max(0, start_bdr_x) offset_y = max(0, start_bdr_y) if end_bdr_x >= xsize: datasize_x = xsize - offset_x else: datasize_x = end_bdr_x - offset_x + 1 if end_bdr_y >= ysize: datasize_y = ysize - offset_y else: datasize_y = end_bdr_y - offset_y + 1 src_data = band.ReadAsArray(offset_x, offset_y, datasize_x, datasize_y) if dy > 0: src_data = src_data[::-1] # scale if necessary (float data only) if needs_scaling: # TODO test if scaling with no-data works if has_no_data_value: src_data = ma.masked_equal(src_data, src_no_data_value) src_data *= inv_scale_factor np.round(src_data, out=src_data) if has_no_data_value: src_data = ma.filled(src_data, dst_no_data_value) # pad incomplete tile with nodata value if datasize_x == tilesize_bdr_x and datasize_y == tilesize_bdr_y: dst_data = src_data else: assert has_no_data_value dst_data = np.empty((tilesize_bdr_y, tilesize_bdr_x), np_dst_dtype) data_start_x = offset_x - start_bdr_x data_start_y = offset_y - start_bdr_y dst_data[data_start_y:data_start_y+datasize_y,data_start_x:data_start_x+datasize_x] = src_data if start_bdr_x < 0: dst_data[:,:data_start_x] = dst_no_data_value if start_bdr_y < 0: dst_data[:data_start_y,:] = dst_no_data_value if end_bdr_x >= xsize: dst_data[:,data_start_x+datasize_x:] = dst_no_data_value if end_bdr_y >= ysize: dst_data[data_start_y+datasize_y:,:] = dst_no_data_value # create tile file dst_ds = driver.Create(tmp_bin_path, tilesize_bdr_x, tilesize_bdr_y, 1, dst_dtype) # type: gdal.Dataset dst_band = dst_ds.GetRasterBand(1) # type: gdal.Band dst_band.WriteArray(dst_data) # write to disk dst_ds.FlushCache() del dst_ds # move to final location with WPS-specific filename convention fmt_int = '{:0' + str(filename_digits) + 'd}' fmt_filename = '{fmt}-{fmt}.{fmt}-{fmt}'.format(fmt=fmt_int) if dy < 0: end_y = ysize_pad - start_y - 1 start_y = end_y - tilesize_y + 1 final_path = os.path.join(output_folder, fmt_filename.format( start_x + 1, end_x + 1, start_y + 1, end_y + 1)) shutil.move(tmp_bin_path, final_path) return GeogridBinaryDataset(index_path, datum_mismatch) finally: shutil.rmtree(tmp_dir)