def test_Stream(test_file): dataset, request, key_count = TEST_FILES[test_file] path = cdscommon.ensure_data(dataset, request, name='cds-' + test_file + '-{uuid}.grib') stream = cfgrib.FileStream(path) leader = stream.first() assert len(leader) == key_count assert sum(1 for _ in stream) == leader['count']
def test_reanalysis_Stream(test_file): dataset, request, key_count = TEST_FILES[test_file] path = cdscommon.ensure_data(dataset, request, name=test_file + "{ext}") stream = cfgrib.FileStream(path) leader = stream.first() assert len(leader) == key_count assert sum(1 for _ in stream) == leader["count"]
def _convert_unpacked_to_netcdf4(self, download_folder): # A function that handles the many small GRIB files that were unpacked and formats those into NetCDF4 grib_files = glob.glob( str(Path(download_folder).joinpath("HA40_N25_*_GB"))) lats, lons = self._build_grid_block(grib_files[0]) for grib_file in grib_files: time_prediction_made, predicted_hour = self._get_time_from_file( grib_file[len(download_folder) + 1:]) cfg_file = cfgrib.FileStream(grib_file) file_dataset = None for cfg_message in cfg_file: # We skip any rotated grid data from the first file in each set of files. Only regular ll grids will do. if cfg_message["gridType"] == "regular_ll": field_name, line_dataset = self._process_cfg_message_to_line_dataset( cfg_message=cfg_message, lats=lats, lons=lons, time_prediction_made=time_prediction_made, predicted_hour=predicted_hour) if line_dataset is not None: if file_dataset is None: file_dataset = line_dataset else: file_dataset[field_name] = line_dataset[field_name] save_filename = Path(download_folder).joinpath( self.file_prefix + str(time_prediction_made.year) + str(time_prediction_made.month).zfill(2) + str(time_prediction_made.day).zfill(2) + "_" + str(time_prediction_made.hour).zfill(2) + "00_prediction_for_" + str(predicted_hour).zfill(2) + "00.nc") file_dataset = file_dataset.unstack("coord") file_dataset.time.encoding["units"] = "hours since 2018-01-01" file_dataset.to_netcdf(path=save_filename, format="NETCDF4") self.logger.info(f"Saved dataset as {save_filename}")
def _build_grid_block(file: str): cf_streamed_file = cfgrib.FileStream(file) line_to_use = None for cf_line in cf_streamed_file: # Make sure we don't pick up the rotated grid that exists in the first hourly file (0000) for each # prediction.. if cf_line["gridType"] == "regular_ll": line_to_use = cf_line break lat_first = float(line_to_use["latitudeOfFirstGridPointInDegrees"]) lat_last = float(line_to_use["latitudeOfLastGridPointInDegrees"]) lon_first = float(line_to_use["longitudeOfFirstGridPointInDegrees"]) lon_last = float(line_to_use["longitudeOfLastGridPointInDegrees"]) lat_step = float(line_to_use["jDirectionIncrement"]) lon_step = float(line_to_use["iDirectionIncrement"]) latitudes = list( range( int(lat_first * 1000), int(lat_last * 1000) + int(lat_step), int(lat_step), )) longitudes = list( range( int(lon_first * 1000), int(lon_last * 1000) + int(lon_step), int(lon_step), )) latitudes = [x / 1000 for x in latitudes] longitudes = [x / 1000 for x in longitudes] return latitudes, longitudes