def fix_metadata(self, cubes): """ Fix metadata. Fixes bad bounds Parameters ---------- cube: iris.cube.Cube Returns ------- iris.cube.Cube """ cube = self.get_cube_from_list(cubes) time = cube.coord('time') if self._fix_required(time): times = time.units.num2date(time.points) starts = [cftime.DatetimeJulian(c.year, c.month, 1) for c in times] ends = [ cftime.DatetimeJulian(c.year, c.month + 1, 1) if c.month < 12 else cftime.DatetimeJulian(c.year + 1, 1, 1) for c in times ] time.bounds = time.units.date2num(np.stack([starts, ends], -1)) return cubes
def _create_raw_dataset( dims, tile_range, time_coord_random, attrs, ): spatial_dims = {dim for dim in dims if dim not in ["tile", "time"]} coords = { dim: np.arange(i + 1) for i, dim in enumerate(sorted(spatial_dims)) } sizes = {dim: len(coords[dim]) for dim in spatial_dims} coords.update({"tile": tile_range}) sizes["tile"] = tile_range.shape[0] if time_coord_random: time_coord = [ cftime.DatetimeJulian(2016, 1, n + 1, 0, 0, 0, np.random.randint(100)) for n in range(TIME_DIM) ] else: time_coord = [ cftime.DatetimeJulian(2016, 1, n + 1, 0, 0, 0) for n in range(TIME_DIM) ] coords.update({"time": time_coord}) sizes["time"] = TIME_DIM arr = np.ones([sizes[k] for k in sorted(sizes)]) data_array = xr.DataArray(arr, dims=sorted(dims)) data_array.attrs.update(attrs) return xr.Dataset({DATA_VAR: data_array}, coords=coords)
def skip_if_3d_output_absent(arg: DiagArg) -> DiagArg: prognostic, verification, grid = arg.prediction, arg.verification, arg.grid dummy_ds = xr.Dataset().assign_coords({ "time": [ cftime.DatetimeJulian(2020, 1, 1, 12), cftime.DatetimeJulian(2020, 1, 1, 15, 30), ] }) prog = prognostic if len(prognostic) > 0 else dummy_ds verif = verification if len(verification) > 0 else dummy_ds return DiagArg(prog, verif, grid)
def test_DiagnosticFileConfig(config, sinktype): diag_file = config.diagnostic_file(initial_time=cftime.DatetimeJulian( 2016, 8, 1), partitioner=None, comm=Mock()) assert isinstance(diag_file._sink, sinktype)
def get_mock_dataset(n_time): n_x, n_y, n_z, n_tile = (8, 8, 10, 6) arr = np.zeros((n_time, n_tile, n_z, n_y, n_x)) arr_surface = np.zeros((n_time, n_tile, n_y, n_x)) dims = ["time", "tile", "z", "y", "x"] dims_surface = ["time", "tile", "y", "x"] data = xr.Dataset( { "specific_humidity": (dims, arr), "air_temperature": (dims, arr), "downward_shortwave": (dims_surface, arr_surface), "net_shortwave": (dims_surface, arr_surface), "downward_longwave": (dims_surface, arr_surface), "dQ1": (dims, arr), "dQ2": (dims, arr), "dQu": (dims, arr), "dQv": (dims, arr), }, coords={ "time": [ cftime.DatetimeJulian(2016, 8, day) for day in range(1, 1 + n_time) ] }, ) return data
def input_args(): mask = [[[0, 1], [0, 2]]] area = [[[1, 2], [3, 4]]] latitude = [[[0, 0], [15, 15]]] p = [[[[10000, 10000], [10000, 10000]]], [[[20000, 20000], [20000, 20000]]]] ntimes = 5 temp = [[[[0.5, 1.5], [2.5, 3.5]]]] * ntimes time_coord = [ cftime.DatetimeJulian(2016, 4, 2, i + 1, 0, 0) for i in range(ntimes) ] ds = xr.Dataset( data_vars={ "SLMSKsfc": (["tile", "x", "y"], mask), "temperature": (["time", "tile", "x", "y"], temp), "var_3d": (["time", "z", "tile", "x", "y"], [p] * ntimes), }, coords={"time": time_coord}, ) grid = xr.Dataset( data_vars={ "lat": (["tile", "x", "y"], latitude), "area": (["tile", "x", "y"], area), "land_sea_mask": (["tile", "x", "y"], mask), }) delp = xr.DataArray( data=[p] * ntimes, dims=["time", "z", "tile", "x", "y"], name="pressure_thickness_of_atmospheric_layer", coords={"time": time_coord}, ) return DiagArg(ds, ds.copy(), grid, delp)
def test_shift(): initial_time = cftime.DatetimeJulian(year=2016, month=8, day=5, hour=12, minute=7, second=30) dt = datetime.timedelta(minutes=15) times = [initial_time, initial_time + dt, initial_time + 2 * dt] arr = [0.0, 1.0, 2.0] ds = xr.Dataset({"a": (["time"], arr)}, coords={"time": times}) steps = ["begin", "middle", "end"] expected_times = [initial_time + dt / 2, initial_time + 3 * dt / 2] expected = xr.Dataset( {"a": (["step", "time"], [[0.0, 1.0], [0.5, 1.5], [1.0, 2.0]])}, coords={ "time": expected_times, "step": steps }, ) shifted = shift(ds, dt=dt / 2) xr.testing.assert_equal(shifted, expected)
def parse_datetime_from_str(time: str) -> cftime.DatetimeJulian: """ Retrieve a datetime object from an FV3GFS timestamp string """ t = datetime.strptime(time, TIME_FMT) return cftime.DatetimeJulian(t.year, t.month, t.day, t.hour, t.minute, t.second)
def test_cf_time_subset(self): ds1 = new_cube(variables=dict(analysed_sst=0.6, mask=8), use_cftime=True, time_dtype=None, time_units='days since 1950-01-01', time_calendar='julian') ds2 = select_temporal_subset(ds1, time_range=('2010-01-02', '2010-01-04')) self.assertIsNot(ds2, ds1) np.testing.assert_equal( np.array([ cftime.DatetimeJulian(2010, 1, 2, 12, 0, 0), cftime.DatetimeJulian(2010, 1, 3, 12, 0, 0), cftime.DatetimeJulian(2010, 1, 4, 12, 0, 0) ], dtype='object'), ds2.time.values)
def str_to_cftime(datestring): """Convert a date string to cftime object""" dt = datetime.strptime(datestring, "%Y-%m-%d") # cfdt = cftime.datetime(dt.year, dt.month, dt.day, calendar=calendar) cfdt = cftime.DatetimeJulian(dt.year, dt.month, dt.day) return cfdt
def _label_to_time(time: str) -> cftime.DatetimeJulian: return cftime.DatetimeJulian( int(time[:4]), int(time[4:6]), int(time[6:8]), int(time[9:11]), int(time[11:13]), int(time[13:15]), )
def test_tensorboardsink(monkeypatch): state = xarray.Dataset({"a": (["y", "x"], np.ones((10, 10)))}) time = cftime.DatetimeJulian(2000, 1, 1) sink = TensorBoardSink() image_summary = Mock() monkeypatch.setattr(tf.summary, "image", image_summary) sink.sink(time, state) assert image_summary.called
def test_tendency_prescriber(state, tmpdir, regtest): time = cftime.DatetimeJulian(2016, 8, 1) path = str(tmpdir.join("tendencies.zarr")) tendencies = _get_tendencies(time) tendencies.to_zarr(path, consolidated=True) derived_state = _get_derived_state(state, time) derived_state_copy = _get_derived_state(state, time) communicator = _get_dummy_comm() diagnostic_variables = [ "tendency_of_air_temperature_due_to_override", "specific_humidity", ] config = { "mapper_config": { "function": "open_zarr", "kwargs": { "data_path": path } }, "variables": { "air_temperature": "Q1" }, } override = TendencyPrescriber( dacite.from_dict(TendencyPrescriberConfig, config), derived_state, communicator, timestep=2, diagnostic_variables=diagnostic_variables, ) def add_one(): derived_state["air_temperature"] = derived_state["air_temperature"] + 1 derived_state[ "specific_humidity"] = derived_state["specific_humidity"] + 1 return {"some_diag": derived_state["specific_humidity"]} diags = override(add_one)() xr.testing.assert_identical( derived_state["specific_humidity"], derived_state_copy["specific_humidity"] + 1, ) xr.testing.assert_identical( derived_state["air_temperature"], (derived_state_copy["air_temperature"] + 2).assign_attrs(units="degK"), ) expected_monitored_tendency = (tendencies.isel( time=0).drop("time").Q1.assign_coords(tile=range(6))) xr.testing.assert_allclose( diags["tendency_of_air_temperature_due_to_tendency_prescriber"], expected_monitored_tendency, ) for variable in sorted(diags): print(variable, joblib.hash(diags[variable].values), file=regtest)
def datetime( year, month, day, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, calendar='proleptic_gregorian'): """ Retrieves a datetime-like object with the requested calendar. Calendar types other than proleptic_gregorian require the netcdftime module to be installed. Parameters ---------- year : int, month : int, day : int, hour : int, optional minute : int, optional second : int, optional microsecond : int, optional tzinfo : datetime.tzinfo, optional A timezone informaton class, such as from pytz. Can only be used with 'proleptic_gregorian' calendar, as netcdftime does not support timezones. calendar : string, optional Should be one of 'proleptic_gregorian', 'no_leap', '365_day', 'all_leap', '366_day', '360_day', 'julian', or 'gregorian'. Default is 'proleptic_gregorian', which returns a normal Python datetime. Other options require the netcdftime module to be installed. Returns ------- datetime : datetime-like The requested datetime. May be a Python datetime, or one of the datetime-like types in netcdftime. """ kwargs = { 'year': year, 'month': month, 'day': day, 'hour': hour, 'minute': minute, 'second': second, 'microsecond': microsecond } if calendar.lower() == 'proleptic_gregorian': return real_datetime(tzinfo=tzinfo, **kwargs) elif tzinfo is not None: raise ValueError('netcdftime does not support timezone-aware datetimes') elif ct is None: raise DependencyError( "Calendars other than 'proleptic_gregorian' require the netcdftime " "package, which is not installed.") elif calendar.lower() in ('all_leap', '366_day'): return ct.DatetimeAllLeap(**kwargs) elif calendar.lower() in ('no_leap', 'noleap', '365_day'): return ct.DatetimeNoLeap(**kwargs) elif calendar.lower() == '360_day': return ct.Datetime360Day(**kwargs) elif calendar.lower() == 'julian': return ct.DatetimeJulian(**kwargs) elif calendar.lower() == 'gregorian': return ct.DatetimeGregorian(**kwargs)
def make_year_constraint_all_calendars(start, end): """Utility function to create a dict of time constraints on year-basis This create a dict of the same time constraint, but for different calendars. Since comparisons between different calendar types are not (always) possible, a calendar for a time coordinate should be compared to the specific constraint with the same calendar. The calendar type (as a string) can be obtained through the coordinate's `units` attribute: `cube.coord('time').units.calendar`; the resulting string is the key for the dict, which then as a value yields the correct constraint Arguments --------- start, end: integer Start and end year. Month and day are 1, 1 for the starting year, and 31, 12 or 30, 12 (for a 360-day calendar) for the end year """ dates = { 'default': (cftime.datetime(start, 1, 1), cftime.datetime(end, 12, 31)), '360_day': (cftime.Datetime360Day(start, 1, 1), cftime.Datetime360Day(end, 12, 30)), '365_day': (cftime.DatetimeNoLeap(start, 1, 1), cftime.DatetimeNoLeap(end, 12, 31)), 'proleptic_gregorian': (cftime.DatetimeProlepticGregorian(start, 1, 1), cftime.DatetimeProlepticGregorian(end, 12, 31)), 'gregorian': (cftime.DatetimeGregorian(start, 1, 1), cftime.DatetimeGregorian(end, 12, 31)), 'julian': (cftime.DatetimeJulian(start, 1, 1), cftime.DatetimeJulian(end, 12, 31)), } constraints = { key: make_date_constraint(*value) for key, value in dates.items() } return constraints
def _translate_time(time: Tuple[int, int, int, int, int, int]) -> cftime.DatetimeJulian: # list order is set by fortran from variable Model%jdat year = time[0] month = time[1] day = time[2] hour = time[4] min = time[5] datetime = cftime.DatetimeJulian(year, month, day, hour, min) logger.debug(f"Translated input time: {datetime}") return datetime
def test_batches_from_mappper_different_indexing_conventions(tiles): n = 48 ds = xr.Dataset( {"a": (["time", "tile", "y", "x"], np.zeros((1, 6, n, n)))}, coords={ "time": [cftime.DatetimeJulian(2016, 8, 1)], "tile": tiles }, ) mapper = loaders.mappers.XarrayMapper(ds) seq = batches_from_mapper(mapper, ["a", "lon"], res=f"c{n}") assert len(seq) == 1 assert ds.a[0].size == seq[0].a.size
def test_DiagnosticFile_fails_with_non_existing_variable(times): data_vars = {"a": (["x"], [1.0])} dataset = xr.Dataset(data_vars) diagnostics = {key: dataset[key] for key in dataset} init_time = cftime.DatetimeJulian(2016, 8, 1) time_container = times(init_time) diag_file = DiagnosticFile(["not in diagnostics"], times=time_container, sink=Mock()) with pytest.raises(KeyError): diag_file.observe(init_time, diagnostics)
def _tendency_dataset(): temperature_tendency = np.full((6, 8, 63, 12, 12), 0.1 / 86400) times = [ cftime.DatetimeJulian(2016, 8, 1) + timedelta(minutes=n) for n in range(0, 120, 15) ] da = xr.DataArray( data=temperature_tendency, dims=["tile", "time", "z", "y", "x"], coords=dict(time=times), attrs={"units": "K/s"}, ) return xr.Dataset({"Q1": da})
def test_standardize_fv3_diagnostics_time_coord(): diag = _create_raw_dataset(DIM_NAMES, TILE_RANGE, True, {}) standardized_ds = standardize_fv3_diagnostics(diag) expected_coord = [ cftime.DatetimeJulian(2016, 1, n + 1, 0, 0, 0) for n in range(TIME_DIM) ] xr.testing.assert_equal( standardized_ds.time, xr.DataArray(expected_coord, dims=["time"], coords={"time": expected_coord}), )
def test_time_mean(): ntimes = 5 time_coord = [ cftime.DatetimeJulian(2016, 4, 2, i + 1) for i in range(ntimes) ] ds = xr.Dataset( data_vars={"temperature": (["time", "x"], np.zeros((ntimes, 10)))}, coords={"time": time_coord}, ) diagnostic = savediags.time_mean(ds) assert diagnostic.temperature.attrs["diagnostic_start_time"] == str( time_coord[0]) assert diagnostic.temperature.attrs["diagnostic_end_time"] == str( time_coord[-1])
def test__time_interpolate_func_has_correct_value(fraction): initial_time = cftime.DatetimeJulian(2016, 1, 1) frequency = timedelta(hours=3) attrs = {"units": "foo"} def func(time): value = float(time - initial_time > timedelta(hours=1.5)) return {"a": xr.DataArray(data=np.array([value]), dims=["x"], attrs=attrs)} myfunc = _time_interpolate_func(func, frequency, initial_time) ans = myfunc(initial_time + frequency * fraction) assert isinstance(ans["a"], xr.DataArray) assert float(ans["a"].values) == pytest.approx(fraction) assert ans["a"].attrs == attrs
def test_open_zarr(tmpdir, time_dim_name): time = cftime.DatetimeJulian(2020, 1, 1) time_str = "20200101.000000" ds = xr.Dataset( { "a": ([time_dim_name, "tile", "z", "y", "x"], np.ones((1, 2, 3, 4, 5))) }, coords={time_dim_name: [time]}, ) ds.to_zarr(str(tmpdir), consolidated=True) mapper = open_zarr(str(tmpdir), dim=time_dim_name) assert isinstance(mapper, XarrayMapper) xr.testing.assert_equal(mapper[time_str], ds.isel({time_dim_name: 0}))
def validate_chunks(config_dict: Mapping[str, Any]) -> None: """Ensure that the time chunk size evenly divides the time dimension size for all output diagnostics. Raise ConfigValidationError if not.""" user_config: UserConfig = dacite.from_dict(UserConfig, config_dict) run_duration = fv3config.get_run_duration(config_dict) initial_time = vcm.round_time( cftime.DatetimeJulian(*config_dict["namelist"]["coupler_nml"]["current_date"]) ) timestep = timedelta(seconds=config_dict["namelist"]["coupler_nml"]["dt_atmos"]) for diag_file_config in user_config.diagnostics: _validate_time_chunks(diag_file_config, initial_time, timestep, run_duration) for fortran_file_config in user_config.fortran_diagnostics: _validate_time_chunks(fortran_file_config, initial_time, timestep, run_duration)
def test__validate_time_chunks(times, chunks, should_validate): initial_time = cftime.DatetimeJulian(2016, 8, 1) timestep = timedelta(minutes=15) run_duration = timedelta(hours=12) diag_file_config = runtime.config.DiagnosticFileConfig( "diags.zarr", variables=["air_temperature"], times=times, chunks=chunks, ) if should_validate: _validate_time_chunks(diag_file_config, initial_time, timestep, run_duration) else: with pytest.raises(ConfigValidationError): _validate_time_chunks(diag_file_config, initial_time, timestep, run_duration)
def test_monitor_file_store_multi_rank_state(layout, nt, tmpdir_factory, shape, ny_rank_add, nx_rank_add, dims, numpy): units = "m" tmpdir = tmpdir_factory.mktemp("data.zarr") nz, ny, nx = shape ny_rank = int(ny / layout[0] + ny_rank_add) nx_rank = int(nx / layout[1] + nx_rank_add) grid = fv3gfs.util.TilePartitioner(layout) time = cftime.DatetimeJulian(2010, 6, 20, 6, 0, 0) timestep = timedelta(hours=1) total_ranks = 6 * layout[0] * layout[1] partitioner = fv3gfs.util.CubedSpherePartitioner(grid) store = zarr.storage.DirectoryStore(tmpdir) shared_buffer = {} monitor_list = [] for rank in range(total_ranks): monitor_list.append( fv3gfs.util.ZarrMonitor( store, partitioner, "w", mpi_comm=DummyComm(rank=rank, total_ranks=total_ranks, buffer_dict=shared_buffer), )) for i_t in range(nt): for rank in range(total_ranks): state = { "time": time + i_t * timestep, "var1": fv3gfs.util.Quantity( numpy.ones([nz, ny_rank, nx_rank]), dims=dims, units=units, ), } monitor_list[rank].store(state) group = zarr.hierarchy.open_group(store=store, mode="r") assert "var1" in group assert group["var1"].shape == (nt, 6, nz, ny + ny_rank_add, nx + nx_rank_add) numpy.testing.assert_array_equal(group["var1"], 1.0)
def test__time_interpolate_func_only_grabs_correct_points(): initial_time = cftime.DatetimeJulian(2016, 1, 1) frequency = timedelta(hours=2) valid_times = [ initial_time, initial_time + frequency, ] def assert_passed_valid_times(time): assert time in valid_times return {} myfunc = _time_interpolate_func(assert_passed_valid_times, frequency, initial_time) # will raise error if incorrect times grabbed myfunc(initial_time + frequency / 3) with pytest.raises(AssertionError): myfunc(initial_time + 4 * frequency / 3)
def _get_forecast_time_index(initialization_time, duration, interval): """Return a list of cftime.DatetimeJulian objects for the restart output """ if interval == timedelta(seconds=0): # TODO why do we need this if-statement? It seems like interval == 0 # shouldn't be possible interval = duration end_time = initialization_time + duration return [ cftime.DatetimeJulian( timestamp.year, timestamp.month, timestamp.day, timestamp.hour, timestamp.minute, timestamp.second, ) for timestamp in pd.date_range( start=initialization_time, end=end_time, freq=interval ) ]
def _get_datasets_to_append(with_coords, lengths, chunk_sizes): datasets = [] for length, chunk_size in zip(lengths, chunk_sizes): array = xr.DataArray(np.arange(5 * length).reshape((length, 5)), dims=["time", "x"]) ds = xr.Dataset({"var1": array.chunk({"time": chunk_size})}) ds["var1"].encoding["chunks"] = (chunk_size, 5) datasets.append(ds) if with_coords: full_coord = [ cftime.DatetimeJulian(2000, 1, d) for d in range(1, sum(lengths) + 1) ] for i, ds in enumerate(datasets): ds_coord = full_coord[sum(lengths[:i]):sum(lengths[:i + 1])] datasets[i] = ds.assign_coords(time=ds_coord) datasets[i]["time"].encoding["chunks"] = (chunk_sizes[i], ) return datasets
def test_solar_time(): t = xr.DataArray( [ cftime.DatetimeJulian(2020, 1, 1, 0, 0), cftime.DatetimeJulian(2020, 1, 1, 0, 0), cftime.DatetimeJulian(2020, 1, 1, 0, 0), cftime.DatetimeJulian(2020, 1, 1, 0, 0), cftime.DatetimeJulian(2020, 1, 1, 6, 0), cftime.DatetimeJulian(2020, 1, 1, 6, 0), ], dims=["x"], coords={"x": range(6)}, ) lon = xr.DataArray([0, 180, 270, 360, 0, 270], dims=["x"], coords={"x": range(6)}) ds_solar_test = xr.Dataset({"initialization_time": t, "lon": lon}) assert np.allclose(local_time(ds_solar_test), [0, 12, 18, 0, 6, 0])