def fixture_dataframe(): """ Loads the sample ICESat-2 ATL11 data, and processes it into an suitable pandas.DataFrame format. """ dataset: xr.Dataset = catalog.test_data.atl11_test_case.to_dask() dataset["utc_time"] = deltatime_to_utctime(dataarray=dataset.delta_time) with tempfile.TemporaryDirectory() as tmpdir: df: pd.DataFrame = ndarray_to_parquet( ndarray=dataset, parquetpath=os.path.join(tmpdir, "temp.parquet"), variables=["longitude", "latitude", "h_corr", "utc_time"], use_deprecated_int96_timestamps=True, ) dataframe: pd.DataFrame = wide_to_long( df=df, stubnames=["h_corr", "utc_time"], j="cycle_number" ) dataframe: pd.DataFrame = dataframe.reset_index(drop=True) # Mock up a dummy track1_track2 column based on the cycle_number dataframe["track1_track2"] = np.where( dataframe["cycle_number"] == 1, "0111_pt1x0222_pt2", "0333pt3x0111_pt1" ) return dataframe
def test_deltatime_to_utctime_xarray_dataarray(): """ Test that converting from ICESat-2 delta_time to utc_time works on an xarray.DataArray, and that the dimensions are preserved in the process. """ atl11_dataset: xr.Dataset = catalog.test_data.atl11_test_case.to_dask() utc_time: xr.DataArray = deltatime_to_utctime( dataarray=atl11_dataset.delta_time) assert utc_time.shape == (1404, 2) assert utc_time.dims == ("ref_pt", "cycle_number") assert dask.is_dask_collection(utc_time) utc_time = utc_time.compute() npt.assert_equal( actual=utc_time.data.min(), desired=np.datetime64("2019-05-19T20:53:51.039891534"), ) npt.assert_equal( actual=np.datetime64(pd.DataFrame(utc_time.data)[0].mean()), desired=np.datetime64("2019-05-19 20:54:00.925868"), ) npt.assert_equal( actual=np.datetime64(pd.DataFrame(utc_time.data)[1].mean()), desired=np.datetime64("2019-08-18 16:33:47.791226"), ) npt.assert_equal( actual=utc_time.data.max(), desired=np.datetime64("2019-08-18T16:33:57.834610209"), )
def test_deltatime_to_utctime_pandas_series(): """ Test that converting from ICESat-2 delta_time to utc_time works on a dask.dataframe.core.Series. """ atl11_dataset: xr.Dataset = catalog.test_data.atl11_test_case.to_dask() atl11_dataframe: pd.DataFrame = atl11_dataset.to_dataframe() utc_time: pd.Series = deltatime_to_utctime( dataarray=atl11_dataframe.delta_time) assert utc_time.shape == (2808, ) npt.assert_equal(actual=utc_time.min(), desired=pd.Timestamp("2019-05-19T20:53:51.039891534")) npt.assert_equal( actual=utc_time.loc[3].mean(), desired=pd.Timestamp("2019-05-19 20:54:00.925868800"), ) npt.assert_equal( actual=utc_time.loc[4].mean(), desired=pd.Timestamp("2019-08-18 16:33:47.791226368"), ) npt.assert_equal(actual=utc_time.max(), desired=pd.Timestamp("2019-08-18T16:33:57.834610209"))
def test_deltatime_to_utctime_numpy_timedelta64(): """ Test that converting from ICESat-2 delta_time to utc_time works on a single numpy.timedelta object. """ delta_time = np.timedelta64(24731275413287379, "ns") utc_time: np.datetime64 = deltatime_to_utctime(dataarray=delta_time) npt.assert_equal(actual=utc_time, desired=np.datetime64("2018-10-14T05:47:55.413287379"))
# ### Retrieve some basic information for plots later # # Simply getting the number of cycles and date range # to put into our plots later on # %% # Get number of ICESat-2 cycles used num_cycles: int = len(ds.cycle_number) # %% # Get first and last dates to put into our plots min_date, max_date = ("2018-10-14", "2020-04-04") if min_date is None: min_delta_time = np.nanmin( ds.delta_time.isel(cycle_number=0).data).compute() min_utc_time = deepicedrain.deltatime_to_utctime(min_delta_time) min_date: str = np.datetime_as_string(arr=min_utc_time, unit="D") if max_date is None: max_delta_time = np.nanmax( ds.delta_time.isel(cycle_number=-1).data).compute() max_utc_time = deepicedrain.deltatime_to_utctime(max_delta_time) max_date: str = np.datetime_as_string(arr=max_utc_time, unit="D") print(f"Handling {num_cycles} ICESat-2 cycles from {min_date} to {max_date}") # %% # %% [markdown] # # Calculate height range (h_range) # # A simple way of finding active subglacial lakes is to see where # there has been a noticeably rapid change in elevation over
# ### Retrieve some basic information for plots later # # Simply getting the number of cycles and date range # to put into our plots later on # %% # Get number of ICESat-2 cycles used num_cycles: int = len(ds.cycle_number) # %% # Get first and last dates to put into our plots min_date, max_date = ("2018-10-14", "2020-05-13") if min_date is None: min_delta_time = np.nanmin( ds.delta_time.isel(cycle_number=0).data).compute() min_utc_time = deepicedrain.deltatime_to_utctime(min_delta_time) min_date: str = np.datetime_as_string(arr=min_utc_time, unit="D") if max_date is None: max_delta_time = np.nanmax( ds.delta_time.isel(cycle_number=-1).data).compute() max_utc_time = deepicedrain.deltatime_to_utctime(max_delta_time) max_date: str = np.datetime_as_string(arr=max_utc_time, unit="D") print(f"Handling {num_cycles} ICESat-2 cycles from {min_date} to {max_date}") # %% # %% [markdown] # # Calculate height range (h_range) # # A simple way of finding active subglacial lakes is to see where # there has been a noticeably rapid change in elevation over
# %% [markdown] # ## Convert delta_time to utc_time # # To get more human-readable datetimes, # we'll convert the delta_time attribute from the original GPS time format # (nanoseconds since the beginning of ICESat-2 starting epoch) # to Coordinated Universal Time (UTC). # The reference date for the ICESat-2 Epoch is 2018 January 1st according to # https://github.com/SmithB/pointCollection/blob/master/is2_calendar.py#L11-L15 # # TODO: Account for [leap seconds](https://en.wikipedia.org/wiki/Leap_second) # in the future. # %% ds["utc_time"] = deepicedrain.deltatime_to_utctime(dataarray=ds.delta_time) # %% [markdown] # ## Mask out low quality height data # # Good quality data has value 0, not so good is > 0. # Look at the 'fit_quality' attribute in `ds` # for more information on what this quality flag means. # # We'll mask out values other than 0 with NaN using xarray's # [where](http://xarray.pydata.org/en/v0.15.1/indexing.html#masking-with-where). # %% ds["h_corr"] = ds.h_corr.where(cond=ds.fit_quality == 0) # %%
# ### Retrieve some basic information for plots later # # Simply getting the number of cycles and date range # to put into our plots later on # %% # Get number of ICESat-2 cycles used num_cycles: int = len(ds.cycle_number) # %% # Get first and last dates to put into our plots min_date, max_date = ("2018-10-14", "2020-07-16") if min_date is None: min_delta_time = np.nanmin( ds.delta_time.isel(cycle_number=0).data).compute() min_utc_time = deepicedrain.deltatime_to_utctime(min_delta_time) min_date: str = np.datetime_as_string(arr=min_utc_time, unit="D") if max_date is None: max_delta_time = np.nanmax( ds.delta_time.isel(cycle_number=-1).data).compute() max_utc_time = deepicedrain.deltatime_to_utctime(max_delta_time) max_date: str = np.datetime_as_string(arr=max_utc_time, unit="D") print(f"Handling {num_cycles} ICESat-2 cycles from {min_date} to {max_date}") # %% # %% [markdown] # # Calculate height range (h_range) # # A simple way of finding active subglacial lakes is to see where # there has been a noticeably rapid change in elevation over