def test_decode_cf_datetime_non_standard_units(self): expected = pd.date_range(periods=100, start='1970-01-01', freq='h') # netCDFs from madis.noaa.gov use this format for their time units # they cannot be parsed by netcdftime, but pd.Timestamp works units = 'hours since 1-1-1970' actual = conventions.decode_cf_datetime(np.arange(100), units) self.assertArrayEqual(actual, expected)
def test_decode_non_standard_calendar(self): import netCDF4 as nc4 for calendar in [ 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day' ]: units = 'days since 0001-01-01' times = pd.date_range('2001-04-01-00', end='2001-04-30-23', freq='H') noleap_time = nc4.date2num(times.to_pydatetime(), units, calendar=calendar) expected = times.values with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(noleap_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, # we could do this check with near microsecond accuracy: # https://github.com/Unidata/netcdf4-python/issues/355 self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all())
def test_decode_non_standard_calendar_multidim_time(self): import netCDF4 as nc4 calendar = 'noleap' units = 'days since 0001-01-01' times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') noleap_time1 = nc4.date2num(times1.to_pydatetime(), units, calendar=calendar) noleap_time2 = nc4.date2num(times2.to_pydatetime(), units, calendar=calendar) mdim_time = np.empty((len(noleap_time1), 2), ) mdim_time[:, 0] = noleap_time1 mdim_time[:, 1] = noleap_time2 expected1 = times1.values expected2 = times2.values with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(mdim_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) self.assertArrayEqual(actual[:, 0], expected1) self.assertArrayEqual(actual[:, 1], expected2)
def _process_coords(self, df, reference_file_name): # TRY TO GET DEPTH AND TIME COORDS AUTOMATICALLY for col in df.columns: # DECODING TIMES IF PRESENT if ('time' in col.lower()) | ('_secs' in col.lower()): time = col self.__data__.time_name = time nco = Dataset(reference_file_name) units = nco.variables[time].getncattr('units') df[time + '_raw'] = df.loc[:, time].copy() if 'seconds since 1970' in units: df[time] = df.loc[:, time].astype('datetime64[s]') else: from xarray.conventions import decode_cf_datetime df[time] = decode_cf_datetime(df.loc[:, time], units) nco.close() # INDEXING DIVES IF DEPTH PRESENT if 'depth' in col.lower(): depth = col self.__data__.depth_name = col # INDEX UP AND DOWN DIVES grp = df.groupby('dives') dmax = grp[depth].apply(np.nanargmax) idx = [grp.groups[i][dmax[i]] for i in grp.groups] # create a dummy index 'up' that marks max depth df['up'] = 0 df.loc[idx, 'up'] = 1 df['up'] = df.up.cumsum() # average 'dives' and 'up' for a dive index df['dives'] = (df['dives'] + df['up']) / 2. df = df.drop('up', axis=1) return df
def test_decode_cf_datetime_non_iso_strings(self): # datetime strings that are _almost_ ISO compliant but not quite, # but which netCDF4.num2date can still parse correctly expected = pd.date_range(periods=100, start='2000-01-01', freq='h') cases = [(np.arange(100), 'hours since 2000-01-01 0'), (np.arange(100), 'hours since 2000-1-1 0'), (np.arange(100), 'hours since 2000-01-01 0:00')] for num_dates, units in cases: actual = conventions.decode_cf_datetime(num_dates, units) self.assertArrayEqual(actual, expected)
def test_decode_non_standard_calendar_single_element(self): units = 'days since 0001-01-01' for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day']: for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(num_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]'))
def test_cf_datetime_nan(self): for num_dates, units, expected_list in [ ([np.nan], 'days since 2000-01-01', ['NaT']), ([np.nan, 0], 'days since 2000-01-01', ['NaT', '2000-01-01T00:00:00Z']), ([np.nan, 0, 1], 'days since 2000-01-01', ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z']), ]: with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'All-NaN') actual = conventions.decode_cf_datetime(num_dates, units) expected = np.array(expected_list, dtype='datetime64[ns]') self.assertArrayEqual(expected, actual)
def test_decode_non_standard_calendar_single_element_fallback(self): import netCDF4 as nc4 units = 'days since 0001-01-01' dt = nc4.netcdftime.datetime(2001, 2, 29) for calendar in ['360_day', 'all_leap', '366_day']: num_time = nc4.date2num(dt, units, calendar) with self.assertWarns('Unable to decode time axis'): actual = conventions.decode_cf_datetime(num_time, units, calendar=calendar) expected = np.asarray(nc4.num2date(num_time, units, calendar)) print(num_time, calendar, actual, expected) self.assertEqual(actual.dtype, np.dtype('O')) self.assertEqual(expected, actual)
def test_decode_cf_datetime_overflow(self): # checks for # https://github.com/pydata/pandas/issues/14068 # https://github.com/pydata/xarray/issues/975 from datetime import datetime units = 'days since 2000-01-01 00:00:00' # date after 2262 and before 1678 days = (-117608, 95795) expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) for i, day in enumerate(days): result = conventions.decode_cf_datetime(day, units) self.assertEqual(result, expected[i])
def test_decode_non_standard_calendar_fallback(self): import netCDF4 as nc4 # ensure leap year doesn't matter for year in [2010, 2011, 2012, 2013, 2014]: for calendar in ['360_day', '366_day', 'all_leap']: calendar = '360_day' units = 'days since {0}-01-01'.format(year) num_times = np.arange(100) expected = nc4.num2date(num_times, units, calendar) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') actual = conventions.decode_cf_datetime(num_times, units, calendar=calendar) self.assertEqual(len(w), 1) self.assertIn('Unable to decode time axis', str(w[0].message)) self.assertEqual(actual.dtype, np.dtype('O')) self.assertArrayEqual(actual, expected)
def test_decode_non_standard_calendar(self): import netCDF4 as nc4 for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day']: units = 'days since 0001-01-01' times = pd.date_range('2001-04-01-00', end='2001-04-30-23', freq='H') noleap_time = nc4.date2num(times.to_pydatetime(), units, calendar=calendar) expected = times.values with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(noleap_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, # we could do this check with near microsecond accuracy: # https://github.com/Unidata/netcdf4-python/issues/355 self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all())
def test_cf_datetime(self): import netCDF4 as nc4 for num_dates, units in [ (np.arange(10), 'days since 2000-01-01'), (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), # here we add a couple minor formatting errors to test # the robustness of the parsing algorithm. (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), (10, 'days since 2000-01-01'), ([10], 'daYs since 2000-01-01'), ([[10]], 'days since 2000-01-01'), ([10, 10], 'days since 2000-01-01'), (np.array(10), 'days since 2000-01-01'), (0, 'days since 1000-01-01'), ([0], 'days since 1000-01-01'), ([[0]], 'days since 1000-01-01'), (np.arange(2), 'days since 1000-01-01'), (np.arange(0, 100000, 20000), 'days since 1900-01-01'), (17093352.0, 'hours since 1-1-1 00:00:0.0'), ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), (0, 'milliseconds since 2000-01-01T00:00:00'), (0, 'microseconds since 2000-01-01T00:00:00'), ]: for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: expected = _ensure_naive_tz(nc4.num2date(num_dates, units, calendar)) print(num_dates, units, calendar) with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime(num_dates, units, calendar) if (isinstance(actual, np.ndarray) and np.issubdtype(actual.dtype, np.datetime64)): # self.assertEqual(actual.dtype.kind, 'M') # For some reason, numpy 1.8 does not compare ns precision # datetime64 arrays as equal to arrays of datetime objects, # but it works for us precision. Thus, convert to us # precision for the actual array equal comparison... actual_cmp = actual.astype('M8[us]') else: actual_cmp = actual self.assertArrayEqual(expected, actual_cmp) encoded, _, _ = conventions.encode_cf_datetime(actual, units, calendar) if '1-1-1' not in units: # pandas parses this date very strangely, so the original # units/encoding cannot be preserved in this case: # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') # Timestamp('2001-01-01 00:00:00') self.assertArrayEqual(num_dates, np.around(encoded, 1)) if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and '1000' not in units): # verify that wrapping with a pandas.Index works # note that it *does not* currently work to even put # non-datetime64 compatible dates into a pandas.Index :( encoded, _, _ = conventions.encode_cf_datetime( pd.Index(actual), units, calendar) self.assertArrayEqual(num_dates, np.around(encoded, 1))
def test_cf_datetime(self): import netCDF4 as nc4 for num_dates, units in [ (np.arange(10), 'days since 2000-01-01'), (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), # here we add a couple minor formatting errors to test # the robustness of the parsing algorithm. (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), (10, 'days since 2000-01-01'), ([10], 'daYs since 2000-01-01'), ([[10]], 'days since 2000-01-01'), ([10, 10], 'days since 2000-01-01'), (np.array(10), 'days since 2000-01-01'), (0, 'days since 1000-01-01'), ([0], 'days since 1000-01-01'), ([[0]], 'days since 1000-01-01'), (np.arange(2), 'days since 1000-01-01'), (np.arange(0, 100000, 20000), 'days since 1900-01-01'), (17093352.0, 'hours since 1-1-1 00:00:0.0'), ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), (0, 'milliseconds since 2000-01-01T00:00:00'), (0, 'microseconds since 2000-01-01T00:00:00'), ]: for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: expected = _ensure_naive_tz( nc4.num2date(num_dates, units, calendar)) print(num_dates, units, calendar) with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = conventions.decode_cf_datetime( num_dates, units, calendar) if (isinstance(actual, np.ndarray) and np.issubdtype(actual.dtype, np.datetime64)): # self.assertEqual(actual.dtype.kind, 'M') # For some reason, numpy 1.8 does not compare ns precision # datetime64 arrays as equal to arrays of datetime objects, # but it works for us precision. Thus, convert to us # precision for the actual array equal comparison... actual_cmp = actual.astype('M8[us]') else: actual_cmp = actual self.assertArrayEqual(expected, actual_cmp) encoded, _, _ = conventions.encode_cf_datetime( actual, units, calendar) if '1-1-1' not in units: # pandas parses this date very strangely, so the original # units/encoding cannot be preserved in this case: # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') # Timestamp('2001-01-01 00:00:00') self.assertArrayEqual(num_dates, np.around(encoded, 1)) if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and '1000' not in units): # verify that wrapping with a pandas.Index works # note that it *does not* currently work to even put # non-datetime64 compatible dates into a pandas.Index :( encoded, _, _ = conventions.encode_cf_datetime( pd.Index(actual), units, calendar) self.assertArrayEqual(num_dates, np.around(encoded, 1))