def test_dataset_repr_with_netcdf4_datetimes(self): # regression test for #347 attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'} with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'unable to decode time') ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) self.assertIn('(time) object', repr(ds)) attrs = {'units': 'days since 1900-01-01'} ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) self.assertIn('(time) datetime64[ns]', repr(ds))
def create_data(): ds = xray.Dataset() ds['time'] = ('time', np.arange(10), {'units': 'hours since 2013-12-12 12:00:00'}) ds['longitude'] = (('longitude'), np.mod(np.arange(235., 240.) + 180, 360) - 180, {'units': 'degrees east'}) ds['latitude'] = ('latitude', np.arange(35., 40.), {'units': 'degrees north'}) shape = tuple([ds.dims[x] for x in ['time', 'longitude', 'latitude']]) beaufort_scale = np.array([0., 1., 3., 6., 10., 16., 21., 27., 33., 40., 47., 55., 63., 75.]) / 1.94384449 mids = 0.5 * (beaufort_scale[1:] + beaufort_scale[:-1]) speeds = mids[np.random.randint(mids.size, size=10 * 5 * 5)] speeds = speeds.reshape(shape) dirs = np.linspace(-7 * np.pi / 8, np.pi, 16) dirs = dirs[np.random.randint(dirs.size, size=10 * 5 * 5)] dirs = dirs.reshape(shape) # the directions were chosen to be direction from uwnd = - speeds * np.sin(dirs) uwnd = uwnd.reshape(shape).astype(np.float32) vwnd = - speeds * np.cos(dirs) vwnd = vwnd.reshape(shape).astype(np.float32) ds['x_wind'] = (('time', 'longitude', 'latitude'), uwnd, {'units': 'm/s'}) ds['y_wind'] = (('time', 'longitude', 'latitude'), vwnd, {'units': 'm/s'}) ds = add_tiny_variable(variables.pressure, ds) return xray.decode_cf(ds)
def create_gfs_data(): ds = xray.Dataset() ds['time'] = ('time', np.arange(0, 120, 3), {'units': 'hours since 2013-12-12 12:00:00'}) ds['longitude'] = (('longitude'), np.mod(np.arange(0., 360.) + 180, 360) - 180, {'units': 'degrees east'}) ds['latitude'] = ('latitude', np.arange(65, -66, -1), {'units': 'degrees north'}) shape = tuple([ds.dims[x] for x in ['time', 'longitude', 'latitude']]) size = reduce(np.multiply, shape) beaufort_scale = np.array([0., 1., 3., 6., 10., 16., 21., 27., 33., 40., 47., 55., 63., 75.]) / 1.94384449 mids = 0.5 * (beaufort_scale[1:] + beaufort_scale[:-1]) speeds = mids[np.random.randint(mids.size, size=size)] speeds = speeds.reshape(shape) dirs = np.linspace(-7 * np.pi / 8, np.pi, 16) dirs = dirs[np.random.randint(dirs.size, size=size)] dirs = dirs.reshape(shape) # the directions were chosen to be direction from uwnd = - speeds * np.sin(dirs) uwnd = uwnd.reshape(shape).astype(np.float32) vwnd = - speeds * np.cos(dirs) vwnd = vwnd.reshape(shape).astype(np.float32) ds['ugrd10m'] = (('time', 'longitude', 'latitude'), uwnd, {'units': 'm/s'}) ds['vgrd10m'] = (('time', 'longitude', 'latitude'), vwnd, {'units': 'm/s'}) return xray.decode_cf(ds)
def decompress_dataset(payload): """ Unpacks a dataset that has been packed using compress_dataset() """ payload = zlib.decompress(payload) version = np.fromstring(payload[0], dtype=np.uint8)[0] payload = payload[1:] if version > _VERSION: raise ValueError("The forecast was compressed using a" "newer version than the version currently " "installed. Consider upgrading slocum") elif version < _VERSION: # TODO: Allow queries to specify the version, so that users # with older versions can request forecasts they can still read. raise NotImplementedError("Backward comaptibility is not currently " "supported. Your version of slocum is newer " "than the server, consider rolling back") # this iterates through the payload and yields individual variables output = xray.Dataset() while len(payload): var_name, packed, payload = _split_single_variable(payload) variable = utils.get_variable(var_name) output.update(variable.decompress(packed, output), inplace=True) logging.debug("Decoded %s" % var_name) return xray.decode_cf(output)
def test_write_store(self): expected = create_test_data() with self.create_store() as store: expected.dump_to_store(store) # we need to cf decode the store because it has time and # non-dimension coordinates actual = xray.decode_cf(store) self.assertDatasetAllClose(expected, actual)
def test_small_time(self): ds = create_data() sm_time = tinylib.small_time(ds['time']) num_times, units = tinylib.expand_small_time(sm_time['packed_array']) actual = xray.Dataset({'time': ('time', num_times, {'units': units})}) actual = xray.decode_cf(actual) self.assertTrue(np.all(actual['time'].values == ds['time'].values)) self.assertTrue(units == ds['time'].encoding['units'])
def test_data(): ds = xray.Dataset() ds['time'] = ('time', np.arange(4), {'units': 'hours since 2013-12-12 12:00:00'}) ds['longitude'] = (('longitude'), np.mod(np.arange(235., 240.) + 180, 360) - 180, {'units': 'degrees east'}) ds['latitude'] = ('latitude', np.arange(35., 40.), {'units': 'degrees north'}) shape = tuple([ds.dims[x] for x in ['time', 'longitude', 'latitude']]) x, y = np.meshgrid(np.arange(-2, 3), np.arange(-2, 3)) wind_mids = 0.5 * (variables.wind_bins[1:] + variables.wind_bins[:-1]) wind_speed = wind_mids[x * x + y * y] current_mids = 0.5 * (variables.current_bins[1:] + variables.current_bins[:-1]) current_speed = current_mids[x * x + y * y] dir = np.arctan2(y, x) current_speeds = np.empty(shape) wind_speeds = np.empty(shape) dirs = np.empty(shape) for i in range(ds.dims['time']): wind_speeds[i] = wind_speed current_speeds[i] = current_speed dirs[i] = dir + i * np.pi / 2 uwnd, vwnd = angles.radial_to_vector(wind_speeds, dirs.copy(), orientation="from") ds['x_wind'] = (('time', 'longitude', 'latitude'), uwnd, {'units': 'm/s'}) ds['y_wind'] = (('time', 'longitude', 'latitude'), vwnd, {'units': 'm/s'}) ucurr, vcurr = angles.radial_to_vector(current_speeds, dirs.copy(), orientation="from") ds['sea_water_x_velocity'] = (('time', 'longitude', 'latitude'), ucurr, {'units': 'm/s'}) ds['sea_water_y_velocity'] = (('time', 'longitude', 'latitude'), vcurr, {'units': 'm/s'}) return xray.decode_cf(ds)
def test_invalid_units_raises_eagerly(self): ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})}) with self.assertRaisesRegexp(ValueError, 'unable to decode time'): decode_cf(ds)
yield (lev, lev_slice), (lat, lat_slice) if __name__ == "__main__": args = parser.parse_args() # Convenience function for prepending output path # _out_path = lambda s: os.path.join(args.out_path, s) _out_path = lambda s: s # Nudge times to the year 2000 data = xray.open_dataset(args.aerosol_ds, decode_times=False) times = data.coords.to_dataset().time times += 2000.*365 data = xray.decode_cf(data) # Global troposphere slice for quick ref global_tropo = data.sel(lev=slice(700, 1100), lat=slice(-80, 80)) # global_tropo = global_tropo.isel(time=-1) #################################################################### # Overview boxplots for subset in all_subsets(): (lev, lev_slice), (lat, lat_slice) = subset print(lev, lat) data_subset = data.sel(lev=lev_slice, lat=lat_slice) # data_subset = data_subset.isel(time=-1)
yield (lev, lev_slice), (lat, lat_slice) if __name__ == "__main__": args = parser.parse_args() # Convenience function for prepending output path # _out_path = lambda s: os.path.join(args.out_path, s) _out_path = lambda s: s # Nudge times to the year 2000 data = xray.open_dataset(args.aerosol_ds, decode_times=False) times = data.coords.to_dataset().time times += 2000. * 365 data = xray.decode_cf(data) # Global troposphere slice for quick ref global_tropo = data.sel(lev=slice(700, 1100), lat=slice(-80, 80)) # global_tropo = global_tropo.isel(time=-1) #################################################################### # Overview boxplots for subset in all_subsets(): (lev, lev_slice), (lat, lat_slice) = subset print(lev, lat) data_subset = data.sel(lev=lev_slice, lat=lat_slice) # data_subset = data_subset.isel(time=-1)
def ingest(infile, read_vars, tshift=None): """ read input and output files from the plumber experiment Parameters ---------- Required: infile : string input file name (netcdf format) read_vars : list or string ('all') list of variables to read from infile. If read_vars == 'all' then all variables are retained. Default: tshift : time shift in minutes (default=None) Returns ------- ds : pandas dataframe data frame with those elements in read_vars that are present in infile The returned dataframe is not guaranteed to have all the variables that are specified in read_vars. It will only include those that are available. It is up to the user to check for completeness. """ # make a copy of read_vars since we don't want to change the list in the # calling scope if read_vars != 'all': try: read_vars = read_vars.copy() except AttributeError as err: logging.critical('%s: read_vars should be a list or \'all\'', err) raise # read infile using xray try: ds = xray.open_dataset(infile, decode_times=False) except RuntimeError as err: logging.critical('%s: failed to read: %s', err, infile) raise # find the time dimension time_dim = [x for x in ds.dims if re.search('time', x, re.I)][0] # rename the time dimension to 'time' to make life easier if time_dim != 'time': ds.rename({time_dim: 'time'}, inplace=True) # only keep the time dimension, drop the others dims = [x for x in ds.dims if x != 'time'] # select the [0] element for all for dimensions dd = dict(zip(dims, [0]*len(dims))) ds = ds.isel(**dd) # drop all non-time dimensions ds = ds.drop(dims) # reconstruct Rnet if it is not provided if 'Rnet' in read_vars or read_vars == 'all': if 'Rnet' not in ds.variables: try: ds['Rnet'] = ds['SWnet'] + ds['LWnet'] except KeyError: pass # drop all variables that are not in read_vars (but keep time) if read_vars != 'all': read_vars.append('time') ds = ds.drop(list(set(ds.variables) - set(read_vars))) # align the time according to tshift # The easiest way to do this would be to use # ds = ds.tshift(tshift, freq='T') # However, the tshift() method is currently very slow, so we do the # shift on the raw time axis and then decode after if tshift: ds.time += tshift*60 # we don't want partial seconds ds[time_dim].values = ds[time_dim].values.round() ds = xray.decode_cf(ds, decode_times=True) # convert to dataframe df = ds.to_dataframe() # some of the time stamps in PLUMBER are messed up # regularize df = df.asfreq('30Min', method='nearest') return df