def test_coordinates_encoding(self): def equals_latlon(obj): return obj == 'lat lon' or obj == 'lon lat' original = Dataset({ 'temp': ('x', [0, 1]), 'precip': ('x', [0, -1]) }, { 'lat': ('x', [2, 3]), 'lon': ('x', [4, 5]) }) with self.roundtrip(original) as actual: self.assertDatasetIdentical(actual, original) with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: self.assertTrue(equals_latlon(ds['temp'].attrs['coordinates'])) self.assertTrue( equals_latlon(ds['precip'].attrs['coordinates'])) self.assertNotIn('coordinates', ds.attrs) self.assertNotIn('coordinates', ds['lat'].attrs) self.assertNotIn('coordinates', ds['lon'].attrs) modified = original.drop(['temp', 'precip']) with self.roundtrip(modified) as actual: self.assertDatasetIdentical(actual, modified) with create_tmp_file() as tmp_file: modified.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: self.assertTrue(equals_latlon(ds.attrs['coordinates'])) self.assertNotIn('coordinates', ds['lat'].attrs) self.assertNotIn('coordinates', ds['lon'].attrs)
def test_vrtdiv(): path = ('/archive/Spencer.Hill/am2/am2clim_reyoi/gfdl.ncrc2-default-prod/' 'pp/atmos_level/ts/monthly/1yr/atmos_level.198301-198312.') # Vertically defined, sigma levels. u_arr = xray.open_dataset(path + 'ucomp.nc').ucomp v_arr = xray.open_dataset(path + 'vcomp.nc').vcomp vort, divg = compute_vrtdiv(u_arr, v_arr) assert vort.shape == u_arr.shape assert divg.shape == u_arr.shape np.testing.assert_array_equal(u_arr.lat, vort.lat) np.testing.assert_array_equal(u_arr.lon, vort.lon) np.testing.assert_array_equal(u_arr.time, vort.time) np.testing.assert_array_equal(u_arr.pfull, vort.pfull) # Not vertically defined. u0 = u_arr[:, 0] v0 = v_arr[:, 0] vort0, divg0 = compute_vrtdiv(u0, v0) assert vort0.shape == u0.shape assert divg0.shape == u0.shape # Dummy case: zeros everywhere u_arr_zeros = xray.DataArray(np.zeros_like(u_arr.values), dims=u_arr.dims, coords=u_arr.coords) v_arr_zeros = u_arr_zeros.copy() vort_zeros, divg_zeros = compute_vrtdiv(u_arr_zeros, v_arr_zeros) assert not vort_zeros.any() assert not divg_zeros.any()
def test_vrtdiv(): path = ('/archive/Spencer.Hill/am2/am2clim_reyoi/gfdl.ncrc2-default-prod/' 'pp/atmos_level/ts/monthly/1yr/atmos_level.198301-198312.') # Vertically defined, sigma levels. u_arr = xray.open_dataset(path + 'ucomp.nc').ucomp v_arr = xray.open_dataset(path + 'vcomp.nc').vcomp vort, divg = compute_vrtdiv(u_arr, v_arr) assert vort.shape == u_arr.shape assert divg.shape == u_arr.shape np.testing.assert_array_equal(u_arr.lat, vort.lat) np.testing.assert_array_equal(u_arr.lon, vort.lon) np.testing.assert_array_equal(u_arr.time, vort.time) np.testing.assert_array_equal(u_arr.pfull, vort.pfull) # Not vertically defined. u0 = u_arr[:,0] v0 = v_arr[:,0] vort0, divg0 = compute_vrtdiv(u0, v0) assert vort0.shape == u0.shape assert divg0.shape == u0.shape # Dummy case: zeros everywhere u_arr_zeros = xray.DataArray(np.zeros_like(u_arr.values), dims=u_arr.dims, coords=u_arr.coords) v_arr_zeros = u_arr_zeros.copy() vort_zeros, divg_zeros = compute_vrtdiv(u_arr_zeros, v_arr_zeros) assert not vort_zeros.any() assert not divg_zeros.any()
def test_coordinates_encoding(self): def equals_latlon(obj): return obj == 'lat lon' or obj == 'lon lat' original = Dataset({'temp': ('x', [0, 1]), 'precip': ('x', [0, -1])}, {'lat': ('x', [2, 3]), 'lon': ('x', [4, 5])}) with self.roundtrip(original) as actual: self.assertDatasetIdentical(actual, original) with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: self.assertTrue(equals_latlon(ds['temp'].attrs['coordinates'])) self.assertTrue(equals_latlon(ds['precip'].attrs['coordinates'])) self.assertNotIn('coordinates', ds.attrs) self.assertNotIn('coordinates', ds['lat'].attrs) self.assertNotIn('coordinates', ds['lon'].attrs) modified = original.drop(['temp', 'precip']) with self.roundtrip(modified) as actual: self.assertDatasetIdentical(actual, modified) with create_tmp_file() as tmp_file: modified.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: self.assertTrue(equals_latlon(ds.attrs['coordinates'])) self.assertNotIn('coordinates', ds['lat'].attrs) self.assertNotIn('coordinates', ds['lon'].attrs)
def load_dataset(self): # import pdb; pdb.set_trace() self.xdataset_persist = xray.open_dataset(self.xdataset_url, decode_times=False) self.ydataset_persist = xray.open_dataset(self.ydataset_url, decode_times=False) self.set_lon_lat_array_coordinates() self.set_time_series_coordinate()
def test_write_groups(self): data1 = create_test_data() data2 = data1 * 2 with create_tmp_file() as tmp_file: data1.to_netcdf(tmp_file, group='data/1') data2.to_netcdf(tmp_file, group='data/2', mode='a') with open_dataset(tmp_file, group='data/1') as actual1: self.assertDatasetIdentical(data1, actual1) with open_dataset(tmp_file, group='data/2') as actual2: self.assertDatasetIdentical(data2, actual2)
def __init__(self, ww3_cur_path, ww3_no_cur_path, pnboia_path=None): self.ww3_cur_experiment = ww3_cur_path.split('/')[-1].split('.')[0] self.ww3_cur_year = ww3_cur_path.split('/')[-1].split('.')[1] self.ww3_cur = xray.open_dataset(ww3_cur_path) self.ww3_no_cur = xray.open_dataset(ww3_no_cur_path) if pnboia_path: self.pnboia_path = pnboia_path self.pnboia = xray.open_dataset(pnboia_path)
def test_dask_roundtrip(self): with create_tmp_file() as tmp: data = create_test_data() data.to_netcdf(tmp) chunks = {'dim1': 4, 'dim2': 4, 'dim3': 4, 'time': 10} with open_dataset(tmp, chunks=chunks) as dask_ds: self.assertDatasetIdentical(data, dask_ds) with create_tmp_file() as tmp2: dask_ds.to_netcdf(tmp2) with open_dataset(tmp2) as on_disk: self.assertDatasetIdentical(data, on_disk)
def __init__(self, hycom_url, ww3_url, pnboia_url): self.hycom_url = hycom_url self.hycom = xray.open_dataset(hycom_url) self.year = hycom_url.split('/')[-1].split('.')[1] self.ww3_url = ww3_url self.ww3 = xray.open_dataset(ww3_url) if pnboia_url: self.pnboia_url = pnboia_url self.pnboia = xray.open_dataset(pnboia_url)
def test_open_dataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp) with open_dataset(tmp, chunks={'x': 5}) as actual: self.assertIsInstance(actual.foo.variable.data, da.Array) self.assertEqual(actual.foo.variable.data.chunks, ((5, 5),)) self.assertDatasetAllClose(original, actual) with open_dataset(tmp) as actual: self.assertIsInstance(actual.foo.variable.data, np.ndarray) self.assertDatasetAllClose(original, actual)
def XrayOpen(filenamein,decodetimes=True): try: if decodetimes: filein= xray.open_dataset(filenamein) else: filein=xray.open_dataset(filenamein,decode_times=False) except RuntimeError: print filenamein exit("couldn't find file") return filein
def test_open_dataset(self): original = Dataset({'foo': ('x', np.random.randn(10))}) with create_tmp_file() as tmp: original.to_netcdf(tmp) with open_dataset(tmp, chunks={'x': 5}) as actual: self.assertIsInstance(actual.foo.variable.data, da.Array) self.assertEqual(actual.foo.variable.data.chunks, ((5, 5), )) self.assertDatasetIdentical(original, actual) with open_dataset(tmp, chunks=5) as actual: self.assertDatasetIdentical(original, actual) with open_dataset(tmp) as actual: self.assertIsInstance(actual.foo.variable.data, np.ndarray) self.assertDatasetIdentical(original, actual)
def setUp(self): # netcdfs we'll use for input and check output against nc_isnobal_input = 'test/data/isnobal_input.nc' nc_isnobal_output = 'test/data/isnobal_output.nc' # connect to the virtual watershed self.vwc = default_vw_client() # load NetCDF inputs and outputs from test data self.input_dataset = open_dataset(nc_isnobal_input) self.output_dataset = open_dataset(nc_isnobal_output) # insert NetCDF test input to virtual watershed input_mr_name = 'webapp-testing-input' modelruns = self.vwc.modelrun_search() unittest_uuids = [r['Model Run UUID'] for r in modelruns.records if r['Model Run Name'] == 'webapp-testing-input'] for u in unittest_uuids: s = self.vwc.delete_modelrun(u) print "pre-test cleanup success on %s: %s" % (u, str(s)) self.model_run_uuid = \ self.vwc.initialize_modelrun( model_run_name=input_mr_name, description='test in vwplatform', researcher_name='Matt Turner', keywords='test,isnobal,webapp') self.vwc.upload(self.model_run_uuid, nc_isnobal_input) self.start_datetime = '2010-10-01 00:00:00' self.end_datetime = '2010-10-01 16:00:00' md = metadata_from_file(nc_isnobal_input, self.model_run_uuid, self.model_run_uuid, 'test input for isnobal run', 'Dry Creek', 'Idaho', model_name='isnobal', start_datetime=self.start_datetime, end_datetime=self.end_datetime, model_set='inputs', taxonomy='geoimage', model_set_taxonomy='grid') # import ipdb; ipdb.set_trace() self.input_uuid = self.vwc.insert_metadata(md).text time.sleep(1)
def test_engine(self): data = create_test_data() with self.assertRaisesRegexp(ValueError, 'unrecognized engine'): data.to_netcdf('foo.nc', engine='foobar') with self.assertRaisesRegexp(ValueError, 'invalid engine'): data.to_netcdf(engine='netcdf4') with create_tmp_file() as tmp_file: data.to_netcdf(tmp_file) with self.assertRaisesRegexp(ValueError, 'unrecognized engine'): open_dataset(tmp_file, engine='foobar') netcdf_bytes = data.to_netcdf() with self.assertRaisesRegexp(ValueError, 'can only read'): open_dataset(BytesIO(netcdf_bytes), engine='foobar')
def _calc_anomalies( self, *args, **kwargs ): ''' calculate absolute or relative anomalies given a NetCDF file of the Climatic Research Unit (CRU) Historical Time Series. ''' import xray # handle modeled vs. historical if self.ar5_modeled != None and self.ar5_historical != None: # parse the input name for some file metadata HARDWIRED! output_naming_dict = DownscaleAR5.standardized_fn_to_vars( self.ar5_modeled ) variable = output_naming_dict[ 'variable' ] # read in both modeled and historical ds = xray.open_dataset( self.ar5_modeled ) ds = ds[ variable ] clim_ds = xray.open_dataset( self.ar5_historical ) # climatology clim_ds = clim_ds.loc[ {'time':slice(self.climatology_begin,self.climatology_end)} ] climatology = clim_ds[ variable ].groupby( 'time.month' ).mean( 'time' ) del clim_ds elif self.ar5_historical is not None and self.ar5_modeled is None: output_naming_dict = standardized_fn_to_vars( self.ar5_historical ) variable = output_naming_dict[ 'variable' ] # read in historical ds = xray.open_dataset( self.ar5_historical ) # climatology climatology = ds.loc[ {'time':slice(self.climatology_begin,self.climatology_end)} ] climatology = climatology[ variable ].groupby( 'time.month' ).mean( 'time' ) else: NameError( 'ERROR: must have both ar5_modeled and ar5_historical, or just ar5_historical' ) if self.plev is not None: plevel, = np.where( ds.plev == self.plev ) ds = ds[ :, plevel[0], ... ] climatology = climatology[ :, plevel[0], ... ] # anomalies if self.absolute == True: anomalies = ds.groupby( 'time.month' ) - climatology elif self.absolute == False: anomalies = ds.groupby( 'time.month' ) / climatology else: AttributeError( '_calc_anomalies (ar5): absolute can only be True or False' ) return anomalies
def load(var_name): path = ('/archive/Spencer.Hill/am2/am2clim_reyoi/gfdl.ncrc2-default-prod/' 'pp/atmos_level/ts/monthly/1yr/atmos_level.198301-198312.') ds = xray.open_dataset(path + var_name + '.nc', drop_variables=['nv', 'time_bounds']) return ds[var_name]
def do_netcdf_load(self, buf): from cStringIO import StringIO import xray f = StringIO(buf) return xray.open_dataset(f)
def roundtrip(self, data, **kwargs): f, tmp_file = tempfile.mkstemp(suffix='.nc') os.close(f) data.dump(tmp_file) roundtrip_data = open_dataset(tmp_file, **kwargs) os.remove(tmp_file) return roundtrip_data
def test_read_byte_attrs_as_unicode(self): with create_tmp_file() as tmp_file: with nc4.Dataset(tmp_file, 'w') as nc: nc.foo = b'bar' actual = open_dataset(tmp_file) expected = Dataset(attrs={'foo': 'bar'}) self.assertDatasetIdentical(expected, actual)
def roundtrip(self, data, **kwargs): with create_tmp_file() as tmp_file: data.to_netcdf(tmp_file, format='NETCDF3_CLASSIC', engine='netcdf4') with open_dataset(tmp_file, engine='netcdf4', **kwargs) as ds: yield ds
def concat_to_nc( filelist, output_filename, dim='time', begin_time=None, end_time=None, nc_format='NETCDF4', **kwargs ): ''' take list of consecutive netcdf files (made for CMIP5 data) and stack them into a single larger netcdf file. This was necessary to overcome some bugginess in how MFDataset is dealing with different calendar units on different files. This is technically valid CF-Compliant metadata, but is tricky to work with. This hack allows us to get around some of this unpredictable behavior. PARAMETERS: ----------- filelist = [list] list of string file paths to the sorted netcdf files to stack together output_filename = [str] path to and name of the output file to be generated (.nc extension) dim = [str] dimension to stack on -- default is 'time' begin_time = [str] PANDAS style datetime string syntax -- used in xray end_time = [str] PANDAS style datetime string syntax -- used in xray format = [str] output NetCDF format desired. valid strings are: 'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC' default is 'NETCDF4' **kwargs -- potential future arguments or overloaded args to pass through (none implemented) RETURNS: -------- output_filename as string, with the important side-effect of writing data to disk ''' import xray with xray.concat([ xray.open_dataset( i ).load() for i in filelist ], dim ) as ds: # time slicer condition if begin_time != None and end_time != None: ds = ds.loc[ { dim:slice( begin_time, end_time ) } ] if os.path.exists( output_filename ): os.remove( output_filename ) ds.to_netcdf( output_filename, mode='w', format=nc_format ) return output_filename
def test_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip f, tmp_file = tempfile.mkstemp(suffix='.nc') os.close(f) ds = nc4.Dataset(tmp_file, 'w') ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time',)) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 ds.close() expected = Dataset() time = pd.date_range('1999-01-05', periods=10) encoding = {'units': units, 'dtype': np.dtype('int32')} expected['time'] = ('time', time, {}, encoding) actual = open_dataset(tmp_file) self.assertXArrayEqual(actual['time'], expected['time']) actual_encoding = {k: v for k, v in actual['time'].encoding.iteritems() if k in expected['time'].encoding} self.assertDictEqual(actual_encoding, expected['time'].encoding) os.remove(tmp_file)
def __init__(self, imp): self.ROOTDIR = os.path.join(os.environ['HOME'],'Studies/Masters/Myroms/Msc_idealized/') self.roms = RunSetup( os.path.join( self.ROOTDIR, "experiments.yaml"), imp ) grdfile = self.roms.retrive_grid('grd') self.Mr, self.Lr = grdfile['lat_rho'].shape # Number of J/I-direction INTERIOR RHO-points self.Mu, self.Lu = grdfile['lat_u'].shape # Number of J/I-direction U-points self.Mv, self.Lv = grdfile['lat_v'].shape # Number of J/I-direction V-points self.h = grdfile['h'] ################################################################################ # Date/Time variables ################################################################################ self.dstart = 0 tdays = (self.roms.ntimes*self.roms.dt)/(60*60*24) self.smstime = range(0, tdays+2) self.ndays = len(self.smstime) ################################################################################ # Thermohaline field variables - SISPRES TS CLIMATOLOGY FIELDS ################################################################################ lims = [-43.80, -42.00, -24.25, -22.65] DATASETdir = '/Users/Phellipe/Studies/Masters/Datasets/' tsclim_file = '%sClimatology/rio_ts_climatology.nc'%DATASETdir self.tsclim = xray.open_dataset( tsclim_file )\ .sel(y=slice(lims[2],lims[3]), x=slice(lims[0], lims[1])) self.x = self.tsclim.coords['x'].values self.y = self.tsclim.coords['y'].values
def __init__(self): self.f_xr = xr.open_dataset('coarser_grid_input/gcmplt.cdf') self.c = lambda var : self.f_xr[var].data #coordinates self.v = lambda t,d,var : self.f_xr[var][t,d,:,:].data self.g_mask = lambda f,m : np.ma.masked_array(f,mask = [f==m]) self.v_sb_smpl = lambda t,d,lon,lat,var : self.f_xr[var][t,d,:,:][lat,:][:,lon].data self.v_smpl_mrg= lambda f1,f2,ax : np.concatenate(f1,f2,axis=ax)
def main(inargs): """Run the program""" # Read data dset_in = xray.open_dataset(inargs.fourier_file) df = dset_in.to_dataframe() # Change the amplitue columns so the value is a ranking amp_df = df.loc[:, df.columns.map(lambda x: 'amp' in x)] rank_df = amp_df.apply(rankdata, axis=1) rank_df = rank_df.combine_first(df) # Select the ones where wave 5 and 6 are in the top 3 amplitudes # (worst ranking must be 8 + 9 = 17) included = (rank_df['wave5_amp'].values + rank_df['wave6_amp'].values) >= 17 final = rank_df.loc[included] # Reject days that change sign too much if inargs.max_sign_change: final = final.loc[final['sign_count'] <= inargs.max_sign_change] final = event_info(final, inargs.freq) if inargs.full_stats: assert not inargs.phase_filter and not inargs.season_filter and not inargs.duration_filter, \ "Cannot filter by phase, season or duration for full stats, because then they would not be full!" final.to_csv(inargs.output_file) else: # Optional filtering by duration if inargs.duration_filter: final = final.loc[final['event_duration'] > inargs.duration_filter] # Optional filtering by season if inargs.season_filter: season = inargs.season_filter months_subset = pandas.to_datetime(final.index.values).month bools_subset = (months_subset == season_months[season][0]) + ( months_subset == season_months[season][1]) + ( months_subset == season_months[season][2]) final = final.loc[bools_subset] # Optional filtering by wave phase if inargs.phase_filter: phase_min, phase_max = set_phase_bounds(inargs.phase_filter, inargs.freq) target_phase = 'wave%i_phase' % (inargs.freq) min_bools = (final[target_phase] > phase_min).values max_bools = (final[target_phase] < phase_max).values if phase_min < phase_max: final = final.loc[numpy.logical_and(min_bools, max_bools)] else: final = final.loc[numpy.logical_or(min_bools, max_bools)] # Write date file gio.write_dates(inargs.output_file, final.index.values) metadata_dict = {inargs.fourier_file: dset_in.attrs['history']} gio.write_metadata(inargs.output_file, file_info=metadata_dict)
def test_dump_and_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip f, tmp_file = tempfile.mkstemp(suffix='.nc') os.close(f) ds = nc4.Dataset(tmp_file, 'w') ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time',)) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 ds.close() xray_dataset = open_dataset(tmp_file) os.remove(tmp_file) xray_dataset.dump(tmp_file) ds = nc4.Dataset(tmp_file, 'r') self.assertEqual(ds.variables['time'].getncattr('units'), units) self.assertArrayEqual(ds.variables['time'], np.arange(10) + 4) ds.close() os.remove(tmp_file)
def load_netcdf(ncfile, group): """ Load a ModVsObs object previously saved in a netcdf file """ # Load the group into a ModVsObs object ds = xray.open_dataset(ncfile, group=group) # work out the varname varnames = ds.data_vars.keys() for vv in varnames: if 'mod' in vv: varname = vv.strip('_mod') # Load the two variables (as Pandas objects) TSobs = ds['%s_obs'%varname].to_pandas() TSmod = ds['%s_mod'%varname].to_pandas() # Load the attributes attrs = ds['%s_obs'%varname].attrs # Convert to a ModVsObs object # Put the data into a ModVsObs object (model first then observed) return ModVsObs(TSmod.index.to_pydatetime(),\ TSmod.values,\ TSobs.index.to_pydatetime(),\ TSobs.values,\ varname=varname,\ long_name=attrs['long_name'], \ units=attrs['units'], \ stationid=group,\ )
def test_mask_and_scale(self): f, tmp_file = tempfile.mkstemp(suffix='.nc') os.close(f) nc = nc4.Dataset(tmp_file, mode='w') nc.createDimension('t', 5) nc.createVariable('x', 'int16', ('t',), fill_value=-1) v = nc.variables['x'] v.set_auto_maskandscale(False) v.add_offset = 10 v.scale_factor = 0.1 v[:] = np.array([-1, -1, 0, 1, 2]) nc.close() # first make sure netCDF4 reads the masked and scaled data correctly nc = nc4.Dataset(tmp_file, mode='r') expected = np.ma.array([-1, -1, 10, 10.1, 10.2], mask=[True, True, False, False, False]) actual = nc.variables['x'][:] self.assertArrayEqual(expected, actual) # now check xray ds = open_dataset(tmp_file) expected = create_masked_and_scaled_data() self.assertDatasetEqual(expected, ds) os.remove(tmp_file)
def _calc_anomalies(self, *args, **kwargs): ''' calculate absolute or relative anomalies given a NetCDF file of the Climatic Research Unit (CRU) Historical Time Series. ''' import xray ds = xray.open_dataset(self.cru_ts) try: clim_ds = ds.loc[{ 'time': slice(self.climatology_begin, self.climatology_end) }] climatology = clim_ds[self.variable].groupby('time.month').mean( 'time') except: AttributeError( 'cannot slice netcdf based on climatology years given. they must overlap.' ) # calculate anomalies if self.absolute == True: anomalies = ds[self.variable].groupby('time.month') - climatology elif self.absolute == False: anomalies = ds[self.variable].groupby('time.month') / climatology else: AttributeError( '_calc_anomalies (cru): absolute can only be True or False') return anomalies
def test_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip with create_tmp_file() as tmp_file: with nc4.Dataset(tmp_file, 'w') as ds: ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time',)) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 expected = Dataset() time = pd.date_range('1999-01-05', periods=10) encoding = {'units': units, 'dtype': np.dtype('int32')} expected['time'] = ('time', time, {}, encoding) actual = open_dataset(tmp_file) self.assertVariableEqual(actual['time'], expected['time']) actual_encoding = dict((k, v) for k, v in iteritems(actual['time'].encoding) if k in expected['time'].encoding) self.assertDictEqual(actual_encoding, expected['time'].encoding)
def test_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip with create_tmp_file() as tmp_file: with nc4.Dataset(tmp_file, 'w') as ds: ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time', )) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 expected = Dataset() time = pd.date_range('1999-01-05', periods=10) encoding = {'units': units, 'dtype': np.dtype('int32')} expected['time'] = ('time', time, {}, encoding) with open_dataset(tmp_file) as actual: self.assertVariableEqual(actual['time'], expected['time']) actual_encoding = dict( (k, v) for k, v in iteritems(actual['time'].encoding) if k in expected['time'].encoding) self.assertDictEqual(actual_encoding, expected['time'].encoding)
def load_dailyrel(datafiles, yearnm='year', onset_varnm='D_ONSET', retreat_varnm='D_RETREAT'): ds = atm.load_concat(datafiles, concat_dim=yearnm) if isinstance(ds, xray.DataArray): ds = ds.to_dataset() varnms = ds.data_vars.keys() if onset_varnm is not None: onset = ds[onset_varnm] varnms.remove(onset_varnm) else: onset = np.nan * ds[yearnm] if retreat_varnm is not None: retreat = ds[retreat_varnm] varnms.remove(retreat_varnm) else: retreat = np.nan * ds[yearnm] # Remaining data variable is the data field varnm = varnms[0] data = ds[varnm] # Copy attributes from the first file in the list with xray.open_dataset(datafiles[0]) as ds0: data.attrs = ds0[varnm].attrs return data, onset, retreat
def _get_varname_cru(self, *args, **kwargs): ''' take as input the cru ts3* netcdf filename and return (if possible) the name of the variable we want to work on from that netcdf. Arguments: nc_fn = [str] filepath to the cru ts* netcdf file used in downscaling Returns: the variable name as a string if it can be deduced, and errors if the variable name cannot be deduced. ''' import xray import netCDF4 ds = xray.open_dataset(self.cru_ts) variables = ds.variables.keys() variable = [ variable for variable in variables \ if variable not in [u'lon', u'lat', u'time'] ] if len(variable) == 1: variable = variable[0] else: AttributeError( 'cannot deduce the variable from the file. supply nc_varname and re-run' ) return variable
def save_gpcp_years(datafile, savedir, yearmin=1997, yearmax=2015): """Read GPCP daily data and save to individual file for each year.""" savestr = savedir + '/gpcp_daily_%d.nc' with xray.open_dataset(datafile) as ds: pcp = ds['PREC'] dates = ds['yyyyddd'] yy = dates // 1000 ddd = dates - 1000 * yy pcp.coords['year'] = yy pcp.coords['day'] = ddd years = np.arange(yearmin, yearmax + 1) for year in years: print(year) ind = np.where(yy.values == year)[0] precip = pcp[ind] days = precip['day'].values precip = precip.drop(['year', 'day']) precip = precip.rename({'time' : 'day'}) precip['day'] = days savefile = savestr % year print('Saving to ' + savefile) atm.save_nc(savefile, precip) return None
def main(inargs): """Run the program""" # Read data dset_in = xray.open_dataset(inargs.infile) gio.check_xrayDataset(dset_in, inargs.metric) subset_dict = gio.get_subset_kwargs(inargs) darray = dset_in[inargs.metric].sel(**subset_dict) # Make selection metric_threshold = uconv.get_threshold(darray.values, inargs.metric_threshold) assert inargs.threshold_direction in ['greater', 'less'] if inargs.threshold_direction == 'greater': indexes = darray >= metric_threshold elif inargs.threshold_direction == 'less': indexes = darray <= metric_threshold darray_selection = darray.loc[indexes] # Write outputs gio.write_dates(inargs.outfile, darray_selection['time'].values) metadata_dict = {inargs.infile: dset_in.attrs['history']} gio.write_metadata(inargs.outfile, file_info=metadata_dict)
def get_sea_mask(ds): raw_mask = xray.open_dataset('mask.nc') # extract places where the nearest latitude or longitude (before or after) # is in the ocean sea_mask = ((raw_mask.reindex_like(ds, method='pad').sftlf < 100) & (raw_mask.reindex_like(ds, method='backfill').sftlf < 100)) return sea_mask
def read_data(data_dir, lat, lon, resample=None): files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)]) dss = [xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest') for f in files] ds = xr.concat([dr.load() for dr in dss], 'time') if resample is not None: ds = ds.resample(resample, 'time') return ds
def _calc_anomalies(nc_fn, nc_varname, climatology_begin, climatology_end, absolute=True): ''' calculate absolute or relative anomalies given a NetCDF file of the Climatic Research Unit (CRU) Historical Time Series. ''' ds = xray.open_dataset(nc_fn) # climatology -- slice the time dimension try: clim_ds = ds.loc[{ 'time': slice(climatology_begin, climatology_end) }] climatology = clim_ds[nc_varname].groupby('time.month').mean( 'time') except: AttributeError( 'cannot slice netcdf based on climatology years given. they must overlap.' ) # calculate anomalies if absolute == True: anomalies = ds[variable].groupby('time.month') - climatology elif absolute == False: anomalies = ds[variable].groupby('time.month') / climatology else: AttributeError( 'calc_anomalies: absolute can only be True or False') return anomalies
def retrive_grid(self, file): '''Docstring''' fdict = self.fnames_dict() grdfile = xray.open_dataset( fdict[file], decode_times=False ) return grdfile
def __init__(self): self.f_xr = xr.open_dataset('coarser_grid_input/gcmplt.cdf') self.c = lambda var: self.f_xr[var].data #coordinates self.v = lambda t, d, var: self.f_xr[var][t, d, :, :].data self.g_mask = lambda f, m: np.ma.masked_array(f, mask=[f == m]) self.v_sb_smpl = lambda t, d, lon, lat, var: self.f_xr[var][ t, d, :, :][lat, :][:, lon].data self.v_smpl_mrg = lambda f1, f2, ax: np.concatenate(f1, f2, axis=ax)
def roundtrip(self, data, save_kwargs={}, open_kwargs={}): with create_tmp_file() as tmp_file: data.to_netcdf(tmp_file, format='NETCDF4_CLASSIC', engine='netcdf4', **save_kwargs) with open_dataset(tmp_file, engine='netcdf4', **open_kwargs) as ds: yield ds
def __init__(self): itime = datetime.utcnow() + relativedelta(hours=-5) + relativedelta(minute=0, second=0, microsecond=0) itime = itime + relativedelta(hour=itime.hour//6*6) ncfile = 'http://nomads.ncep.noaa.gov:9090/dods/gfs_0p25/gfs{}/gfs_0p25_{}z'.format(itime.strftime('%Y%m%d'), itime.strftime('%H')) ds = xray.open_dataset(ncfile) self.ds = ds self.itime = itime
def test_cmp_local_file(self): url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc' actual = open_dataset(url, engine='pydap') with open_example_dataset('bears.nc') as expected: # don't check attributes since pydap doesn't serialize them correctly # also skip the "bears" variable since the test DAP server incorrectly # concatenates it. self.assertDatasetEqual(actual.drop('bears'), expected.drop('bears'))
def fetch(self, url=None): """ Fetches either the most recent forecast (if url is None) or the forecast specified by the url. """ if url is None: url = most_recent_dataset(self.url_format, self.frequency) ds = xray.open_dataset(url) return self.normalize(ds)
def create_datasets(): actual = open_dataset(url, engine='pydap') with open_example_dataset('bears.nc') as expected: # don't check attributes since pydap doesn't serialize them # correctly also skip the "bears" variable since the test DAP # server incorrectly concatenates it. actual = actual.drop('bears') expected = expected.drop('bears') yield actual, expected
def get_datasets(names, files, variables, analysis_vars, timestep): """ Parse the files and variables namelists and load the files into xray objects. """ datasets = OrderedDict() if not any(names): return datasets for name in names: print('Getting data for {0}'.format(name)) f = files.loc[(files['NAME'] == name) & (files['TIMESTEP'] == timestep)] if len(f) > 1: raise RasmLibIOError('Union of NAME: {0} and TIMESTEP: {1} ' 'returned too many rows ({2}).\n' '{3}'.format(name, timestep, len(f), f)) elif len(f) < 1: raise RasmLibIOError('Union of NAME: {0} and TIMESTEP: {1} ' 'returned no rows.\n' '{2}'.format(name, timestep, f)) file_path = f['FILE_PATH'].values[0] dataset_class = f['DATASET_CLASS'].values[0] # read the dataset ds = xray.open_dataset(file_path) # adjust units and var names for var in analysis_vars: v = variables[variables['VARIABLE'] == var] units = v['UNITS_STR'].values[0] dsvar = v['{0}-VARNAME'.format(dataset_class)].values[0] mult = v['{0}-MULT'.format(dataset_class)].values[0] offset = v['{0}-OFFSET'.format(dataset_class)].values[0] # rename variable if dsvar != var: ds[dsvar] = ds[dsvar].rename(var) # apply multiplier if mult != 1.: ds[dsvar] *= mult # apply offset if offset != 0.: ds[dsvar] += offset # set the units attribute ds[dsvar].attrs['units'] = units # add any attributes to the dataset ds.attrs['analysis_name'] = name ds.attrs['dataset_class'] = dataset_class datasets[name] = ds return datasets
def _get_grid_files(self): """Get the files holding grid data for an aospy object.""" datasets = [] for path in self.grid_file_paths: try: ds = xray.open_dataset(path, decode_times=False) except TypeError: ds = xray.open_mfdataset(path, decode_times=False) datasets.append(ds) return tuple(datasets)
def read_data(data_dir, lat, lon, resample=None): files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)]) dss = [ xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest') for f in files ] ds = xr.concat([dr.load() for dr in dss], 'time') if resample is not None: ds = ds.resample(resample, 'time') return ds
def create_datasets(self, **kwargs): url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc' actual = open_dataset(url, engine='pydap', **kwargs) with open_example_dataset('bears.nc') as expected: # don't check attributes since pydap doesn't serialize them # correctly also skip the "bears" variable since the test DAP # server incorrectly concatenates it. actual = actual.drop('bears') expected = expected.drop('bears') yield actual, expected