Ejemplo n.º 1
0
    def test_coordinates_encoding(self):
        def equals_latlon(obj):
            return obj == 'lat lon' or obj == 'lon lat'

        original = Dataset({
            'temp': ('x', [0, 1]),
            'precip': ('x', [0, -1])
        }, {
            'lat': ('x', [2, 3]),
            'lon': ('x', [4, 5])
        })
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(actual, original)
        with create_tmp_file() as tmp_file:
            original.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds['temp'].attrs['coordinates']))
                self.assertTrue(
                    equals_latlon(ds['precip'].attrs['coordinates']))
                self.assertNotIn('coordinates', ds.attrs)
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)

        modified = original.drop(['temp', 'precip'])
        with self.roundtrip(modified) as actual:
            self.assertDatasetIdentical(actual, modified)
        with create_tmp_file() as tmp_file:
            modified.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds.attrs['coordinates']))
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)
Ejemplo n.º 2
0
def test_vrtdiv():
    path = ('/archive/Spencer.Hill/am2/am2clim_reyoi/gfdl.ncrc2-default-prod/'
            'pp/atmos_level/ts/monthly/1yr/atmos_level.198301-198312.')

    # Vertically defined, sigma levels.
    u_arr = xray.open_dataset(path + 'ucomp.nc').ucomp
    v_arr = xray.open_dataset(path + 'vcomp.nc').vcomp
    vort, divg = compute_vrtdiv(u_arr, v_arr)
    assert vort.shape == u_arr.shape
    assert divg.shape == u_arr.shape
    np.testing.assert_array_equal(u_arr.lat, vort.lat)
    np.testing.assert_array_equal(u_arr.lon, vort.lon)
    np.testing.assert_array_equal(u_arr.time, vort.time)
    np.testing.assert_array_equal(u_arr.pfull, vort.pfull)

    # Not vertically defined.
    u0 = u_arr[:, 0]
    v0 = v_arr[:, 0]
    vort0, divg0 = compute_vrtdiv(u0, v0)
    assert vort0.shape == u0.shape
    assert divg0.shape == u0.shape

    # Dummy case: zeros everywhere
    u_arr_zeros = xray.DataArray(np.zeros_like(u_arr.values),
                                 dims=u_arr.dims,
                                 coords=u_arr.coords)
    v_arr_zeros = u_arr_zeros.copy()
    vort_zeros, divg_zeros = compute_vrtdiv(u_arr_zeros, v_arr_zeros)
    assert not vort_zeros.any()
    assert not divg_zeros.any()
Ejemplo n.º 3
0
def test_vrtdiv():
    path = ('/archive/Spencer.Hill/am2/am2clim_reyoi/gfdl.ncrc2-default-prod/'
            'pp/atmos_level/ts/monthly/1yr/atmos_level.198301-198312.')

    # Vertically defined, sigma levels.
    u_arr = xray.open_dataset(path + 'ucomp.nc').ucomp
    v_arr = xray.open_dataset(path + 'vcomp.nc').vcomp
    vort, divg = compute_vrtdiv(u_arr, v_arr)
    assert vort.shape == u_arr.shape
    assert divg.shape == u_arr.shape
    np.testing.assert_array_equal(u_arr.lat, vort.lat)
    np.testing.assert_array_equal(u_arr.lon, vort.lon)
    np.testing.assert_array_equal(u_arr.time, vort.time)
    np.testing.assert_array_equal(u_arr.pfull, vort.pfull)

    # Not vertically defined.
    u0 = u_arr[:,0]
    v0 = v_arr[:,0]
    vort0, divg0 = compute_vrtdiv(u0, v0)
    assert vort0.shape == u0.shape
    assert divg0.shape == u0.shape

    # Dummy case: zeros everywhere
    u_arr_zeros = xray.DataArray(np.zeros_like(u_arr.values), dims=u_arr.dims,
                                 coords=u_arr.coords)
    v_arr_zeros = u_arr_zeros.copy()
    vort_zeros, divg_zeros = compute_vrtdiv(u_arr_zeros, v_arr_zeros)
    assert not vort_zeros.any()
    assert not divg_zeros.any()
Ejemplo n.º 4
0
    def test_coordinates_encoding(self):
        def equals_latlon(obj):
            return obj == 'lat lon' or obj == 'lon lat'

        original = Dataset({'temp': ('x', [0, 1]), 'precip': ('x', [0, -1])},
                           {'lat': ('x', [2, 3]), 'lon': ('x', [4, 5])})
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(actual, original)
        with create_tmp_file() as tmp_file:
            original.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds['temp'].attrs['coordinates']))
                self.assertTrue(equals_latlon(ds['precip'].attrs['coordinates']))
                self.assertNotIn('coordinates', ds.attrs)
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)

        modified = original.drop(['temp', 'precip'])
        with self.roundtrip(modified) as actual:
            self.assertDatasetIdentical(actual, modified)
        with create_tmp_file() as tmp_file:
            modified.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds.attrs['coordinates']))
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)
Ejemplo n.º 5
0
 def load_dataset(self):
     # import pdb; pdb.set_trace()
     self.xdataset_persist = xray.open_dataset(self.xdataset_url,
                                               decode_times=False)
     self.ydataset_persist = xray.open_dataset(self.ydataset_url,
                                               decode_times=False)
     self.set_lon_lat_array_coordinates()
     self.set_time_series_coordinate()
Ejemplo n.º 6
0
 def test_write_groups(self):
     data1 = create_test_data()
     data2 = data1 * 2
     with create_tmp_file() as tmp_file:
         data1.to_netcdf(tmp_file, group='data/1')
         data2.to_netcdf(tmp_file, group='data/2', mode='a')
         with open_dataset(tmp_file, group='data/1') as actual1:
             self.assertDatasetIdentical(data1, actual1)
         with open_dataset(tmp_file, group='data/2') as actual2:
             self.assertDatasetIdentical(data2, actual2)
Ejemplo n.º 7
0
    def __init__(self, ww3_cur_path, ww3_no_cur_path, pnboia_path=None):
        self.ww3_cur_experiment = ww3_cur_path.split('/')[-1].split('.')[0]
        self.ww3_cur_year = ww3_cur_path.split('/')[-1].split('.')[1]
        self.ww3_cur = xray.open_dataset(ww3_cur_path)

        self.ww3_no_cur = xray.open_dataset(ww3_no_cur_path)

        if pnboia_path:
            self.pnboia_path = pnboia_path
            self.pnboia = xray.open_dataset(pnboia_path)
Ejemplo n.º 8
0
 def test_write_groups(self):
     data1 = create_test_data()
     data2 = data1 * 2
     with create_tmp_file() as tmp_file:
         data1.to_netcdf(tmp_file, group='data/1')
         data2.to_netcdf(tmp_file, group='data/2', mode='a')
         with open_dataset(tmp_file, group='data/1') as actual1:
             self.assertDatasetIdentical(data1, actual1)
         with open_dataset(tmp_file, group='data/2') as actual2:
             self.assertDatasetIdentical(data2, actual2)
Ejemplo n.º 9
0
 def test_dask_roundtrip(self):
     with create_tmp_file() as tmp:
         data = create_test_data()
         data.to_netcdf(tmp)
         chunks = {'dim1': 4, 'dim2': 4, 'dim3': 4, 'time': 10}
         with open_dataset(tmp, chunks=chunks) as dask_ds:
             self.assertDatasetIdentical(data, dask_ds)
             with create_tmp_file() as tmp2:
                 dask_ds.to_netcdf(tmp2)
                 with open_dataset(tmp2) as on_disk:
                     self.assertDatasetIdentical(data, on_disk)
Ejemplo n.º 10
0
    def __init__(self, hycom_url, ww3_url, pnboia_url):
        self.hycom_url = hycom_url
        self.hycom = xray.open_dataset(hycom_url)
        self.year = hycom_url.split('/')[-1].split('.')[1]

        self.ww3_url = ww3_url
        self.ww3 = xray.open_dataset(ww3_url)

        if pnboia_url:
            self.pnboia_url = pnboia_url
            self.pnboia = xray.open_dataset(pnboia_url)
Ejemplo n.º 11
0
 def test_dask_roundtrip(self):
     with create_tmp_file() as tmp:
         data = create_test_data()
         data.to_netcdf(tmp)
         chunks = {'dim1': 4, 'dim2': 4, 'dim3': 4, 'time': 10}
         with open_dataset(tmp, chunks=chunks) as dask_ds:
             self.assertDatasetIdentical(data, dask_ds)
             with create_tmp_file() as tmp2:
                 dask_ds.to_netcdf(tmp2)
                 with open_dataset(tmp2) as on_disk:
                     self.assertDatasetIdentical(data, on_disk)
Ejemplo n.º 12
0
 def test_open_dataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_dataset(tmp, chunks={'x': 5}) as actual:
             self.assertIsInstance(actual.foo.variable.data, da.Array)
             self.assertEqual(actual.foo.variable.data.chunks, ((5, 5),))
             self.assertDatasetAllClose(original, actual)
         with open_dataset(tmp) as actual:
             self.assertIsInstance(actual.foo.variable.data, np.ndarray)
             self.assertDatasetAllClose(original, actual)
Ejemplo n.º 13
0
def XrayOpen(filenamein,decodetimes=True):

        try:
                if decodetimes:
                        filein= xray.open_dataset(filenamein)
                else:
                        filein=xray.open_dataset(filenamein,decode_times=False)
        except RuntimeError:
                print filenamein
                exit("couldn't find file")
        return filein
Ejemplo n.º 14
0
 def test_open_dataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_dataset(tmp, chunks={'x': 5}) as actual:
             self.assertIsInstance(actual.foo.variable.data, da.Array)
             self.assertEqual(actual.foo.variable.data.chunks, ((5, 5), ))
             self.assertDatasetIdentical(original, actual)
         with open_dataset(tmp, chunks=5) as actual:
             self.assertDatasetIdentical(original, actual)
         with open_dataset(tmp) as actual:
             self.assertIsInstance(actual.foo.variable.data, np.ndarray)
             self.assertDatasetIdentical(original, actual)
Ejemplo n.º 15
0
    def setUp(self):

        # netcdfs we'll use for input and check output against
        nc_isnobal_input = 'test/data/isnobal_input.nc'
        nc_isnobal_output = 'test/data/isnobal_output.nc'

        # connect to the virtual watershed
        self.vwc = default_vw_client()

        # load NetCDF inputs and outputs from test data
        self.input_dataset = open_dataset(nc_isnobal_input)
        self.output_dataset = open_dataset(nc_isnobal_output)

        # insert NetCDF test input to virtual watershed
        input_mr_name = 'webapp-testing-input'

        modelruns = self.vwc.modelrun_search()
        unittest_uuids = [r['Model Run UUID'] for r in modelruns.records
                          if r['Model Run Name'] == 'webapp-testing-input']

        for u in unittest_uuids:
            s = self.vwc.delete_modelrun(u)
            print "pre-test cleanup success on %s: %s" % (u, str(s))

        self.model_run_uuid = \
            self.vwc.initialize_modelrun(
                model_run_name=input_mr_name,
                description='test in vwplatform',
                researcher_name='Matt Turner',
                keywords='test,isnobal,webapp')

        self.vwc.upload(self.model_run_uuid, nc_isnobal_input)

        self.start_datetime = '2010-10-01 00:00:00'
        self.end_datetime = '2010-10-01 16:00:00'

        md = metadata_from_file(nc_isnobal_input, self.model_run_uuid,
                                self.model_run_uuid,
                                'test input for isnobal run',
                                'Dry Creek', 'Idaho', model_name='isnobal',
                                start_datetime=self.start_datetime,
                                end_datetime=self.end_datetime,
                                model_set='inputs', taxonomy='geoimage',
                                model_set_taxonomy='grid')
        # import ipdb; ipdb.set_trace()

        self.input_uuid = self.vwc.insert_metadata(md).text

        time.sleep(1)
Ejemplo n.º 16
0
    def test_engine(self):
        data = create_test_data()
        with self.assertRaisesRegexp(ValueError, 'unrecognized engine'):
            data.to_netcdf('foo.nc', engine='foobar')
        with self.assertRaisesRegexp(ValueError, 'invalid engine'):
            data.to_netcdf(engine='netcdf4')

        with create_tmp_file() as tmp_file:
            data.to_netcdf(tmp_file)
            with self.assertRaisesRegexp(ValueError, 'unrecognized engine'):
                open_dataset(tmp_file, engine='foobar')

        netcdf_bytes = data.to_netcdf()
        with self.assertRaisesRegexp(ValueError, 'can only read'):
            open_dataset(BytesIO(netcdf_bytes), engine='foobar')
Ejemplo n.º 17
0
    def test_engine(self):
        data = create_test_data()
        with self.assertRaisesRegexp(ValueError, 'unrecognized engine'):
            data.to_netcdf('foo.nc', engine='foobar')
        with self.assertRaisesRegexp(ValueError, 'invalid engine'):
            data.to_netcdf(engine='netcdf4')

        with create_tmp_file() as tmp_file:
            data.to_netcdf(tmp_file)
            with self.assertRaisesRegexp(ValueError, 'unrecognized engine'):
                open_dataset(tmp_file, engine='foobar')

        netcdf_bytes = data.to_netcdf()
        with self.assertRaisesRegexp(ValueError, 'can only read'):
            open_dataset(BytesIO(netcdf_bytes), engine='foobar')
Ejemplo n.º 18
0
	def _calc_anomalies( self, *args, **kwargs ):
		'''
		calculate absolute or relative anomalies given a NetCDF file
		of the Climatic Research Unit (CRU) Historical Time Series.
		'''
		import xray

		# handle modeled vs. historical
		if self.ar5_modeled != None and self.ar5_historical != None:
			# parse the input name for some file metadata HARDWIRED!
			output_naming_dict = DownscaleAR5.standardized_fn_to_vars( self.ar5_modeled )
			variable = output_naming_dict[ 'variable' ]

			# read in both modeled and historical
			ds = xray.open_dataset( self.ar5_modeled )
			ds = ds[ variable ]
			clim_ds = xray.open_dataset( self.ar5_historical )
			# climatology
			clim_ds = clim_ds.loc[ {'time':slice(self.climatology_begin,self.climatology_end)} ]
			climatology = clim_ds[ variable ].groupby( 'time.month' ).mean( 'time' )
			del clim_ds

		elif self.ar5_historical is not None and self.ar5_modeled is None:
			output_naming_dict = standardized_fn_to_vars( self.ar5_historical )
			variable = output_naming_dict[ 'variable' ]

			# read in historical
			ds = xray.open_dataset( self.ar5_historical )
			# climatology
			climatology = ds.loc[ {'time':slice(self.climatology_begin,self.climatology_end)} ]
			climatology = climatology[ variable ].groupby( 'time.month' ).mean( 'time' )

		else:
			NameError( 'ERROR: must have both ar5_modeled and ar5_historical, or just ar5_historical' )

		if self.plev is not None:
			plevel, = np.where( ds.plev == self.plev )
			ds = ds[ :, plevel[0], ... ]
			climatology = climatology[ :, plevel[0], ... ]

		# anomalies
		if self.absolute == True:
			anomalies = ds.groupby( 'time.month' ) - climatology
		elif self.absolute == False:
			anomalies = ds.groupby( 'time.month' ) / climatology
		else:
			AttributeError( '_calc_anomalies (ar5): absolute can only be True or False' )
		return anomalies
Ejemplo n.º 19
0
def load(var_name):
    path = ('/archive/Spencer.Hill/am2/am2clim_reyoi/gfdl.ncrc2-default-prod/'
            'pp/atmos_level/ts/monthly/1yr/atmos_level.198301-198312.')

    ds = xray.open_dataset(path + var_name + '.nc',
                           drop_variables=['nv', 'time_bounds'])
    return ds[var_name]
Ejemplo n.º 20
0
    def do_netcdf_load(self, buf):
        from cStringIO import StringIO
        import xray

        f = StringIO(buf)

        return xray.open_dataset(f)
Ejemplo n.º 21
0
 def roundtrip(self, data, **kwargs):
     f, tmp_file = tempfile.mkstemp(suffix='.nc')
     os.close(f)
     data.dump(tmp_file)
     roundtrip_data = open_dataset(tmp_file, **kwargs)
     os.remove(tmp_file)
     return roundtrip_data
Ejemplo n.º 22
0
 def test_read_byte_attrs_as_unicode(self):
     with create_tmp_file() as tmp_file:
         with nc4.Dataset(tmp_file, 'w') as nc:
             nc.foo = b'bar'
         actual = open_dataset(tmp_file)
         expected = Dataset(attrs={'foo': 'bar'})
         self.assertDatasetIdentical(expected, actual)
Ejemplo n.º 23
0
 def roundtrip(self, data, **kwargs):
     with create_tmp_file() as tmp_file:
         data.to_netcdf(tmp_file,
                        format='NETCDF3_CLASSIC',
                        engine='netcdf4')
         with open_dataset(tmp_file, engine='netcdf4', **kwargs) as ds:
             yield ds
def concat_to_nc( filelist, output_filename, dim='time', begin_time=None, end_time=None, nc_format='NETCDF4', **kwargs ):
	'''
	take list of consecutive netcdf files (made for CMIP5 data) and stack them into a 
	single larger netcdf file.  This was necessary to overcome some bugginess in how 
	MFDataset is dealing with different calendar units on different files.  This is 
	technically valid CF-Compliant metadata, but is tricky to work with.  This hack allows
	us to get around some of this unpredictable behavior.

	PARAMETERS:
	-----------
	filelist = [list] list of string file paths to the sorted netcdf files to stack together
	output_filename = [str] path to and name of the output file to be generated (.nc extension)
	dim = [str] dimension to stack on -- default is 'time'
	begin_time = [str] PANDAS style datetime string syntax -- used in xray
	end_time = [str] PANDAS style datetime string syntax -- used in xray
	format = [str] output NetCDF format desired. valid strings are:
					'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'
					default is 'NETCDF4'
	**kwargs -- potential future arguments or overloaded args to pass through (none implemented)

	RETURNS:
	--------

	output_filename as string, with the important side-effect of writing data to disk

	'''
	import xray
	with xray.concat([ xray.open_dataset( i ).load() for i in filelist ], dim ) as ds:
		# time slicer condition
		if begin_time != None and end_time != None:
			ds = ds.loc[ { dim:slice( begin_time, end_time ) } ]
		if os.path.exists( output_filename ):
			os.remove( output_filename )
		ds.to_netcdf( output_filename, mode='w', format=nc_format )
	return output_filename
Ejemplo n.º 25
0
    def do_netcdf_load(self, buf):
        from cStringIO import StringIO
        import xray

        f = StringIO(buf)

        return xray.open_dataset(f)
Ejemplo n.º 26
0
    def test_open_encodings(self):
        # Create a netCDF file with explicit time units
        # and make sure it makes it into the encodings
        # and survives a round trip
        f, tmp_file = tempfile.mkstemp(suffix='.nc')
        os.close(f)

        ds = nc4.Dataset(tmp_file, 'w')
        ds.createDimension('time', size=10)
        ds.createVariable('time', np.int32, dimensions=('time',))
        units = 'days since 1999-01-01'
        ds.variables['time'].setncattr('units', units)
        ds.variables['time'][:] = np.arange(10) + 4
        ds.close()

        expected = Dataset()

        time = pd.date_range('1999-01-05', periods=10)
        encoding = {'units': units, 'dtype': np.dtype('int32')}
        expected['time'] = ('time', time, {}, encoding)

        actual = open_dataset(tmp_file)

        self.assertXArrayEqual(actual['time'], expected['time'])
        actual_encoding = {k: v for k, v in actual['time'].encoding.iteritems()
                           if k in expected['time'].encoding}
        self.assertDictEqual(actual_encoding, expected['time'].encoding)

        os.remove(tmp_file)
Ejemplo n.º 27
0
 def test_read_byte_attrs_as_unicode(self):
     with create_tmp_file() as tmp_file:
         with nc4.Dataset(tmp_file, 'w') as nc:
             nc.foo = b'bar'
         actual = open_dataset(tmp_file)
         expected = Dataset(attrs={'foo': 'bar'})
         self.assertDatasetIdentical(expected, actual)
Ejemplo n.º 28
0
    def __init__(self, imp):
        self.ROOTDIR = os.path.join(os.environ['HOME'],'Studies/Masters/Myroms/Msc_idealized/')
        self.roms = RunSetup( os.path.join( self.ROOTDIR, "experiments.yaml"), imp )

        grdfile = self.roms.retrive_grid('grd')
        
        self.Mr, self.Lr = grdfile['lat_rho'].shape # Number of J/I-direction INTERIOR RHO-points
        self.Mu, self.Lu = grdfile['lat_u'].shape   # Number of J/I-direction U-points
        self.Mv, self.Lv = grdfile['lat_v'].shape   # Number of J/I-direction V-points

        self.h  = grdfile['h']

        ################################################################################
        # Date/Time variables
        ################################################################################
        self.dstart = 0 
        tdays = (self.roms.ntimes*self.roms.dt)/(60*60*24)
        self.smstime = range(0, tdays+2)
        self.ndays = len(self.smstime)

        ################################################################################
        # Thermohaline field variables - SISPRES TS CLIMATOLOGY FIELDS
        ################################################################################
        lims = [-43.80, -42.00, -24.25, -22.65] 
        
        DATASETdir = '/Users/Phellipe/Studies/Masters/Datasets/'
        tsclim_file = '%sClimatology/rio_ts_climatology.nc'%DATASETdir
        self.tsclim = xray.open_dataset( tsclim_file )\
                        .sel(y=slice(lims[2],lims[3]), 
                                x=slice(lims[0], lims[1])) 
        
        self.x = self.tsclim.coords['x'].values
        self.y = self.tsclim.coords['y'].values
Ejemplo n.º 29
0
 def __init__(self):
     self.f_xr = xr.open_dataset('coarser_grid_input/gcmplt.cdf')
     self.c = lambda var :     self.f_xr[var].data #coordinates
     self.v = lambda t,d,var : self.f_xr[var][t,d,:,:].data
     self.g_mask = lambda f,m : np.ma.masked_array(f,mask = [f==m])
     self.v_sb_smpl = lambda t,d,lon,lat,var : self.f_xr[var][t,d,:,:][lat,:][:,lon].data
     self.v_smpl_mrg= lambda f1,f2,ax : np.concatenate(f1,f2,axis=ax)
Ejemplo n.º 30
0
def main(inargs):
    """Run the program"""

    # Read data
    dset_in = xray.open_dataset(inargs.fourier_file)
    df = dset_in.to_dataframe()

    # Change the amplitue columns so the value is a ranking
    amp_df = df.loc[:, df.columns.map(lambda x: 'amp' in x)]
    rank_df = amp_df.apply(rankdata, axis=1)
    rank_df = rank_df.combine_first(df)

    # Select the ones where wave 5 and 6 are in the top 3 amplitudes
    # (worst ranking must be 8 + 9 = 17)
    included = (rank_df['wave5_amp'].values +
                rank_df['wave6_amp'].values) >= 17
    final = rank_df.loc[included]

    # Reject days that change sign too much
    if inargs.max_sign_change:
        final = final.loc[final['sign_count'] <= inargs.max_sign_change]

    final = event_info(final, inargs.freq)

    if inargs.full_stats:
        assert not inargs.phase_filter and not inargs.season_filter and not inargs.duration_filter, \
        "Cannot filter by phase, season or duration for full stats, because then they would not be full!"
        final.to_csv(inargs.output_file)

    else:
        # Optional filtering by duration
        if inargs.duration_filter:
            final = final.loc[final['event_duration'] > inargs.duration_filter]

        # Optional filtering by season
        if inargs.season_filter:
            season = inargs.season_filter
            months_subset = pandas.to_datetime(final.index.values).month
            bools_subset = (months_subset == season_months[season][0]) + (
                months_subset == season_months[season][1]) + (
                    months_subset == season_months[season][2])
            final = final.loc[bools_subset]

        # Optional filtering by wave phase
        if inargs.phase_filter:
            phase_min, phase_max = set_phase_bounds(inargs.phase_filter,
                                                    inargs.freq)
            target_phase = 'wave%i_phase' % (inargs.freq)
            min_bools = (final[target_phase] > phase_min).values
            max_bools = (final[target_phase] < phase_max).values
            if phase_min < phase_max:
                final = final.loc[numpy.logical_and(min_bools, max_bools)]
            else:
                final = final.loc[numpy.logical_or(min_bools, max_bools)]

        # Write date file
        gio.write_dates(inargs.output_file, final.index.values)

    metadata_dict = {inargs.fourier_file: dset_in.attrs['history']}
    gio.write_metadata(inargs.output_file, file_info=metadata_dict)
Ejemplo n.º 31
0
    def test_dump_and_open_encodings(self):
        # Create a netCDF file with explicit time units
        # and make sure it makes it into the encodings
        # and survives a round trip
        f, tmp_file = tempfile.mkstemp(suffix='.nc')
        os.close(f)

        ds = nc4.Dataset(tmp_file, 'w')
        ds.createDimension('time', size=10)
        ds.createVariable('time', np.int32, dimensions=('time',))
        units = 'days since 1999-01-01'
        ds.variables['time'].setncattr('units', units)
        ds.variables['time'][:] = np.arange(10) + 4
        ds.close()

        xray_dataset = open_dataset(tmp_file)
        os.remove(tmp_file)
        xray_dataset.dump(tmp_file)

        ds = nc4.Dataset(tmp_file, 'r')

        self.assertEqual(ds.variables['time'].getncattr('units'), units)
        self.assertArrayEqual(ds.variables['time'], np.arange(10) + 4)

        ds.close()
        os.remove(tmp_file)
Ejemplo n.º 32
0
def concat_to_nc( filelist, output_filename, dim='time', begin_time=None, end_time=None, nc_format='NETCDF4', **kwargs ):
	'''
	take list of consecutive netcdf files (made for CMIP5 data) and stack them into a 
	single larger netcdf file.  This was necessary to overcome some bugginess in how 
	MFDataset is dealing with different calendar units on different files.  This is 
	technically valid CF-Compliant metadata, but is tricky to work with.  This hack allows
	us to get around some of this unpredictable behavior.

	PARAMETERS:
	-----------
	filelist = [list] list of string file paths to the sorted netcdf files to stack together
	output_filename = [str] path to and name of the output file to be generated (.nc extension)
	dim = [str] dimension to stack on -- default is 'time'
	begin_time = [str] PANDAS style datetime string syntax -- used in xray
	end_time = [str] PANDAS style datetime string syntax -- used in xray
	format = [str] output NetCDF format desired. valid strings are:
					'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'
					default is 'NETCDF4'
	**kwargs -- potential future arguments or overloaded args to pass through (none implemented)

	RETURNS:
	--------

	output_filename as string, with the important side-effect of writing data to disk

	'''
	import xray
	with xray.concat([ xray.open_dataset( i ).load() for i in filelist ], dim ) as ds:
		# time slicer condition
		if begin_time != None and end_time != None:
			ds = ds.loc[ { dim:slice( begin_time, end_time ) } ]
		if os.path.exists( output_filename ):
			os.remove( output_filename )
		ds.to_netcdf( output_filename, mode='w', format=nc_format )
	return output_filename
Ejemplo n.º 33
0
def load_netcdf(ncfile, group):
    """
    Load a ModVsObs object previously saved in a netcdf file
    """
    # Load the group into a ModVsObs object
    ds = xray.open_dataset(ncfile, group=group)

    # work out the varname
    varnames = ds.data_vars.keys()

    for vv in varnames:
        if 'mod' in vv:
            varname = vv.strip('_mod')

    # Load the two variables (as Pandas objects)
    TSobs = ds['%s_obs'%varname].to_pandas()
    TSmod = ds['%s_mod'%varname].to_pandas()

    # Load the attributes
    attrs = ds['%s_obs'%varname].attrs

    # Convert to a ModVsObs object
    # Put the data into a ModVsObs object (model first then observed)
    return ModVsObs(TSmod.index.to_pydatetime(),\
            TSmod.values,\
            TSobs.index.to_pydatetime(),\
            TSobs.values,\
            varname=varname,\
            long_name=attrs['long_name'], \
            units=attrs['units'], \
            stationid=group,\
        )
Ejemplo n.º 34
0
    def test_mask_and_scale(self):
        f, tmp_file = tempfile.mkstemp(suffix='.nc')
        os.close(f)

        nc = nc4.Dataset(tmp_file, mode='w')
        nc.createDimension('t', 5)
        nc.createVariable('x', 'int16', ('t',), fill_value=-1)
        v = nc.variables['x']
        v.set_auto_maskandscale(False)
        v.add_offset = 10
        v.scale_factor = 0.1
        v[:] = np.array([-1, -1, 0, 1, 2])
        nc.close()

        # first make sure netCDF4 reads the masked and scaled data correctly
        nc = nc4.Dataset(tmp_file, mode='r')
        expected = np.ma.array([-1, -1, 10, 10.1, 10.2],
                               mask=[True, True, False, False, False])
        actual = nc.variables['x'][:]
        self.assertArrayEqual(expected, actual)

        # now check xray
        ds = open_dataset(tmp_file)
        expected = create_masked_and_scaled_data()
        self.assertDatasetEqual(expected, ds)
        os.remove(tmp_file)
Ejemplo n.º 35
0
    def _calc_anomalies(self, *args, **kwargs):
        '''
		calculate absolute or relative anomalies given a NetCDF file
		of the Climatic Research Unit (CRU) Historical Time Series.
		'''
        import xray
        ds = xray.open_dataset(self.cru_ts)
        try:
            clim_ds = ds.loc[{
                'time':
                slice(self.climatology_begin, self.climatology_end)
            }]
            climatology = clim_ds[self.variable].groupby('time.month').mean(
                'time')
        except:
            AttributeError(
                'cannot slice netcdf based on climatology years given. they must overlap.'
            )
        # calculate anomalies
        if self.absolute == True:
            anomalies = ds[self.variable].groupby('time.month') - climatology
        elif self.absolute == False:
            anomalies = ds[self.variable].groupby('time.month') / climatology
        else:
            AttributeError(
                '_calc_anomalies (cru): absolute can only be True or False')
        return anomalies
Ejemplo n.º 36
0
    def test_open_encodings(self):
        # Create a netCDF file with explicit time units
        # and make sure it makes it into the encodings
        # and survives a round trip
        with create_tmp_file() as tmp_file:
            with nc4.Dataset(tmp_file, 'w') as ds:
                ds.createDimension('time', size=10)
                ds.createVariable('time', np.int32, dimensions=('time',))
                units = 'days since 1999-01-01'
                ds.variables['time'].setncattr('units', units)
                ds.variables['time'][:] = np.arange(10) + 4

            expected = Dataset()

            time = pd.date_range('1999-01-05', periods=10)
            encoding = {'units': units, 'dtype': np.dtype('int32')}
            expected['time'] = ('time', time, {}, encoding)

            actual = open_dataset(tmp_file)

            self.assertVariableEqual(actual['time'], expected['time'])
            actual_encoding = dict((k, v) for k, v
                                   in iteritems(actual['time'].encoding)
                                   if k in expected['time'].encoding)
            self.assertDictEqual(actual_encoding, expected['time'].encoding)
Ejemplo n.º 37
0
    def test_open_encodings(self):
        # Create a netCDF file with explicit time units
        # and make sure it makes it into the encodings
        # and survives a round trip
        with create_tmp_file() as tmp_file:
            with nc4.Dataset(tmp_file, 'w') as ds:
                ds.createDimension('time', size=10)
                ds.createVariable('time', np.int32, dimensions=('time', ))
                units = 'days since 1999-01-01'
                ds.variables['time'].setncattr('units', units)
                ds.variables['time'][:] = np.arange(10) + 4

            expected = Dataset()

            time = pd.date_range('1999-01-05', periods=10)
            encoding = {'units': units, 'dtype': np.dtype('int32')}
            expected['time'] = ('time', time, {}, encoding)

            with open_dataset(tmp_file) as actual:
                self.assertVariableEqual(actual['time'], expected['time'])
                actual_encoding = dict(
                    (k, v) for k, v in iteritems(actual['time'].encoding)
                    if k in expected['time'].encoding)
                self.assertDictEqual(actual_encoding,
                                     expected['time'].encoding)
Ejemplo n.º 38
0
def load_dailyrel(datafiles, yearnm='year', onset_varnm='D_ONSET',
                  retreat_varnm='D_RETREAT'):

    ds = atm.load_concat(datafiles, concat_dim=yearnm)
    if isinstance(ds, xray.DataArray):
        ds = ds.to_dataset()
    varnms = ds.data_vars.keys()
    if onset_varnm is not None:
        onset = ds[onset_varnm]
        varnms.remove(onset_varnm)
    else:
        onset = np.nan * ds[yearnm]
    if retreat_varnm is not None:
        retreat = ds[retreat_varnm]
        varnms.remove(retreat_varnm)
    else:
        retreat = np.nan * ds[yearnm]

    # Remaining data variable is the data field
    varnm = varnms[0]
    data = ds[varnm]

    # Copy attributes from the first file in the list
    with xray.open_dataset(datafiles[0]) as ds0:
        data.attrs = ds0[varnm].attrs

    return data, onset, retreat
Ejemplo n.º 39
0
    def _get_varname_cru(self, *args, **kwargs):
        '''
		take as input the cru ts3* netcdf filename and return (if possible)
		the name of the variable we want to work on from that netcdf.

		Arguments:
			nc_fn = [str] filepath to the cru ts* netcdf file used in downscaling

		Returns:
			the variable name as a string if it can be deduced, and errors if
			the variable name cannot be deduced.

		'''
        import xray
        import netCDF4
        ds = xray.open_dataset(self.cru_ts)
        variables = ds.variables.keys()
        variable = [ variable for variable in variables \
            if variable not in [u'lon', u'lat', u'time'] ]
        if len(variable) == 1:
            variable = variable[0]
        else:
            AttributeError(
                'cannot deduce the variable from the file. supply nc_varname and re-run'
            )
        return variable
Ejemplo n.º 40
0
def save_gpcp_years(datafile, savedir, yearmin=1997, yearmax=2015):
    """Read GPCP daily data and save to individual file for each year."""

    savestr = savedir + '/gpcp_daily_%d.nc'

    with xray.open_dataset(datafile) as ds:
        pcp = ds['PREC']
        dates = ds['yyyyddd']
        yy = dates // 1000
        ddd = dates - 1000 * yy
        pcp.coords['year'] = yy
        pcp.coords['day'] = ddd

        years = np.arange(yearmin, yearmax + 1)
        for year in years:
            print(year)
            ind = np.where(yy.values == year)[0]
            precip = pcp[ind]
            days = precip['day'].values
            precip = precip.drop(['year', 'day'])
            precip = precip.rename({'time' : 'day'})
            precip['day'] = days
            savefile = savestr % year
            print('Saving to ' + savefile)
            atm.save_nc(savefile, precip)

    return None
def main(inargs):
    """Run the program"""

    # Read data
    dset_in = xray.open_dataset(inargs.infile)
    gio.check_xrayDataset(dset_in, inargs.metric)

    subset_dict = gio.get_subset_kwargs(inargs)
    darray = dset_in[inargs.metric].sel(**subset_dict)

    # Make selection
    metric_threshold = uconv.get_threshold(darray.values,
                                           inargs.metric_threshold)

    assert inargs.threshold_direction in ['greater', 'less']
    if inargs.threshold_direction == 'greater':
        indexes = darray >= metric_threshold
    elif inargs.threshold_direction == 'less':
        indexes = darray <= metric_threshold

    darray_selection = darray.loc[indexes]

    # Write outputs
    gio.write_dates(inargs.outfile, darray_selection['time'].values)
    metadata_dict = {inargs.infile: dset_in.attrs['history']}
    gio.write_metadata(inargs.outfile, file_info=metadata_dict)
Ejemplo n.º 42
0
def get_sea_mask(ds):
    raw_mask = xray.open_dataset('mask.nc')
    # extract places where the nearest latitude or longitude (before or after)
    # is in the ocean
    sea_mask = ((raw_mask.reindex_like(ds, method='pad').sftlf < 100)
                & (raw_mask.reindex_like(ds, method='backfill').sftlf < 100))
    return sea_mask
def read_data(data_dir, lat, lon, resample=None):
    files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)])
    dss = [xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest') for f in files]
    ds = xr.concat([dr.load() for dr in dss], 'time')
    if resample is not None:
        ds = ds.resample(resample, 'time')
    return ds
Ejemplo n.º 44
0
    def _calc_anomalies(nc_fn,
                        nc_varname,
                        climatology_begin,
                        climatology_end,
                        absolute=True):
        '''
		calculate absolute or relative anomalies given a NetCDF file
		of the Climatic Research Unit (CRU) Historical Time Series.
		'''
        ds = xray.open_dataset(nc_fn)
        # climatology -- slice the time dimension
        try:
            clim_ds = ds.loc[{
                'time': slice(climatology_begin, climatology_end)
            }]
            climatology = clim_ds[nc_varname].groupby('time.month').mean(
                'time')
        except:
            AttributeError(
                'cannot slice netcdf based on climatology years given. they must overlap.'
            )
        # calculate anomalies
        if absolute == True:
            anomalies = ds[variable].groupby('time.month') - climatology
        elif absolute == False:
            anomalies = ds[variable].groupby('time.month') / climatology
        else:
            AttributeError(
                'calc_anomalies: absolute can only be True or False')
        return anomalies
Ejemplo n.º 45
0
    def retrive_grid(self, file):
        '''Docstring'''

        fdict = self.fnames_dict()
        grdfile = xray.open_dataset( fdict[file], decode_times=False ) 

        return grdfile
Ejemplo n.º 46
0
def load(var_name):
    path = ('/archive/Spencer.Hill/am2/am2clim_reyoi/gfdl.ncrc2-default-prod/'
            'pp/atmos_level/ts/monthly/1yr/atmos_level.198301-198312.')

    ds = xray.open_dataset(path + var_name + '.nc',
                           drop_variables=['nv', 'time_bounds'])
    return ds[var_name]
Ejemplo n.º 47
0
 def __init__(self):
     self.f_xr = xr.open_dataset('coarser_grid_input/gcmplt.cdf')
     self.c = lambda var: self.f_xr[var].data  #coordinates
     self.v = lambda t, d, var: self.f_xr[var][t, d, :, :].data
     self.g_mask = lambda f, m: np.ma.masked_array(f, mask=[f == m])
     self.v_sb_smpl = lambda t, d, lon, lat, var: self.f_xr[var][
         t, d, :, :][lat, :][:, lon].data
     self.v_smpl_mrg = lambda f1, f2, ax: np.concatenate(f1, f2, axis=ax)
Ejemplo n.º 48
0
 def roundtrip(self, data, save_kwargs={}, open_kwargs={}):
     with create_tmp_file() as tmp_file:
         data.to_netcdf(tmp_file,
                        format='NETCDF4_CLASSIC',
                        engine='netcdf4',
                        **save_kwargs)
         with open_dataset(tmp_file, engine='netcdf4', **open_kwargs) as ds:
             yield ds
Ejemplo n.º 49
0
    def __init__(self):
        itime = datetime.utcnow() + relativedelta(hours=-5) + relativedelta(minute=0, second=0, microsecond=0)
        itime = itime + relativedelta(hour=itime.hour//6*6)

        ncfile = 'http://nomads.ncep.noaa.gov:9090/dods/gfs_0p25/gfs{}/gfs_0p25_{}z'.format(itime.strftime('%Y%m%d'), itime.strftime('%H'))

        ds = xray.open_dataset(ncfile)
        self.ds = ds
        self.itime = itime
Ejemplo n.º 50
0
 def test_cmp_local_file(self):
     url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc'
     actual = open_dataset(url, engine='pydap')
     with open_example_dataset('bears.nc') as expected:
         # don't check attributes since pydap doesn't serialize them correctly
         # also skip the "bears" variable since the test DAP server incorrectly
         # concatenates it.
         self.assertDatasetEqual(actual.drop('bears'),
                                 expected.drop('bears'))
Ejemplo n.º 51
0
 def fetch(self, url=None):
     """
     Fetches either the most recent forecast (if url is None) or
     the forecast specified by the url.
     """
     if url is None:
         url = most_recent_dataset(self.url_format, self.frequency)
     ds = xray.open_dataset(url)
     return self.normalize(ds)
Ejemplo n.º 52
0
 def create_datasets():
     actual = open_dataset(url, engine='pydap')
     with open_example_dataset('bears.nc') as expected:
         # don't check attributes since pydap doesn't serialize them
         # correctly also skip the "bears" variable since the test DAP
         # server incorrectly concatenates it.
         actual = actual.drop('bears')
         expected = expected.drop('bears')
         yield actual, expected
Ejemplo n.º 53
0
 def create_datasets():
     actual = open_dataset(url, engine='pydap')
     with open_example_dataset('bears.nc') as expected:
         # don't check attributes since pydap doesn't serialize them
         # correctly also skip the "bears" variable since the test DAP
         # server incorrectly concatenates it.
         actual = actual.drop('bears')
         expected = expected.drop('bears')
         yield actual, expected
Ejemplo n.º 54
0
def get_datasets(names, files, variables, analysis_vars, timestep):
    """
    Parse the files and variables namelists and load the files into xray
    objects.
    """
    datasets = OrderedDict()

    if not any(names):
        return datasets

    for name in names:
        print('Getting data for {0}'.format(name))
        f = files.loc[(files['NAME'] == name)
                      & (files['TIMESTEP'] == timestep)]
        if len(f) > 1:
            raise RasmLibIOError('Union of NAME: {0} and TIMESTEP: {1} '
                                 'returned too many rows ({2}).\n'
                                 '{3}'.format(name, timestep, len(f), f))
        elif len(f) < 1:
            raise RasmLibIOError('Union of NAME: {0} and TIMESTEP: {1} '
                                 'returned no rows.\n'
                                 '{2}'.format(name, timestep, f))
        file_path = f['FILE_PATH'].values[0]
        dataset_class = f['DATASET_CLASS'].values[0]

        # read the dataset
        ds = xray.open_dataset(file_path)

        # adjust units and var names
        for var in analysis_vars:
            v = variables[variables['VARIABLE'] == var]
            units = v['UNITS_STR'].values[0]
            dsvar = v['{0}-VARNAME'.format(dataset_class)].values[0]
            mult = v['{0}-MULT'.format(dataset_class)].values[0]
            offset = v['{0}-OFFSET'.format(dataset_class)].values[0]

            # rename variable
            if dsvar != var:
                ds[dsvar] = ds[dsvar].rename(var)
            # apply multiplier
            if mult != 1.:
                ds[dsvar] *= mult
            # apply offset
            if offset != 0.:
                ds[dsvar] += offset

            # set the units attribute
            ds[dsvar].attrs['units'] = units

            # add any attributes to the dataset
            ds.attrs['analysis_name'] = name
            ds.attrs['dataset_class'] = dataset_class

        datasets[name] = ds

    return datasets
Ejemplo n.º 55
0
 def _get_grid_files(self):
     """Get the files holding grid data for an aospy object."""
     datasets = []
     for path in self.grid_file_paths:
         try:
             ds = xray.open_dataset(path, decode_times=False)
         except TypeError:
             ds = xray.open_mfdataset(path, decode_times=False)
         datasets.append(ds)
     return tuple(datasets)
def read_data(data_dir, lat, lon, resample=None):
    files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir)])
    dss = [
        xr.open_dataset(f).sel(lat=lat, lon=lon, method='nearest')
        for f in files
    ]
    ds = xr.concat([dr.load() for dr in dss], 'time')
    if resample is not None:
        ds = ds.resample(resample, 'time')
    return ds
Ejemplo n.º 57
0
 def _get_grid_files(self):
     """Get the files holding grid data for an aospy object."""
     datasets = []
     for path in self.grid_file_paths:
         try:
             ds = xray.open_dataset(path, decode_times=False)
         except TypeError:
             ds = xray.open_mfdataset(path, decode_times=False)
         datasets.append(ds)
     return tuple(datasets)
Ejemplo n.º 58
0
 def create_datasets(self, **kwargs):
     url = 'http://test.opendap.org/opendap/hyrax/data/nc/bears.nc'
     actual = open_dataset(url, engine='pydap', **kwargs)
     with open_example_dataset('bears.nc') as expected:
         # don't check attributes since pydap doesn't serialize them
         # correctly also skip the "bears" variable since the test DAP
         # server incorrectly concatenates it.
         actual = actual.drop('bears')
         expected = expected.drop('bears')
         yield actual, expected