def test_concat_multiindex(self): idx = pd.MultiIndex.from_product([[0, 1, 2], ['a', 'b']]) coords = [IndexVariable('x', idx[:2]), IndexVariable('x', idx[2:])] expected = IndexVariable('x', idx) actual = IndexVariable.concat(coords, dim='x') assert actual.identical(expected) assert isinstance(actual.to_index(), pd.MultiIndex)
def test_level_names(self): midx = pd.MultiIndex.from_product([['a', 'b'], [1, 2]], names=['level_1', 'level_2']) x = IndexVariable('x', midx) self.assertEqual(x.level_names, midx.names) self.assertIsNone(IndexVariable('y', [10.0]).level_names)
def _decode_datetime_cf(data_array): """ Decide the datetime based on CF conventions """ for coord in data_array.coords: # stage 1: timedelta if ( "units" in data_array[coord].attrs and data_array[coord].attrs["units"] in times.TIME_UNITS ): units = times.pop_to( data_array[coord].attrs, data_array[coord].encoding, "units" ) new_values = times.decode_cf_timedelta( data_array[coord].values, units=units ) data_array = data_array.assign_coords( { coord: IndexVariable( dims=data_array[coord].dims, data=new_values.astype(np.dtype("timedelta64[ns]")), attrs=data_array[coord].attrs, encoding=data_array[coord].encoding, ) } ) # stage 2: datetime if ( "units" in data_array[coord].attrs and "since" in data_array[coord].attrs["units"] ): units = times.pop_to( data_array[coord].attrs, data_array[coord].encoding, "units" ) calendar = times.pop_to( data_array[coord].attrs, data_array[coord].encoding, "calendar" ) dtype = times._decode_cf_datetime_dtype( data_array[coord].values, units, calendar, True ) new_values = times.decode_cf_datetime( data_array[coord].values, units=units, calendar=calendar, use_cftime=True, ) data_array = data_array.assign_coords( { coord: IndexVariable( dims=data_array[coord].dims, data=new_values.astype(dtype), attrs=data_array[coord].attrs, encoding=data_array[coord].encoding, ) } ) return data_array
def test_get_level_variable(self): midx = pd.MultiIndex.from_product([['a', 'b'], [1, 2]], names=['level_1', 'level_2']) x = IndexVariable('x', midx) level_1 = IndexVariable('x', midx.get_level_values('level_1')) self.assertVariableIdentical(x.get_level_variable('level_1'), level_1) with self.assertRaisesRegexp(ValueError, 'has no MultiIndex'): IndexVariable('y', [10.0]).get_level_variable('level')
def test_concat_periods(self): periods = pd.period_range('2000-01-01', periods=10) coords = [IndexVariable('t', periods[:5]), IndexVariable('t', periods[5:])] expected = IndexVariable('t', periods) actual = IndexVariable.concat(coords, dim='t') assert actual.identical(expected) assert isinstance(actual.to_index(), pd.PeriodIndex) positions = [list(range(5)), list(range(5, 10))] actual = IndexVariable.concat(coords, dim='t', positions=positions) assert actual.identical(expected) assert isinstance(actual.to_index(), pd.PeriodIndex)
def _decode_datetime_cf(data_array, decode_times, decode_timedelta): """ Decide the datetime based on CF conventions """ if decode_timedelta is None: decode_timedelta = decode_times for coord in data_array.coords: time_var = None if decode_times and "since" in data_array[coord].attrs.get("units", ""): time_var = times.CFDatetimeCoder(use_cftime=True).decode( as_variable(data_array[coord]), name=coord ) elif ( decode_timedelta and data_array[coord].attrs.get("units") in times.TIME_UNITS ): time_var = times.CFTimedeltaCoder().decode( as_variable(data_array[coord]), name=coord ) if time_var is not None: dimensions, data, attributes, encoding = variables.unpack_for_decoding( time_var ) data_array = data_array.assign_coords( { coord: IndexVariable( dims=dimensions, data=data, attrs=attributes, encoding=encoding, ) } ) return data_array
def test_numpy_same_methods(self): v = Variable([], np.float32(0.0)) self.assertEqual(v.item(), 0) self.assertIs(type(v.item()), float) v = IndexVariable('x', np.arange(5)) self.assertEqual(2, v.searchsorted(2))
def test_data(self): x = IndexVariable('x', np.arange(3.0)) self.assertIsInstance(x._data, PandasIndexAdapter) self.assertIsInstance(x.data, np.ndarray) self.assertEqual(float, x.dtype) self.assertArrayEqual(np.arange(3), x) self.assertEqual(float, x.values.dtype) with self.assertRaisesRegexp(TypeError, 'cannot be modified'): x[:] = 0
def test_data(self): x = IndexVariable('x', np.arange(3.0)) # data should be initially saved as an ndarray self.assertIs(type(x._data), np.ndarray) self.assertEqual(float, x.dtype) self.assertArrayEqual(np.arange(3), x) self.assertEqual(float, x.values.dtype) # after inspecting x.values, the IndexVariable value will be saved as an Index self.assertIsInstance(x._data, PandasIndexAdapter) with self.assertRaisesRegexp(TypeError, 'cannot be modified'): x[:] = 0
def _load_netcdf_1d_coords(tags): """ Dimension information: - NETCDF_DIM_EXTRA: '{time}' (comma separated list of dim names) - NETCDF_DIM_time_DEF: '{2,6}' (dim size, dim dtype) - NETCDF_DIM_time_VALUES: '{0,872712.659688}' (comma separated list of data) """ dim_names = tags.get("NETCDF_DIM_EXTRA") if not dim_names: return {} dim_names = dim_names.strip("{}").split(",") coords = {} for dim_name in dim_names: dim_def = tags.get(f"NETCDF_DIM_{dim_name}_DEF") if not dim_def: continue dim_size, dim_dtype = dim_def.strip("{}").split(",") dim_dtype = NETCDF_DTYPE_MAP.get(int(dim_dtype), object) dim_values = tags[f"NETCDF_DIM_{dim_name}_VALUES"].strip("{}") coords[dim_name] = IndexVariable( dim_name, np.fromstring(dim_values, dtype=dim_dtype, sep=",")) return coords
def test_name(self): coord = IndexVariable('x', [10.0]) self.assertEqual(coord.name, 'x') with self.assertRaises(AttributeError): coord.name = 'y'
def test_multiindex_default_level_names(self): midx = pd.MultiIndex.from_product([['a', 'b'], [1, 2]]) v = IndexVariable(['x'], midx, {'foo': 'bar'}) self.assertEqual(v.to_index().names, ('x_level_0', 'x_level_1'))
def test_to_index(self): data = 0.5 * np.arange(10) v = IndexVariable(['time'], data, {'foo': 'bar'}) self.assertTrue(pd.Index(data, name='time').identical(v.to_index()))
def test_init(self): with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): IndexVariable((), 0)
def ensembles2dataset(ensdict, dsattrs={}, verbose=False, print_every=1000): """ Convert a dictionary of ensembles into an xarray Dataset object. """ mms2ms = 1e-3 fbadens = np.array([not isinstance(ens, dict) for ens in ensdict]) nt = len(ensdict) - np.sum(fbadens) n=0 ensdict0 = np.nan while not isinstance(ensdict0, dict): ensdict0 = ensdict[n] n+=1 nz = ensdict0['fixed_leader_janus']['number_of_cells'] sk = np.ma.zeros((nz, nt))*np.nan # Beam vels stored in mm/s # as int64 to save memory. b1, b2, b3, b4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() sk0 = np.ma.zeros(nt)*np.nan cor1, cor2, cor3, cor4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() int1, int2, int3, int4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() b5, cor5, int5 = sk.copy(), sk.copy(), sk.copy() heading, pitch, roll = sk0.copy(), sk0.copy(), sk0.copy() tjanus = [] ensdict = np.array(ensdict)[~fbadens] ensdict = ensdict.tolist() n=0 for ensarr in ensdict: tjanus.append(ensarr['timestamp']) heading[n] = ensarr['variable_leader_janus']['heading'] pitch[n] = ensarr['variable_leader_janus']['pitch'] roll[n] = ensarr['variable_leader_janus']['roll'] vjanus = ensarr['velocity_janus']['data'] b1[:, n] = vjanus[:, 0] b2[:, n] = vjanus[:, 1] b3[:, n] = vjanus[:, 2] b4[:, n] = vjanus[:, 3] b5[:, n] = ensarr['velocity_beam5']['data'].squeeze() corjanus = ensarr['correlation_janus']['data'] cor1[:, n] = corjanus[:, 0] cor2[:, n] = corjanus[:, 1] cor3[:, n] = corjanus[:, 2] cor4[:, n] = corjanus[:, 3] cor5[:, n] = ensarr['correlation_beam5']['data'].squeeze() intjanus = ensarr['echo_intensity_janus']['data'] int1[:, n] = intjanus[:, 0] int2[:, n] = intjanus[:, 1] int3[:, n] = intjanus[:, 2] int4[:, n] = intjanus[:, 3] int5[:, n] = ensarr['echo_intensity_beam5']['data'].squeeze() n+=1 if verbose and not n%print_every: print(n) fixj = ensdict0['fixed_leader_janus'] fix5 = ensdict0['fixed_leader_beam5'] # Add ping offset to get beam 5's timestamps. dt5 = fix5['ping_offset_time'] # In milliseconds. dt5 = np.array(Timedelta(dt5, unit='ms')) t5 = tjanus + dt5 th = fixj['beam_angle'] assert th==25 # Always 25 degrees. th = th*np.pi/180. Cth = np.cos(th) # Construct along-beam/vertical axes. cm2m = 1e-2 r1janus = fixj['bin_1_distance']*cm2m r1b5 = fix5['bin_1_distance']*cm2m ncj = fixj['number_of_cells'] nc5 = fix5['number_of_cells'] lcj = fixj['depth_cell_length']*cm2m lc5 = fix5['depth_cell_length']*cm2m Lj = ncj*lcj # Distance from center of bin 1 to the center of last bin (Janus). L5 = nc5*lc5 # Distance from center of bin 1 to the center of last bin (beam 5). rb = r1janus + np.arange(0, Lj, lcj) # Distance from xducer head # (Janus). zab = Cth*rb # Vertical distance from xducer head # (Janus). zab5 = r1b5 + np.arange(0, L5, lc5) # Distance from xducer head, also # depth for the vertical beam. rb = IndexVariable('z', rb, attrs={'units':'meters', 'long_name':"along-beam distance from the xducer's face to the center of the bins, for beams 1-4 (Janus)"}) zab = IndexVariable('z', zab, attrs={'units':'meters', 'long_name':"vertical distance from the instrument's head to the center of the bins, for beams 1-4 (Janus)"}) zab5 = IndexVariable('z', zab5, attrs={'units':'meters', 'long_name':"vertical distance from xducer face to the center of the bins, for beam 5 (vertical)"}) time = IndexVariable('time', tjanus, attrs={'long_name':'timestamp for beams 1-4 (Janus)'}) time5 = IndexVariable('time', t5, attrs={'long_name':'timestamp for beam 5 (vertical)'}) coords0 = [('time', time)] coords = [('z', zab), ('time', time)] coords5 = [('z5', zab5), ('time5', time5)] dims = ['z', 'time'] dims0 = ['time'] # Convert velocities to m/s. b1, b2, b3, b4, b5 = b1*mms2ms, b2*mms2ms, b3*mms2ms, b4*mms2ms, b5*mms2ms # Scale heading, pitch and roll. Sentinel V manual, p. 259. phisc = 0.01 heading *= phisc pitch *= phisc roll *= phisc arrs = (b1, b2, b3, b4, b5, cor1, cor2, cor3, cor4, cor5, int1, int2, int3, int4, int5, heading, pitch, roll) # pressure, temperature, salinity, soundspeed) long_names = ('Beam 1 velocity', 'Beam 2 velocity', 'Beam 3 velocity', 'Beam 4 velocity', 'Beam 5 velocity', 'Beam 1 correlation', 'Beam 2 correlation', 'Beam 3 correlation', 'Beam 4 correlation', 'Beam 5 correlation', 'Beam 1 echo amplitude', 'Beam 2 echo amplitude', 'Beam 3 echo amplitude', 'Beam 4 echo amplitude', 'Beam 5 echo amplitude', 'heading', 'pitch', 'roll') units = ('m/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'no units', 'no units', 'no units', 'no units', 'no units', 'dB', 'dB', 'dB', 'dB', 'dB', 'degrees', 'degrees', 'degrees') names = ('b1', 'b2', 'b3', 'b4', 'b5', 'cor1', 'cor2', 'cor3', 'cor4', 'cor5', 'int1', 'int2', 'int3', 'int4', 'int5', 'phi1', 'phi2', 'phi3') data_vars = {} for arr,name,long_name,unit in zip(arrs,names,long_names,units): if 'Beam5' in long_name: coordsn = coords5 dimsn = dims elif 'phi' in name: coordsn = coords0 dimsn = dims0 else: coordsn = coords dimsn = dims if 'int' in name: arr *= 0.45 # Scale factor for echo intensity, see Sentinel V manual # Sentinel V manual p. 264. da = DataArray(arr, coords=coordsn, dims=dimsn, attrs=dict(units=unit, long_name=long_name)) data_vars.update({name:da}) allcoords = {'rb':rb} # Along-beam distance for slanted beams. allcoords.update(coords) allcoords.update(coords5) ds = Dataset(data_vars=data_vars, coords=allcoords, attrs=dsattrs) return ds
def ensembles2dataset_dask(ensdict, ncfpath, dsattrs={}, chunks=10, verbose=True, print_every=1000): """ Convert a dictionary of ensembles into an xarray Dataset object using dask.delayed to keep memory usage feasible. """ mms2ms = 1e-3 n=0 # fbadens = np.array(ensdict_aux)==None # nt = len(ensdict) - np.sum(fbadens) # embed() ensdict0 = None while ensdict0 is None: ensdict0 = ensdict[n].compute() n+=1 nz = ensdict0['fixed_leader_janus']['number_of_cells'] fixj = ensdict0['fixed_leader_janus'].compute() fix5 = ensdict0['fixed_leader_beam5'].compute() # Add ping offset to get beam 5's timestamps. dt5 = fix5['ping_offset_time'] # In milliseconds. dt5 = np.array(Timedelta(dt5, unit='ms')) th = fixj['beam_angle'] assert th==25 # Always 25 degrees. th = th*np.pi/180. Cth = np.cos(th) # Construct along-beam/vertical axes. cm2m = 1e-2 r1janus = fixj['bin_1_distance']*cm2m r1b5 = fix5['bin_1_distance']*cm2m ncj = fixj['number_of_cells'] nc5 = fix5['number_of_cells'] lcj = fixj['depth_cell_length']*cm2m lc5 = fix5['depth_cell_length']*cm2m Lj = ncj*lcj # Distance from center of bin 1 to the center of last bin (Janus). L5 = nc5*lc5 # Distance from center of bin 1 to the center of last bin (beam 5). rb = r1janus + np.arange(0, Lj, lcj) # Distance from xducer head # (Janus). zab = Cth*rb # Vertical distance from xducer head # (Janus). zab5 = r1b5 + np.arange(0, L5, lc5) # Distance from xducer head, also # depth for the vertical beam. rb = IndexVariable('z', rb, attrs={'units':'meters', 'long_name':"along-beam distance from the xducer's face to the center of the bins, for beams 1-4 (Janus)"}) zab = IndexVariable('z', zab, attrs={'units':'meters', 'long_name':"vertical distance from the instrument's head to the center of the bins, for beams 1-4 (Janus)"}) zab5 = IndexVariable('z5', zab5, attrs={'units':'meters', 'long_name':"vertical distance from xducer face to the center of the bins, for beam 5 (vertical)"}) ensdict = from_sequence(ensdict) tjanus = ensdict.map_partitions(_alloc_timestamp_parts) t5 = _addtarr(tjanus, dt5) if verbose: print("Unpacking timestamps.") time = IndexVariable('time', tjanus.compute(), attrs={'long_name':'timestamps for beams 1-4 (Janus)'}) time5 = IndexVariable('time5', t5.compute(), attrs={'long_name':'timestamps for beam 5 (vertical)'}) if verbose: print("Done unpacking timestamps.") coords0 = dict(time=time) coords = dict(z=zab, time=time, rb=rb) coords5 = dict(z5=zab5, time5=time5) dims = ['z', 'time'] dims5 = ['z5', 'time5'] dims0 = ['time'] coordsdict = coords0 if verbose: print("Allocating heading, pitch, roll.") kwda = dict(coords=coordsdict, dims=dims0, attrs=dict(units=unit, long_name=lname)) svars = ['heading', 'pitch', 'roll'] long_names = svars units = ['degrees']*3 grp = 'variable_leader_janus' vars1d = dict() for vname,lname,unit in zip(svars,long_names,units): if verbose: print(vname) wrk = ensdict.map_partitions(_alloc_hpr, grp, vname) # wrk = darr.from_array(np.array(wrk.compute()), chunks=chunks) wrk2 = delayed(_bag2DataArray)(wrk, chunks)(**kwda) vars1d.update({vname:wrk2}) del(wrk, wrk2) ds2hpr = Dataset(data_vars=vars1d, coords=coordsdict) ds2hpr = ds2hpr.to_netcdf(ncfpath, compute=False, mode='w') if verbose: print("Saving heading, pitch, roll.") ds2hpr.compute() if verbose: print("Done saving heading, pitch, roll.") del(ds2hpr) coordsdict = coords5 # Load beam 5 variables into memory to # be able to put them in a chunked DataArray. if verbose: print("Allocating beam 5 variables.") grps = ['velocity_beam5', 'correlation_beam5', 'echo_intensity_beam5'] long_names = ['Beam 5 velocity', 'Beam 5 correlation', 'Beam 5 echo amplitude'] units = ['mm/s, positive toward xducer face', 'unitless', 'dB'] vars5 = dict() for grp,lname,unit in zip(grps,long_names,units): if verbose: print(grp) wrk = ensdict.map_partitions(_alloc_beam5, grp) wrk = darr.from_array(np.array(wrk.compute()).T, chunks=(1, chunks)) wrk = DataArray(wrk, coords=coordsdict, dims=dims5, attrs=dict(units=unit, long_name=lname)) vars5.update({grp:wrk}) del(wrk) ds5 = Dataset(data_vars=vars5, coords=coordsdict) ds5 = ds5.to_netcdf(ncfpath, compute=False, mode='a') if verbose: print("Saving beam 5 variables.") ds5.compute() if verbose: print("Done saving beam 5 variables.") del(ds5) embed() coordsdict = coords # Load beams 1-4 variables into memory to # be able to put them in a chunked DataArray. if verbose: print("Allocating Janus variables.") grps = ['velocity_janus', 'correlation_janus', 'echo_intensity_janus'] long_names = ['Janus velocity', 'Janus correlation', 'Janus echo amplitude'] units = ['mm/s, positive toward xducer face', 'unitless', 'dB'] vars5 = dict() for grp,lname,unit in zip(grps,long_names,units): if verbose: print(grp) wrk = ensdict.map_partitions(_alloc_janus, grp) wrk = darr.from_array(np.array(wrk.compute()).T, chunks=(1, chunks)) wrk = DataArray(wrk, coords=coordsdict, dims=dims5, attrs=dict(units=unit, long_name=lname)) vars5.update({grp:wrk}) del(wrk) dsj = Dataset(data_vars=varsj, coords=coordsdict) dsj = dsj.to_netcdf(ncfpath, compute=False, mode='a') if verbose: print("Saving Janus variables.") dsj.compute() if verbose: print("Done saving Janus variables.") del(dsj) long_names = ('Beam 1 velocity', 'Beam 2 velocity', 'Beam 3 velocity', 'Beam 4 velocity', 'Beam 5 velocity', 'Beam 1 correlation', 'Beam 2 correlation', 'Beam 3 correlation', 'Beam 4 correlation', 'Beam 5 correlation', 'Beam 1 echo amplitude', 'Beam 2 echo amplitude', 'Beam 3 echo amplitude', 'Beam 4 echo amplitude', 'Beam 5 echo amplitude', 'heading', 'pitch', 'roll') units = ('m/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'no units', 'no units', 'no units', 'no units', 'no units', 'dB', 'dB', 'dB', 'dB', 'dB', 'degrees', 'degrees', 'degrees') names = ('b1', 'b2', 'b3', 'b4', 'b5', 'cor1', 'cor2', 'cor3', 'cor4', 'cor5', 'int1', 'int2', 'int3', 'int4', 'int5', 'phi1', 'phi2', 'phi3') # data_vars = {} # # sk = darr.zeros((nz, nt), chunks=chunks)*np.nan # Beam vels stored in mm/s # # as int64 to save memory. # b1, b2, b3, b4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() # # embed() # sk0 = darr.zeros(nt, chunks=chunks)*np.nan # cor1, cor2, cor3, cor4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() # int1, int2, int3, int4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() # b5, cor5, int5 = sk.copy(), sk.copy(), sk.copy() # heading, pitch, roll = sk0.copy(), sk0.copy(), sk0.copy() # tjanus = [] # ensdict = np.array(ensdict)[~fbadens] # ensdict = ensdict.tolist() arrs = (b1, b2, b3, b4, b5, cor1, cor2, cor3, cor4, cor5, int1, int2, int3, int4, int5, heading, pitch, roll) # pressure, temperature, salinity, soundspeed) for arr,name,long_name,unit in zip(arrs,names,long_names,units): if 'Beam5' in long_name: coordsn = coords5 dimsn = dims elif 'phi' in name: coordsn = coords0 dimsn = dims0 else: coordsn = coords dimsn = dims da = DataArray(arr, coords=coordsn, dims=dimsn, attrs=dict(units=unit, long_name=long_name)) data_vars.update({name:da}) allcoords.update(coords) allcoords.update(coords5) ds = Dataset(data_vars=data_vars, coords=allcoords, attrs=dsattrs) return ds