def test_imt_multi_not_string(self): filepath = os.path.join(os.path.dirname(__file__), 'resources', 'im-multiple-nonstring.nc') CFDataset.load(filepath).close() with IncompleteMultidimensionalTrajectory(filepath) as ncd: fid, tmpfile = tempfile.mkstemp(suffix='.nc') df = ncd.to_dataframe(clean_rows=False) with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile) as result_ncd: assert 'trajectory' in result_ncd.dimensions test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile, reduce_dims=True) as result_ncd: # Could not reduce dims since there was more than one trajectory assert 'trajectory' not in result_ncd.dimensions test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile, unlimited=True) as result_ncd: assert result_ncd.dimensions['obs'].isunlimited() is True test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again with IncompleteMultidimensionalTrajectory.from_dataframe(df, tmpfile, reduce_dims=True, unlimited=True) as result_ncd: # Could not reduce dims since there was more than one trajectory assert 'trajectory' not in result_ncd.dimensions assert result_ncd.dimensions['obs'].isunlimited() is True test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again os.close(fid) os.remove(tmpfile)
def test_imt_calculated_metadata(self): with IncompleteMultidimensionalTrajectory(self.single) as ncd: s = ncd.calculated_metadata() assert s.min_t == dtparse('1990-01-01 00:00:00') assert s.max_t == dtparse('1990-01-05 03:00:00') traj1 = s.trajectories["Trajectory1"] assert traj1.min_z == 0 assert traj1.max_z == 99 assert traj1.min_t == dtparse('1990-01-01 00:00:00') assert traj1.max_t == dtparse('1990-01-05 03:00:00') assert np.isclose(traj1.first_loc.x, -7.9336) assert np.isclose(traj1.first_loc.y, 42.00339) with IncompleteMultidimensionalTrajectory(self.multi) as ncd: m = ncd.calculated_metadata() assert m.min_t == dtparse('1990-01-01 00:00:00') assert m.max_t == dtparse('1990-01-02 12:00:00') assert len(m.trajectories) == 4 traj0 = m.trajectories["Trajectory0"] assert traj0.min_z == 0 assert traj0.max_z == 35 assert traj0.min_t == dtparse('1990-01-01 00:00:00') assert traj0.max_t == dtparse('1990-01-02 11:00:00') assert np.isclose(traj0.first_loc.x, -35.07884) assert np.isclose(traj0.first_loc.y, 2.15286) traj3 = m.trajectories["Trajectory3"] assert traj3.min_z == 0 assert traj3.max_z == 36 assert traj3.min_t == dtparse('1990-01-01 00:00:00') assert traj3.max_t == dtparse('1990-01-02 12:00:00') assert np.isclose(traj3.first_loc.x, -73.3026) assert np.isclose(traj3.first_loc.y, 1.95761)
def test_imt_dataframe(self): single_tmp = tempfile.mkstemp(suffix='.nc')[-1] with IncompleteMultidimensionalTrajectory(self.single) as ncd: single_df = ncd.to_dataframe(clean_rows=False) single_nc = IncompleteMultidimensionalTrajectory.from_dataframe(single_df, single_tmp) single_nc.close() os.remove(single_tmp) multip_tmp = tempfile.mkstemp(suffix='.nc')[-1] with IncompleteMultidimensionalTrajectory(self.multi) as ncd: multip_df = ncd.to_dataframe(clean_rows=False) multip_nc = IncompleteMultidimensionalTrajectory.from_dataframe(multip_df, multip_tmp) multip_nc.close() os.remove(multip_tmp)
def test_imt_calculated_metadata_multi(self): filepath = os.path.join(os.path.dirname(__file__), 'resources', 'im-multiple.nc') with IncompleteMultidimensionalTrajectory(filepath) as ncd: m = ncd.calculated_metadata() assert m.min_t == dtparse('1990-01-01 00:00:00') assert m.max_t == dtparse('1990-01-02 12:00:00') assert len(m.trajectories) == 4 traj0 = m.trajectories["Trajectory0"] assert traj0.min_z == 0 assert traj0.max_z == 35 assert traj0.min_t.round('S') == dtparse('1990-01-01 00:00:00') assert traj0.max_t.round('S') == dtparse('1990-01-02 11:00:00') first_loc = traj0.geometry.coords[0] assert np.isclose(first_loc[0], -35.07884) assert np.isclose(first_loc[1], 2.15286) traj3 = m.trajectories["Trajectory3"] assert traj3.min_z == 0 assert traj3.max_z == 36 assert traj3.min_t.round('S') == dtparse('1990-01-01 00:00:00') assert traj3.max_t.round('S') == dtparse('1990-01-02 12:00:00') first_loc = traj3.geometry.coords[0] assert np.isclose(first_loc[0], -73.3026) assert np.isclose(first_loc[1], 1.95761)
def test_imt_change_axis_names(self): new_axis = {'t': 'time', 'x': 'lon', 'y': 'lat', 'z': 'depth'} filepath = os.path.join(os.path.dirname(__file__), 'resources', 'im-multiple.nc') with IncompleteMultidimensionalTrajectory(filepath) as ncd: fid, tmpfile = tempfile.mkstemp(suffix='.nc') df = ncd.to_dataframe(clean_rows=False, axes=new_axis) with IncompleteMultidimensionalTrajectory.from_dataframe( df, tmpfile, axes=new_axis) as result_ncd: assert 'trajectory' in result_ncd.dimensions assert 'time' in result_ncd.variables assert 'lon' in result_ncd.variables assert 'lat' in result_ncd.variables assert 'depth' in result_ncd.variables test_is_mine(IncompleteMultidimensionalTrajectory, tmpfile) # Try to load it again
def merge_profile_netcdf_files(folder, output): import pandas as pd from glob import glob new_fp, new_path = tempfile.mkstemp(suffix='.nc', prefix='gutils_merge_') try: # Get the number of profiles members = sorted(list(glob(os.path.join(folder, '*.nc')))) # Iterate over the netCDF files and create a dataframe for each dfs = [] axes = { 'trajectory': 'trajectory', 't': 'time', 'x': 'lon', 'y': 'lat', 'z': 'depth', } for ncf in members: with IncompleteMultidimensionalTrajectory(ncf) as old: df = old.to_dataframe(axes=axes, clean_cols=False) dfs.append(df) full_df = pd.concat(dfs, ignore_index=True) # Now add a profile axes axes = { 'trajectory': 'trajectory', 'profile': 'profile_id', 't': 'profile_time', 'x': 'profile_lon', 'y': 'profile_lat', 'z': 'depth', } newds = ContiguousRaggedTrajectoryProfile.from_dataframe( full_df, output=new_path, axes=axes, mode='a' ) # Apply default metadata attrs = read_attrs(template='ioos_ngdac') newds.apply_meta(attrs, create_vars=False, create_dims=False) newds.close() safe_makedirs(os.path.dirname(output)) shutil.move(new_path, output) finally: os.close(new_fp) if os.path.exists(new_path): os.remove(new_path)
def test_imt_calculated_metadata_single(self): filepath = os.path.join(os.path.dirname(__file__), 'resources', 'im-single.nc') with IncompleteMultidimensionalTrajectory(filepath) as ncd: s = ncd.calculated_metadata() assert s.min_t.round('S') == dtparse('1990-01-01 00:00:00') assert s.max_t.round('S') == dtparse('1990-01-05 03:00:00') traj1 = s.trajectories["Trajectory1"] assert traj1.min_z == 0 assert traj1.max_z == 99 assert traj1.min_t.round('S') == dtparse('1990-01-01 00:00:00') assert traj1.max_t.round('S') == dtparse('1990-01-05 03:00:00') first_loc = traj1.geometry.coords[0] assert np.isclose(first_loc[0], -7.9336) assert np.isclose(first_loc[1], 42.00339)
def test_im_single_row(self): filepath = os.path.join(os.path.dirname(__file__), 'resources', 'im-singlerow.nc') with IncompleteMultidimensionalTrajectory(filepath) as s: df = s.to_dataframe(clean_rows=True) assert len(df) == 1
def test_json_attributes_multi(self): filepath = os.path.join(os.path.dirname(__file__), 'resources', 'im-multiple.nc') with IncompleteMultidimensionalTrajectory(filepath) as s: s.json_attributes()
def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH): try: # Path to hold file while we create it tmp_handle, tmp_path = tempfile.mkstemp(suffix='.nc', prefix='gutils_glider_netcdf_') profile_time = profile.t.dropna().iloc[0] if profile_id_type == ProfileIdTypes.EPOCH: # We are using the epoch as the profile_index! profile_index = calendar.timegm(profile_time.utctimetuple()) # Figure out which profile index to use (epoch or integer) elif profile_id_type == ProfileIdTypes.COUNT: # Get all existing netCDF outputs and find out the index of this netCDF file. That # will be the profile_id of this file. This is effectively keeping a tally of netCDF # files that have been created and only works if NETCDF FILES ARE WRITTEN IN # ASCENDING ORDER. # There is a race condition here if files are being in parallel and one should be # sure that when this function is being run there can be no more files writtten. # This file being written is the last profile available. netcdf_files_same_mode = list(glob( os.path.join( output_path, '*_{}.nc'.format(mode) ) )) profile_index = len(netcdf_files_same_mode) elif profile_id_type == ProfileIdTypes.FRAME: profile_index = profile.profile.iloc[0] else: raise ValueError('{} is not a valid profile type'.format(profile_id_type)) # Create final filename filename = "{0}_{1:010d}_{2:%Y%m%dT%H%M%S}Z_{3}.nc".format( attrs['glider'], profile_index, profile_time, mode ) output_file = os.path.join(output_path, filename) # Add in the trajectory dimension to make pocean happy traj_name = '{}-{}'.format( attrs['glider'], attrs['trajectory_date'] ) profile = profile.assign(trajectory=traj_name) # We add this back in later profile.drop('profile', axis=1, inplace=True) # Compute U/V scalar values uv_txy = get_uv_data(profile) if 'u_orig' in profile.columns and 'v_orig' in profile.columns: profile.drop(['u_orig', 'v_orig'], axis=1, inplace=True) # Compute profile scalar values profile_txy = get_profile_data(profile, method=None) # Calculate some geographic global attributes attrs = dict_update(attrs, get_geographic_attributes(profile)) # Calculate some vertical global attributes attrs = dict_update(attrs, get_vertical_attributes(profile)) # Calculate some temporal global attributes attrs = dict_update(attrs, get_temporal_attributes(profile)) # Set the creation dates and history attrs = dict_update(attrs, get_creation_attributes(profile)) # Changing column names here from the default 't z x y' axes = { 't': 'time', 'z': 'depth', 'x': 'lon', 'y': 'lat', 'sample': 'time' } profile = profile.rename(columns=axes) # Use pocean to create NetCDF file with IncompleteMultidimensionalTrajectory.from_dataframe( profile, tmp_path, axes=axes, reduce_dims=True, mode='a') as ncd: # We only want to apply metadata from the `attrs` map if the variable is already in # the netCDF file or it is a scalar variable (no shape defined). This avoids # creating measured variables that were not measured in this profile. prof_attrs = attrs.copy() vars_to_update = OrderedDict() for vname, vobj in prof_attrs['variables'].items(): if vname in ncd.variables or ('shape' not in vobj and 'type' in vobj): if 'shape' in vobj: # Assign coordinates vobj['attributes']['coordinates'] = '{} {} {} {}'.format( axes.get('t'), axes.get('z'), axes.get('x'), axes.get('y'), ) vars_to_update[vname] = vobj else: # L.debug("Skipping missing variable: {}".format(vname)) pass prof_attrs['variables'] = vars_to_update ncd.apply_meta(prof_attrs) # Set trajectory value ncd.id = traj_name ncd.variables['trajectory'][0] = traj_name # Set profile_* data set_profile_data(ncd, profile_txy, profile_index) # Set *_uv data set_uv_data(ncd, uv_txy) # Move to final destination safe_makedirs(os.path.dirname(output_file)) os.chmod(tmp_path, 0o664) shutil.move(tmp_path, output_file) L.info('Created: {}'.format(output_file)) return output_file except BaseException: raise finally: os.close(tmp_handle) if os.path.exists(tmp_path): os.remove(tmp_path)
def test_json_attributes(self): with IncompleteMultidimensionalTrajectory(self.single) as s: s.json_attributes() with IncompleteMultidimensionalTrajectory(self.multi) as m: m.json_attributes()
def test_imt_load(self): IncompleteMultidimensionalTrajectory(self.single).close() IncompleteMultidimensionalTrajectory(self.multi).close()