def backfill_uv_variables(src_glider_nc, empty_uv_processed_paths): uv_values = {} for key_name in GLIDER_UV_DATATYPE_KEYS: uv_values[key_name] = src_glider_nc.get_scalar(key_name) for file_path in empty_uv_processed_paths: with open_glider_netcdf(file_path, 'a') as dst_glider_nc: fill_uv_variables(dst_glider_nc, uv_values) return uv_values
def test_creation(self): with open_glider_netcdf(self.test_path, 'w') as glider_nc: # Set global attributes glider_nc.set_global_attributes(self.global_attributes) # Set Trajectory glider_nc.set_trajectory_id(self.deployment['glider'], self.deployment['trajectory_date']) traj_str = "{}-{}".format(self.deployment['glider'], self.deployment['trajectory_date']) assert 'trajectory' in glider_nc.nc.variables vfunc = np.vectorize(decoder) self.assertEqual( vfunc(nc4.chartostring( glider_nc.nc.variables['trajectory'][:])), traj_str) # Set Platform glider_nc.set_platform(self.deployment['platform']) self.assertEqual( glider_nc.nc.variables['platform'].getncattr('wmo_id'), 4801516) # Set Instruments glider_nc.set_instruments(self.instruments) self.assertIn('instrument_ctd', glider_nc.nc.variables) # Set Segment ID glider_nc.set_segment_id(3) self.assertEqual(glider_nc.nc.variables['segment_id'].getValue(), 3) # Set Profile ID glider_nc.set_profile_id(4) self.assertEqual(glider_nc.nc.variables['profile_id'].getValue(), 4) flightReader = GliderBDReader( [resource('usf-bass', 'usf-bass-2014-061-1-0.sbd')]) scienceReader = GliderBDReader( [resource('usf-bass', 'usf-bass-2014-061-1-0.tbd')]) reader = MergedGliderBDReader(flightReader, scienceReader) for line in reader: glider_nc.stream_dict_insert(line) glider_nc.update_profile_vars() glider_nc.calculate_salinity() glider_nc.calculate_density() glider_nc.update_bounds()
def init_netcdf(file_path, attrs, segment_id, profile_id): with open_glider_netcdf(file_path, 'w') as glider_nc: # Set global attributes glider_nc.set_global_attributes(attrs['global']) # Set Trajectory glider_nc.set_trajectory_id(attrs['deployment']['glider'], attrs['deployment']['trajectory_date']) # Set Platform glider_nc.set_platform(attrs['deployment']['platform']) # Set Instruments glider_nc.set_instruments(attrs['instruments']) # Set Segment ID glider_nc.set_segment_id(segment_id) # Set Profile ID glider_nc.set_profile_id(profile_id)
def write_netcdf(configs, sets, set_key): dataset = GliderDataset(sets[set_key]) # No longer need the dataset stored by handlers del sets[set_key] global_attributes = (generate_global_attributes(configs, dataset)) _, tmp_path = tempfile.mkstemp(suffix='.nc') with open_glider_netcdf(tmp_path, 'w') as glider_nc: glider_nc.set_global_attributes(global_attributes) glider_nc.set_platform( configs[dataset.glider]['deployment']['platform']) glider_nc.set_trajectory_id(1) glider_nc.set_segment_id(dataset.segment) glider_nc.set_datatypes(configs['datatypes']) glider_nc.set_instruments(configs[dataset.glider]['instruments']) glider_nc.set_times(dataset.times) # Insert time_uv parameters glider_nc.set_time_uv(dataset.time_uv) glider_nc.set_profile_ids(dataset.calculate_profiles()) for datatype, data in dataset.data_by_type.items(): glider_nc.insert_data(datatype, data) deployment_path = os.path.join( configs['output_directory'], configs[dataset.glider]['deployment']['directory']) if not os.path.exists(deployment_path): os.mkdir(deployment_path) filename = generate_filename(configs, dataset) file_path = os.path.join(deployment_path, filename) shutil.move(tmp_path, file_path) logger.info("Datafile written to %s" % file_path)
def process_dataset(args): attrs = read_attrs(args.glider_config_path) timestr = 'timestamp' flight_path = args.flight science_path = args.science glider_name = attrs['deployment']['glider'] deployment_name = '{}-{}'.format(glider_name, attrs['deployment']['trajectory_date']) try: # Find profile breaks profiles = find_profiles(flight_path, science_path, args.time, args.depth) # Interpolate GPS interp_gps = get_file_set_gps(flight_path, science_path, args.time, args.gps_prefix) except ValueError as e: logger.error('{} - Skipping'.format(e)) return 1 # Create NetCDF Files for Each Profile profile_id = 0 profile_end = 0 file_path = None uv_values = None movepairs = [] empty_uv_processed_paths = [] reader = create_reader(flight_path, science_path) # Tempdirectory tmpdir = tempfile.mkdtemp() for line in reader: if profile_end < line[timestr]: # New profile! init the NetCDF output file # Path to hold file while we create it _, tmp_path = tempfile.mkstemp(dir=tmpdir, suffix='.nc', prefix='gutils') # Open new NetCDF begin_time = datetime.utcfromtimestamp(line[timestr]) filename = "%s_%s_%s.nc" % ( glider_name, begin_time.strftime("%Y%m%dT%H%M%SZ"), args.mode) file_path = os.path.join(args.output_path, deployment_name, filename) # NOTE: Store 1 based profile id init_netcdf(tmp_path, attrs, args.segment_id, profile_id + 1) profile = profiles[profiles[:, 2] == profile_id] profile_end = max(profile[:, 0]) with open_glider_netcdf(tmp_path, 'a') as glider_nc: while line[timestr] <= profile_end: line = fill_gps(line, interp_gps, args.time, args.gps_prefix) glider_nc.stream_dict_insert(line) try: line = reader.__next__() except StopIteration: break # Handle UV Variables if glider_nc.contains('time_uv'): uv_values = backfill_uv_variables(glider_nc, empty_uv_processed_paths) elif uv_values is not None: fill_uv_variables(glider_nc, uv_values) del empty_uv_processed_paths[:] else: empty_uv_processed_paths.append(tmp_path) glider_nc.update_profile_vars() try: glider_nc.calculate_salinity() glider_nc.calculate_density() except BaseException as e: logger.error(e) glider_nc.update_bounds() movepairs.append((tmp_path, file_path)) profile_id += 1 for tp, fp in movepairs: try: os.makedirs(os.path.dirname(fp)) except OSError: pass # destination folder exists shutil.move(tp, fp) shutil.rmtree(tmpdir) return 0
def process_ooi_dataset(args): glider_deployment_path = args.glider_deployment_path logger.debug('Deployment directory {:s}'.format(glider_deployment_path)) if not os.path.isdir(glider_deployment_path): logger.error('Invalid deployment location {:s}'.format(args.glider_deployment_path)) return 1 # Create path to glider deployment configuration files cfg_path = os.path.join(glider_deployment_path, 'cfg') logger.debug('Deployment configuration directory {:s}'.format(cfg_path)) if not os.path.isdir(cfg_path): logger.error('Deployment configuration path does not exist {:s}'.format(cfg_path)) return 1 # Make sure required config file exist cfg_status = True for f in REQUIRED_CFG_FILES: cfg_file = os.path.join(cfg_path, f) if not os.path.isfile(cfg_file): logger.error('Missing required config file {:s}'.format(cfg_file)) cfg_status = False if not cfg_status: return 1 # Create path to glider deployment status files status_path = os.path.join(glider_deployment_path, 'status') logger.debug('Deployment status directory {:s}'.format(status_path)) if not os.path.isdir(status_path): logger.error('Deployment status path does not exist {:s}'.format(status_path)) return 1 # Search for source NetCDF files nc_source_dir = os.path.join(glider_deployment_path, 'nc-source') logger.debug('Source NetCDF location {:s}'.format(nc_source_dir)) if not os.path.isdir(nc_source_dir): logger.error('Invalid source NetCDF directory {:s}'.format(nc_source_dir)) return 1 nc_files = glob.glob(os.path.join(nc_source_dir, '*.nc')) if not nc_files: logger.info('No deployment source NetCDF files found {:s}'.format(nc_source_dir)) return 1 # Read deployment configuration files attrs = read_attrs(cfg_path) glider_name = attrs['deployment']['glider'] deployment_name = build_trajectory_name( glider_name, attrs['deployment']['trajectory_date'] ) # Profile id counter profile_status_file = os.path.join(status_path, '{:s}-profiles.json'.format(deployment_name)) profile_id = 1 existing_nc = [] if os.path.isfile(profile_status_file): try: with open(profile_status_file, 'r') as fid: profile_status = json.load(fid) except (OSError, ValueError) as e: logging.error('Status read error {:s} ({:s})'.format(profile_status_file, e)) return 1 # If there are entries in the profile_status array, get the max end time from # p['profile_max_time'] if profile_status: # Find the max profile_id and increment it by one profile_id = max([p['profile_id'] for p in profile_status]) + 1 # Create a list NetCDF files that have previously been created existing_nc = {os.path.basename(p['filename']):p['filename'] for p in profile_status} # Process each input NetCDF file for nc_file in nc_files: # Create the NC_GLOBAL:history with the name of the source UFrame NetCDF file history = '{:s}: Data Source {:s}'.format(datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'), nc_file) attrs['global']['history'] = '{:s}\n'.format(history) try: logger.info('Reading {:s}'.format(nc_file)) dataset = create_reader(nc_file, args.nctype) logger.info('{:s} read complete'.format(nc_file)) if not dataset: logger.warning('Skipping invalid NetCDF {:s}'.format(nc_file)) continue stream = dataset['stream'] # Find profile breaks profile_times = find_profiles(stream, depthsensor=args.depth, timesensor=args.time) except ValueError as e: logger.error('{} - Skipping'.format(e)) return 1 if profile_times.shape[0] == 0: logger.info('No profiles indexed {:s}'.format(nc_file)) continue uv_values = None movepairs = [] # uv_values = [] empty_uv_processed_paths = [] # Tempdirectory tmpdir = tempfile.mkdtemp() # All timestamps from stream ts = [r[args.time] for r in stream] # Create a new NetCDF file for each profile for profile in profile_times: # Profile start time p0 = profile[0] # Profile end time p1 = profile[-1] # Find all rows in ts that are between p0 & p1 p_inds = np.flatnonzero(np.logical_and(ts >= p0, ts <= p1)) profile_stream = stream[p_inds[0]:p_inds[-1]] # Path to hold file while we create it _, tmp_path = tempfile.mkstemp(dir=tmpdir, suffix='.nc', prefix='gutils') # Open new NetCDF begin_time = datetime.utcfromtimestamp(np.mean(profile)) filename = "%s-%s_%s.nc" % ( glider_name, begin_time.strftime("%Y%m%dT%H%M%SZ"), args.mode ) # Skip this write operation if the args.clobber is False and the file has # been previously written if not args.clobber: if filename in existing_nc: logging.warning('Skipping (Profile NetCDF already exists: {:s}'.format(filename)) continue elif args.clobber: # If arg.clobber is True, try to delete the existing file provided # we can find it if os.path.isfile(existing_nc[filename]): logging.info('Clobbering existing NetCDF: {:s}'.format(existing_nc[filename])) try: os.remove(p[filename]) except OSError as e: logging.warning('Failed to delete existing file: {:s} ({:s})'.format(existing_nc[filename], e)) # Full path to the file to be written file_path = os.path.join( args.output_path, deployment_name, filename ) logger.debug('tmp_path={:s}'.format(tmp_path)) #init the NetCDF output file init_netcdf(tmp_path, cfg_path, attrs, profile_id) with open_glider_netcdf(tmp_path, cfg_path, mode='a') as glider_nc: for line in profile_stream: #Append the row to the NetCDF file glider_nc.stream_dict_insert(line) # Handle UV Variables if glider_nc.contains('time_uv'): uv_values = backfill_uv_variables( glider_nc, empty_uv_processed_paths ) elif uv_values is not None: fill_uv_variables(glider_nc, uv_values) del empty_uv_processed_paths[:] else: empty_uv_processed_paths.append(tmp_path) # Update the scalar profile variables glider_nc.update_profile_vars() glider_nc.update_bounds() # Update the global title attribute with the glider name and # formatted self.nc.variables['profile_time']: # glider-YYYYmmddTHHMM glider_nc.update_global_title(glider_name) movepairs.append((tmp_path, file_path)) profile_id += 1 # Set move_status to False if any NetCDF move fails so that the temporary # directory is not removed. move_status = True for tp, fp in movepairs: dest_dir = os.path.dirname(fp) if not os.path.isdir(dest_dir): try: logger.debug('Creating NetCDF destination {:s}'.format(dest_dir)) os.makedirs(dest_dir) except OSError as e: logger.error('Failed to create {:s} ({:s})'.format(dest_dir, e)) continue # Move the file from the temporary directory to the destination if args.verbosity: sys.stdout.write('{:s}\n'.format(fp)) try: shutil.move(tp, fp) except OSError as e: logger.error('Failed to move NetCDF {:s} ({:s})'.format(tp, e)) move_status = False continue # Remove the temporary directory if all NetCDF moves succeeded if move_status: shutil.rmtree(tmpdir) return 0