def backfill_uv_variables(src_glider_nc, empty_uv_processed_paths):
    uv_values = {}
    for key_name in GLIDER_UV_DATATYPE_KEYS:
        uv_values[key_name] = src_glider_nc.get_scalar(key_name)

    for file_path in empty_uv_processed_paths:
        with open_glider_netcdf(file_path, 'a') as dst_glider_nc:
            fill_uv_variables(dst_glider_nc, uv_values)

    return uv_values
Example #2
0
    def test_creation(self):

        with open_glider_netcdf(self.test_path, 'w') as glider_nc:

            # Set global attributes
            glider_nc.set_global_attributes(self.global_attributes)

            # Set Trajectory
            glider_nc.set_trajectory_id(self.deployment['glider'],
                                        self.deployment['trajectory_date'])

            traj_str = "{}-{}".format(self.deployment['glider'],
                                      self.deployment['trajectory_date'])

            assert 'trajectory' in glider_nc.nc.variables
            vfunc = np.vectorize(decoder)
            self.assertEqual(
                vfunc(nc4.chartostring(
                    glider_nc.nc.variables['trajectory'][:])), traj_str)

            # Set Platform
            glider_nc.set_platform(self.deployment['platform'])
            self.assertEqual(
                glider_nc.nc.variables['platform'].getncattr('wmo_id'),
                4801516)

            # Set Instruments
            glider_nc.set_instruments(self.instruments)
            self.assertIn('instrument_ctd', glider_nc.nc.variables)

            # Set Segment ID
            glider_nc.set_segment_id(3)
            self.assertEqual(glider_nc.nc.variables['segment_id'].getValue(),
                             3)

            # Set Profile ID
            glider_nc.set_profile_id(4)
            self.assertEqual(glider_nc.nc.variables['profile_id'].getValue(),
                             4)

            flightReader = GliderBDReader(
                [resource('usf-bass', 'usf-bass-2014-061-1-0.sbd')])
            scienceReader = GliderBDReader(
                [resource('usf-bass', 'usf-bass-2014-061-1-0.tbd')])
            reader = MergedGliderBDReader(flightReader, scienceReader)

            for line in reader:
                glider_nc.stream_dict_insert(line)

            glider_nc.update_profile_vars()
            glider_nc.calculate_salinity()
            glider_nc.calculate_density()
            glider_nc.update_bounds()
def init_netcdf(file_path, attrs, segment_id, profile_id):
    with open_glider_netcdf(file_path, 'w') as glider_nc:
        # Set global attributes
        glider_nc.set_global_attributes(attrs['global'])

        # Set Trajectory
        glider_nc.set_trajectory_id(attrs['deployment']['glider'],
                                    attrs['deployment']['trajectory_date'])

        # Set Platform
        glider_nc.set_platform(attrs['deployment']['platform'])

        # Set Instruments
        glider_nc.set_instruments(attrs['instruments'])

        # Set Segment ID
        glider_nc.set_segment_id(segment_id)

        # Set Profile ID
        glider_nc.set_profile_id(profile_id)
Example #4
0
def write_netcdf(configs, sets, set_key):
    dataset = GliderDataset(sets[set_key])

    # No longer need the dataset stored by handlers
    del sets[set_key]

    global_attributes = (generate_global_attributes(configs, dataset))

    _, tmp_path = tempfile.mkstemp(suffix='.nc')
    with open_glider_netcdf(tmp_path, 'w') as glider_nc:
        glider_nc.set_global_attributes(global_attributes)
        glider_nc.set_platform(
            configs[dataset.glider]['deployment']['platform'])
        glider_nc.set_trajectory_id(1)
        glider_nc.set_segment_id(dataset.segment)
        glider_nc.set_datatypes(configs['datatypes'])
        glider_nc.set_instruments(configs[dataset.glider]['instruments'])
        glider_nc.set_times(dataset.times)

        # Insert time_uv parameters
        glider_nc.set_time_uv(dataset.time_uv)

        glider_nc.set_profile_ids(dataset.calculate_profiles())
        for datatype, data in dataset.data_by_type.items():
            glider_nc.insert_data(datatype, data)

    deployment_path = os.path.join(
        configs['output_directory'],
        configs[dataset.glider]['deployment']['directory'])

    if not os.path.exists(deployment_path):
        os.mkdir(deployment_path)

    filename = generate_filename(configs, dataset)
    file_path = os.path.join(deployment_path, filename)
    shutil.move(tmp_path, file_path)

    logger.info("Datafile written to %s" % file_path)
def process_dataset(args):

    attrs = read_attrs(args.glider_config_path)

    timestr = 'timestamp'

    flight_path = args.flight
    science_path = args.science

    glider_name = attrs['deployment']['glider']
    deployment_name = '{}-{}'.format(glider_name,
                                     attrs['deployment']['trajectory_date'])

    try:
        # Find profile breaks
        profiles = find_profiles(flight_path, science_path, args.time,
                                 args.depth)

        # Interpolate GPS
        interp_gps = get_file_set_gps(flight_path, science_path, args.time,
                                      args.gps_prefix)
    except ValueError as e:
        logger.error('{} - Skipping'.format(e))
        return 1

    # Create NetCDF Files for Each Profile
    profile_id = 0
    profile_end = 0
    file_path = None
    uv_values = None
    movepairs = []
    empty_uv_processed_paths = []
    reader = create_reader(flight_path, science_path)

    # Tempdirectory
    tmpdir = tempfile.mkdtemp()

    for line in reader:
        if profile_end < line[timestr]:
            # New profile! init the NetCDF output file

            # Path to hold file while we create it
            _, tmp_path = tempfile.mkstemp(dir=tmpdir,
                                           suffix='.nc',
                                           prefix='gutils')

            # Open new NetCDF
            begin_time = datetime.utcfromtimestamp(line[timestr])
            filename = "%s_%s_%s.nc" % (
                glider_name, begin_time.strftime("%Y%m%dT%H%M%SZ"), args.mode)

            file_path = os.path.join(args.output_path, deployment_name,
                                     filename)

            # NOTE: Store 1 based profile id
            init_netcdf(tmp_path, attrs, args.segment_id, profile_id + 1)
            profile = profiles[profiles[:, 2] == profile_id]
            profile_end = max(profile[:, 0])

        with open_glider_netcdf(tmp_path, 'a') as glider_nc:
            while line[timestr] <= profile_end:
                line = fill_gps(line, interp_gps, args.time, args.gps_prefix)
                glider_nc.stream_dict_insert(line)
                try:
                    line = reader.__next__()
                except StopIteration:
                    break

            # Handle UV Variables
            if glider_nc.contains('time_uv'):
                uv_values = backfill_uv_variables(glider_nc,
                                                  empty_uv_processed_paths)
            elif uv_values is not None:
                fill_uv_variables(glider_nc, uv_values)
                del empty_uv_processed_paths[:]
            else:
                empty_uv_processed_paths.append(tmp_path)

            glider_nc.update_profile_vars()
            try:
                glider_nc.calculate_salinity()
                glider_nc.calculate_density()
            except BaseException as e:
                logger.error(e)
            glider_nc.update_bounds()

        movepairs.append((tmp_path, file_path))

        profile_id += 1

    for tp, fp in movepairs:
        try:
            os.makedirs(os.path.dirname(fp))
        except OSError:
            pass  # destination folder exists
        shutil.move(tp, fp)
    shutil.rmtree(tmpdir)

    return 0
Example #6
0
def process_ooi_dataset(args):

    glider_deployment_path = args.glider_deployment_path
    logger.debug('Deployment directory {:s}'.format(glider_deployment_path))
    if not os.path.isdir(glider_deployment_path):
        logger.error('Invalid deployment location {:s}'.format(args.glider_deployment_path))
        return 1
        
    # Create path to glider deployment configuration files
    cfg_path = os.path.join(glider_deployment_path, 'cfg')
    logger.debug('Deployment configuration directory {:s}'.format(cfg_path))
    if not os.path.isdir(cfg_path):
        logger.error('Deployment configuration path does not exist {:s}'.format(cfg_path))
        return 1
    # Make sure required config file exist
    cfg_status = True
    for f in REQUIRED_CFG_FILES:
        cfg_file = os.path.join(cfg_path, f)
        if not os.path.isfile(cfg_file):
            logger.error('Missing required config file {:s}'.format(cfg_file))
            cfg_status = False
    if not cfg_status:
        return 1
    
        
    # Create path to glider deployment status files
    status_path  = os.path.join(glider_deployment_path, 'status')
    logger.debug('Deployment status directory {:s}'.format(status_path))
    if not os.path.isdir(status_path):
        logger.error('Deployment status path does not exist {:s}'.format(status_path))
        return 1
        
    # Search for source NetCDF files
    nc_source_dir = os.path.join(glider_deployment_path, 'nc-source')
    logger.debug('Source NetCDF location {:s}'.format(nc_source_dir))
    if not os.path.isdir(nc_source_dir):
        logger.error('Invalid source NetCDF directory {:s}'.format(nc_source_dir))
        return 1
    nc_files = glob.glob(os.path.join(nc_source_dir, '*.nc'))
    if not nc_files:
        logger.info('No deployment source NetCDF files found {:s}'.format(nc_source_dir))
        return 1
    
    # Read deployment configuration files
    attrs = read_attrs(cfg_path)

    glider_name = attrs['deployment']['glider']
    deployment_name = build_trajectory_name(
        glider_name,
        attrs['deployment']['trajectory_date']
    )

    # Profile id counter
    profile_status_file = os.path.join(status_path, '{:s}-profiles.json'.format(deployment_name))
    profile_id = 1
    existing_nc = []
    if os.path.isfile(profile_status_file):
        try:
            with open(profile_status_file, 'r') as fid:
                profile_status = json.load(fid)
        except (OSError, ValueError) as e:
            logging.error('Status read error {:s} ({:s})'.format(profile_status_file, e))
            return 1
        # If there are entries in the profile_status array, get the max end time from
        # p['profile_max_time']
        if profile_status:
            # Find the max profile_id and increment it by one
            profile_id = max([p['profile_id'] for p in profile_status]) + 1
            # Create a list NetCDF files that have previously been created
            existing_nc = {os.path.basename(p['filename']):p['filename'] for p in profile_status}
    
    # Process each input NetCDF file
    for nc_file in nc_files:
        
        # Create the NC_GLOBAL:history with the name of the source UFrame NetCDF file
        history = '{:s}: Data Source {:s}'.format(datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'), nc_file)
        attrs['global']['history'] = '{:s}\n'.format(history)
        
        try:
           
            logger.info('Reading {:s}'.format(nc_file))
            dataset = create_reader(nc_file, args.nctype)
            logger.info('{:s} read complete'.format(nc_file))
            if not dataset:
                logger.warning('Skipping invalid NetCDF {:s}'.format(nc_file))
                continue
                
            stream = dataset['stream']
                
            # Find profile breaks
            profile_times = find_profiles(stream, depthsensor=args.depth, timesensor=args.time)
    
        except ValueError as e:
            logger.error('{} - Skipping'.format(e))
            return 1
            
        if profile_times.shape[0] == 0:
            logger.info('No profiles indexed {:s}'.format(nc_file))
            continue
        
        uv_values = None
        movepairs = []
#        uv_values = []
        empty_uv_processed_paths = []
    
        # Tempdirectory
        tmpdir = tempfile.mkdtemp()
    
        # All timestamps from stream
        ts = [r[args.time] for r in stream]
        
        # Create a new NetCDF file for each profile
        for profile in profile_times:

            # Profile start time
            p0 = profile[0]
            # Profile end time
            p1 = profile[-1]
            # Find all rows in ts that are between p0 & p1
            p_inds = np.flatnonzero(np.logical_and(ts >= p0, ts <= p1))
            profile_stream = stream[p_inds[0]:p_inds[-1]]
        
            # Path to hold file while we create it
            _, tmp_path = tempfile.mkstemp(dir=tmpdir, suffix='.nc', prefix='gutils')

            # Open new NetCDF
            begin_time = datetime.utcfromtimestamp(np.mean(profile))
            filename = "%s-%s_%s.nc" % (
                glider_name,
                begin_time.strftime("%Y%m%dT%H%M%SZ"),
                args.mode
            )
            
            # Skip this write operation if the args.clobber is False and the file has
            # been previously written
            if not args.clobber:
                if filename in existing_nc:
                    logging.warning('Skipping (Profile NetCDF already exists: {:s}'.format(filename))
                    continue
            elif args.clobber:
                # If arg.clobber is True, try to delete the existing file provided
                # we can find it
                if os.path.isfile(existing_nc[filename]):
                    logging.info('Clobbering existing NetCDF: {:s}'.format(existing_nc[filename]))
                    try:
                        os.remove(p[filename])
                    except OSError as e:
                        logging.warning('Failed to delete existing file: {:s} ({:s})'.format(existing_nc[filename], e))

            # Full path to the file to be written
            file_path = os.path.join(
                args.output_path,
                deployment_name,
                filename
            )

            logger.debug('tmp_path={:s}'.format(tmp_path))
            #init the NetCDF output file
            init_netcdf(tmp_path, cfg_path, attrs, profile_id)
            
            with open_glider_netcdf(tmp_path, cfg_path, mode='a') as glider_nc:
                for line in profile_stream:
                    
                    #Append the row to the NetCDF file
                    glider_nc.stream_dict_insert(line)
    
                # Handle UV Variables
                if glider_nc.contains('time_uv'):
                    uv_values = backfill_uv_variables(
                        glider_nc, empty_uv_processed_paths
                    )
                elif uv_values is not None:
                    fill_uv_variables(glider_nc, uv_values)
                    del empty_uv_processed_paths[:]
                else:
                    empty_uv_processed_paths.append(tmp_path)
    
                # Update the scalar profile variables
                glider_nc.update_profile_vars()
                glider_nc.update_bounds()
                
                # Update the global title attribute with the glider name and
                # formatted self.nc.variables['profile_time']:
                # glider-YYYYmmddTHHMM
                glider_nc.update_global_title(glider_name)
    
            movepairs.append((tmp_path, file_path))
    
            profile_id += 1
    
        # Set move_status to False if any NetCDF move fails so that the temporary
        # directory is not removed.
        move_status = True
        for tp, fp in movepairs:
            dest_dir = os.path.dirname(fp)
            if not os.path.isdir(dest_dir):
                try:
                    logger.debug('Creating NetCDF destination {:s}'.format(dest_dir))
                    os.makedirs(dest_dir)
                except OSError as e:
                    logger.error('Failed to create {:s} ({:s})'.format(dest_dir, e))
                    continue
            # Move the file from the temporary directory to the destination
            if args.verbosity:
                sys.stdout.write('{:s}\n'.format(fp))
            try:
                shutil.move(tp, fp)
            except OSError as e:
                logger.error('Failed to move NetCDF {:s} ({:s})'.format(tp, e))
                move_status = False
                continue
        
        # Remove the temporary directory if all NetCDF moves succeeded        
        if move_status:
            shutil.rmtree(tmpdir)

    return 0