Beispiel #1
0
def main(args):
    """Parse one or more Slocum glider ascii dba files and write CF-compliant Profile NetCDF files
    """

    # Set up logger
    log_level = getattr(logging, args.loglevel.upper())
    log_format = '%(module)s:%(levelname)s:%(message)s [line %(lineno)d]'
    logging.basicConfig(format=log_format, level=log_level)

    config_path = args.config_path
    output_path = args.output_path or os.path.realpath(os.curdir)
    dba_files = args.dba_files
    start_profile_id = args.start_profile_id
    clobber = args.clobber
    comp_level = args.compression
    nc_format = args.nc_format

    if not os.path.isdir(config_path):
        logging.error(
            'Invalid configuration directory: {:s}'.format(config_path))
        return 1

    if not output_path:
        args.output_path = os.path.realpath(os.curdir)
        logging.info('No NetCDF output_path specified. Using cwd: {:s}'.format(
            output_path))

    if not os.path.isdir(output_path):
        logging.error('Invalid output_path: {:s}'.format(output_path))
        return 1

    if not dba_files:
        logging.error('No Slocum dba files specified')
        return 1

    # Create the Trajectory NetCDF writer
    ncw = ProfileNetCDFWriter(config_path,
                              comp_level=comp_level,
                              nc_format=nc_format,
                              profile_id=start_profile_id,
                              clobber=clobber)
    if args.debug:
        sys.stdout.write('{}\n'.format(ncw))
        return 0

    # Create a temporary directory for creating/writing NetCDF prior to moving them to output_path
    tmp_dir = tempfile.mkdtemp()
    logging.debug('Temporary NetCDF directory: {:s}'.format(tmp_dir))

    # Write one NetCDF file for each input file
    output_nc_files = []
    processed_dbas = []
    for dba_file in dba_files:

        if not os.path.isfile(dba_file):
            logging.error('Invalid dba file specified: {:s}'.format(dba_file))
            continue

        logging.info('Processing dba file: {:s}'.format(dba_file))

        # Parse the dba file
        dba = create_llat_dba_reader(dba_file)
        if len(dba['data']) == 0:
            logging.warning('Skipping empty dba file: {:s}'.format(dba_file))
            continue

        # Create the yo for profile indexing find the profile minima/maxima
        yo = build_yo(dba)
        if yo is None:
            continue
        try:
            profile_times = find_profiles(yo)
        except ValueError as e:
            logging.error('{:s}: {:s}'.format(dba_file, e))
            continue

        if len(profile_times) == 0:
            logging.info('No profiles indexed: {:s}'.format(dba_file))
            continue

        # All timestamps from stream
        ts = yo[:, 0]

        for profile_interval in profile_times:

            # Profile start time
            p0 = profile_interval[0]
            # Profile end time
            p1 = profile_interval[-1]
            # Find all rows in ts that are between p0 & p1
            p_inds = np.flatnonzero(np.logical_and(ts >= p0, ts <= p1))
            # profile_stream = dba['data'][p_inds[0]:p_inds[-1]]

            # Calculate and convert profile mean time to a datetime
            mean_profile_epoch = np.nanmean(profile_interval)
            if np.isnan(mean_profile_epoch):
                logging.warning('Profile mean timestamp is Nan')
                continue
            # If no start profile id was specified on the command line, use the mean_profile_epoch as the profile_id
            # since it will be unique to this profile and deployment
            if args.start_profile_id < 1:
                ncw.profile_id = int(mean_profile_epoch)
            pro_mean_dt = datetime.datetime.utcfromtimestamp(
                mean_profile_epoch)

            # Create the output NetCDF path
            pro_mean_ts = pro_mean_dt.strftime('%Y%m%dT%H%M%SZ')
            profile_filename = '{:s}-{:s}-{:s}-profile'.format(
                ncw.attributes['deployment']['glider'], pro_mean_ts,
                dba['file_metadata']['filename_extension'])
            # Path to temporarily hold file while we create it
            tmp_fid, tmp_nc = tempfile.mkstemp(
                dir=tmp_dir,
                suffix='.nc',
                prefix=os.path.basename(profile_filename))
            os.close(tmp_fid)

            out_nc_file = os.path.join(output_path,
                                       '{:s}.nc'.format(profile_filename))
            if os.path.isfile(out_nc_file):
                if args.clobber:
                    logging.info(
                        'Clobbering existing NetCDF: {:s}'.format(out_nc_file))
                else:
                    logging.warning(
                        'Skipping existing NetCDF: {:s}'.format(out_nc_file))
                    continue

            # Initialize the temporary NetCDF file
            try:
                ncw.init_nc(tmp_nc)
            except (OSError, IOError) as e:
                logging.error('Error initializing {:s}: {}'.format(tmp_nc, e))
                continue

            try:
                ncw.open_nc()
                # Add command line call used to create the file
                ncw.update_history('{:s} {:s}'.format(sys.argv[0], dba_file))
            except (OSError, IOError) as e:
                logging.error('Error opening {:s}: {}'.format(tmp_nc, e))
                os.unlink(tmp_nc)
                continue

            # Create and set the trajectory
            trajectory_string = '{:s}'.format(ncw.trajectory)
            ncw.set_trajectory_id()
            # Update the global title attribute with the name of the source dba file
            ncw.set_title('{:s}-{:s} Vertical Profile'.format(
                ncw.deployment_configs['glider'],
                pro_mean_dt.strftime('%Y%m%d%H%M%SZ')))

            # Create the source file scalar variable
            ncw.set_source_file_var(dba['file_metadata']['filename_label'],
                                    dba['file_metadata'])

            # Update the self.nc_sensors_defs with the dba sensor definitions
            ncw.update_data_file_sensor_defs(dba['sensors'])

            # Find and set container variables
            ncw.set_container_variables()

            # Create variables and add data
            for v in list(range(len(dba['sensors']))):
                var_name = dba['sensors'][v]['sensor_name']
                var_data = dba['data'][p_inds, v]
                logging.debug('Inserting {:s} data array'.format(var_name))

                ncw.insert_var_data(var_name, var_data)

            # Write scalar profile variable and permanently close the NetCDF file
            nc_file = ncw.finish_nc()

            if nc_file:
                try:
                    shutil.move(tmp_nc, out_nc_file)
                    os.chmod(out_nc_file, 0o755)
                except IOError as e:
                    logging.error(
                        'Error moving temp NetCDF file {:s}: {:}'.format(
                            tmp_nc, e))
                    continue

            output_nc_files.append(out_nc_file)

        processed_dbas.append(dba_file)

    # Delete the temporary directory once files have been moved
    try:
        logging.debug('Removing temporary directory: {:s}'.format(tmp_dir))
        shutil.rmtree(tmp_dir)
    except OSError as e:
        logging.error(e)
        return 1

    # Print the list of files created
    for output_nc_file in output_nc_files:
        os.chmod(output_nc_file, 0o664)
        sys.stdout.write('{:s}\n'.format(output_nc_file))

    return 0
    def navonc_to_ngdacnc(self, navo_nc_files, output_path):

        # Create a temporary directory for creating/writing NetCDF prior to moving them to output_path
        tmp_dir = tempfile.mkdtemp()
        self._logger.debug('Temporary NetCDF directory: {:s}'.format(tmp_dir))

        # Write one NetCDF file for each input file
        output_nc_files = []
        processed_ncs = []
        for nc_file in navo_nc_files:

            if not os.path.isfile(nc_file):
                self._logger.error(
                    'Invalid NAVO NetCDF file specified: {:s}'.format(nc_file))
                continue

            self._logger.info(
                'Processing NAVO NetCDF file: {:s}'.format(nc_file))

            # Parse the dba file
            dba = create_llat_nc_reader(nc_file)
            if dba is None or len(dba['data']) == 0:
                self._logger.warning(
                    'Skipping empty NAVO NetCDF file: {:s}'.format(nc_file))
                continue

            # NAVOCEANO NetCDF files contain missing_values in the time coordinate variable (dimension). We need to remove
            # them
            ti = [s['sensor_name'] for s in dba['sensors']].index('llat_time')
            missing_value = dba['sensors'][ti]['attrs']['missing_value']
            dba['data'] = dba['data'][dba['data'][:, ti] != missing_value, :]

            # Create the yo for profile indexing find the profile minima/maxima
            yo = slice_sensor_data(dba)
            if yo is None:
                continue
            try:
                profile_times = find_profiles(yo)
            except ValueError as e:
                self._logger.error('{:s}: {:s}'.format(nc_file, e))
                continue

            if len(profile_times) == 0:
                self._logger.info('No profiles indexed: {:s}'.format(nc_file))
                continue

            # All timestamps from stream
            ts = yo[:, 0]

            # Open up the source NetCDF file
            nci = Dataset(nc_file, 'r')
            # Add the source NetCDF file's global attributes to the
            source_global_atts = nci.ncattrs()
            for att in source_global_atts:
                self.add_global_attribute(att,
                                          nci.getncattr(att),
                                          override=False)

            # Update the self.nc_sensors_defs with the dba sensor definitions
            self.update_data_file_sensor_defs(dba['sensors'])

            # The NAVOCEANO NetCDF files have a global attribute specifying the dive number.  Use this number as the
            # first profile id and then increment it for successive dives
            # start_profile_id = dba['file_metadata']['dive_number']
            profile_count = 0
            for profile_interval in profile_times:

                # Profile start time
                p0 = profile_interval[0]
                # Profile end time
                p1 = profile_interval[-1]
                # Find all rows in ts that are between p0 & p1
                p_inds = np.flatnonzero(np.logical_and(ts >= p0, ts <= p1))
                # profile_stream = dba['data'][p_inds[0]:p_inds[-1]]

                # Calculate and convert profile mean time to a datetime
                mean_profile_epoch = np.nanmean(profile_interval)
                if np.isnan(mean_profile_epoch):
                    self._logger.warning('Profile mean timestamp is Nan')
                    continue
                # Set the profile id
                # self.profile_id = start_profile_id
                # Increment the profile counter
                # start_profile_id += 1
                # Use the mean_profile_epoch as the profile_id
                self.profile_id = mean_profile_epoch
                pro_mean_dt = datetime.datetime.utcfromtimestamp(
                    mean_profile_epoch)

                # Create the output NetCDF path
                pro_mean_ts = pro_mean_dt.strftime('%Y%m%dT%H%M%S')
                profile_filename = '{:s}_{:s}_rt'.format(
                    self.attributes['deployment']['glider'], pro_mean_ts)
                # Path to temporarily hold file while we create it
                tmp_fid, tmp_nc = tempfile.mkstemp(
                    dir=tmp_dir,
                    suffix='.nc',
                    prefix=os.path.basename(profile_filename))
                os.close(tmp_fid)

                out_nc_file = os.path.join(output_path,
                                           '{:s}.nc'.format(profile_filename))
                if os.path.isfile(out_nc_file):
                    if self.clobber:
                        self._logger.info(
                            'Clobbering existing NetCDF: {:s}'.format(
                                out_nc_file))
                    else:
                        self._logger.warning(
                            'Skipping existing NetCDF: {:s}'.format(
                                out_nc_file))
                        continue

                # Initialize the temporary NetCDF file
                try:
                    self.init_nc(tmp_nc)
                except (OSError, IOError) as e:
                    self._logger.error('Error initializing {:s}: {}'.format(
                        tmp_nc, e))
                    continue

                try:
                    self.open_nc()
                    # Add command line call used to create the file
                    self.update_history('{:s} {:s}'.format(
                        sys.argv[0], nc_file))
                except (OSError, IOError) as e:
                    self._logger.error('Error opening {:s}: {}'.format(
                        tmp_nc, e))
                    os.unlink(tmp_nc)
                    continue

                # Create and set the trajectory
                # trajectory_string = '{:s}'.format(self.trajectory)
                self.set_trajectory_id()
                # Update the global title attribute with the name of the source dba file
                self.set_title('{:s}-{:s} Vertical Profile'.format(
                    self.deployment_configs['glider'],
                    pro_mean_dt.strftime('%Y%m%d%H%M%SZ')))

                # Create the source file scalar variable
                self.set_source_file_var(dba['file_metadata']['source_file'],
                                         dba['file_metadata'])

                # Update the self.nc_sensors_defs with the dba sensor definitions
                self.update_data_file_sensor_defs(dba['sensors'])

                # Find and set container variables
                self.set_container_variables()

                # Create variables and add data
                for v in list(range(len(dba['sensors']))):
                    var_name = dba['sensors'][v]['sensor_name']
                    # Make sure there is a sensor definition before attempting to add the data array
                    if var_name not in self.nc_sensor_defs:
                        continue
                    var_data = dba['data'][p_inds, v]
                    self._logger.debug(
                        'Inserting {:s} data array'.format(var_name))

                    self.insert_var_data(var_name, var_data)

                # Check for u_da and v_da sensors (u/v currents).  If they exist, add them as variables
                u_sensor_def = self.sensor_def_exists('u_da')
                v_sensor_def = self.sensor_def_exists('v_da')
                uv_time_sensor_def = self.sensor_def_exists('time_uv')
                if u_sensor_def and v_sensor_def and uv_time_sensor_def:
                    # u_var_name = u_sensor_def['nc_var_name']
                    # v_var_name = v_sensor_def['nc_var_name']
                    try:
                        u = self.set_scalar('u_da', nci.variables['u_da'][0])
                    except KeyError as e:
                        self._logger.error(
                            'Failed to create u current variable: {:}'.format(
                                e))
                    try:
                        v = self.set_scalar('v_da', nci.variables['v_da'][0])
                    except KeyError as e:
                        self._logger.error(
                            'Failed to create v current variable: {:}'.format(
                                e))
                    try:
                        t = self.set_scalar('time_uv', mean_profile_epoch)
                    except KeyError as e:
                        self._logger.error(
                            'Failed to create time_uv variable: {:}'.format(e))

                # Write scalar profile variable and permanently close the NetCDF file
                nc_written = self.finish_nc()

                if nc_written:
                    try:
                        shutil.move(tmp_nc, out_nc_file)
                        os.chmod(out_nc_file, 0o775)
                    except IOError as e:
                        self._logger.error(
                            'Error moving temp NetCDF file {:s}: {:}'.format(
                                tmp_nc, e))
                        continue

                output_nc_files.append(out_nc_file)

                profile_count += 1

            nci.close()
            processed_ncs.append(nc_file)
            self._logger.info('{:0.0f} profiles written'.format(profile_count))

        # Delete the temporary directory once files have been moved
        try:
            self._logger.debug(
                'Removing temporary directory: {:s}'.format(tmp_dir))
            shutil.rmtree(tmp_dir)
        except OSError as e:
            self._logger.error(e)
            return 1

        return output_nc_files
Beispiel #3
0
def main(args):
    """Parse one or more Slocum glider ascii dba files and write CF-compliant Profile NetCDF files
    """

    # Set up logger
    log_level = getattr(logging, args.loglevel.upper())
    log_format = '%(asctime)s:%(module)s:%(levelname)s:%(message)s [line %(lineno)d]'
    logging.basicConfig(format=log_format, level=log_level)

    config_path = args.config_path
    output_path = args.output_path or os.path.realpath(os.curdir)
    dba_files = args.dba_files
    start_profile_id = args.start_profile_id
    clobber = args.clobber
    comp_level = args.compression
    nc_format = args.nc_format
    ngdac_extensions = args.ngdac

    if not os.path.isdir(config_path):
        logging.error(
            'Invalid configuration directory: {:s}'.format(config_path))
        return 1

    if not output_path:
        args.output_path = os.path.realpath(os.curdir)
        logging.info('No NetCDF output_path specified. Using cwd: {:s}'.format(
            output_path))

    if not os.path.isdir(output_path):
        logging.error('Invalid output_path: {:s}'.format(output_path))
        return 1

    if not dba_files:
        logging.error('No Slocum dba files specified')
        return 1

    # Create the Trajectory NetCDF writer
    ncw = SlocumProfileNetCDFWriter(config_path,
                                    comp_level=comp_level,
                                    nc_format=nc_format,
                                    profile_id=start_profile_id,
                                    clobber=clobber)

    if not args.clobber:
        logging.info('Keeping existing NetCDF files')

    output_nc_files = []
    if not args.science:
        logging.info('Writing unprocessed NetCDF files')
        if args.debug:
            sys.stdout.write('{}\n'.format(ncw))
            return 0
        output_nc_files = ncw.dbas_to_profile_nc(
            dba_files, output_path, ngdac_extensions=ngdac_extensions)
    else:

        logging.info('Writing science NetCDF files')
        if args.debug:
            sys.stdout.write('{}\n'.format(ncw))
            return 0
        # Create a temporary directory for creating/writing NetCDF prior to moving them to output_path
        tmp_dir = tempfile.mkdtemp()
        logging.debug('Temporary NetCDF directory: {:s}'.format(tmp_dir))

        for dba_file in dba_files:
            dba = create_llat_dba_reader(dba_file)
            if dba is None:
                continue

            # Create the yo for profile indexing find the profile minima/maxima
            yo = build_yo(dba)
            if yo is None:
                continue
            try:
                profile_times = find_profiles(yo)
            except ValueError as e:
                logging.error('{:s}: {:s}'.format(dba, e))
                continue

            # logging.info('{:0.0f} profiles indexed'.format(len(profile_times)))
            if len(profile_times) == 0:
                continue

            # Derive and add CTD parameters to the dba object
            dba = derive_ctd_parameters(dba)

            # Update the sensor definitions with the calculated parameters
            ncw.update_data_file_sensor_defs(dba['sensors'], override=False)

            nc_files = ncw.dba_obj_to_profile_nc(
                dba,
                output_path,
                tmp_dir=tmp_dir,
                ngdac_extensions=ngdac_extensions)
            if nc_files:
                output_nc_files = output_nc_files + nc_files

        logging.info('{:0.0f} NetCDF files written: {:s}'.format(
            len(output_nc_files), output_path))
        # Remove tmp_dir
        try:
            logging.debug('Removing temporary directory: {:s}'.format(tmp_dir))
            shutil.rmtree(tmp_dir)
        except OSError as e:
            logging.error(e)

    # Print the list of files created
    for output_nc_file in output_nc_files:
        os.chmod(output_nc_file, 0o664)
        sys.stdout.write('{:s}\n'.format(output_nc_file))

    return 0
    def dbas_to_profile_nc(self,
                           dba_files,
                           output_path,
                           z_from_p=True,
                           ngdac_extensions=False):

        # Create a temporary directory for creating/writing NetCDF prior to moving them to output_path
        tmp_dir = tempfile.mkdtemp()
        self._logger.debug('Temporary NetCDF directory: {:s}'.format(tmp_dir))

        # Write one NetCDF file for each input file
        output_nc_files = []
        processed_dbas = []
        non_clobbered_nc_files_count = 0
        for dba_file in dba_files:

            if not os.path.isfile(dba_file):
                self._logger.error(
                    'Invalid dba file specified: {:s}'.format(dba_file))
                continue

            self._logger.info('Processing dba file: {:s}'.format(dba_file))

            # Parse the dba file
            dba = create_llat_dba_reader(dba_file, z_from_p=z_from_p)
            if dba is None or len(dba['data']) == 0:
                self._logger.warning(
                    'Skipping empty dba file: {:s}'.format(dba_file))
                continue

            # Create the yo for profile indexing find the profile minima/maxima
            yo = build_yo(dba)
            if yo is None:
                continue
            try:
                profile_times = find_profiles(yo)
            except ValueError as e:
                self._logger.error('{:s}: {:s}'.format(dba_file, e))
                continue

            self._logger.info('{:s} {:0.0f} profiles indexed'.format(
                os.path.basename(dba['file_metadata']['source_file']),
                len(profile_times)))
            if len(profile_times) == 0:
                continue

            # Clean up the dba:
            # 1. Replace NaNs with fill values
            # 2. Set llat_time 0 values to fill values
            dba = self.clean_dba(dba)

            # All timestamps from stream
            ts = yo[:, 0]

            for profile_interval in profile_times:

                # Profile start time
                p0 = profile_interval[0]
                # Profile end time
                p1 = profile_interval[-1]
                # Find all rows in ts that are between p0 & p1
                p_inds = np.flatnonzero(np.logical_and(ts >= p0, ts <= p1))
                # profile_stream = dba['data'][p_inds[0]:p_inds[-1]]

                # Calculate and convert profile mean time to a datetime
                mean_profile_epoch = np.nanmean(profile_interval)
                if np.isnan(mean_profile_epoch):
                    self._logger.warning('Profile mean timestamp is Nan')
                    continue
                # If no start profile id was specified on the command line, use the mean_profile_epoch as the profile_id
                # since it will be unique to this profile and deployment
                if self.profile_id < 1:
                    self.profile_id = int(mean_profile_epoch)
                pro_mean_dt = datetime.datetime.utcfromtimestamp(
                    mean_profile_epoch)

                # Create the output NetCDF path
                pro_mean_ts = pro_mean_dt.strftime('%Y%m%dT%H%M%SZ')
                if ngdac_extensions:
                    telemetry = 'rt'
                    if dba['file_metadata'][
                            'filename_extension'] != 'sbd' and dba[
                                'file_metadata']['filename_extension']:
                        telemetry = 'delayed'

                    profile_nc_file = '{:s}_{:s}_{:s}'.format(
                        self.attributes['deployment']['glider'], pro_mean_ts,
                        telemetry)
                else:
                    profile_nc_file = '{:s}_{:s}_{:s}'.format(
                        self.attributes['deployment']['glider'], pro_mean_ts,
                        dba['file_metadata']['filename_extension'])
                # Path to temporarily hold file while we create it
                tmp_fid, tmp_nc = tempfile.mkstemp(
                    dir=tmp_dir,
                    suffix='.nc',
                    prefix=os.path.basename(profile_nc_file))
                os.close(tmp_fid)

                out_nc_file = os.path.join(output_path,
                                           '{:s}.nc'.format(profile_nc_file))
                if os.path.isfile(out_nc_file):
                    if self.clobber:
                        self._logger.info(
                            'Clobbering existing NetCDF: {:s}'.format(
                                out_nc_file))
                    else:
                        self._logger.debug(
                            'Skipping existing NetCDF: {:s}'.format(
                                out_nc_file))
                        non_clobbered_nc_files_count += 1
                        continue

                # Initialize the temporary NetCDF file
                try:
                    self.init_nc(tmp_nc)
                except (OSError, IOError) as e:
                    self._logger.error('Error initializing {:s}: {}'.format(
                        tmp_nc, e))
                    continue

                try:
                    self.open_nc()
                    # Add command line call used to create the file
                    self.update_history('{:s} {:s}'.format(
                        sys.argv[0], dba_file))
                except (OSError, IOError) as e:
                    self._logger.error('Error opening {:s}: {}'.format(
                        tmp_nc, e))
                    os.unlink(tmp_nc)
                    continue

                # Create and set the trajectory
                trajectory_string = '{:s}'.format(self.trajectory)
                self.set_trajectory_id()
                # Update the global title attribute with the name of the source dba file
                self.set_title('{:s}-{:s} Vertical Profile'.format(
                    self.deployment_configs['glider'],
                    pro_mean_dt.strftime('%Y%m%d%H%M%SZ')))

                # Create the source file scalar variable
                self.set_source_file_var(
                    dba['file_metadata']['filename_label'],
                    dba['file_metadata'])

                # Update the self.nc_sensors_defs with the dba sensor definitions
                self.update_data_file_sensor_defs(dba['sensors'])

                # Find and set container variables
                self.set_container_variables()

                # Create variables and add data
                for v in list(range(len(dba['sensors']))):
                    var_name = dba['sensors'][v]['sensor_name']
                    var_data = dba['data'][p_inds, v]
                    self._logger.debug(
                        'Inserting {:s} data array'.format(var_name))

                    self.insert_var_data(var_name, var_data)

                # Write scalar profile variable and permanently close the NetCDF file
                nc_file = self.finish_nc()

                if nc_file:
                    try:
                        shutil.move(tmp_nc, out_nc_file)
                        os.chmod(out_nc_file, 0o755)
                    except IOError as e:
                        self._logger.error(
                            'Error moving temp NetCDF file {:s}: {:}'.format(
                                tmp_nc, e))
                        continue

                output_nc_files.append(out_nc_file)

            processed_dbas.append(dba_file)

        if not self.clobber:
            self._logger.info('{:0.0f} NetCDFs not clobbered'.format(
                non_clobbered_nc_files_count))

        # Delete the temporary directory once files have been moved
        try:
            self._logger.debug(
                'Removing temporary directory: {:s}'.format(tmp_dir))
            shutil.rmtree(tmp_dir)
        except OSError as e:
            self._logger.error(e)
            return 1

        return output_nc_files