Example #1
0
    def __init__(self,
                 specifier,
                 serial=False,
                 verbosity=1,
                 skip_existing=False,
                 overwrite=False,
                 once=False,
                 simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class, 
                defining the input specification for this reshaper operation.

        Keyword Arguments:
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0 
                means no output, and a higher value means more output.  The
                default value is 1.
            skip_existing (bool): Flag specifying whether to skip the generation
                of time-series for variables with time-series files that already
                exist.  Default is False.
            overwrite (bool): Flag specifying whether to forcefully overwrite
                output files if they already exist.  Default is False.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel 
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(skip_existing) is not bool:
            err_msg = "Skip_existing flag must be True or False."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not (isinstance(simplecomm, SimpleComm) or \
                    isinstance(simplecomm, SimpleCommMPI)):
                err_msg = (
                    "Simple communicator object is not a SimpleComm or ",
                    "SimpleCommMPI")
                raise TypeError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join([
            '[',
            str(self._simplecomm.get_rank()), '/',
            str(self._simplecomm.get_size()), '] '
        ])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper', verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('Specifier validated', verbosity=1)

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = Nio.options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format == 'netcdf4':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 0
        elif specifier.netcdf_format == 'netcdf4c':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.netcdf_deflate
            if self._simplecomm.is_manager():
                self._vprint('PyNIO compression level: {0}'.format(\
                    specifier.netcdf_deflate), verbosity=2)

        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('PyNIO options set', verbosity=2)

        # Open all of the input files
        self._timer.start('Open Input Files')
        self._input_files = []
        for filename in specifier.input_file_list:
            self._input_files.append(Nio.open_file(filename, "r"))
        self._timer.stop('Open Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files opened', verbosity=2)

        # Validate the input files themselves
        self._timer.start('Input File Validation')
        self._validate_input_files(specifier)
        self._timer.stop('Input File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Input files validated', verbosity=2)

        # Sort the input files by time
        self._timer.start('Sort Input Files')
        self._sort_input_files_by_time(specifier)
        self._timer.stop('Sort Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files sorted', verbosity=2)

        # Retrieve and sort the variables in each time-slice file
        # (To determine if it is time-invariant metadata, time-variant
        # metadata, or if it is a time-series variable)
        self._timer.start('Sort Variables')
        self._sort_variables(specifier)
        self._timer.stop('Sort Variables')
        if self._simplecomm.is_manager():
            self._vprint('Variables sorted', verbosity=2)

        # Validate the output files
        self._timer.start('Output File Validation')
        self._validate_output_files(specifier, skip_existing, overwrite)
        self._timer.stop('Output File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Output files validated', verbosity=2)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=1)

        # Sync before continuing..
        self._simplecomm.sync()
Example #2
0
    def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class,
                defining the input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{0}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # The output write mode to use
        self._write_mode = wmode

        # Internal timer data
        self._timer = TimeKeeper()

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)

        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        # Contruct the print header
        header = ''.join(['[', str(self._simplecomm.get_rank()),
                          '/', str(self._simplecomm.get_size()), '] '])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper...', verbosity=0)
            self._vprint('  MPI Communicator Size: {}'.format(
                self._simplecomm.get_size()), verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('  Specifier validated', verbosity=1)

        # The I/O backend to use
        if iobackend.is_available(specifier.io_backend):
            self._backend = specifier.io_backend
        else:
            self._backend = iobackend.get_backend()
            self._vprint(('  I/O Backend {0} not available.  Using {1} '
                          'instead').format(specifier.io_backend, self._backend), verbosity=1)

        # Store the input file names
        self._input_filenames = specifier.input_file_list

        # Store the time-series variable names
        self._time_series_names = specifier.time_series
        if self._time_series_names is not None:
            vnames = ', '.join(self._time_series_names)
            if self._simplecomm.is_manager():
                self._vprint('WARNING: Extracting only variables: {0}'.format(
                    vnames), verbosity=-1)

        # Store the list of metadata names
        self._metadata_names = specifier.time_variant_metadata

        # Store whether to treat 1D time-variant variables as metadata
        self._1d_metadata = specifier.assume_1d_time_variant_metadata

        # Store the metadata filename
        self._metadata_filename = specifier.metadata_filename

        # Store time invariant variables that should be excluded from the timeseries files
        self._exclude_list = specifier.exclude_list

        # Store the output file prefix and suffix
        self._output_prefix = specifier.output_file_prefix
        self._output_suffix = specifier.output_file_suffix

        # Setup NetCDF file options
        self._netcdf_format = specifier.netcdf_format
        self._netcdf_compression = specifier.compression_level
        self._netcdf_least_significant_digit = specifier.least_significant_digit
        if self._simplecomm.is_manager():
            self._vprint(
                '  NetCDF I/O Backend: {0}'.format(self._backend), verbosity=1)
            self._vprint('  NetCDF Output Format: {0}'.format(
                self._netcdf_format), verbosity=1)
            self._vprint('  NetCDF Compression: {0}'.format(
                self._netcdf_compression), verbosity=1)
            trunc_str = ('{} decimal places'.format(self._netcdf_least_significant_digit)
                         if self._netcdf_least_significant_digit else 'Disabled')
            self._vprint('  NetCDF Truncation: {0}'.format(
                trunc_str), verbosity=1)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('...Reshaper initialized.', verbosity=0)

        # Sync before continuing..
        self._simplecomm.sync()
Example #3
0
    def __init__(self,
                 specifier,
                 serial=False,
                 verbosity=1,
                 wmode='w',
                 once=False,
                 simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class,
                defining the input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # The output write mode to use
        self._write_mode = wmode

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join([
            '[',
            str(self._simplecomm.get_rank()), '/',
            str(self._simplecomm.get_size()), '] '
        ])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper...', verbosity=0)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('  Specifier validated', verbosity=1)

        # Store the input file names
        self._input_filenames = specifier.input_file_list

        # Store the list of metadata names
        self._metadata_names = specifier.time_variant_metadata

        # Store the output file prefix and suffix
        self._output_prefix = specifier.output_file_prefix
        self._output_suffix = specifier.output_file_suffix

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = nio_options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format in ['netcdf4', 'netcdf4c']:
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.compression_level
        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('  PyNIO options set', verbosity=1)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=0)

        # Sync before continuing..
        self._simplecomm.sync()