def setUp(self):
        self.gcomm = simplecomm.create_comm()
        self.gsize = MPI_COMM_WORLD.Get_size()
        self.grank = MPI_COMM_WORLD.Get_rank()

        self.groups = ['a', 'b', 'c']

        self.rank = int(self.grank // len(self.groups))
        self.color = int(self.grank % len(self.groups))
        self.group = self.groups[self.color]

        self.monocomm, self.multicomm = self.gcomm.divide(self.group)

        self.all_colors = [i % len(self.groups) for i in range(self.gsize)]
        self.all_groups = [self.groups[i] for i in self.all_colors]
        self.all_ranks = [int(i // len(self.groups)) for i in range(self.gsize)]
Beispiel #2
0
    def setUp(self):
        self.gcomm = simplecomm.create_comm()
        self.gsize = MPI_COMM_WORLD.Get_size()
        self.grank = MPI_COMM_WORLD.Get_rank()

        self.groups = ['a', 'b', 'c']

        self.rank = int(self.grank // len(self.groups))
        self.color = int(self.grank % len(self.groups))
        self.group = self.groups[self.color]

        self.monocomm, self.multicomm = self.gcomm.divide(self.group)

        self.all_colors = [i % len(self.groups) for i in range(self.gsize)]
        self.all_groups = [self.groups[i] for i in self.all_colors]
        self.all_ranks = [
            int(i // len(self.groups)) for i in range(self.gsize)
        ]
    def setUp(self):
        # COMM_WORLD Communicator and its size and
        # this MPI process's world rank
        self.gcomm = simplecomm.create_comm()
        self.gsize = MPI_COMM_WORLD.Get_size()
        self.grank = MPI_COMM_WORLD.Get_rank()

        # The group names to assume when dividing COMM_WORLD
        self.groups = ['a', 'b', 'c']

        # This MPI process's rank, color, and group after division
        self.rank = self.grank / len(self.groups)
        self.color = self.grank % len(self.groups)
        self.group = self.groups[self.color]

        # The divided communicators (monocolor and multicolor)
        self.monocomm, self.multicomm = self.gcomm.divide(self.group)

        # Every MPI process's color, group, and grank after division
        self.all_colors = [i % len(self.groups) for i in xrange(self.gsize)]
        self.all_groups = [self.groups[i] for i in self.all_colors]
        self.all_ranks = [i / len(self.groups) for i in xrange(self.gsize)]
    def setUp(self):
        # COMM_WORLD Communicator and its size and
        # this MPI process's world rank
        self.gcomm = simplecomm.create_comm()
        self.gsize = MPI_COMM_WORLD.Get_size()
        self.grank = MPI_COMM_WORLD.Get_rank()

        # The group names to assume when dividing COMM_WORLD
        self.groups = ['a', 'b', 'c']

        # This MPI process's rank, color, and group after division
        self.rank = self.grank / len(self.groups)
        self.color = self.grank % len(self.groups)
        self.group = self.groups[self.color]

        # The divided communicators (monocolor and multicolor)
        self.monocomm, self.multicomm = self.gcomm.divide(self.group)

        # Every MPI process's color, group, and grank after division
        self.all_colors = [i % len(self.groups) for i in xrange(self.gsize)]
        self.all_groups = [self.groups[i] for i in self.all_colors]
        self.all_ranks = [i / len(self.groups) for i in xrange(self.gsize)]
Beispiel #5
0
    def __init__(self,
                 specifier,
                 serial=False,
                 verbosity=1,
                 wmode='w',
                 once=False,
                 simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class,
                defining the input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # The output write mode to use
        self._write_mode = wmode

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join([
            '[',
            str(self._simplecomm.get_rank()), '/',
            str(self._simplecomm.get_size()), '] '
        ])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper...', verbosity=0)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('  Specifier validated', verbosity=1)

        # Store the input file names
        self._input_filenames = specifier.input_file_list

        # Store the list of metadata names
        self._metadata_names = specifier.time_variant_metadata

        # Store the output file prefix and suffix
        self._output_prefix = specifier.output_file_prefix
        self._output_suffix = specifier.output_file_suffix

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = nio_options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format in ['netcdf4', 'netcdf4c']:
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.compression_level
        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('  PyNIO options set', verbosity=1)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=0)

        # Sync before continuing..
        self._simplecomm.sync()
Beispiel #6
0
    def __init__(self, specifier, serial=False, verbosity=1, wmode='w',
                 once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class,
                defining the input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # The output write mode to use
        self._write_mode = wmode

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join(['[', str(self._simplecomm.get_rank()),
                          '/', str(self._simplecomm.get_size()), '] '])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper...', verbosity=0)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('  Specifier validated', verbosity=1)

        # Store the input file names
        self._input_filenames = specifier.input_file_list

        # Store the list of metadata names
        self._metadata_names = specifier.time_variant_metadata

        # Store the output file prefix and suffix
        self._output_prefix = specifier.output_file_prefix
        self._output_suffix = specifier.output_file_suffix

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = nio_options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format in ['netcdf4', 'netcdf4c']:
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.compression_level
        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('  PyNIO options set', verbosity=1)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=0)

        # Sync before continuing..
        self._simplecomm.sync()
Beispiel #7
0
def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/'

    # This creates the dictionary of input arguments
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
        print opts_dict

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Open the files in the input directory
    o_files = []
    for onefile in in_file_list:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
              or ( nlon != int(input_dims["nlon"]))):
            print "Dimension mismatch between ", in_file_list[
                0], 'and', in_file_list[count], '!!!'
            sys.exit()

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'

        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if (verbose == True):
            print "Setting dimensions ....."
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('time', None)
        nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
        nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
        nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.create_dimension('nvars3d', len(Var3d))
        nc_sumfile.create_dimension('nvars2d', len(Var2d))
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        v_time = nc_sumfile.create_variable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.create_variable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))
        if not opts_dict['zscoreonly']:
            v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                              ('time', 'nvars', 'ens_size'))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print "Assigning time invariant metadata ....."
        vars_dict = o_files[0].variables
        lev_data = vars_dict["z_t"]
        v_lev = lev_data

    # Time-varient metadata
    if verbose:
        print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    # Calculate global mean, average, standard deviation
    if verbose:
        print "Calculating global means ....."
    is_SE = False
    tslice = 0
    if not opts_dict['zscoreonly']:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, Var3d, Var2d, is_SE, False, opts_dict)
    if verbose:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (verbose == True):
        print "Calculating RMSZ scores ....."
    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
        o_files, Var3d, Var2d, is_SE, opts_dict)

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor
        # Gather global means 3d results
        if not opts_dict['zscoreonly']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            #print "before gather, gmall.shape=",gmall.shape
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(Var3d) + len(Var2d), len(o_files)))
        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(o_files), nbin))
        #print 'zmall=',zmall

        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to file:
    if me.get_rank() == 0:
        #Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]
        if not opts_dict['zscoreonly']:
            v_gm[:, :, :] = gmall[:, :, :]
        print "All done"
    if rank == 0:
        # Update system log with the dates that were just converted
        debugMsg('before chunking.write_log', header=True, verbosity=1)
        chunking.write_log('{0}/logs/ts_status.log'.format(caseroot), log)
        debugMsg('after chunking.write_log', header=True, verbosity=1)

    scomm.sync()

    return 0


#===================================

if __name__ == "__main__":
    # initialize simplecomm object
    scomm = simplecomm.create_comm(serial=False)

    # setup an overall timer
    timer = timekeeper.TimeKeeper()
    timer.start("Total Time")

    # get commandline options
    options = commandline_options()
    debug = options.debug[0]

    # initialize global vprinter object for printing debug messages
    debugMsg = vprinter.VPrinter(header='', verbosity=0)
    if options.debug:
        header = 'cesm_tseries_generator: DEBUG... '
        debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0])
Beispiel #9
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVars popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= printStdMean fIndex= lev= eet= saveResults json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVars'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['printStdMean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False
    opts_dict['saveResults'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']

    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print(' ')
        print('--------pyCECT--------')
        print(' ')
        print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
        print(' ')
        if not opts_dict['web_enabled']:
            print('Ensemble summary file = ' + opts_dict['sumfile'])
        print(' ')
        print('Testcase file directory = ' + opts_dict['indir'])
        print(' ')
        print(' ')

    #make sure these are valid
    if opts_dict['web_enabled'] == False and os.path.isfile(
            opts_dict['sumfile']) == False:
        print("ERROR: Summary file name is not valid.")
        sys.exit()
    if os.path.exists(opts_dict['indir']) == False:
        print("ERROR: --indir path is not valid.")
        sys.exit()

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print("ERROR: " + opts_dict['json_case'] + " does not exist.")
            sys.exit()
        print("in_files=", in_files)
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print("ERROR: no matching files for wildcard=" + wildname +
                      " found in specified --indir")
                sys.exit()
            else:
                print("Found " + str(num_file) +
                      " matching files in specified --indir")
            if opts_dict['numRunFile'] > num_file:
                print("ERROR: more files needed (" +
                      str(opts_dict['numRunFile']) +
                      ") than available in the indir (" + str(num_file) + ").")
                sys.exit()

    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick cam files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(frun_temp)
        else:
            print("ERROR: COULD NOT LOCATE FILE " + frun_temp)
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        #need to open ifiles

        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print(' ')
            print('Validation file    : machineid = ' + machineid +
                  ', compiler = ' + compiler)
            print('Found summary file : ' + opts_dict['sumfile'])
            print(' ')
        else:
            print('Warning: machine and compiler are unknown')

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print(
                "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            )
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print(' ')
        print('Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold']))
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)

        np.set_printoptions(threshold=sys.maxsize)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm, std_gm_array, str_size = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        #Only doing gm

        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}

        for k, v in ens_gm.items():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print(
                'Warning: please note the ensemble summary file is different from the testing files: they use different grids'
            )

        # Compare the new run and the ensemble summary file
        results = {}
        countgm = np.zeros(len(ifiles), dtype=np.int32)

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        #        if opts_dict['printStdMean'] and decision == 'FAILED':
        if opts_dict['printStdMean']:

            import seaborn as sns
            import matplotlib
            matplotlib.use('Agg')  #don't display figures
            import matplotlib.pyplot as plt

            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Test run variable standardized means (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            print(" ")

            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]

                if key == "all_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 3 test run global means outside of the 99th percentile."
                    )
                elif key == "two_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 2 test run global means outside of the 99th percentile."
                    )
                elif key == "one_outside99":
                    print(
                        "*** ", len(value),
                        " variables have 1 test run global mean outside of the 99th percentile."
                    )
                elif key == "all_oneside_outside1QR":
                    print(
                        "*** ", len(value),
                        " variables have all test run global means outside of the first quartile (but not outside the 99th percentile)."
                    )

                if len(value) > 0:
                    print(" => generating plot ...")
                    if len(value) > 20:
                        print(
                            "    NOTE: truncating to only plot the first 20 variables."
                        )
                        value = value[0:20]

                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    name = each_var[0]
                    if isinstance(name, str) == False:
                        name = name.decode("utf-8")

                    list_var.append(name)

                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    plt.xticks(list(range(len(list_array))),
                               list_var,
                               fontsize=8,
                               rotation=-45)

                    if decision == 'FAILED':
                        plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        plt.savefig(part_name + "_" + key + "_pass.png")
                    plt.close()


##
# Print file with info about new test runs....to a netcdf file
##
        if opts_dict['saveResults']:

            num_vars = comp_std_gm.shape[0]
            tsize = comp_std_gm.shape[1]
            esize = std_gm_array.shape[1]
            this_savefile = 'savefile.nc'
            if (verbose == True):
                print("VERBOSE: Creating ", this_savefile, "  ...")

            if os.path.exists(this_savefile):
                os.unlink(this_savefile)
            nc_savefile = nc.Dataset(this_savefile,
                                     "w",
                                     format="NETCDF4_CLASSIC")
            nc_savefile.createDimension('ens_size', esize)
            nc_savefile.createDimension('test_size', tsize)
            nc_savefile.createDimension('nvars', num_vars)
            nc_savefile.createDimension('str_size', str_size)

            # Set global attributes
            now = time.strftime("%c")
            nc_savefile.creation_date = now
            nc_savefile.title = 'PyCECT compare results file'
            nc_savefile.summaryfile = opts_dict['sumfile']
            #nc_savefile.testfiles = in_files

            #variables
            v_vars = nc_savefile.createVariable("vars", 'S1',
                                                ('nvars', 'str_size'))
            v_std_gm = nc_savefile.createVariable("std_gm", 'f8',
                                                  ('nvars', 'test_size'))
            v_scores = nc_savefile.createVariable("scores", 'f8',
                                                  ('nvars', 'test_size'))
            v_ens_sigma_scores = nc_savefile.createVariable(
                'ens_sigma_scores', 'f8', ('nvars', ))
            v_ens_std_gm = nc_savefile.createVariable("ens_std_gm", 'f8',
                                                      ('nvars', 'ens_size'))

            #hard-coded size
            str_out = nc.stringtochar(np.array(ens_var_name, 'S10'))

            v_vars[:] = str_out
            v_std_gm[:, :] = comp_std_gm[:, :]
            v_scores[:, :] = new_scores[:, :]
            v_ens_sigma_scores[:] = sigma_scores_gm[:]
            v_ens_std_gm[:, :] = std_gm_array[:, :]

            nc_savefile.close()

        # Print variables (optional)
        if opts_dict['printVars']:
            print(" ")
            print(
                '***************************************************************************** '
            )
            print(
                'Variable global mean information (for reference only - not used to determine pass/fail)'
            )
            print(
                '***************************************************************************** '
            )
            for fcount, fid in enumerate(ifiles):
                print(' ')
                print('Run ' + str(fcount + 1) + ":")
                print(' ')
                print(
                    '***' + str(countgm[fcount]),
                    " of " + str(len(ens_var_name)) +
                    ' variables are outside of ensemble global mean distribution***'
                )
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print(' ')
                print(
                    '----------------------------------------------------------------------------'
                )

    if me.get_rank() == 0:
        print(' ')
        print("Testing complete.")
        print(' ')
Beispiel #10
0
            oldfile = os.path.join(olddir, filename)
            if oldfile in oldfiles:
                item_dict['old'] = oldfile
                oldfiles.remove(oldfile)
                items_to_check.append(item_dict)
            else:
                item_dict['old'] = None
                unchecked_new_items.append(item_dict)
        for oldfile in oldfiles:
            item_dict = {'test': test_name}
            item_dict['new'] = None
            item_dict['old'] = oldfile
            unchecked_old_items.append(item_dict)

    # Get a basic MPI comm
    comm = create_comm(serial=(opts.serial or opts.list_tests))

    # Print tests that will be checked
    if comm.is_manager():
        print 'Checking test results.'

        for test_name in tests_to_check:
            print 'Test {0!s}:'.format(test_name)
            num_chk = sum(1 for i in items_to_check if i['test'] == test_name)
            num_new = num_chk + sum(1 for i in unchecked_new_items
                                    if i['test'] == test_name)
            num_old = num_chk + sum(1 for i in unchecked_old_items
                                    if i['test'] == test_name)
            print '   Checking {0!s} of {1!s}'.format(num_chk, num_new),
            print 'new files generated against {0!s}'.format(num_old),
            print 'old files found.'
Beispiel #11
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try: 
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}
    
    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments 
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)


    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print 'Please specify --tag, --compset, --mach and --res options'
       sys.exit()
       
    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]
    inc_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    
    if me.get_rank() == 0:
       print 'Running pyEnsSum!'

    if me.get_rank() ==0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude=False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist=[]
            # Read in the excluded or included var list
            ex_varlist,exclude=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')
            if exclude == False:
               inc_varlist=ex_varlist
               ex_varlist=[]
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude=me.partition(exclude,func=Duplicate(),involved=True)
        if exclude:
           ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
        else:
           inc_varlist=me.partition(inc_varlist,func=Duplicate(),involved=True)
        
    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank()==0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank()==0 and (verbose == True):
               print 'Number of files in input directory (',num_files,\
                ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank()==0 and (verbose == True):
               print 'NOTE: Number of files in ', input_dir, \
                'is greater than specified ensemble size of ', esize ,\
                '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank()==0:
           print 'Input directory: ',input_dir,' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0 and (verbose == True):
           print 'in_files=',in_files

    # Open the files in the input directory
    o_files=[]
    if me.get_rank() == 0 and opts_dict['verbose']:
       print 'Input files are: '
       print "\n".join(in_files)
       #for i in in_files:
       #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            if me.get_rank()==0:
               print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...."
            sys.exit() 

    # Store dimensions of the input fields
    if me.get_rank()==0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key
        
    if (nlev == -1) : 
        if me.get_rank()==0: 
           print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit() 

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank()==0: 
           print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()            

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True 
    else:
        is_SE = False    

    # Make sure all files have the same dimensions
    if me.get_rank()==0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions     
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))): 
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 

    # Get 2d vars, 3d vars and all vars (For now include all variables) 
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict=vars_dict_all
        for i in ex_varlist:
          if i in vars_dict:
            del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict=vars_dict_all.copy()
        for k,v in vars_dict_all.iteritems():
           if (k not in inc_varlist) and (vars_dict_all[k].typecode()=='f'):
            #print vars_dict_all[k].typecode()
            #print k
            del vars_dict[k]
 
    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size 
    for k,v in vars_dict.iteritems():  
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):  
                is_2d = True 
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):  
                is_3d = True 
                num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):  
                is_2d = True 
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and (vs[1] == nlev or vs[1]==nilev ))):  
                is_3d = True 
                num_3d += 1
                    
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):    
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d+num_2d
        print '3D variables: '+str(num_3d)+', 2D variables: '+str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case - 
    # which is what we want)
    d2_var_names.sort()       
    d3_var_names.sort()

    if esize<num_2d+num_3d:
       if me.get_rank()==0:
          print "************************************************************************************************************************************"
          print "  Error: the total number of 3D and 2D variables "+str(num_2d+num_3d)+" is larger than the number of ensemble files "+str(esize)
          print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
          print "  or add more varaibles in your excluded variable list!!!"
          print "************************************************************************************************************************************"
       sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if(me.get_rank() ==0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date',now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"]) 
        setattr(nc_sumfile, 'compset', opts_dict["compset"]) 
        setattr(nc_sumfile, 'resolution', opts_dict["res"]) 
        setattr(nc_sumfile, 'machine', opts_dict["mach"]) 

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
        v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
        v_standardized_gm=nc_sumfile.create_variable("standardized_gm",'f',('nvars','ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names =[]
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ')*(str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d,gm2d,var_list = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)    

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index=get_stride_list(len(d3_var_names),me)
         
            # Gather global means 3d results
            gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
                ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d) 
                ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d) 

            # Gather 2d variable results from all processors to the master processor
            slice_index=get_stride_list(len(d2_var_names),me)

            # Gather global means 2d results
            gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

            var_list=gather_list(var_list,me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
                ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d) 
                ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d) 

        else:
            gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
            gmall=np.concatenate((gm3d,gm2d),axis=0)
            if not opts_dict['gmonly']:
                Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
                v_RMSZ[:,:]=Zscoreall[:,:]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
                    v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
                    v_ens_avg2d[:,:]=ens_avg2d[:,:]
                    v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
                else:
                    v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
                    v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
                    v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
                    v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
        mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall,all_var_names,var_list,me)
        v_gm[:,:]=gmall[:,:]
        v_standardized_gm[:,:]=standardized_global_mean[:,:]
        v_mu_gm[:]=mu_gm[:]
        v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
        v_loadings_gm[:,:]=loadings_gm[:,:]
        v_sigma_scores_gm[:]=scores_gm[:]

        if me.get_rank() == 0:
           print "All Done"
Beispiel #12
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = """verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVarTest popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm pop_tol= web_enabled
         pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['prn_std_mean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False

    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, caller, opts_dict)
    popens = opts_dict['popens']
    #some mods for POP-ECT
    if popens == True:
        opts_dict['tslice'] = 0
        opts_dict['numRunFile'] = 1
        opts_dict['eet'] = 0
        opts_dict['mpi_enable'] = False

        #print opts_dict

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt = datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank() == 0:
        print '--------pyCECT--------'
        print ' '
        print dt.strftime("%A, %d. %B %Y %I:%M%p")
        print ' '
        if not opts_dict['web_enabled']:
            print 'Ensemble summary file = ' + opts_dict['sumfile']
        print ' '
        print 'Testcase file directory = ' + opts_dict['indir']
        print ' '
        print ' '

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

    ifiles = []
    in_files = []
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
        with open(opts_dict['casejson']) as fin:
            result = json.load(fin)
            in_files_first = result['not_pick_files']
            in_files = random.sample(in_files_first, opts_dict['npick'])
            print 'Testcase files:'
            print '\n'.join(in_files)

    elif opts_dict['json_case']:
        json_file = opts_dict['json_case']
        if (os.path.exists(json_file)):
            fd = open(json_file)
            metainfo = json.load(fd)
            if 'CaseName' in metainfo:
                casename = metainfo['CaseName']
                if (os.path.exists(opts_dict['indir'])):
                    for name in casename:
                        wildname = '*.' + name + '.*'
                        full_glob_str = os.path.join(opts_dict['indir'],
                                                     wildname)
                        glob_file = glob.glob(full_glob_str)
                        in_files.extend(glob_file)
        else:
            print "ERROR: " + opts_dict['json_case'] + " does not exist."
            sys.exit()
        print "in_files=", in_files
    else:
        wildname = '*' + str(opts_dict['input_globs']) + '*'
        # Open all input files
        if (os.path.exists(opts_dict['indir'])):
            full_glob_str = os.path.join(opts_dict['indir'], wildname)
            glob_files = glob.glob(full_glob_str)
            in_files.extend(glob_files)
            num_file = len(in_files)
            if num_file == 0:
                print "ERROR: no matching files for wildcard=" + wildname + " found in specified --indir"
                sys.exit()
            else:
                print "Found " + str(
                    num_file) + " matching files in specified --indir"
            if opts_dict['numRunFile'] > num_file:
                print "ERROR: more files needed (" + str(
                    opts_dict['numRunFile']
                ) + ") than available in the indir (" + str(num_file) + ")."
                sys.exit()
            #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()
    #print in_files

    if popens:
        #Partition the input file list
        in_files_list = me.partition(in_files,
                                     func=EqualStride(),
                                     involved=True)

    else:
        # Random pick non pop files
        in_files_list = pyEnsLib.Random_pickup(in_files, opts_dict)
        #in_files_list=in_files

    for frun_file in in_files_list:
        if frun_file.find(opts_dict['indir']) != -1:
            frun_temp = frun_file
        else:
            frun_temp = opts_dict['indir'] + '/' + frun_file
        if (os.path.isfile(frun_temp)):
            ifiles.append(Nio.open_file(frun_temp, "r"))
        else:
            print "ERROR: COULD NOT LOCATE FILE " + frun_temp
            sys.exit()

    if opts_dict['web_enabled']:
        if len(opts_dict['sumfile']) == 0:
            opts_dict[
                'sumfile'] = '/glade/p/cesmdata/cseg/inputdata/validation/'
        opts_dict['sumfile'], machineid, compiler = pyEnsLib.search_sumfile(
            opts_dict, ifiles)
        if len(machineid) != 0 and len(compiler) != 0:
            print ' '
            print 'Validation file    : machineid = ' + machineid + ', compiler = ' + compiler
            print 'Found summary file : ' + opts_dict['sumfile']
            print ' '
        else:
            print 'Warning: machine and compiler are unknown'

    if popens:

        # Read in the included var list
        if not os.path.exists(opts_dict['jsonfile']):
            print "ERROR: POP-ECT requires the specification of a valid json file via --jsonfile."
            sys.exit()
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        print ' '
        print 'Z-score tolerance = ' + '{:3.2f}'.format(opts_dict['pop_tol'])
        print 'ZPR = ' + '{:.2%}'.format(opts_dict['pop_threshold'])
        zmall, n_timeslice = pyEnsLib.pop_compare_raw_score(
            opts_dict, ifiles, me.get_rank(), Var3d, Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(
                zmall, me, (me.get_size(), len(Var3d) + len(Var2d),
                            len(ifiles), opts_dict['nbin']))
            if me.get_rank() == 0:
                fout = open(opts_dict['outfile'], "w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout, j, fmt='%-7.2e')
    #cam
    else:
        # Read all variables from the ensemble summary file
        ens_var_name, ens_avg, ens_stddev, ens_rmsz, ens_gm, num_3d, mu_gm, sigma_gm, loadings_gm, sigma_scores_gm, is_SE_sum, std_gm = pyEnsLib.read_ensemble_summary(
            opts_dict['sumfile'])

        if len(ens_rmsz) == 0:
            gmonly = True
        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables = {}
        if not gmonly:
            for k, v in ens_rmsz.iteritems():
                pyEnsLib.addvariables(variables, k, 'zscoreRange', v)

        for k, v in ens_gm.iteritems():
            pyEnsLib.addvariables(variables, k, 'gmRange', v)

        # Get 3d variable name list and 2d variable name list separately
        var_name3d = []
        var_name2d = []
        for vcount, v in enumerate(ens_var_name):
            if vcount < num_3d:
                var_name3d.append(v)
            else:
                var_name2d.append(v)

        # Get ncol and nlev value
        npts3d, npts2d, is_SE = pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
            print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids'

        # Compare the new run and the ensemble summary file to get rmsz score
        results = {}
        countzscore = np.zeros(len(ifiles), dtype=np.int32)
        countgm = np.zeros(len(ifiles), dtype=np.int32)
        if not gmonly:
            for fcount, fid in enumerate(ifiles):
                otimeSeries = fid.variables
                for var_name in ens_var_name:
                    orig = otimeSeries[var_name]
                    Zscore, has_zscore = pyEnsLib.calculate_raw_score(
                        var_name, orig[opts_dict['tslice']], npts3d, npts2d,
                        ens_avg, ens_stddev, is_SE, opts_dict, 0, 0, 0)
                    if has_zscore:
                        # Add the new run rmsz zscore to the dictionary "results"
                        pyEnsLib.addresults(results, 'zscore', Zscore,
                                            var_name, 'f' + str(fcount))

            # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
            for fcount, fid in enumerate(ifiles):
                countzscore[fcount] = pyEnsLib.evaluatestatus(
                    'zscore', 'zscoreRange', variables, 'ens', results,
                    'f' + str(fcount))

        # Calculate the new run global mean
        mean3d, mean2d, varlist = pyEnsLib.generate_global_mean_for_summary(
            ifiles, var_name3d, var_name2d, is_SE, opts_dict['pepsi_gm'],
            opts_dict)
        means = np.concatenate((mean3d, mean2d), axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results, 'means', means[j][i],
                                    ens_var_name[j], 'f' + str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount, fid in enumerate(ifiles):
            countgm[fcount] = pyEnsLib.evaluatestatus('means', 'gmRange',
                                                      variables, 'gm', results,
                                                      'f' + str(fcount))

        # Calculate the PCA scores of the new run
        new_scores, var_list, comp_std_gm = pyEnsLib.standardized(
            means, mu_gm, sigma_gm, loadings_gm, ens_var_name, opts_dict,
            ens_avg, me)
        run_index, decision = pyEnsLib.comparePCAscores(
            ifiles, new_scores, sigma_scores_gm, opts_dict, me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        if opts_dict['prn_std_mean'] and decision == 'FAILED':
            import seaborn as sns
            category = {
                "all_outside99": [],
                "two_outside99": [],
                "one_outside99": [],
                "all_oneside_outside1QR": []
            }
            b = list(pyEnsLib.chunk(ens_var_name, 10))
            for f, alist in enumerate(b):
                for fc, avar in enumerate(alist):
                    dist_995 = np.percentile(std_gm[avar], 99.5)
                    dist_75 = np.percentile(std_gm[avar], 75)
                    dist_25 = np.percentile(std_gm[avar], 25)
                    dist_05 = np.percentile(std_gm[avar], 0.5)
                    c = 0
                    d = 0
                    p = 0
                    q = 0
                    for i in range(comp_std_gm[f + fc].size):
                        if comp_std_gm[f + fc][i] > dist_995:
                            c = c + 1
                        elif comp_std_gm[f + fc][i] < dist_05:
                            d = d + 1
                        elif (comp_std_gm[f + fc][i] < dist_995
                              and comp_std_gm[f + fc][i] > dist_75):
                            p = p + 1
                        elif (comp_std_gm[f + fc][i] > dist_05
                              and comp_std_gm[f + fc][i] < dist_25):
                            q = q + 1
                    if c == 3 or d == 3:
                        category["all_outside99"].append((avar, f + fc))
                    elif c == 2 or d == 2:
                        category["two_outside99"].append((avar, f + fc))
                    elif c == 1 or d == 1:
                        category["one_outside99"].append((avar, f + fc))
                    if p == 3 or q == 3:
                        category["all_oneside_outside1QR"].append(
                            (avar, f + fc))
            part_name = opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name = opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array = []
                list_array2 = []
                list_var = []
                value = category[key]
                print "value len=", key, len(value)
                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    list_var.append(each_var[0])
                if len(value) != 0:
                    ax = sns.boxplot(data=list_array,
                                     whis=[0.5, 99.5],
                                     fliersize=0.0)
                    sns.stripplot(data=list_array2, jitter=True, color="r")
                    sns.plt.xticks(range(len(list_array)),
                                   list_var,
                                   fontsize=8,
                                   rotation=-45)
                    if decision == 'FAILED':
                        sns.plt.savefig(part_name + "_" + key + "_fail.png")
                    else:
                        sns.plt.savefig(part_name + "_" + key + "_pass.png")
                    sns.plt.clf()
            '''
            if len(run_index)>0:
               json_file=opts_dict['json_case']
               if (os.path.exists(json_file)):
                  fd=open(json_file)
                  metainfo=json.load(fd)
                  caseindex=metainfo['CaseIndex']
                  enspath=str(metainfo['EnsPath'][0])
                  #print caseindex
                  if (os.path.exists(enspath)):
                     i=0
                     comp_file=[]
                     search = '\.[0-9]{3}\.'
                     for name in in_files_list:
                        s=re.search(search,name)
                        in_files_index=s.group(0)
                        if in_files_index[1:4] in caseindex:
                           ens_index=str(caseindex[in_files_index[1:4]])
                           wildname='*.'+ens_index+'.*'
                           full_glob_str=os.path.join(enspath,wildname)
                           glob_file=glob.glob(full_glob_str)
                           comp_file.extend(glob_file)
                     print "comp_file=",comp_file                
                     pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me)
            '''
        # Print out
        if opts_dict['printVarTest']:
            print '*********************************************** '
            print 'Variable-based testing (for reference only - not used to determine pass/fail)'
            print '*********************************************** '
            for fcount, fid in enumerate(ifiles):
                print ' '
                print 'Run ' + str(fcount + 1) + ":"
                print ' '
                if not gmonly:
                    print '***' + str(countzscore[fcount]), " of " + str(
                        len(ens_var_name)
                    ) + ' variables are outside of ensemble RMSZ distribution***'
                    pyEnsLib.printsummary(results, 'ens', 'zscore',
                                          'zscoreRange', (fcount), variables,
                                          'RMSZ')
                    print ' '
                print '***' + str(countgm[fcount]), " of " + str(
                    len(ens_var_name)
                ) + ' variables are outside of ensemble global mean distribution***'
                pyEnsLib.printsummary(results, 'gm', 'means', 'gmRange',
                                      fcount, variables, 'global mean')
                print ' '
                print '----------------------------------------------------------------------------'
    if me.get_rank() == 0:
        print ' '
        print "Testing complete."
        print ' '
Beispiel #13
0
    def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class,
                defining the input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{0}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # The output write mode to use
        self._write_mode = wmode

        # Internal timer data
        self._timer = TimeKeeper()

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)

        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        # Contruct the print header
        header = ''.join(['[', str(self._simplecomm.get_rank()),
                          '/', str(self._simplecomm.get_size()), '] '])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper...', verbosity=0)
            self._vprint('  MPI Communicator Size: {}'.format(
                self._simplecomm.get_size()), verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('  Specifier validated', verbosity=1)

        # The I/O backend to use
        if iobackend.is_available(specifier.io_backend):
            self._backend = specifier.io_backend
        else:
            self._backend = iobackend.get_backend()
            self._vprint(('  I/O Backend {0} not available.  Using {1} '
                          'instead').format(specifier.io_backend, self._backend), verbosity=1)

        # Store the input file names
        self._input_filenames = specifier.input_file_list

        # Store the time-series variable names
        self._time_series_names = specifier.time_series
        if self._time_series_names is not None:
            vnames = ', '.join(self._time_series_names)
            if self._simplecomm.is_manager():
                self._vprint('WARNING: Extracting only variables: {0}'.format(
                    vnames), verbosity=-1)

        # Store the list of metadata names
        self._metadata_names = specifier.time_variant_metadata

        # Store whether to treat 1D time-variant variables as metadata
        self._1d_metadata = specifier.assume_1d_time_variant_metadata

        # Store the metadata filename
        self._metadata_filename = specifier.metadata_filename

        # Store time invariant variables that should be excluded from the timeseries files
        self._exclude_list = specifier.exclude_list

        # Store the output file prefix and suffix
        self._output_prefix = specifier.output_file_prefix
        self._output_suffix = specifier.output_file_suffix

        # Setup NetCDF file options
        self._netcdf_format = specifier.netcdf_format
        self._netcdf_compression = specifier.compression_level
        self._netcdf_least_significant_digit = specifier.least_significant_digit
        if self._simplecomm.is_manager():
            self._vprint(
                '  NetCDF I/O Backend: {0}'.format(self._backend), verbosity=1)
            self._vprint('  NetCDF Output Format: {0}'.format(
                self._netcdf_format), verbosity=1)
            self._vprint('  NetCDF Compression: {0}'.format(
                self._netcdf_compression), verbosity=1)
            trunc_str = ('{} decimal places'.format(self._netcdf_least_significant_digit)
                         if self._netcdf_least_significant_digit else 'Disabled')
            self._vprint('  NetCDF Truncation: {0}'.format(
                trunc_str), verbosity=1)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('...Reshaper initialized.', verbosity=0)

        # Sync before continuing..
        self._simplecomm.sync()
Beispiel #14
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s="""verbose sumfile= indir= input_globs= tslice= nPC= sigMul= 
         minPCFail= minRunFail= numRunFile= printVarTest popens 
         jsonfile= mpi_enable nbin= minrange= maxrange= outfile= 
         casejson= npick= pepsi_gm test_failure pop_tol= web_enabled
         pop_threshold= prn_std_mean fIndex= lev= eet= json_case= """
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv,"h",optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)
  
    
    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    opts_dict['prn_std_mean'] = False
    opts_dict['lev'] = 0
    opts_dict['eet'] = 0
    opts_dict['json_case'] = ''
    opts_dict['sumfile'] = ''
    opts_dict['web_enabled'] = False
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict)
    popens = opts_dict['popens']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])

    # Print out timestamp, input ensemble file and new run directory
    dt=datetime.now()
    verbose = opts_dict['verbose']
    if me.get_rank()==0:
        print '--------pyCECT--------'
        print ' '
        print dt.strftime("%A, %d. %B %Y %I:%M%p")
        print ' '
        if not opts_dict['web_enabled']:
          print 'Ensemble summary file = '+opts_dict['sumfile']
        print ' '
        print 'Testcase file directory = '+opts_dict['indir']    
        print ' '
        print ' '

    # Ensure sensible EET value
    if opts_dict['eet'] and opts_dict['numRunFile'] > opts_dict['eet']:
        pyEnsLib.CECT_usage()
        sys.exit(2)

  
    ifiles=[]
    in_files=[]
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
       with open(opts_dict['casejson']) as fin:
            result=json.load(fin)
            in_files_first=result['not_pick_files']
            in_files=random.sample(in_files_first,opts_dict['npick'])
            print 'Testcase files:'
            print '\n'.join(in_files)
           
    elif opts_dict['json_case']: 
       json_file=opts_dict['json_case']
       if (os.path.exists(json_file)):
          fd=open(json_file)
          metainfo=json.load(fd)
          if 'CaseName' in metainfo:
              casename=metainfo['CaseName']
              if (os.path.exists(opts_dict['indir'])):
                 for name in casename: 
                     wildname='*.'+name+'.*'
                     full_glob_str=os.path.join(opts_dict['indir'],wildname)
                     glob_file=glob.glob(full_glob_str)
                     in_files.extend(glob_file)
       else:
          print "Error: "+opts_dict['json_case']+" does not exist"
          sys.exit()
       print "in_files=",in_files
    else: 
       wildname='*'+opts_dict['input_globs']+'*'
       # Open all input files
       if (os.path.exists(opts_dict['indir'])):
          full_glob_str=os.path.join(opts_dict['indir'],wildname)
          glob_files=glob.glob(full_glob_str)
          in_files.extend(glob_files)
          num_file=len(in_files)
          if opts_dict['numRunFile'] > num_file:
             print "You requested more numRunFile than it is available at the indir, please change"
             sys.exit()
          #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()

    if popens:
        #Partition the input file list 
        in_files_list=me.partition(in_files,func=EqualStride(),involved=True)

    else:
        # Random pick non pop files
        in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict)
        #in_files_list=in_files

    for frun_file in in_files_list:
         if frun_file.find(opts_dict['indir']) != -1:
            frun_temp=frun_file
         else:
            frun_temp=opts_dict['indir']+'/'+frun_file
         if (os.path.isfile(frun_temp)):
             ifiles.append(Nio.open_file(frun_temp,"r"))
         else:
             print "COULD NOT LOCATE FILE " +frun_temp+" EXISTING"
             sys.exit()
   
    if opts_dict['web_enabled']:
       if len(opts_dict['sumfile'])==0:
          opts_dict['sumfile']='/glade/p/cesmdata/cseg/inputdata/validation/'
       opts_dict['sumfile'],machineid,compiler=pyEnsLib.search_sumfile(opts_dict,ifiles) 
       if len(machineid)!=0 and len(compiler)!=0:
          print ' '
          print 'Validation file    : machineid = '+machineid+', compiler = '+compiler
          print 'Found summery file : '+opts_dict['sumfile']
          print ' '
       else:
          print 'Warning: machineid and compiler are unknown'

             

    if popens:
        
        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        print ' '
        print 'Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol'])
        print 'ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold'])
        zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d)  
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin'])) 
            if me.get_rank()==0:
                fout = open(opts_dict['outfile'],"w")
                for i in range(me.get_size()):
                    for j in zmall[i]:
                        np.savetxt(fout,j,fmt='%-7.2e')
    else:
        # Read all variables from the ensemble summary file
        ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum,std_gm=pyEnsLib.read_ensemble_summary(opts_dict['sumfile']) 

        if len(ens_rmsz) == 0:
            gmonly = True
        # Add ensemble rmsz and global mean to the dictionary "variables"
        variables={}
        if not gmonly:
            for k,v in ens_rmsz.iteritems():
                pyEnsLib.addvariables(variables,k,'zscoreRange',v)

        for k,v in ens_gm.iteritems():
            pyEnsLib.addvariables(variables,k,'gmRange',v)

        # Get 3d variable name list and 2d variable name list seperately
        var_name3d=[]
        var_name2d=[]
        for vcount,v in enumerate(ens_var_name):
          if vcount < num_3d:
            var_name3d.append(v)
          else:
            var_name2d.append(v)

        # Get ncol and nlev value
        npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0])
 
        if (is_SE ^ is_SE_sum):
           print 'Warning: please note the ensemble summary file is different from the testing files, they use different grids'
           
     
        # Compare the new run and the ensemble summary file to get rmsz score
        results={}
        countzscore=np.zeros(len(ifiles),dtype=np.int32)
        countgm=np.zeros(len(ifiles),dtype=np.int32)
        if not gmonly:
            for fcount,fid in enumerate(ifiles): 
                otimeSeries = fid.variables 
                for var_name in ens_var_name: 
                    orig=otimeSeries[var_name]
                    Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0) 
                    if has_zscore:
                        # Add the new run rmsz zscore to the dictionary "results"
                        pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount))


            # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
            for fcount,fid in enumerate(ifiles):
                countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount))

        # Calculate the new run global mean
        mean3d,mean2d,varlist=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict)
        means=np.concatenate((mean3d,mean2d),axis=0)

        # Add the new run global mean to the dictionary "results"
        for i in range(means.shape[1]):
            for j in range(means.shape[0]):
                pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i))

        # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
        for fcount,fid in enumerate(ifiles):
            countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount))
      
        # Calculate the PCA scores of the new run
        new_scores,var_list,comp_std_gm=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm,ens_var_name,opts_dict,ens_avg,me)
        run_index,decision=pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict,me)

        # If there is failure, plot out standardized mean and compared standardized mean in box plots
        if opts_dict['prn_std_mean'] and decision == 'FAILED':
            import seaborn as sns
            category={"all_outside99":[],"two_outside99":[],"one_outside99":[],"all_oneside_outside1QR":[]}
            b=list(pyEnsLib.chunk(ens_var_name,10))
            for f,alist in enumerate(b):
                for fc,avar in enumerate(alist):
                    dist_995=np.percentile(std_gm[avar],99.5)
                    dist_75=np.percentile(std_gm[avar],75)
                    dist_25=np.percentile(std_gm[avar],25)
                    dist_05=np.percentile(std_gm[avar],0.5)
                    c=0
                    d=0
                    p=0
                    q=0
                    for i in range(comp_std_gm[f+fc].size):
                        if comp_std_gm[f+fc][i]>dist_995:
                           c=c+1
                        elif comp_std_gm[f+fc][i]<dist_05:
                           d=d+1
                        elif (comp_std_gm[f+fc][i]<dist_995 and comp_std_gm[f+fc][i]>dist_75):
                           p=p+1
                        elif (comp_std_gm[f+fc][i]>dist_05 and comp_std_gm[f+fc][i]<dist_25):
                           q=q+1
                    if c == 3 or d == 3:
                       category["all_outside99"].append((avar,f+fc))
                    elif c == 2 or d == 2:    
                       category["two_outside99"].append((avar,f+fc))
                    elif c == 1 or d == 1:
                       category["one_outside99"].append((avar,f+fc))
                    if p == 3 or q == 3:
                       category["all_oneside_outside1QR"].append((avar,f+fc))
            part_name=opts_dict['indir'].split('/')[-1]
            if not part_name:
                part_name=opts_dict['indir'].split('/')[-2]
            for key in sorted(category):
                list_array=[]
                list_array2=[]
                list_var=[]
                value=category[key]
                print "value len=",key,len(value)
                for each_var in value:
                    list_array.append(std_gm[each_var[0]])
                    list_array2.append(comp_std_gm[each_var[1]])
                    list_var.append(each_var[0])
                if len(value) !=0 :
                    ax=sns.boxplot(data=list_array,whis=[0.5,99.5],fliersize=0.0)
                    sns.stripplot(data=list_array2,jitter=True,color="r")
                    sns.plt.xticks(range(len(list_array)),list_var,fontsize=8,rotation=-45)
                    if decision == 'FAILED':
                       sns.plt.savefig(part_name+"_"+key+"_fail.png")
                    else:
                       sns.plt.savefig(part_name+"_"+key+"_pass.png")
                    sns.plt.clf()
                
            '''
            if len(run_index)>0:
               json_file=opts_dict['json_case']
               if (os.path.exists(json_file)):
                  fd=open(json_file)
                  metainfo=json.load(fd)
                  caseindex=metainfo['CaseIndex']
                  enspath=str(metainfo['EnsPath'][0])
                  #print caseindex
                  if (os.path.exists(enspath)):
                     i=0
                     comp_file=[]
                     search = '\.[0-9]{3}\.'
                     for name in in_files_list:
                        s=re.search(search,name)
                        in_files_index=s.group(0)
                        if in_files_index[1:4] in caseindex:
                           ens_index=str(caseindex[in_files_index[1:4]])
                           wildname='*.'+ens_index+'.*'
                           full_glob_str=os.path.join(enspath,wildname)
                           glob_file=glob.glob(full_glob_str)
                           comp_file.extend(glob_file)
                     print "comp_file=",comp_file                
                     pyEnsLib.plot_variable(in_files_list,comp_file,opts_dict,var_list,run_index,me)
            '''
        # Print out 
        if opts_dict['printVarTest']:
            print '*********************************************** '
            print 'Variable-based testing (for reference only - not used to determine pass/fail)'
            print '*********************************************** '
            for fcount,fid in enumerate(ifiles):
                print ' '
                print 'Run '+str(fcount+1)+":"
                print ' '
                if not gmonly:
                    print '***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***'
                    pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ')
                    print ' '
                print '***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***'
                pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean')
                print ' '
                print '----------------------------------------------------------------------------'
    if me.get_rank() == 0:
        print ' '
        print "Testing complete."
        print ' '
def main(argv):

    print('Running pyEnsSum!')

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = ''
    opts_dict['compset'] = ''
    opts_dict['mach'] = ''
    opts_dict['esize'] = 151
    opts_dict['tslice'] = 0
    opts_dict['res'] = ''
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = False
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if (verbose == True):
        print(opts_dict)
        print('Ensemble size for summary = ', esize)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print('Please specify --tag, --compset, --mach and --res options')
       sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])


    if me.get_rank() == 0:
    if opts_dict['jsonfile']:
        # Read in the excluded var list
        ex_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
    ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)

    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)
        #print in_files
        # Make sure we have enough
        num_files = len(in_files)
        if (verbose == True):
            print('Number of files in input directory = ', num_files)
        if (num_files < esize):
            print('Number of files in input directory (',num_files,
                ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            print('NOTE: Number of files in ', input_dir,
                'is greater than specified ensemble size of ', esize,
                '\nwill just use the first ',  esize, 'files')
    else:
        print('Input directory: ',input_dir,' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0:
           print('in_files=',in_files)

    # Open the files in the input directory
    o_files=[]
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print("COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING....")
            sys.exit()

    # Store dimensions of the input fields
    if (verbose == True):
        print("Getting spatial dimensions")
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key

    if (nlev == -1) :
        print("COULD NOT LOCATE valid dimension lev => EXITING....")
        sys.exit()

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        print("Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if (verbose == True):
        print("Checking dimensions across files....")
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                print("Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!')
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
    for i in ex_varlist:
            if i in vars_dict:
           del vars_dict[i]
    num_vars = len(vars_dict)
    if (verbose == True):
        print('Number of variables (including metadata) found =  ', num_vars)
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k,v in vars_dict.iteritems():
        var = k
        vd = v.dimensions # all the variable's dimensions (names)
        vr = v.rank # num dimension
        vs = v.shape # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True): # (time, lev, ncol) or (time, ncol)
        if ((vr == 2) and (vs[1] == ncol)):
        is_2d = True
        num_2d += 1
        elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev )):
        is_3d = True
        num_3d += 1
        else: # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev )):
                is_3d = True
                num_3d += 1
        if (is_3d == True) :
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif  (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)


    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()


    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if (verbose == True):
        print('num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")")

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if (verbose == True):
        print("Creating ", this_sumfile, "  ...")
    if(me.get_rank() ==0 | opts_dict["popens"]):
    if os.path.exists(this_sumfile):
        os.unlink(this_sumfile)

    opt = Nio.options()
    opt.PreFill = False
    opt.Format = 'NetCDF4Classic'
    nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

    # Set dimensions
    if (verbose == True):
        print("Setting dimensions .....")
    if (is_SE == True):
        nc_sumfile.create_dimension('ncol', ncol)
    else:
        nc_sumfile.create_dimension('nlat', nlat)
        nc_sumfile.create_dimension('nlon', nlon)
    nc_sumfile.create_dimension('nlev', nlev)
    nc_sumfile.create_dimension('ens_size', esize)
    nc_sumfile.create_dimension('nvars', num_3d + num_2d)
    nc_sumfile.create_dimension('nvars3d', num_3d)
    nc_sumfile.create_dimension('nvars2d', num_2d)
    nc_sumfile.create_dimension('str_size', str_size)

    # Set global attributes
    now = time.strftime("%c")
    if (verbose == True):
        print("Setting global attributes .....")
    setattr(nc_sumfile, 'creation_date',now)
    setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
    setattr(nc_sumfile, 'tag', opts_dict["tag"])
    setattr(nc_sumfile, 'compset', opts_dict["compset"])
    setattr(nc_sumfile, 'resolution', opts_dict["res"])
    setattr(nc_sumfile, 'machine', opts_dict["mach"])

    # Create variables
    if (verbose == True):
        print("Creating variables .....")
    v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
    v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
    v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
    v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
        if (is_SE == True):
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'ncol'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'ncol'))
        else:
        v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

        v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('nvars', 'ens_size'))
    v_gm = nc_sumfile.create_variable("global_mean", 'f', ('nvars', 'ens_size'))
    v_loadings_gm = nc_sumfile.create_variable('loadings_gm','f',('nvars','nvars'))
    v_mu_gm = nc_sumfile.create_variable('mu_gm','f',('nvars',))
    v_sigma_gm = nc_sumfile.create_variable('sigma_gm','f',('nvars',))
    v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm','f',('nvars',))


    # Assign vars, var3d and var2d
    if (verbose == True):
        print("Assigning vars, var3d, and var2d .....")

    eq_all_var_names =[]
    eq_d3_var_names = []
    eq_d2_var_names = []

    l_eq = len(all_var_names)
    for i in range(l_eq):
        tt = list(all_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_all_var_names.append(tt)

    l_eq = len(d3_var_names)
    for i in range(l_eq):
        tt = list(d3_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d3_var_names.append(tt)

    l_eq = len(d2_var_names)
    for i in range(l_eq):
        tt = list(d2_var_names[i])
        l_tt = len(tt)
        if (l_tt < str_size):
        extra = list(' ')*(str_size - l_tt)
        tt.extend(extra)
        eq_d2_var_names.append(tt)

    v_vars[:] = eq_all_var_names[:]
    v_var3d[:] = eq_d3_var_names[:]
    v_var2d[:] = eq_d2_var_names[:]

    # Time-invarient metadata
    if (verbose == True):
        print("Assigning time invariant metadata .....")
    lev_data = vars_dict["lev"]
    v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc=me.partition(d3_var_names,func=EqualStride(),involved=True)
        var2_list_loc=me.partition(d2_var_names,func=EqualStride(),involved=True)
    else:
        var3_list_loc=d3_var_names
        var2_list_loc=d2_var_names

    # Calculate global means #
    if (verbose == True):
        print("Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,var3_list_loc,var2_list_loc , is_SE, False,opts_dict)
    if (verbose == True):
        print("Finish calculating global means .....")

    # Calculate RMSZ scores
    if (verbose == True):
        print("Calculating RMSZ scores .....")
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,var3_list_loc,var2_list_loc,is_SE,opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
    if (verbose == True):
        print("Calculating max norm of ensembles .....")
    pyEnsLib.calculate_maxnormens(opts_dict,var3_list_loc)
    pyEnsLib.calculate_maxnormens(opts_dict,var2_list_loc)

    if opts_dict['mpi_enable'] & ( not opts_dict['popens']):

        if not opts_dict['cumul']:
        # Gather the 3d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d3_var_names),me)

        # Gather global means 3d results
        gm3d=gather_npArray(gm3d,me,slice_index,(len(d3_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore3d results
        zscore3d=gather_npArray(zscore3d,me,slice_index,(len(d3_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev3d results
        shape_tuple3d=get_shape(ens_avg3d.shape,len(d3_var_names),me.get_rank())
        ens_avg3d=gather_npArray(ens_avg3d,me,slice_index,shape_tuple3d)
        ens_stddev3d=gather_npArray(ens_stddev3d,me,slice_index,shape_tuple3d)

        # Gather 2d variable results from all processors to the master processor
        slice_index=get_stride_list(len(d2_var_names),me)

        # Gather global means 2d results
        gm2d=gather_npArray(gm2d,me,slice_index,(len(d2_var_names),len(o_files)))

        if not opts_dict['gmonly']:
        # Gather zscore2d results
        zscore2d=gather_npArray(zscore2d,me,slice_index,(len(d2_var_names),len(o_files)))

        # Gather ens_avg3d and ens_stddev2d results
        shape_tuple2d=get_shape(ens_avg2d.shape,len(d2_var_names),me.get_rank())
        ens_avg2d=gather_npArray(ens_avg2d,me,slice_index,shape_tuple2d)
        ens_stddev2d=gather_npArray(ens_stddev2d,me,slice_index,shape_tuple2d)

        else:
        gmall=np.concatenate((temp1,temp2),axis=0)
            gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(d3_var_names)+len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens'] :
        if not opts_dict['cumul']:
        gmall=np.concatenate((gm3d,gm2d),axis=0)
        if not opts_dict['gmonly']:
        Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
        v_RMSZ[:,:]=Zscoreall[:,:]
        if not opts_dict['gmonly']:
        if (is_SE == True):
            v_ens_avg3d[:,:,:]=ens_avg3d[:,:,:]
            v_ens_stddev3d[:,:,:]=ens_stddev3d[:,:,:]
            v_ens_avg2d[:,:]=ens_avg2d[:,:]
            v_ens_stddev2d[:,:]=ens_stddev2d[:,:]
        else:
            v_ens_avg3d[:,:,:,:]=ens_avg3d[:,:,:,:]
            v_ens_stddev3d[:,:,:,:]=ens_stddev3d[:,:,:,:]
            v_ens_avg2d[:,:,:]=ens_avg2d[:,:,:]
            v_ens_stddev2d[:,:,:]=ens_stddev2d[:,:,:]
        else:
            gmall_temp=np.transpose(gmall[:,:])
            gmall=gmall_temp
    mu_gm,sigma_gm,standardized_global_mean,loadings_gm,scores_gm=pyEnsLib.pre_PCA(gmall)
    v_gm[:,:]=gmall[:,:]
    v_mu_gm[:]=mu_gm[:]
    v_sigma_gm[:]=sigma_gm[:].astype(np.float32)
    v_loadings_gm[:,:]=loadings_gm[:,:]
    v_sigma_scores_gm[:]=scores_gm[:]

    print("All Done")

def get_cumul_filelist(opts_dict,indir,regx):
   if not opts_dict['indir']:
      print('input dir is not specified')
      sys.exit(2)
   #regx='(pgi(.)*-(01|02))'
   regx_list=["mon","gnu","pgi"]
   all_files=[]
   for prefix in regx_list:
       for i in range(opts_dict['fIndex'],opts_dict['fIndex']+opts_dict['esize']/3):
       for j in range(opts_dict['startMon'],opts_dict['endMon']+1):
           mon_str=str(j).zfill(2)
           regx='(^'+prefix+'(.)*'+str(i)+'(.)*-('+mon_str+'))'
           print('regx=',regx)
           res=[f for f in os.listdir(indir) if re.search(regx,f)]
           in_files=sorted(res)
           all_files.extend(in_files)
   print("all_files=",all_files)
   #in_files=res
   return all_files





#
# Get the shape of all variable list in tuple for all processor
#
def get_shape(shape_tuple,shape1,rank):
    lst=list(shape_tuple)
    lst[0]=shape1
    shape_tuple=tuple(lst)
    return shape_tuple

#
# Get the mpi partition list for each processor
#
def get_stride_list(len_of_list,me):
    slice_index=[]
    for i in range(me.get_size()):
    index_arr=np.arange(len_of_list)
    slice_index.append(index_arr[i::me.get_size()])
    return slice_index

#
# Gather arrays from each processor by the var_list to the master processor and make it an array
#
def gather_npArray(npArray,me,slice_index,array_shape):
    the_array=np.zeros(array_shape,dtype=np.float32)
    if me.get_rank()==0:
    k=0
    for j in slice_index[me.get_rank()]:
         the_array[j,:]=npArray[k,:]
         k=k+1
    for i in range(1,me.get_size()):
    if me.get_rank() == 0:
        rank,npArray=me.collect()
        k=0
        for j in slice_index[rank]:
        the_array[j,:]=npArray[k,:]
        k=k+1
    if me.get_rank() != 0:
    message={"from_rank":me.get_rank(),"shape":npArray.shape}
    me.collect(npArray)
    me.sync()
    return the_array

if __name__ == "__main__":
    main(sys.argv[1:])
Beispiel #16
0
 def setUp(self):
     self.gcomm = simplecomm.create_comm()
     self.size = MPI_COMM_WORLD.Get_size()
     self.rank = MPI_COMM_WORLD.Get_rank()
def main(argv):


    # Get command line stuff and store in a dictionary
    s='verbose sumfile= indir= input_globs= tslice= nPC= sigMul= minPCFail= minRunFail= numRunFile= printVarTest popens jsonfile= mpi_enable nbin= minrange= maxrange= outfile= casejson= npick= pepsi_gm test_failure pop_tol= pop_threshold='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv,"h",optkeys)
    except getopt.GetoptError:
        pyEnsLib.CECT_usage()
        sys.exit(2)


    # Set the default value for options
    opts_dict = {}
    opts_dict['input_globs'] = ''
    opts_dict['indir'] = ''
    opts_dict['tslice'] = 1
    opts_dict['nPC'] = 50
    opts_dict['sigMul'] = 2
    opts_dict['verbose'] = False
    opts_dict['minPCFail'] = 3
    opts_dict['minRunFail'] = 2
    opts_dict['numRunFile'] = 3
    opts_dict['printVarTest'] = False
    opts_dict['popens'] = False
    opts_dict['jsonfile'] = ''
    opts_dict['mpi_enable'] = False
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['outfile'] = 'testcase.result'
    opts_dict['casejson'] = ''
    opts_dict['npick'] = 10
    opts_dict['pepsi_gm'] = False
    opts_dict['test_failure'] = True
    opts_dict['pop_tol'] = 3.0
    opts_dict['pop_threshold'] = 0.90
    # Call utility library getopt_parseconfig to parse the option keys
    # and save to the dictionary
    caller = 'CECT'
    gmonly = False
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,caller,opts_dict)
    popens = opts_dict['popens']

    # Print out timestamp, input ensemble file and new run directory
    dt=datetime.now()
    verbose = opts_dict['verbose']
    print('--------pyCECT--------')
    print(' ')
    print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
    print(' ')
    print('Ensemble summary file = '+opts_dict['sumfile'])
    print(' ')
    print('Testcase file directory = '+opts_dict['indir']    )
    print(' ')
    print(' ')

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])

    ifiles=[]
    in_files=[]
    # Random pick pop files from not_pick_files list
    if opts_dict['casejson']:
       with open(opts_dict['casejson']) as fin:
            result=json.load(fin)
            in_files_first=result['not_pick_files']
            in_files=random.sample(in_files_first,opts_dict['npick'])
            print('Testcase files:')
            print('\n'.join(in_files))

    else:
       wildname='*'+opts_dict['input_globs']+'*'
       # Open all input files
       if (os.path.exists(opts_dict['indir'])):
          full_glob_str=os.path.join(opts_dict['indir'],wildname)
          glob_files=glob.glob(full_glob_str)
          in_files.extend(glob_files)
          #in_files_temp=os.listdir(opts_dict['indir'])
    in_files.sort()

    if popens:
        #Partition the input file list
        in_files_list=me.partition(in_files,func=EqualStride(),involved=True)

    else:
        # Random pick non pop files
        in_files_list=pyEnsLib.Random_pickup(in_files,opts_dict)
    for frun_file in in_files_list:
         if frun_file.find(opts_dict['indir']) != -1:
            frun_temp=frun_file
         else:
            frun_temp=opts_dict['indir']+'/'+frun_file
         if (os.path.isfile(frun_temp)):
             ifiles.append(Nio.open_file(frun_temp,"r"))
         else:
             print("COULD NOT LOCATE FILE " +frun_temp+" EXISTING")
             sys.exit()

    if popens:

        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        print(' ')
        print('Z-score tolerance = '+'{:3.2f}'.format(opts_dict['pop_tol']))
        print('ZPR = '+'{:.2%}'.format(opts_dict['pop_threshold']))
        zmall,n_timeslice=pyEnsLib.compare_raw_score(opts_dict,ifiles,me.get_rank(),Var3d,Var2d)
        #zmall = np.concatenate((Zscore3d,Zscore2d),axis=0)
        np.set_printoptions(threshold=np.nan)

        if opts_dict['mpi_enable']:
            zmall = pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(ifiles),opts_dict['nbin']))
            if me.get_rank()==0:
                fout = open(opts_dict['outfile'],"w")
        for i in range(me.get_size()):
            for j in zmall[i]:
                        np.savetxt(fout,j,fmt='%-7.2e')
    else:
    # Read all variables from the ensemble summary file
    ens_var_name,ens_avg,ens_stddev,ens_rmsz,ens_gm,num_3d,mu_gm,sigma_gm,loadings_gm,sigma_scores_gm,is_SE_sum=pyEnsLib.read_ensemble_summary(opts_dict['sumfile'])

    if len(ens_rmsz) == 0:
        gmonly = True
    # Add ensemble rmsz and global mean to the dictionary "variables"
    variables={}
    if not gmonly:
        for k,v in ens_rmsz.iteritems():
        pyEnsLib.addvariables(variables,k,'zscoreRange',v)

    for k,v in ens_gm.iteritems():
        pyEnsLib.addvariables(variables,k,'gmRange',v)

    # Get 3d variable name list and 2d variable name list seperately
    var_name3d=[]
    var_name2d=[]
    for vcount,v in enumerate(ens_var_name):
      if vcount < num_3d:
        var_name3d.append(v)
      else:
        var_name2d.append(v)

    # Get ncol and nlev value
    npts3d,npts2d,is_SE=pyEnsLib.get_ncol_nlev(ifiles[0])

        if (is_SE ^ is_SE_sum):
           print('Warning: please note the ensemble summary file is different from the testing files, they use different grids')


    # Compare the new run and the ensemble summary file to get rmsz score
    results={}
    countzscore=np.zeros(len(ifiles),dtype=np.int32)
    countgm=np.zeros(len(ifiles),dtype=np.int32)
    if not gmonly:
        for fcount,fid in enumerate(ifiles):
        otimeSeries = fid.variables
        for var_name in ens_var_name:
            orig=otimeSeries[var_name]
            Zscore,has_zscore=pyEnsLib.calculate_raw_score(var_name,orig[opts_dict['tslice']],npts3d,npts2d,ens_avg,ens_stddev,is_SE,opts_dict,0,0,0)
            if has_zscore:
            # Add the new run rmsz zscore to the dictionary "results"
            pyEnsLib.addresults(results,'zscore',Zscore,var_name,'f'+str(fcount))


        # Evaluate the new run rmsz score if is in the range of the ensemble summary rmsz zscore range
        for fcount,fid in enumerate(ifiles):
        countzscore[fcount]=pyEnsLib.evaluatestatus('zscore','zscoreRange',variables,'ens',results,'f'+str(fcount))

    # Calculate the new run global mean
    mean3d,mean2d=pyEnsLib.generate_global_mean_for_summary(ifiles,var_name3d,var_name2d,is_SE,opts_dict['pepsi_gm'],opts_dict)
    means=np.concatenate((mean3d,mean2d),axis=0)

    # Add the new run global mean to the dictionary "results"
    for i in range(means.shape[1]):
        for j in range(means.shape[0]):
        pyEnsLib.addresults(results,'means',means[j][i],ens_var_name[j],'f'+str(i))

    # Evaluate the new run global mean if it is in the range of the ensemble summary global mean range
    for fcount,fid in enumerate(ifiles):
        countgm[fcount]=pyEnsLib.evaluatestatus('means','gmRange',variables,'gm',results,'f'+str(fcount))

    # Calculate the PCA scores of the new run
    new_scores=pyEnsLib.standardized(means,mu_gm,sigma_gm,loadings_gm)
    pyEnsLib.comparePCAscores(ifiles,new_scores,sigma_scores_gm,opts_dict)

    # Print out
    if opts_dict['printVarTest']:
        print('*********************************************** ')
        print('Variable-based testing (for reference only - not used to determine pass/fail)')
        print('*********************************************** ')
        for fcount,fid in enumerate(ifiles):
        print(' ')
        print('Run '+str(fcount+1)+":")
        print(' ')
        if not gmonly:
            print('***'+str(countzscore[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble RMSZ distribution***')
            pyEnsLib.printsummary(results,'ens','zscore','zscoreRange',(fcount),variables,'RMSZ')
            print(' ')
        print('***'+str(countgm[fcount])," of "+str(len(ens_var_name))+' variables are outside of ensemble global mean distribution***')
        pyEnsLib.printsummary(results,'gm','means','gmRange',fcount,variables,'global mean')
        print(' ')
        print('----------------------------------------------------------------------------')

if __name__ == "__main__":
    main(sys.argv[1:])
    print(' ')
    print("Testing complete.")
        debugMsg('calling initialize_main', header=True, verbosity=1)
        envDict = initialize_main(envDict, caseroot, debugMsg, options.standalone)

        debugMsg('calling setup_config', header=True, verbosity=1)
        setup_config(envDict)

    main_comm.sync()



#===================================


if __name__ == "__main__":
    # initialize simplecomm object
    main_comm = simplecomm.create_comm(serial=True)

    # setup an overall timer
    timer = timekeeper.TimeKeeper()

    # get commandline options
    options = commandline_options()

    # initialize global vprinter object for printing debug messages
    print("debug level = {0}".format(options.debug[0]))
    if options.debug:
        header = "[" + str(main_comm.get_rank()) + "/" + str(main_comm.get_size()) + "]: DEBUG... "
        debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0])
   
    try:
        timer.start("Total Time")
Beispiel #19
0
    def __init__(self,
                 specifiers,
                 serial=False,
                 verbosity=1,
                 skip_existing=False,
                 overwrite=False,
                 once=False,
                 simplecomm=None):
        """
        Constructor

        Parameters:
            specifiers (dict): A dict of named Specifier instances, each
                defining an input specification for this reshaper operation.

        Keyword Arguments:
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0 
                means no output, and a higher value means more output.  The
                default value is 1.
            skip_existing (bool): Flag specifying whether to skip the generation
                of time-series for variables with time-series files that already
                exist.  Default is False.
            overwrite (bool): Flag specifying whether to forcefully overwrite
                output files if they already exist.  Default is False.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel 
                communication, if necessary
        """

        # Check types
        if not isinstance(specifiers, dict):
            err_msg = "Input must be given in a dictionary of Specifiers"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(skip_existing) is not bool:
            err_msg = "Skip_existing flag must be True or False."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if simplecomm is not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)

        # Whether to write to a once file
        self._use_once_file = once

        # Whether to write to a once file
        self._skip_existing = skip_existing

        # Whether to write to overwrite output files
        self._overwrite = overwrite

        # Store the list of specifiers
        self._specifiers = specifiers

        # Store the serial specifier
        self._serial = serial

        # Check for a SimpleComm, and if none create it
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)

        # Pointer to its own messenger
        self._simplecomm = simplecomm

        # Store the verbosity
        self._verbosity = verbosity

        # Set the verbose printer
        self._vprint = VPrinter(verbosity=verbosity)

        # Storage for timing data
        self._times = {}

        # Orders for printing timing data
        self._time_orders = {}

        # Storage for all byte counters
        self._byte_counts = {}
Beispiel #20
0
            oldfile = os.path.join(olddir, filename)
            if oldfile in oldfiles:
                item_dict["old"] = oldfile
                oldfiles.remove(oldfile)
                items_to_check.append(item_dict)
            else:
                item_dict["old"] = None
                unchecked_new_items.append(item_dict)
        for oldfile in oldfiles:
            item_dict = {"test": test_name}
            item_dict["new"] = None
            item_dict["old"] = oldfile
            unchecked_old_items.append(item_dict)

    # Get a basic MPI comm
    comm = create_comm(serial=(args.serial or args.list_tests))

    # Print tests that will be checked
    if comm.is_manager():
        if args.multispec:
            print "Checking multitest results."
        else:
            print "Checking individual test results."
        print

        for test_name in tests_to_check:
            print "Test {0!s}:".format(test_name)
            num_chk = sum(1 for i in items_to_check if i["test"] == test_name)
            num_new = num_chk + sum(1 for i in unchecked_new_items if i["test"] == test_name)
            num_old = num_chk + sum(1 for i in unchecked_old_items if i["test"] == test_name)
            print "   Checking {0!s} of {1!s}".format(num_chk, num_new),
Beispiel #21
0
def main(argv=None):
    args = cli(argv)

    # Create the necessary SimpleComm
    scomm = create_comm(serial=args.serial)

    # Do setup only on manager node
    if scomm.is_manager():

        # Check that the specfile exists
        if not exists(args.stdfile):
            raise OSError(("Output specification file {!r} not "
                           "found").format(args.stdfile))

        # Read the specfile into a dictionary
        print("Reading standardization file: {}".format(args.stdfile))
        dsdict = json_load(open(args.stdfile, "r"),
                           object_pairs_hook=OrderedDict)

        # Parse the output Dataset
        print(
            "Creating output dataset descriptor from standardization file...")
        outds = OutputDatasetDesc(dsdict=dsdict)

    else:
        outds = None

    # Send the output descriptor to all nodes
    outds = scomm.partition(outds, func=Duplicate(), involved=True)

    # Sync
    scomm.sync()

    # Continue setup only on manager node
    if scomm.is_manager():

        # Gather the list of input files
        infiles = []
        for infile in args.infiles:
            infiles.extend(glob(infile))

        # If no input files, stop here
        if len(infiles) == 0:
            print("Standardization file validated.")
            return

        # Parse the input Dataset
        print(
            "Creating input dataset descriptor from {} input files...".format(
                len(infiles)))
        inpds = InputDatasetDesc(filenames=infiles)

    else:
        inpds = None

    # Send the input descriptor to all nodes
    inpds = scomm.partition(inpds, func=Duplicate(), involved=True)

    # Sync and continue process on all nodes
    scomm.sync()

    # Check for warn/error
    if args.error:
        simplefilter("error", ValidationWarning)

    # Try importing all of the necessary user-defined modules
    if args.module is not None:
        for i, modpath in enumerate(args.module):
            load_source("user{}".format(i), modpath)

    # Setup the PyConform data flow on all nodes
    if scomm.is_manager():
        print("Creating the data flow...")
    dataflow = DataFlow(inpds, outds)

    # Execute the data flow (write to files)
    history = not args.no_history
    dataflow.execute(
        chunks=dict(args.chunks),
        scomm=scomm,
        history=history,
        deflate=args.deflate,
        debug=args.debug,
    )
Beispiel #22
0
        debugMsg('calling setup_config', header=True, verbosity=1)
        setup_config(envDict)

        debugMsg('expanding variables in batch script',
                 header=True,
                 verbosity=1)
        expand_batch_vars(envDict, imb_name)

    main_comm.sync()


#===================================

if __name__ == "__main__":
    # initialize simplecomm object
    main_comm = simplecomm.create_comm(serial=True)

    # setup an overall timer
    timer = timekeeper.TimeKeeper()

    # get commandline options
    options = commandline_options()

    # initialize global vprinter object for printing debug messages
    print("debug level = {0}".format(options.debug[0]))
    if options.debug:
        header = "[" + str(main_comm.get_rank()) + "/" + str(
            main_comm.get_size()) + "]: DEBUG... "
        debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0])

    try:
Beispiel #23
0
def main(argv):


    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try: 
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}
    
    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta10'
    opts_dict['compset'] = 'F2000climo'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments 
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ES',opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)


    if opts_dict['popens' == True]:
        print "Error: Please use pyEnsSumPop.py for a POP ensemble (not --popens)."
        sys.exit()

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach'] or opts_dict['res']):
       print 'Please specify --tag, --compset, --mach and --res options'
       sys.exit()
       
    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist=[]
    inc_varlist=[]

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    
    if me.get_rank() == 0:
       print 'Running pyEnsSum!'

    if me.get_rank() ==0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude=False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist=[]
            # Read in the excluded or included var list
            ex_varlist,exclude=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')
            if exclude == False:
               inc_varlist=ex_varlist
               ex_varlist=[]
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude=me.partition(exclude,func=Duplicate(),involved=True)
        if exclude:
           ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
        else:
           inc_varlist=me.partition(inc_varlist,func=Duplicate(),involved=True)
        
    in_files=[]
    if(os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files=sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank()==0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank()==0 and (verbose == True):
               print 'Number of files in input directory (',num_files,\
                ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank()==0 and (verbose == True):
               print 'NOTE: Number of files in ', input_dir, \
                'is greater than specified ensemble size of ', esize ,\
                '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank()==0:
           print 'Input directory: ',input_dir,' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
           in_files_list=get_cumul_filelist(opts_dict,opts_dict['indir'],opts_dict['regx'])
        in_files=me.partition(in_files_list,func=EqualLength(),involved=True)
        if me.get_rank()==0 and (verbose == True):
           print 'in_files=',in_files

    # Open the files in the input directory
    o_files=[]
    if me.get_rank() == 0 and opts_dict['verbose']:
       print 'Input files are: '
       print "\n".join(in_files)
       #for i in in_files:
       #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            if me.get_rank()==0:
               print "COULD NOT LOCATE FILE ", input_dir+'/'+onefile , "! EXITING...."
            sys.exit() 

    # Store dimensions of the input fields
    if me.get_rank()==0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey=''
    latkey=''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key =="lon"):
            nlon = input_dims[key]
            lonkey=key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey=key
        
    if (nlev == -1) : 
        if me.get_rank()==0: 
           print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit() 

    if (( ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank()==0: 
           print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()            

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True 
    else:
        is_SE = False    

    # Make sure all files have the same dimensions
    if me.get_rank()==0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions     
        if (is_SE == True):
            if ( nlev != int(input_dims["lev"]) or ( ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))): 
                if me.get_rank() == 0:
                   print "Dimension mismatch between ", in_files[0], 'and', in_files[0], '!!!'
                sys.exit() 

    # Get 2d vars, 3d vars and all vars (For now include all variables) 
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict=vars_dict_all
        for i in ex_varlist:
          if i in vars_dict:
            del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict=vars_dict_all.copy()
        for k,v in vars_dict_all.iteritems():
           if (k not in inc_varlist) and (vars_dict_all[k].typecode()=='f'):
            #print vars_dict_all[k].typecode()
            #print k
            del vars_dict[k]
 
    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0
Beispiel #24
0
  def __init__(self,in_directory,
	       out_directory,
	       prefix,
               suffix,
    	       file_pattern='null',
               date_pattern='null',
               m_id = ['-999'],
	       hist_type='slice',
	       avg_list=[],
  	       weighted=False,
               split=False,
               split_files='null',
	       split_orig_size='null',
	       ncformat='netcdf4c',
	       varlist=[],
	       serial=False,
               mean_diff_rms_obs_dir='null',
               region_nc_var='null',
               regions={},
	       region_wgt_var='null',
               obs_file='null',
               reg_obs_file_suffix='null',
               obs_dir='null',
               main_comm=None,
               clobber=False,
	       ice_obs_file='null',
               reg_file = 'null',
               ncl_location='null',
               year0=-99,
               year1=-99,
               collapse_dim='',
               vertical_levels=60):
    '''
    Initializes the internal data with optional arguments

    @param in_directory     Where the input directory resides (needs full path).

    @param out_directory    Where the output will be produced (needs full path).

    @param prefix           String specifying the full file name before the date string.

    @param suffix           String specifying the suffix of the file names

    @param file_pattern     File pattern used put the prefix, date, and suffix together for input files.

    @param date_pattern     The pattern used to decipher the date string within the file name.  

    @param m_id             Array of member identifiers.  All averages will be done on each member individually and then across all members.

    @param hist_type	    Type of file ('slice' or 'series').  Default is 'slice'.

    @param avg_list	    List of averages that need to be computed.  Elements should contain aveType:year0:year1.
	                    year2 is only required for multi year averaging.

    @param weighted         Boolean variable to selected if weights will be applied to the averaging.  
			    True = weights will be applied.  Default is False.

    @param split            Boolean variable.  True = the file is split spatially and the final average needs to be pieced together.
			    (ie. CICE times series files) Default is False. 

    @param split_files	    The strings indicating the naming difference between split files.  Expects a string with elements separated by a comma.
         	            Defualt is 'null'.  

    @param split_orig_size  A string listing the lat and lon values of the origianl grid size.  Needed in case some of the grid has been deleted.
		            (example: 'lon=288,lat=192').  Default is 'null'.

    @param ncformat	    Format to output the averaged file(s) in.  Default is 'netcdf4c'.  Other options: 'netcdf','netcdf4','netcdf4c'

    @param varlist	    Optional variables list, if not averaging all variables
 
    @param serial	    Boolean to run in serial mode.  True=serial (without MPI) False=run in parallel(with MPI) False requires mpi4py to be installed.
                            Default is False.

    @param regions          Dictionary that contains regions to average over.  Fromat is 'string region name: int region value'.  Default is an empty dictionary. 

    @param region_nc_var    String that identifies the netcdf variable that contains the region mask used by a regional average.

    @param region_wgt_var   String that identifies the netcdf variable that contains the weights.

    @param obs_file         Observational file used for the creation of the mean_diff_rms file. This file must contain all of the variables within the
                            variable list (or if a variable list is not specified, must contain all hist file variables).  Dimension must be nlon and nlat. 

    @param reg_obs_file_suffix The suffix of the regional, weighted averages of the 'obs_file'.  Used for the creation of the mean_diff_rms file.  

    @param obs_dir          Full path to the observational files used for the mean_diff_rms file.

    @param main_comm        A simplecomm to be used by the PyAverager.  If not specified, one will be created by this specifier. Default None.

    @param clobber          Remove netcdf output file(s) if they exist.  Default False - will exit if an output file of the same name exists. 

    @param ice_obs_file     Full path to the observational file used to create the cice model pre_proc file

    @param reg_file         Full path to the regional file used to create the cice model pre_proc file

    @param ncl_location     Location of where the ncl scripts reside

    @param year0            The first year - only used to create the cice pre_proc file.  

    @param year1            The last year - only used to create the cice pre_proc file. 

    @param collapse_dims    Used to collapse/average over one dim.

    @param vertical_levels  Number of ocean vertical levels
    '''

    # Where the input is located
    self.in_directory = in_directory

    # Where the output should be produced
    self.out_directory = out_directory

    # Full file name up to the date string
    self.prefix = prefix

    # The suffix of the data files
    self.suffix = suffix

    # Type of file
    self.hist_type = hist_type

    # List of averages to compute
    self.avg_list = avg_list

    # Should weights be applied?
    self.weighted = weighted

    # Are files split spatially?
    self.split = split

    # Split file name indicators
    self.split_files = split_files

    # The original grid size of the split files
    self.split_orig_size = split_orig_size

    # The netcdf output format 
    self.ncformat = ncformat

    # Varlist to average (if not all variables)
    self.varlist = varlist

    # Run in serial mode?  If True, will be ran without MPI
    self.serial = serial

    # Directory where to find the regional obds files for the mean_diff_rms climo file
    self.mean_diff_rms_obs_dir = mean_diff_rms_obs_dir

    # Regions to average over
    self.regions = regions

    # Netcdf variable name that contains a region mask
    self.region_nc_var = region_nc_var

    # Netcdf variable name that contains the weights
    self.region_wgt_var = region_wgt_var

    # String that indicates the suffix of the regional obs files used for the mean_diff_rms file
    self.reg_obs_file_suffix = reg_obs_file_suffix

    # String that indicates the name of the observational file
    self.obs_file = obs_file

    # String indicating the path to the observational files used for the mean_diff_rms file
    self.obs_dir = obs_dir

    # File pattern used to piece together a full file name
    if (file_pattern == 'null'):
        if (hist_type == 'slice'):
            self.file_pattern = ['$prefix','.','$date_pattern','.','$suffix']
        if (hist_type == 'series'):
            if split:
                self.file_pattern = ['$prefix','.','$var','_','$hem','.','$date_pattern','.','$suffix']
            else:
                self.file_pattern = ['$prefix','.','$var','.','$date_pattern','.','$suffix']
    else: 
        self.file_pattern = file_pattern

    # The date pattern to decipher the date within the file name
    self.date_pattern = date_pattern

    self.m_id = m_id

    # Get first and last years used in the averaging by parsing the avg_list
    dates = []
    for avg in avg_list:
      avg_descr = avg.split(':')
      for yr in avg_descr[1:]:
        dates.append(int(yr))
    if (year0 == -99 and year1 == -99):
        self.year0 = int(min(dates))
        self.year1 = int(max(dates)) 
    else:
        self.year0 = int(year0)
        self.year1 = int(year1)     

    # Initialize a simple_comm object if one was not passed in by the user
    if (main_comm is None):
        from asaptools import simplecomm
        self.main_comm = simplecomm.create_comm(serial=serial)
    else:
        self.main_comm = main_comm

    # True/False, rm average file(s) is it has already been created
    self.clobber = clobber

    # File that contains the weight/area information
    self.ice_obs_file = ice_obs_file

    # File that exists or will be created that contains a region mask for ice
    self.reg_file = reg_file

    # Location of the ncl script that will be used to create reg_file if it doesn't exist
    self.ncl_location = ncl_location

    # Used to collapse/average over one dim.
    self.collapse_dim = collapse_dim

    # Used to specify the number of ocean vertical levels
    self.vertical_levels = vertical_levels
 def setUp(self):
     self.scomm = simplecomm.create_comm(serial=True)
     self.pcomm = simplecomm.create_comm(serial=False)
     self.size = MPI_COMM_WORLD.Get_size()
     self.rank = MPI_COMM_WORLD.Get_rank()
Beispiel #26
0
def main(argv):

    print "Running pyEnsSum!"

    # Get command line stuff and store in a dictionary
    s = "tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex="
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict["tag"] = ""
    opts_dict["compset"] = ""
    opts_dict["mach"] = ""
    opts_dict["esize"] = 151
    opts_dict["tslice"] = 0
    opts_dict["res"] = ""
    opts_dict["sumfile"] = "ens.summary.nc"
    opts_dict["indir"] = "./"
    opts_dict["sumfiledir"] = "./"
    opts_dict["jsonfile"] = ""
    opts_dict["verbose"] = True
    opts_dict["mpi_enable"] = False
    opts_dict["maxnorm"] = False
    opts_dict["gmonly"] = False
    opts_dict["popens"] = False
    opts_dict["cumul"] = False
    opts_dict["regx"] = "test"
    opts_dict["startMon"] = 1
    opts_dict["endMon"] = 1
    opts_dict["fIndex"] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, "ES", opts_dict)

    verbose = opts_dict["verbose"]

    st = opts_dict["esize"]
    esize = int(st)

    if verbose == True:
        print opts_dict
        print "Ensemble size for summary = ", esize

    if not (opts_dict["tag"] and opts_dict["compset"] and opts_dict["mach"] or opts_dict["res"]):
        print "Please specify --tag, --compset, --mach and --res options"
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict["indir"]
    # The var list that will be excluded
    ex_varlist = []

    # Create a mpi simplecomm object
    if opts_dict["mpi_enable"]:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict["mpi_enable"])

    if me.get_rank() == 0:
        if opts_dict["jsonfile"]:
            # Read in the excluded var list
            ex_varlist = pyEnsLib.read_jsonlist(opts_dict["jsonfile"], "ES")

        # Broadcast the excluded var list to each processor
    if opts_dict["mpi_enable"]:
        ex_varlist = me.partition(ex_varlist, func=Duplicate(), involved=True)

    in_files = []
    if os.path.exists(input_dir):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
        if verbose == True:
            print "Number of files in input directory = ", num_files
        if num_files < esize:
            print "Number of files in input directory (", num_files, ") is less than specified ensemble size of ", esize
            sys.exit(2)
        if num_files > esize:
            print "NOTE: Number of files in ", input_dir, "is greater than specified ensemble size of ", esize, "\nwill just use the first ", esize, "files"
    else:
        print "Input directory: ", input_dir, " not found"
        sys.exit(2)

    if opts_dict["cumul"]:
        if opts_dict["regx"]:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict["indir"], opts_dict["regx"])
        in_files = me.partition(in_files_list, func=EqualLength(), involved=True)
        if me.get_rank() == 0:
            print "in_files=", in_files

    # Open the files in the input directory
    o_files = []
    for onefile in in_files[0:esize]:
        if os.path.isfile(input_dir + "/" + onefile):
            o_files.append(Nio.open_file(input_dir + "/" + onefile, "r"))
        else:
            print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if verbose == True:
        print "Getting spatial dimensions"
    nlev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ""
    latkey = ""
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if nlev == -1:
        print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if (ncol == -1) and ((nlat == -1) or (nlon == -1)):
        print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if ncol != -1:
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if verbose == True:
        print "Checking dimensions across files...."
        print "lev = ", nlev
        if is_SE == True:
            print "ncol = ", ncol
        else:
            print "nlat = ", nlat
            print "nlon = ", nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if is_SE == True:
            if nlev != int(input_dims["lev"]) or (ncol != int(input_dims["ncol"])):
                print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!"
                sys.exit()
        else:
            if nlev != int(input_dims["lev"]) or (nlat != int(input_dims[latkey])) or (nlon != int(input_dims[lonkey])):
                print "Dimension mismatch between ", in_files[0], "and", in_files[0], "!!!"
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    if ex_varlist:
        for i in ex_varlist:
            del vars_dict[i]
    num_vars = len(vars_dict)
    if verbose == True:
        print "Number of variables (including metadata) found =  ", num_vars
    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if is_SE == True:  # (time, lev, ncol) or (time, ncol)
            if (vr == 2) and (vs[1] == ncol):
                is_2d = True
                num_2d += 1
            elif (vr == 3) and (vs[2] == ncol and vs[1] == nlev):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if (vr == 3) and (vs[1] == nlat and vs[2] == nlon):
                is_2d = True
                num_2d += 1
            elif (vr == 4) and (vs[2] == nlat and vs[3] == nlon and vs[1] == nlev):
                is_3d = True
                num_3d += 1
        if is_3d == True:
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif is_2d == True:
            str_size = max(str_size, len(k))
            d2_var_names.append(k)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    if verbose == True:
        print "num vars = ", n_all_var_names, "(3d = ", num_3d, " and 2d = ", num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose == True:
        print "Creating ", this_sumfile, "  ..."
    if me.get_rank() == 0 | opts_dict["popens"]:
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = "NetCDF4Classic"
        nc_sumfile = Nio.open_file(this_sumfile, "w", options=opt)

        # Set dimensions
        if verbose == True:
            print "Setting dimensions ....."
        if is_SE == True:
            nc_sumfile.create_dimension("ncol", ncol)
        else:
            nc_sumfile.create_dimension("nlat", nlat)
            nc_sumfile.create_dimension("nlon", nlon)
        nc_sumfile.create_dimension("nlev", nlev)
        nc_sumfile.create_dimension("ens_size", esize)
        nc_sumfile.create_dimension("nvars", num_3d + num_2d)
        nc_sumfile.create_dimension("nvars3d", num_3d)
        nc_sumfile.create_dimension("nvars2d", num_2d)
        nc_sumfile.create_dimension("str_size", str_size)

        # Set global attributes
        now = time.strftime("%c")
        if verbose == True:
            print "Setting global attributes ....."
        setattr(nc_sumfile, "creation_date", now)
        setattr(nc_sumfile, "title", "CAM verification ensemble summary file")
        setattr(nc_sumfile, "tag", opts_dict["tag"])
        setattr(nc_sumfile, "compset", opts_dict["compset"])
        setattr(nc_sumfile, "resolution", opts_dict["res"])
        setattr(nc_sumfile, "machine", opts_dict["mach"])

        # Create variables
        if verbose == True:
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", "f", ("nlev",))
        v_vars = nc_sumfile.create_variable("vars", "S1", ("nvars", "str_size"))
        v_var3d = nc_sumfile.create_variable("var3d", "S1", ("nvars3d", "str_size"))
        v_var2d = nc_sumfile.create_variable("var2d", "S1", ("nvars2d", "str_size"))
        if not opts_dict["gmonly"]:
            if is_SE == True:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "ncol"))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "ncol"))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "ncol"))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "ncol"))
            else:
                v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", "f", ("nvars3d", "nlev", "nlat", "nlon"))
                v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", "f", ("nvars3d", "nlev", "nlat", "nlon"))
                v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", "f", ("nvars2d", "nlat", "nlon"))
                v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", "f", ("nvars2d", "nlat", "nlon"))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", "f", ("nvars", "ens_size"))
        v_gm = nc_sumfile.create_variable("global_mean", "f", ("nvars", "ens_size"))
        v_loadings_gm = nc_sumfile.create_variable("loadings_gm", "f", ("nvars", "nvars"))
        v_mu_gm = nc_sumfile.create_variable("mu_gm", "f", ("nvars",))
        v_sigma_gm = nc_sumfile.create_variable("sigma_gm", "f", ("nvars",))
        v_sigma_scores_gm = nc_sumfile.create_variable("sigma_scores_gm", "f", ("nvars",))

        # Assign vars, var3d and var2d
        if verbose == True:
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if l_tt < str_size:
                extra = list(" ") * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if verbose == True:
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

        # Form ensembles, each missing one member; compute RMSZs and global means
        # for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict["tslice"]

    if not opts_dict["cumul"]:
        # Partition the var list
        var3_list_loc = me.partition(d3_var_names, func=EqualStride(), involved=True)
        var2_list_loc = me.partition(d2_var_names, func=EqualStride(), involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if verbose == True:
        print "Calculating global means ....."
    if not opts_dict["cumul"]:
        gm3d, gm2d = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict
        )
    if verbose == True:
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if verbose == True:
        print "Calculating RMSZ scores ....."
    if (not opts_dict["gmonly"]) | (opts_dict["cumul"]):
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict
        )

    # Calculate max norm ensemble
    if opts_dict["maxnorm"]:
        if verbose == True:
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict["mpi_enable"] & (not opts_dict["popens"]):

        if not opts_dict["cumul"]:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index, (len(d3_var_names), len(o_files)))

            if not opts_dict["gmonly"]:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index, (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names), me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index, shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index, shape_tuple3d)

                # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index, (len(d2_var_names), len(o_files)))

            if not opts_dict["gmonly"]:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index, (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names), me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index, shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index, shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(gmall, me, (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict["popens"]:
        if not opts_dict["cumul"]:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict["gmonly"]:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict["gmonly"]:
                if is_SE == True:
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(gmall)
        v_gm[:, :] = gmall[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        print "All Done"
Beispiel #27
0
    def __init__(self, specifiers, serial=False, verbosity=1, wmode='w',
                 once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifiers (dict): A dict of named Specifier instances, each
                defining an input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Check types
        if not isinstance(specifiers, dict):
            err_msg = "Input must be given in a dictionary of Specifiers"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write to a once file
        self._use_once_file = once

        # Output file write mode
        self._write_mode = wmode

        # Store the list of specifiers
        self._specifiers = specifiers

        # Store the serial specifier
        self._serial = serial

        # Check for a SimpleComm, and if none create it
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)

        # Pointer to its own messenger
        self._simplecomm = simplecomm

        # Store the verbosity
        self._verbosity = verbosity

        # Set the verbose printer
        self._vprint = VPrinter(verbosity=verbosity)

        # Storage for timing data
        self._times = {}

        # Orders for printing timing data
        self._time_orders = {}

        # Storage for all byte counters
        self._byte_counts = {}
Beispiel #28
0
    def __init__(self, specifier, serial=False, verbosity=1,
                 skip_existing=False, overwrite=False,
                 once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class, 
                defining the input specification for this reshaper operation.

        Keyword Arguments:
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0 
                means no output, and a higher value means more output.  The
                default value is 1.
            skip_existing (bool): Flag specifying whether to skip the generation
                of time-series for variables with time-series files that already
                exist.  Default is False.
            overwrite (bool): Flag specifying whether to forcefully overwrite
                output files if they already exist.  Default is False.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel 
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(skip_existing) is not bool:
            err_msg = "Skip_existing flag must be True or False."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not (isinstance(simplecomm, SimpleComm) or \
                    isinstance(simplecomm, SimpleCommMPI)):
                err_msg = ("Simple communicator object is not a SimpleComm or ",
                           "SimpleCommMPI")
                raise TypeError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join(['[', str(self._simplecomm.get_rank()),
                          '/', str(self._simplecomm.get_size()), '] '])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper', verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('Specifier validated', verbosity=1)

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = Nio.options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format == 'netcdf4':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 0
        elif specifier.netcdf_format == 'netcdf4c':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.netcdf_deflate
            if self._simplecomm.is_manager():
                self._vprint('PyNIO compression level: {0}'.format(\
                    specifier.netcdf_deflate), verbosity=2)

        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('PyNIO options set', verbosity=2)

        # Open all of the input files
        self._timer.start('Open Input Files')
        self._input_files = []
        for filename in specifier.input_file_list:
            self._input_files.append(Nio.open_file(filename, "r"))
        self._timer.stop('Open Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files opened', verbosity=2)

        # Validate the input files themselves
        self._timer.start('Input File Validation')
        self._validate_input_files(specifier)
        self._timer.stop('Input File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Input files validated', verbosity=2)

        # Sort the input files by time
        self._timer.start('Sort Input Files')
        self._sort_input_files_by_time(specifier)
        self._timer.stop('Sort Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files sorted', verbosity=2)

        # Retrieve and sort the variables in each time-slice file
        # (To determine if it is time-invariant metadata, time-variant
        # metadata, or if it is a time-series variable)
        self._timer.start('Sort Variables')
        self._sort_variables(specifier)
        self._timer.stop('Sort Variables')
        if self._simplecomm.is_manager():
            self._vprint('Variables sorted', verbosity=2)

        # Validate the output files
        self._timer.start('Output File Validation')
        self._validate_output_files(specifier, skip_existing, overwrite)
        self._timer.stop('Output File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Output files validated', verbosity=2)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=1)

        # Sync before continuing..
        self._simplecomm.sync()
Beispiel #29
0
    def __init__(self, specifiers, serial=False, verbosity=1,
                 skip_existing=False, overwrite=False,
                 once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifiers (dict): A dict of named Specifier instances, each
                defining an input specification for this reshaper operation.

        Keyword Arguments:
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0 
                means no output, and a higher value means more output.  The
                default value is 1.
            skip_existing (bool): Flag specifying whether to skip the generation
                of time-series for variables with time-series files that already
                exist.  Default is False.
            overwrite (bool): Flag specifying whether to forcefully overwrite
                output files if they already exist.  Default is False.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel 
                communication, if necessary
        """

        # Check types
        if not isinstance(specifiers, dict):
            err_msg = "Input must be given in a dictionary of Specifiers"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(skip_existing) is not bool:
            err_msg = "Skip_existing flag must be True or False."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if simplecomm is not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)

        # Whether to write to a once file
        self._use_once_file = once

        # Whether to write to a once file
        self._skip_existing = skip_existing

        # Whether to write to overwrite output files
        self._overwrite = overwrite

        # Store the list of specifiers
        self._specifiers = specifiers

        # Store the serial specifier
        self._serial = serial

        # Check for a SimpleComm, and if none create it
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)

        # Pointer to its own messenger
        self._simplecomm = simplecomm

        # Store the verbosity
        self._verbosity = verbosity

        # Set the verbose printer
        self._vprint = VPrinter(verbosity=verbosity)

        # Storage for timing data
        self._times = {}

        # Orders for printing timing data
        self._time_orders = {}

        # Storage for all byte counters
        self._byte_counts = {}
            oldfile = os.path.join(olddir, filename)
            if oldfile in oldfiles:
                item_dict['old'] = oldfile
                oldfiles.remove(oldfile)
                items_to_check.append(item_dict)
            else:
                item_dict['old'] = None
                unchecked_new_items.append(item_dict)
        for oldfile in oldfiles:
            item_dict = {'test': test_name}
            item_dict['new'] = None
            item_dict['old'] = oldfile
            unchecked_old_items.append(item_dict)

    # Get a basic MPI comm
    comm = create_comm(serial=(opts.serial or opts.list_tests))

    # Print tests that will be checked
    if comm.is_manager():
        print 'Checking test results.'

        for test_name in tests_to_check:
            print 'Test {0!s}:'.format(test_name)
            num_chk = sum(1 for i in items_to_check if i['test'] == test_name)
            num_new = num_chk + sum(
                1 for i in unchecked_new_items if i['test'] == test_name)
            num_old = num_chk + sum(
                1 for i in unchecked_old_items if i['test'] == test_name)
            print '   Checking {0!s} of {1!s}'.format(num_chk, num_new),
            print 'new files generated against {0!s}'.format(num_old),
            print 'old files found.'
Beispiel #31
0
    def __init__(self,
                 specifiers,
                 serial=False,
                 verbosity=1,
                 wmode='w',
                 once=False,
                 simplecomm=None):
        """
        Constructor

        Parameters:
            specifiers (dict): A dict of named Specifier instances, each
                defining an input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Check types
        if not isinstance(specifiers, dict):
            err_msg = "Input must be given in a dictionary of Specifiers"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write to a once file
        self._use_once_file = once

        # Output file write mode
        self._write_mode = wmode

        # Store the list of specifiers
        self._specifiers = specifiers

        # Store the serial specifier
        self._serial = serial

        # Check for a SimpleComm, and if none create it
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)

        # Pointer to its own messenger
        self._simplecomm = simplecomm

        # Store the verbosity
        self._verbosity = verbosity

        # Set the verbose printer
        self._vprint = VPrinter(verbosity=verbosity)

        # Storage for timing data
        self._times = {}

        # Orders for printing timing data
        self._time_orders = {}

        # Storage for all byte counters
        self._byte_counts = {}
Beispiel #32
0
def main(argv):
    print 'Running pyEnsSumPop!'

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable zscoreonly nrand= rand seq= jsondir='
    optkeys = s.split()
    try: 
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict={}

    # Defaults
    opts_dict['tag'] = 'cesm1_2_0'
    opts_dict['compset'] = 'FC5'
    opts_dict['mach'] = 'yellowstone'
    opts_dict['tslice'] = 0 
    opts_dict['nyear'] = 3
    opts_dict['nmonth'] = 12
    opts_dict['npert'] = 40
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'ne30_ne30'
    opts_dict['sumfile'] = 'ens.pop.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = ''
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = False
    opts_dict['zscoreonly'] = False
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40 
    opts_dict['rand'] = False
    opts_dict['seq'] = 0 
    opts_dict['jsondir'] = '/glade/scratch/haiyingx/' 

    # This creates the dictionary of input arguments 
    print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts,optkeys,'ESP',opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if verbose:
       print opts_dict
       
    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d,Var3d=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ESP')
        str_size=0
        for str in Var3d:
            if str_size < len(str):
               str_size=len(str)
        for str in Var2d:
            if str_size < len(str):
               str_size=len(str)


    in_files=[]
    if(os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
           in_files=pyEnsLib.Random_pickup_pop(input_dir,opts_dict,opts_dict['nrand'])
        else:    
           # Get the list of files
           in_files_temp = os.listdir(input_dir)
           in_files=sorted(in_files_temp)
        # Make sure we have enough
        num_files = len(in_files)
    else:
        print 'Input directory: ',input_dir,' not found'
        sys.exit(2)

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me=simplecomm.create_comm()
    else:
        me=simplecomm.create_comm(not opts_dict['mpi_enable'])
    #Partition the input file list 
    in_file_list=me.partition(in_files,func=EqualStride(),involved=True)

    
    # Open the files in the input directory
    o_files=[]
    for onefile in in_file_list:
        if (os.path.isfile(input_dir+'/' + onefile)):
            o_files.append(Nio.open_file(input_dir+'/' + onefile,"r"))
        else:
            print "COULD NOT LOCATE FILE "+ input_dir + onefile + "! EXITING...."
            sys.exit() 


    print in_file_list

    # Store dimensions of the input fields
    if (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    for key in input_dims:
        if key == "z_t":
            nlev = input_dims["z_t"]
        elif key == "nlon":
            nlon = input_dims["nlon"]
        elif key == "nlat":
            nlat = input_dims["nlat"]

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions     
	if ( nlev != int(input_dims["z_t"]) or ( nlat != int(input_dims["nlat"]))\
	      or ( nlon != int(input_dims["nlon"]))):
	    print "Dimension mismatch between ", in_file_list[0], 'and', in_file_list[count], '!!!'
	    sys.exit() 


    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if verbose:
       print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 ):
       if os.path.exists(this_sumfile):
           os.unlink(this_sumfile)
       opt =Nio.options()
       opt.PreFill = False
       opt.Format = 'NetCDF4Classic'

       nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

       # Set dimensions
       if (verbose == True):
	   print "Setting dimensions ....."
       nc_sumfile.create_dimension('nlat', nlat)
       nc_sumfile.create_dimension('nlon', nlon)
       nc_sumfile.create_dimension('nlev', nlev)
       nc_sumfile.create_dimension('time',None)
       nc_sumfile.create_dimension('ens_size', opts_dict['npert'])
       nc_sumfile.create_dimension('nbin', opts_dict['nbin'])
       nc_sumfile.create_dimension('nvars', len(Var3d) + len(Var2d))
       nc_sumfile.create_dimension('nvars3d', len(Var3d))
       nc_sumfile.create_dimension('nvars2d', len(Var2d))
       nc_sumfile.create_dimension('str_size', str_size)

       # Set global attributes
       now = time.strftime("%c")
       if (verbose == True):
	   print "Setting global attributes ....."
       setattr(nc_sumfile, 'creation_date',now)
       setattr(nc_sumfile, 'title', 'POP verification ensemble summary file')
       setattr(nc_sumfile, 'tag', opts_dict["tag"]) 
       setattr(nc_sumfile, 'compset', opts_dict["compset"]) 
       setattr(nc_sumfile, 'resolution', opts_dict["res"]) 
       setattr(nc_sumfile, 'machine', opts_dict["mach"]) 

       # Create variables
       if (verbose == True):
	   print "Creating variables ....."
       v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev',))
       v_vars = nc_sumfile.create_variable("vars", 'S1', ('nvars', 'str_size'))
       v_var3d = nc_sumfile.create_variable("var3d", 'S1', ('nvars3d', 'str_size'))
       v_var2d = nc_sumfile.create_variable("var2d", 'S1', ('nvars2d', 'str_size'))
       v_time = nc_sumfile.create_variable("time",'d',('time',))
       v_ens_avg3d = nc_sumfile.create_variable("ens_avg3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon'))
       v_ens_stddev3d = nc_sumfile.create_variable("ens_stddev3d", 'f', ('time','nvars3d', 'nlev', 'nlat', 'nlon'))
       v_ens_avg2d = nc_sumfile.create_variable("ens_avg2d", 'f', ('time','nvars2d', 'nlat', 'nlon'))
       v_ens_stddev2d = nc_sumfile.create_variable("ens_stddev2d", 'f', ('time','nvars2d', 'nlat', 'nlon'))

       v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f', ('time','nvars', 'ens_size','nbin'))
       if not opts_dict['zscoreonly']:
          v_gm = nc_sumfile.create_variable("global_mean", 'f', ('time','nvars', 'ens_size'))


       # Assign vars, var3d and var2d
       if (verbose == True):
	   print "Assigning vars, var3d, and var2d ....."

       eq_all_var_names =[]
       eq_d3_var_names = []
       eq_d2_var_names = []
       all_var_names = list(Var3d)
       all_var_names += Var2d
       l_eq = len(all_var_names)
       for i in range(l_eq):
	   tt = list(all_var_names[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_all_var_names.append(tt)

       l_eq = len(Var3d)
       for i in range(l_eq):
	   tt = list(Var3d[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_d3_var_names.append(tt)

       l_eq = len(Var2d)
       for i in range(l_eq):
	   tt = list(Var2d[i])
	   l_tt = len(tt)
	   if (l_tt < str_size):
	       extra = list(' ')*(str_size - l_tt)
	       tt.extend(extra)
	   eq_d2_var_names.append(tt)

       v_vars[:] = eq_all_var_names[:]
       v_var3d[:] = eq_d3_var_names[:]
       v_var2d[:] = eq_d2_var_names[:]

       # Time-invarient metadata
       if (verbose == True):
	   print "Assigning time invariant metadata ....."
       vars_dict = o_files[0].variables
       lev_data = vars_dict["z_t"]
       v_lev = lev_data
       
    # Time-varient metadata
    if verbose:
       print "Assigning time variant metadata ....."
    vars_dict = o_files[0].variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array,me,(me.get_size(),))
    if me.get_rank() == 0:
       v_time[:]=time_array[:]

    # Calculate global mean, average, standard deviation 
    if verbose:
       print "Calculating global means ....."
    is_SE = False
    tslice=0
    if not opts_dict['zscoreonly']:
       gm3d,gm2d = pyEnsLib.generate_global_mean_for_summary(o_files,Var3d,Var2d, is_SE,False,opts_dict)
    if verbose:
       print "Finish calculating global means ....."

    # Calculate RMSZ scores  
    if (verbose == True):
       print "Calculating RMSZ scores ....."
    zscore3d,zscore2d,ens_avg3d,ens_stddev3d,ens_avg2d,ens_stddev2d,temp1,temp2=pyEnsLib.calc_rmsz(o_files,Var3d,Var2d,is_SE,opts_dict)    

    # Collect from all processors
    if opts_dict['mpi_enable'] :
	# Gather the 3d variable results from all processors to the master processor
	# Gather global means 3d results
        if not opts_dict['zscoreonly']:
           gmall=np.concatenate((gm3d,gm2d),axis=0)
           #print "before gather, gmall.shape=",gmall.shape
	   gmall=pyEnsLib.gather_npArray_pop(gmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files)))
        zmall=np.concatenate((zscore3d,zscore2d),axis=0)
        zmall=pyEnsLib.gather_npArray_pop(zmall,me,(me.get_size(),len(Var3d)+len(Var2d),len(o_files),nbin))
        #print 'zmall=',zmall
        
        #print "after gather, gmall.shape=",gmall.shape
        ens_avg3d=pyEnsLib.gather_npArray_pop(ens_avg3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon))
        ens_avg2d=pyEnsLib.gather_npArray_pop(ens_avg2d,me,(me.get_size(),len(Var2d),(nlat),nlon))
        ens_stddev3d=pyEnsLib.gather_npArray_pop(ens_stddev3d,me,(me.get_size(),len(Var3d),nlev,(nlat),nlon))
        ens_stddev2d=pyEnsLib.gather_npArray_pop(ens_stddev2d,me,(me.get_size(),len(Var2d),(nlat),nlon))

    # Assign to file:
    if me.get_rank() == 0 :
	#Zscoreall=np.concatenate((zscore3d,zscore2d),axis=0)
	v_RMSZ[:,:,:,:]=zmall[:,:,:,:]
	v_ens_avg3d[:,:,:,:,:]=ens_avg3d[:,:,:,:,:]
	v_ens_stddev3d[:,:,:,:,:]=ens_stddev3d[:,:,:,:,:]
	v_ens_avg2d[:,:,:,:]=ens_avg2d[:,:,:,:]
	v_ens_stddev2d[:,:,:,:]=ens_stddev2d[:,:,:,:]
	if not opts_dict['zscoreonly']:
	   v_gm[:,:,:]=gmall[:,:,:]
        print "All done"
    def execute(self,
                chunks={},
                serial=False,
                history=False,
                scomm=None,
                deflate=None):
        """
        Execute the Data Flow
        
        Parameters:
            chunks (dict): A dictionary of output dimension names and chunk sizes for each
                dimension given.  Output dimensions not included in the dictionary will not be
                chunked.  (Use OrderedDict to preserve order of dimensions, where the first
                dimension will be assumed to correspond to the fastest-varying index and the last
                dimension will be assumed to correspond to the slowest-varying index.)
            serial (bool): Whether to run in serial (True) or parallel (False)
            history (bool): Whether to write a history attribute generated during execution
                for each variable in the file
            scomm (SimpleComm): An externally created SimpleComm object to use for managing
                parallel operation
            deflate (int): Override all output file deflate levels with given value
        """
        # Check chunks type
        if not isinstance(chunks, dict):
            raise TypeError('Chunks must be specified with a dictionary')

        # Make sure that the specified chunking dimensions are valid
        for odname, odsize in chunks.iteritems():
            if odname not in self._o2imap:
                raise ValueError(
                    'Cannot chunk over unknown output dimension {!r}'.format(
                        odname))
            if not isinstance(odsize, int):
                raise TypeError(
                    ('Chunk size invalid for output dimension {!r}: '
                     '{}').format(odname, odsize))

        # Check that we are not chunking over any "sum-like" dimensions
        sumlike_chunk_dims = sorted(d for d in chunks
                                    if d in self._sumlike_dimensions)
        if len(sumlike_chunk_dims) > 0:
            raise ValueError((
                'Cannot chunk over dimensions that are summed over (or "sum-like")'
                ': {}'.format(', '.join(sumlike_chunk_dims))))

        # Create the simple communicator, if necessary
        if scomm is None:
            scomm = create_comm(serial=bool(serial))
        elif isinstance(scomm, SimpleComm):
            if scomm.is_manager():
                print 'Inheriting SimpleComm object from parent.  (Ignoring serial argument.)'
        else:
            raise TypeError('Communication object is not a SimpleComm!')

        # Start general output
        prefix = '[{}/{}]'.format(scomm.get_rank(), scomm.get_size())
        if scomm.is_manager():
            print 'Beginning execution of data flow...'
            print 'Mapping Input Dimensions to Output Dimensions:'
            for d in sorted(self._i2omap):
                print '   {} --> {}'.format(d, self._i2omap[d])
            if len(chunks) > 0:
                print 'Chunking over Output Dimensions:'
                for d in chunks:
                    print '   {}: {}'.format(d, chunks[d])
            else:
                print 'Not chunking output.'

        # Partition the output files/variables over available parallel (MPI) ranks
        fnames = scomm.partition(self._filesizes.items(),
                                 func=WeightBalanced(),
                                 involved=True)
        if scomm.is_manager():
            print 'Writing {} files across {} MPI processes.'.format(
                len(self._filesizes), scomm.get_size())
        scomm.sync()

        # Standard output
        print '{}: Writing {} files: {}'.format(prefix, len(fnames),
                                                ', '.join(fnames))
        scomm.sync()

        # Loop over output files and write using given chunking
        for fname in fnames:
            print '{}: Writing file: {}'.format(prefix, fname)
            if history:
                self._writenodes[fname].enable_history()
            else:
                self._writenodes[fname].disable_history()
            self._writenodes[fname].execute(chunks=chunks, deflate=deflate)
            print '{}: Finished writing file: {}'.format(prefix, fname)

        scomm.sync()
        if scomm.is_manager():
            print 'All output variables written.'
            print
    def __init__(self,
                 in_directory,
                 out_directory,
                 prefix,
                 suffix,
                 file_pattern='null',
                 date_pattern='null',
                 m_id=['-999'],
                 hist_type='slice',
                 avg_list=[],
                 weighted=False,
                 split=False,
                 split_files='null',
                 split_orig_size='null',
                 ncformat='netcdf4c',
                 varlist=[],
                 serial=False,
                 mean_diff_rms_obs_dir='null',
                 region_nc_var='null',
                 regions={},
                 region_wgt_var='null',
                 obs_file='null',
                 reg_obs_file_suffix='null',
                 obs_dir='null',
                 main_comm=None,
                 clobber=False,
                 ice_obs_file='null',
                 reg_file='null',
                 ncl_location='null',
                 year0=-99,
                 year1=-99,
                 collapse_dim='',
                 vertical_levels=60):
        '''
    Initializes the internal data with optional arguments

    @param in_directory     Where the input directory resides (needs full path).

    @param out_directory    Where the output will be produced (needs full path).

    @param prefix           String specifying the full file name before the date string.

    @param suffix           String specifying the suffix of the file names

    @param file_pattern     File pattern used put the prefix, date, and suffix together for input files.

    @param date_pattern     The pattern used to decipher the date string within the file name.  

    @param m_id             Array of member identifiers.  All averages will be done on each member individually and then across all members.

    @param hist_type	    Type of file ('slice' or 'series').  Default is 'slice'.

    @param avg_list	    List of averages that need to be computed.  Elements should contain aveType:year0:year1.
	                    year2 is only required for multi year averaging.

    @param weighted         Boolean variable to selected if weights will be applied to the averaging.  
			    True = weights will be applied.  Default is False.

    @param split            Boolean variable.  True = the file is split spatially and the final average needs to be pieced together.
			    (ie. CICE times series files) Default is False. 

    @param split_files	    The strings indicating the naming difference between split files.  Expects a string with elements separated by a comma.
         	            Defualt is 'null'.  

    @param split_orig_size  A string listing the lat and lon values of the origianl grid size.  Needed in case some of the grid has been deleted.
		            (example: 'lon=288,lat=192').  Default is 'null'.

    @param ncformat	    Format to output the averaged file(s) in.  Default is 'netcdf4c'.  Other options: 'netcdf','netcdf4','netcdf4c'

    @param varlist	    Optional variables list, if not averaging all variables
 
    @param serial	    Boolean to run in serial mode.  True=serial (without MPI) False=run in parallel(with MPI) False requires mpi4py to be installed.
                            Default is False.

    @param regions          Dictionary that contains regions to average over.  Fromat is 'string region name: int region value'.  Default is an empty dictionary. 

    @param region_nc_var    String that identifies the netcdf variable that contains the region mask used by a regional average.

    @param region_wgt_var   String that identifies the netcdf variable that contains the weights.

    @param obs_file         Observational file used for the creation of the mean_diff_rms file. This file must contain all of the variables within the
                            variable list (or if a variable list is not specified, must contain all hist file variables).  Dimension must be nlon and nlat. 

    @param reg_obs_file_suffix The suffix of the regional, weighted averages of the 'obs_file'.  Used for the creation of the mean_diff_rms file.  

    @param obs_dir          Full path to the observational files used for the mean_diff_rms file.

    @param main_comm        A simplecomm to be used by the PyAverager.  If not specified, one will be created by this specifier. Default None.

    @param clobber          Remove netcdf output file(s) if they exist.  Default False - will exit if an output file of the same name exists. 

    @param ice_obs_file     Full path to the observational file used to create the cice model pre_proc file

    @param reg_file         Full path to the regional file used to create the cice model pre_proc file

    @param ncl_location     Location of where the ncl scripts reside

    @param year0            The first year - only used to create the cice pre_proc file.  

    @param year1            The last year - only used to create the cice pre_proc file. 

    @param collapse_dims    Used to collapse/average over one dim.

    @param vertical_levels  Number of ocean vertical levels
    '''

        # Where the input is located
        self.in_directory = in_directory

        # Where the output should be produced
        self.out_directory = out_directory

        # Full file name up to the date string
        self.prefix = prefix

        # The suffix of the data files
        self.suffix = suffix

        # Type of file
        self.hist_type = hist_type

        # List of averages to compute
        self.avg_list = avg_list

        # Should weights be applied?
        self.weighted = weighted

        # Are files split spatially?
        self.split = split

        # Split file name indicators
        self.split_files = split_files

        # The original grid size of the split files
        self.split_orig_size = split_orig_size

        # The netcdf output format
        self.ncformat = ncformat

        # Varlist to average (if not all variables)
        self.varlist = varlist

        # Run in serial mode?  If True, will be ran without MPI
        self.serial = serial

        # Directory where to find the regional obds files for the mean_diff_rms climo file
        self.mean_diff_rms_obs_dir = mean_diff_rms_obs_dir

        # Regions to average over
        self.regions = regions

        # Netcdf variable name that contains a region mask
        self.region_nc_var = region_nc_var

        # Netcdf variable name that contains the weights
        self.region_wgt_var = region_wgt_var

        # String that indicates the suffix of the regional obs files used for the mean_diff_rms file
        self.reg_obs_file_suffix = reg_obs_file_suffix

        # String that indicates the name of the observational file
        self.obs_file = obs_file

        # String indicating the path to the observational files used for the mean_diff_rms file
        self.obs_dir = obs_dir

        # File pattern used to piece together a full file name
        if (file_pattern == 'null'):
            if (hist_type == 'slice'):
                self.file_pattern = [
                    '$prefix', '.', '$date_pattern', '.', '$suffix'
                ]
            if (hist_type == 'series'):
                if split:
                    self.file_pattern = [
                        '$prefix', '.', '$var', '_', '$hem', '.',
                        '$date_pattern', '.', '$suffix'
                    ]
                else:
                    self.file_pattern = [
                        '$prefix', '.', '$var', '.', '$date_pattern', '.',
                        '$suffix'
                    ]
        else:
            self.file_pattern = file_pattern

        # The date pattern to decipher the date within the file name
        self.date_pattern = date_pattern

        self.m_id = m_id

        # Get first and last years used in the averaging by parsing the avg_list
        dates = []
        for avg in avg_list:
            avg_descr = avg.split(':')
            for yr in avg_descr[1:]:
                dates.append(int(yr))
        if (year0 == -99 and year1 == -99):
            self.year0 = int(min(dates))
            self.year1 = int(max(dates))
        else:
            self.year0 = int(year0)
            self.year1 = int(year1)

        # Initialize a simple_comm object if one was not passed in by the user
        if (main_comm is None):
            from asaptools import simplecomm
            self.main_comm = simplecomm.create_comm(serial=serial)
        else:
            self.main_comm = main_comm

        # True/False, rm average file(s) is it has already been created
        self.clobber = clobber

        # File that contains the weight/area information
        self.ice_obs_file = ice_obs_file

        # File that exists or will be created that contains a region mask for ice
        self.reg_file = reg_file

        # Location of the ncl script that will be used to create reg_file if it doesn't exist
        self.ncl_location = ncl_location

        # Used to collapse/average over one dim.
        self.collapse_dim = collapse_dim

        # Used to specify the number of ocean vertical levels
        self.vertical_levels = vertical_levels
                workdir = '{0}/climo/{1}/{2}/{3}/'.format(envDict['PTMPDIR_'+t], envDict['caseid_'+t], subdir, m_dir)

                timer_tag = '{0}_{1}'.format(t, climo_file)
                timer.start(timer_tag)
                debugMsg('Before call to lnd_regrid using workdir = {0}/{1}'.format(workdir, ext_dir), header=True, verbosity=1)
                diagUtilsLib.lnd_regrid(climo_file, regrid_script, t, workdir, ext_dir, envDict)
                timer.stop(timer_tag)

                debugMsg("Total time to regrid file {0} = {1}".format(climo_file, timer.get_time(timer_tag)), header=True, verbosity=1)

#===================================


if __name__ == "__main__":
    # initialize simplecomm object
    main_comm = simplecomm.create_comm(serial=False)

    # setup an overall timer
    timer = timekeeper.TimeKeeper()

    # get commandline options
    options = commandline_options()

    # initialize global vprinter object for printing debug messages
    if options.debug:
        header = "[" + str(main_comm.get_rank()) + "/" + str(main_comm.get_size()) + "]: DEBUG... "
        debugMsg = vprinter.VPrinter(header=header, verbosity=options.debug[0])
   
    try:
        timer.start("Total Time")
        status = main(options, main_comm, debugMsg, timer)
 def setUp(self):
     self.scomm = simplecomm.create_comm(serial=True)
     self.pcomm = simplecomm.create_comm(serial=False)
     self.size = MPI_COMM_WORLD.Get_size()
     self.rank = MPI_COMM_WORLD.Get_rank()
Beispiel #37
0
    def __init__(self,
                 specifier,
                 serial=False,
                 verbosity=1,
                 skip_existing=False,
                 overwrite=False,
                 once=False,
                 simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class, 
                defining the input specification for this reshaper operation.

        Keyword Arguments:
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0 
                means no output, and a higher value means more output.  The
                default value is 1.
            skip_existing (bool): Flag specifying whether to skip the generation
                of time-series for variables with time-series files that already
                exist.  Default is False.
            overwrite (bool): Flag specifying whether to forcefully overwrite
                output files if they already exist.  Default is False.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel 
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(skip_existing) is not bool:
            err_msg = "Skip_existing flag must be True or False."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not (isinstance(simplecomm, SimpleComm) or \
                    isinstance(simplecomm, SimpleCommMPI)):
                err_msg = (
                    "Simple communicator object is not a SimpleComm or ",
                    "SimpleCommMPI")
                raise TypeError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # Internal timer data
        self._timer = TimeKeeper()

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)
        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Contruct the print header
        header = ''.join([
            '[',
            str(self._simplecomm.get_rank()), '/',
            str(self._simplecomm.get_size()), '] '
        ])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper', verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('Specifier validated', verbosity=1)

        # Setup PyNIO options (including disabling the default PreFill option)
        opt = Nio.options()
        opt.PreFill = False

        # Determine the Format and CompressionLevel options
        # from the NetCDF format string in the Specifier
        if specifier.netcdf_format == 'netcdf':
            opt.Format = 'Classic'
        elif specifier.netcdf_format == 'netcdf4':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = 0
        elif specifier.netcdf_format == 'netcdf4c':
            opt.Format = 'NetCDF4Classic'
            opt.CompressionLevel = specifier.netcdf_deflate
            if self._simplecomm.is_manager():
                self._vprint('PyNIO compression level: {0}'.format(\
                    specifier.netcdf_deflate), verbosity=2)

        self._nio_options = opt
        if self._simplecomm.is_manager():
            self._vprint('PyNIO options set', verbosity=2)

        # Open all of the input files
        self._timer.start('Open Input Files')
        self._input_files = []
        for filename in specifier.input_file_list:
            self._input_files.append(Nio.open_file(filename, "r"))
        self._timer.stop('Open Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files opened', verbosity=2)

        # Validate the input files themselves
        self._timer.start('Input File Validation')
        self._validate_input_files(specifier)
        self._timer.stop('Input File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Input files validated', verbosity=2)

        # Sort the input files by time
        self._timer.start('Sort Input Files')
        self._sort_input_files_by_time(specifier)
        self._timer.stop('Sort Input Files')
        if self._simplecomm.is_manager():
            self._vprint('Input files sorted', verbosity=2)

        # Retrieve and sort the variables in each time-slice file
        # (To determine if it is time-invariant metadata, time-variant
        # metadata, or if it is a time-series variable)
        self._timer.start('Sort Variables')
        self._sort_variables(specifier)
        self._timer.stop('Sort Variables')
        if self._simplecomm.is_manager():
            self._vprint('Variables sorted', verbosity=2)

        # Validate the output files
        self._timer.start('Output File Validation')
        self._validate_output_files(specifier, skip_existing, overwrite)
        self._timer.stop('Output File Validation')
        if self._simplecomm.is_manager():
            self._vprint('Output files validated', verbosity=2)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('Reshaper initialized.', verbosity=1)

        # Sync before continuing..
        self._simplecomm.sync()
Beispiel #38
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0_beta08'
    opts_dict['compset'] = 'F2000'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print 'Please specify --tag, --compset, --mach and --res options'
        sys.exit()

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print 'Running pyEnsSum!'

    if me.get_rank() == 0 and (verbose == True):
        print opts_dict
        print 'Ensemble size for summary = ', esize

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []
            # Read in the included var list
            #inc_varlist=pyEnsLib.read_jsonlist(opts_dict['jsonfile'],'ES')

    # Broadcast the excluded var list to each processor
    #if opts_dict['mpi_enable']:
    #   ex_varlist=me.partition(ex_varlist,func=Duplicate(),involved=True)
    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print 'Number of files in input directory = ', num_files
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print 'NOTE: Number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files'
    else:
        if me.get_rank() == 0:
            print 'Input directory: ', input_dir, ' not found'
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print 'in_files=', in_files

    # Open the files in the input directory
    o_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print 'Input files are: '
        print "\n".join(in_files)
        #for i in in_files:
        #    print "in_files =",i
    for onefile in in_files[0:esize]:
        if (os.path.isfile(input_dir + '/' + onefile)):
            o_files.append(Nio.open_file(input_dir + '/' + onefile, "r"))
        else:
            if me.get_rank() == 0:
                print "COULD NOT LOCATE FILE " + input_dir + onefile + "! EXITING...."
            sys.exit()

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print "Getting spatial dimensions"
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = o_files[0].dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = input_dims["lev"]
        elif key == "ilev":
            nilev = input_dims["ilev"]
        elif key == "ncol":
            ncol = input_dims["ncol"]
        elif (key == "nlon") or (key == "lon"):
            nlon = input_dims[key]
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = input_dims[key]
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print "COULD NOT LOCATE valid dimension lev => EXITING...."
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print "Need either lat/lon or ncol  => EXITING...."
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # Make sure all files have the same dimensions
    if me.get_rank() == 0 and (verbose == True):
        print "Checking dimensions across files...."
        print 'lev = ', nlev
        if (is_SE == True):
            print 'ncol = ', ncol
        else:
            print 'nlat = ', nlat
            print 'nlon = ', nlon

    for count, this_file in enumerate(o_files):
        input_dims = this_file.dimensions
        if (is_SE == True):
            if (nlev != int(input_dims["lev"])
                    or (ncol != int(input_dims["ncol"]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()
        else:
            if ( nlev != int(input_dims["lev"]) or ( nlat != int(input_dims[latkey]))\
                  or ( nlon != int(input_dims[lonkey]))):
                if me.get_rank() == 0:
                    print "Dimension mismatch between ", in_files[
                        0], 'and', in_files[0], '!!!'
                sys.exit()

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = o_files[0].variables
    # Remove the excluded variables (specified in json file) from variable dictionary
    #print len(vars_dict_all)
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all float var that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.iteritems():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                #print vars_dict_all[k].typecode()
                #print k
                del vars_dict[k]

    num_vars = len(vars_dict)
    #print num_vars
    #if me.get_rank() == 0:
    #   for k,v in vars_dict.iteritems():
    #       print 'vars_dict',k,vars_dict[k].typecode()

    str_size = 0

    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.iteritems():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = v.rank  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)
        #else:
        #    print 'var=',k

    if me.get_rank() == 0 and (verbose == True):
        print 'Number of variables found:  ', num_3d + num_2d
        print '3D variables: ' + str(num_3d) + ', 2D variables: ' + str(num_2d)

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print "************************************************************************************************************************************"
            print "  Error: the total number of 3D and 2D variables " + str(
                num_2d + num_3d
            ) + " is larger than the number of ensemble files " + str(esize)
            print "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            print "  or add more varaibles in your excluded variable list!!!"
            print "************************************************************************************************************************************"
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    #if me.get_rank() == 0 and (verbose == True):
    #    print 'num vars = ', n_all_var_names, '(3d = ', num_3d, ' and 2d = ', num_2d, ")"

    # Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    if me.get_rank() == 0 and (verbose == True):
        print "Creating ", this_sumfile, "  ..."
    if (me.get_rank() == 0 | opts_dict["popens"]):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        opt = Nio.options()
        opt.PreFill = False
        opt.Format = 'NetCDF4Classic'
        nc_sumfile = Nio.open_file(this_sumfile, 'w', options=opt)

        # Set dimensions
        if me.get_rank() == 0 and (verbose == True):
            print "Setting dimensions ....."
        if (is_SE == True):
            nc_sumfile.create_dimension('ncol', ncol)
        else:
            nc_sumfile.create_dimension('nlat', nlat)
            nc_sumfile.create_dimension('nlon', nlon)
        nc_sumfile.create_dimension('nlev', nlev)
        nc_sumfile.create_dimension('ens_size', esize)
        nc_sumfile.create_dimension('nvars', num_3d + num_2d)
        nc_sumfile.create_dimension('nvars3d', num_3d)
        nc_sumfile.create_dimension('nvars2d', num_2d)
        nc_sumfile.create_dimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if me.get_rank() == 0 and (verbose == True):
            print "Setting global attributes ....."
        setattr(nc_sumfile, 'creation_date', now)
        setattr(nc_sumfile, 'title', 'CAM verification ensemble summary file')
        setattr(nc_sumfile, 'tag', opts_dict["tag"])
        setattr(nc_sumfile, 'compset', opts_dict["compset"])
        setattr(nc_sumfile, 'resolution', opts_dict["res"])
        setattr(nc_sumfile, 'machine', opts_dict["mach"])

        # Create variables
        if me.get_rank() == 0 and (verbose == True):
            print "Creating variables ....."
        v_lev = nc_sumfile.create_variable("lev", 'f', ('nlev', ))
        v_vars = nc_sumfile.create_variable("vars", 'S1',
                                            ('nvars', 'str_size'))
        v_var3d = nc_sumfile.create_variable("var3d", 'S1',
                                             ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.create_variable("var2d", 'S1',
                                             ('nvars2d', 'str_size'))
        if not opts_dict['gmonly']:
            if (is_SE == True):
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'ncol'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'ncol'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'ncol'))
            else:
                v_ens_avg3d = nc_sumfile.create_variable(
                    "ens_avg3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_stddev3d = nc_sumfile.create_variable(
                    "ens_stddev3d", 'f', ('nvars3d', 'nlev', 'nlat', 'nlon'))
                v_ens_avg2d = nc_sumfile.create_variable(
                    "ens_avg2d", 'f', ('nvars2d', 'nlat', 'nlon'))
                v_ens_stddev2d = nc_sumfile.create_variable(
                    "ens_stddev2d", 'f', ('nvars2d', 'nlat', 'nlon'))

            v_RMSZ = nc_sumfile.create_variable("RMSZ", 'f',
                                                ('nvars', 'ens_size'))
        v_gm = nc_sumfile.create_variable("global_mean", 'f',
                                          ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.create_variable("standardized_gm", 'f',
                                                       ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.create_variable('loadings_gm', 'f',
                                                   ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.create_variable('mu_gm', 'f', ('nvars', ))
        v_sigma_gm = nc_sumfile.create_variable('sigma_gm', 'f', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.create_variable('sigma_scores_gm', 'f',
                                                       ('nvars', ))

        # Assign vars, var3d and var2d
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning vars, var3d, and var2d ....."

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if me.get_rank() == 0 and (verbose == True):
            print "Assigning time invariant metadata ....."
        lev_data = vars_dict["lev"]
        v_lev = lev_data

    # Form ensembles, each missing one member; compute RMSZs and global means
    #for each variable, we also do max norm also (currently done in pyStats)
    tslice = opts_dict['tslice']

    if not opts_dict['cumul']:
        # Partition the var list

        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print "Calculating global means ....."
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            o_files, var3_list_loc, var2_list_loc, is_SE, False, opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print "Finish calculating global means ....."

    # Calculate RMSZ scores
    if (not opts_dict['gmonly']) | (opts_dict['cumul']):
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating RMSZ scores ....."
        zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d, temp1, temp2 = pyEnsLib.calc_rmsz(
            o_files, var3_list_loc, var2_list_loc, is_SE, opts_dict)

    # Calculate max norm ensemble
    if opts_dict['maxnorm']:
        if me.get_rank() == 0 and (verbose == True):
            print "Calculating max norm of ensembles ....."
        pyEnsLib.calculate_maxnormens(opts_dict, var3_list_loc)
        pyEnsLib.calculate_maxnormens(opts_dict, var2_list_loc)

    if opts_dict['mpi_enable'] & (not opts_dict['popens']):

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(o_files)))
            if not opts_dict['gmonly']:
                # Gather zscore3d results
                zscore3d = gather_npArray(zscore3d, me, slice_index,
                                          (len(d3_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev3d results
                shape_tuple3d = get_shape(ens_avg3d.shape, len(d3_var_names),
                                          me.get_rank())
                ens_avg3d = gather_npArray(ens_avg3d, me, slice_index,
                                           shape_tuple3d)
                ens_stddev3d = gather_npArray(ens_stddev3d, me, slice_index,
                                              shape_tuple3d)

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(o_files)))

            var_list = gather_list(var_list, me)

            if not opts_dict['gmonly']:
                # Gather zscore2d results
                zscore2d = gather_npArray(zscore2d, me, slice_index,
                                          (len(d2_var_names), len(o_files)))

                # Gather ens_avg3d and ens_stddev2d results
                shape_tuple2d = get_shape(ens_avg2d.shape, len(d2_var_names),
                                          me.get_rank())
                ens_avg2d = gather_npArray(ens_avg2d, me, slice_index,
                                           shape_tuple2d)
                ens_stddev2d = gather_npArray(ens_stddev2d, me, slice_index,
                                              shape_tuple2d)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))
    # Assign to file:
    if me.get_rank() == 0 | opts_dict['popens']:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
            if not opts_dict['gmonly']:
                Zscoreall = np.concatenate((zscore3d, zscore2d), axis=0)
                v_RMSZ[:, :] = Zscoreall[:, :]
            if not opts_dict['gmonly']:
                if (is_SE == True):
                    v_ens_avg3d[:, :, :] = ens_avg3d[:, :, :]
                    v_ens_stddev3d[:, :, :] = ens_stddev3d[:, :, :]
                    v_ens_avg2d[:, :] = ens_avg2d[:, :]
                    v_ens_stddev2d[:, :] = ens_stddev2d[:, :]
                else:
                    v_ens_avg3d[:, :, :, :] = ens_avg3d[:, :, :, :]
                    v_ens_stddev3d[:, :, :, :] = ens_stddev3d[:, :, :, :]
                    v_ens_avg2d[:, :, :] = ens_avg2d[:, :, :]
                    v_ens_stddev2d[:, :, :] = ens_stddev2d[:, :, :]
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)
        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:].astype(np.float32)
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        if me.get_rank() == 0:
            print "All Done"
Beispiel #39
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'tag= compset= esize= tslice= res= sumfile= indir= sumfiledir= mach= verbose jsonfile= mpi_enable maxnorm gmonly popens cumul regx= startMon= endMon= fIndex= mpi_disable'
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSum_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_0'
    opts_dict['compset'] = 'F2000climo'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['esize'] = 350
    opts_dict['tslice'] = 1
    opts_dict['res'] = 'f19_f19'
    opts_dict['sumfile'] = 'ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['sumfiledir'] = './'
    opts_dict['jsonfile'] = 'exclude_empty.json'
    opts_dict['verbose'] = False
    opts_dict['mpi_enable'] = True
    opts_dict['mpi_disable'] = False
    opts_dict['maxnorm'] = False
    opts_dict['gmonly'] = True
    opts_dict['popens'] = False
    opts_dict['cumul'] = False
    opts_dict['regx'] = 'test'
    opts_dict['startMon'] = 1
    opts_dict['endMon'] = 1
    opts_dict['fIndex'] = 151

    # This creates the dictionary of input arguments
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ES', opts_dict)

    verbose = opts_dict['verbose']

    st = opts_dict['esize']
    esize = int(st)

    if opts_dict['popens'] == True:
        print(
            "ERROR: Please use pyEnsSumPop.py for a POP ensemble (not --popens)  => EXITING...."
        )
        sys.exit()

    if not (opts_dict['tag'] and opts_dict['compset'] and opts_dict['mach']
            or opts_dict['res']):
        print(
            'ERROR: Please specify --tag, --compset, --mach and --res options  => EXITING....'
        )
        sys.exit()

    if opts_dict['mpi_disable'] == True:
        opts_dict['mpi_enable'] = False

    # Now find file names in indir
    input_dir = opts_dict['indir']
    # The var list that will be excluded
    ex_varlist = []
    inc_varlist = []

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(not opts_dict['mpi_enable'])

    if me.get_rank() == 0:
        print('STATUS: Running pyEnsSum.py')

    if me.get_rank() == 0 and (verbose == True):
        print(opts_dict)
        print('STATUS: Ensemble size for summary = ', esize)

    exclude = False
    if me.get_rank() == 0:
        if opts_dict['jsonfile']:
            inc_varlist = []
            # Read in the excluded or included var list
            ex_varlist, exclude = pyEnsLib.read_jsonlist(
                opts_dict['jsonfile'], 'ES')
            if exclude == False:
                inc_varlist = ex_varlist
                ex_varlist = []

    # Broadcast the excluded var list to each processor
    if opts_dict['mpi_enable']:
        exclude = me.partition(exclude, func=Duplicate(), involved=True)
        if exclude:
            ex_varlist = me.partition(ex_varlist,
                                      func=Duplicate(),
                                      involved=True)
        else:
            inc_varlist = me.partition(inc_varlist,
                                       func=Duplicate(),
                                       involved=True)

    in_files = []
    if (os.path.exists(input_dir)):
        # Get the list of files
        in_files_temp = os.listdir(input_dir)
        in_files = sorted(in_files_temp)

        # Make sure we have enough
        num_files = len(in_files)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: Number of files in input directory = ', num_files)
        if (num_files < esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Number of files in input directory (',num_files,\
                 ') is less than specified ensemble size of ', esize)
            sys.exit(2)
        if (num_files > esize):
            if me.get_rank() == 0 and (verbose == True):
                print('VERBOSE: Note that the number of files in ', input_dir, \
                 'is greater than specified ensemble size of ', esize ,\
                 '\nwill just use the first ',  esize, 'files')
    else:
        if me.get_rank() == 0:
            print('ERROR: Input directory: ', input_dir, ' not found')
        sys.exit(2)

    if opts_dict['cumul']:
        if opts_dict['regx']:
            in_files_list = get_cumul_filelist(opts_dict, opts_dict['indir'],
                                               opts_dict['regx'])
        in_files = me.partition(in_files_list,
                                func=EqualLength(),
                                involved=True)
        if me.get_rank() == 0 and (verbose == True):
            print('VERBOSE: in_files  = ', in_files)

    # Check full file names in input directory (don't open yet)
    full_in_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print('VERBOSE: Input files are: ')

    for onefile in in_files[0:esize]:
        fname = input_dir + '/' + onefile
        if me.get_rank() == 0 and opts_dict['verbose']:
            print(fname)
        if (os.path.isfile(fname)):
            full_in_files.append(fname)
        else:
            if me.get_rank() == 0:
                print("ERROR: Could not locate file ", fname,
                      " => EXITING....")
            sys.exit()

    #open just the first file
    first_file = nc.Dataset(full_in_files[0], "r")

    # Store dimensions of the input fields
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Getting spatial dimensions")
    nlev = -1
    nilev = -1
    ncol = -1
    nlat = -1
    nlon = -1
    lonkey = ''
    latkey = ''
    # Look at first file and get dims
    input_dims = first_file.dimensions
    ndims = len(input_dims)

    for key in input_dims:
        if key == "lev":
            nlev = len(input_dims["lev"])
        elif key == "ilev":
            nilev = len(input_dims["ilev"])
        elif key == "ncol":
            ncol = len(input_dims["ncol"])
        elif (key == "nlon") or (key == "lon"):
            nlon = len(input_dims[key])
            lonkey = key
        elif (key == "nlat") or (key == "lat"):
            nlat = len(input_dims[key])
            latkey = key

    if (nlev == -1):
        if me.get_rank() == 0:
            print(
                "ERROR: could not locate a valid dimension (lev) => EXITING...."
            )
        sys.exit()

    if ((ncol == -1) and ((nlat == -1) or (nlon == -1))):
        if me.get_rank() == 0:
            print("ERROR: Need either lat/lon or ncol  => EXITING....")
        sys.exit()

    # Check if this is SE or FV data
    if (ncol != -1):
        is_SE = True
    else:
        is_SE = False

    # output dimensions
    if me.get_rank() == 0 and (verbose == True):
        print('lev = ', nlev)
        if (is_SE == True):
            print('ncol = ', ncol)
        else:
            print('nlat = ', nlat)
            print('nlon = ', nlon)

    # Get 2d vars, 3d vars and all vars (For now include all variables)
    vars_dict_all = first_file.variables

    # Remove the excluded variables (specified in json file) from variable dictionary
    if exclude:
        vars_dict = vars_dict_all
        for i in ex_varlist:
            if i in vars_dict:
                del vars_dict[i]
    #Given an included var list, remove all the variables that are not on the list
    else:
        vars_dict = vars_dict_all.copy()
        for k, v in vars_dict_all.items():
            if (k not in inc_varlist) and (vars_dict_all[k].typecode() == 'f'):
                del vars_dict[k]

    num_vars = len(vars_dict)

    str_size = 0
    d2_var_names = []
    d3_var_names = []
    num_2d = 0
    num_3d = 0

    # Which are 2d, which are 3d and max str_size
    for k, v in vars_dict.items():
        var = k
        vd = v.dimensions  # all the variable's dimensions (names)
        vr = len(v.dimensions)  # num dimension
        vs = v.shape  # dim values
        is_2d = False
        is_3d = False
        if (is_SE == True):  # (time, lev, ncol) or (time, ncol)
            if ((vr == 2) and (vs[1] == ncol)):
                is_2d = True
                num_2d += 1
            elif ((vr == 3) and (vs[2] == ncol and vs[1] == nlev)):
                is_3d = True
                num_3d += 1
        else:  # (time, lev, nlon, nlon) or (time, nlat, nlon)
            if ((vr == 3) and (vs[1] == nlat and vs[2] == nlon)):
                is_2d = True
                num_2d += 1
            elif ((vr == 4) and (vs[2] == nlat and vs[3] == nlon and
                                 (vs[1] == nlev or vs[1] == nilev))):
                is_3d = True
                num_3d += 1

        if (is_3d == True):
            str_size = max(str_size, len(k))
            d3_var_names.append(k)
        elif (is_2d == True):
            str_size = max(str_size, len(k))
            d2_var_names.append(k)

    if me.get_rank() == 0 and (verbose == True):
        print('VERBOSE: Number of variables found:  ', num_3d + num_2d)
        print('VERBOSE: 3D variables: ' + str(num_3d) + ', 2D variables: ' +
              str(num_2d))

    # Now sort these and combine (this sorts caps first, then lower case -
    # which is what we want)
    d2_var_names.sort()
    d3_var_names.sort()

    if esize < num_2d + num_3d:
        if me.get_rank() == 0:
            print(
                "************************************************************************************************************************************"
            )
            print("  ERROR: the total number of 3D and 2D variables " +
                  str(num_2d + num_3d) +
                  " is larger than the number of ensemble files " + str(esize))
            print(
                "  Cannot generate ensemble summary file, please remove more variables from your included variable list,"
            )
            print(
                "  or add more variables in your excluded variable list  => EXITING...."
            )
            print(
                "************************************************************************************************************************************"
            )
        sys.exit()
    # All vars is 3d vars first (sorted), the 2d vars
    all_var_names = list(d3_var_names)
    all_var_names += d2_var_names
    n_all_var_names = len(all_var_names)

    # Rank 0 - Create new summary ensemble file
    this_sumfile = opts_dict["sumfile"]

    #check if directory is valid
    sum_dir = os.path.dirname(this_sumfile)
    if len(sum_dir) == 0:
        sum_dir = '.'
    if (os.path.exists(sum_dir) == False):
        if me.get_rank() == 0:
            print('ERROR: Summary file directory: ', sum_dir, ' not found')
        sys.exit(2)

    if (me.get_rank() == 0):

        if (verbose == True):
            print("VERBOSE: Creating ", this_sumfile, "  ...")

        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)
        nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC")

        # Set dimensions
        if (verbose == True):
            print("VERBOSE: Setting dimensions .....")
        if (is_SE == True):
            nc_sumfile.createDimension('ncol', ncol)
        else:
            nc_sumfile.createDimension('nlat', nlat)
            nc_sumfile.createDimension('nlon', nlon)

        nc_sumfile.createDimension('nlev', nlev)
        nc_sumfile.createDimension('ens_size', esize)
        nc_sumfile.createDimension('nvars', num_3d + num_2d)
        nc_sumfile.createDimension('nvars3d', num_3d)
        nc_sumfile.createDimension('nvars2d', num_2d)
        nc_sumfile.createDimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if (verbose == True):
            print("VERBOSE: Setting global attributes .....")
        nc_sumfile.creation_date = now
        nc_sumfile.title = 'CAM verification ensemble summary file'
        nc_sumfile.tag = opts_dict["tag"]
        nc_sumfile.compset = opts_dict["compset"]
        nc_sumfile.resolution = opts_dict["res"]
        nc_sumfile.machine = opts_dict["mach"]

        # Create variables
        if (verbose == True):
            print("VERBOSE: Creating variables .....")
        v_lev = nc_sumfile.createVariable("lev", 'f8', ('nlev', ))
        v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.createVariable("var3d", 'S1',
                                            ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.createVariable("var2d", 'S1',
                                            ('nvars2d', 'str_size'))

        v_gm = nc_sumfile.createVariable("global_mean", 'f8',
                                         ('nvars', 'ens_size'))
        v_standardized_gm = nc_sumfile.createVariable("standardized_gm", 'f8',
                                                      ('nvars', 'ens_size'))
        v_loadings_gm = nc_sumfile.createVariable('loadings_gm', 'f8',
                                                  ('nvars', 'nvars'))
        v_mu_gm = nc_sumfile.createVariable('mu_gm', 'f8', ('nvars', ))
        v_sigma_gm = nc_sumfile.createVariable('sigma_gm', 'f8', ('nvars', ))
        v_sigma_scores_gm = nc_sumfile.createVariable('sigma_scores_gm', 'f8',
                                                      ('nvars', ))

        # Assign vars, var3d and var2d
        if (verbose == True):
            print("VERBOSE: Assigning vars, var3d, and var2d .....")

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []

        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(d3_var_names)
        for i in range(l_eq):
            tt = list(d3_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(d2_var_names)
        for i in range(l_eq):
            tt = list(d2_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if (verbose == True):
            print("VERBOSE: Assigning time invariant metadata .....")


#        lev_data = np.zeros(num_lev,dtype=np.float64)
        lev_data = first_file.variables["lev"]
        v_lev[:] = lev_data[:]
    #end of rank=0 work

    # All:
    tslice = opts_dict['tslice']
    if not opts_dict['cumul']:
        # Partition the var list
        var3_list_loc = me.partition(d3_var_names,
                                     func=EqualStride(),
                                     involved=True)
        var2_list_loc = me.partition(d2_var_names,
                                     func=EqualStride(),
                                     involved=True)
    else:
        var3_list_loc = d3_var_names
        var2_list_loc = d2_var_names

    #close first_file
    first_file.close()

    # Calculate global means #
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Calculating global means .....")
    if not opts_dict['cumul']:
        gm3d, gm2d, var_list = pyEnsLib.generate_global_mean_for_summary(
            full_in_files, var3_list_loc, var2_list_loc, is_SE, False,
            opts_dict)
    if me.get_rank() == 0 and (verbose == True):
        print("VERBOSE: Finished calculating global means .....")

    #gather to rank = 0
    if opts_dict['mpi_enable']:

        if not opts_dict['cumul']:
            # Gather the 3d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d3_var_names), me)

            # Gather global means 3d results
            gm3d = gather_npArray(gm3d, me, slice_index,
                                  (len(d3_var_names), len(full_in_files)))

            # Gather 2d variable results from all processors to the master processor
            slice_index = get_stride_list(len(d2_var_names), me)

            # Gather global means 2d results
            gm2d = gather_npArray(gm2d, me, slice_index,
                                  (len(d2_var_names), len(full_in_files)))

            #gather variables ro exclude (in pre_pca)
            var_list = gather_list(var_list, me)

        else:
            gmall = np.concatenate((temp1, temp2), axis=0)
            gmall = pyEnsLib.gather_npArray_pop(
                gmall, me,
                (me.get_size(), len(d3_var_names) + len(d2_var_names)))

    # rank =0 : complete calculations for summary file
    if me.get_rank() == 0:
        if not opts_dict['cumul']:
            gmall = np.concatenate((gm3d, gm2d), axis=0)
        else:
            gmall_temp = np.transpose(gmall[:, :])
            gmall = gmall_temp

        #PCA prep and calculation
        mu_gm, sigma_gm, standardized_global_mean, loadings_gm, scores_gm, b_exit = pyEnsLib.pre_PCA(
            gmall, all_var_names, var_list, me)

        #if PCA calc encounters an error, then remove the summary file and exit
        if b_exit:
            nc_sumfile.close()
            os.unlink(this_sumfile)
            print("STATUS: Summary could not be created.")
            sys.exit(2)

        v_gm[:, :] = gmall[:, :]
        v_standardized_gm[:, :] = standardized_global_mean[:, :]
        v_mu_gm[:] = mu_gm[:]
        v_sigma_gm[:] = sigma_gm[:]
        v_loadings_gm[:, :] = loadings_gm[:, :]
        v_sigma_scores_gm[:] = scores_gm[:]

        print("STATUS: Summary file is complete.")

        nc_sumfile.close()
Beispiel #40
0
def main(argv):

    # Get command line stuff and store in a dictionary
    s = 'nyear= nmonth= npert= tag= res= mach= compset= sumfile= indir= tslice= verbose jsonfile= mpi_enable mpi_disable nrand= rand seq= jsondir= esize='
    optkeys = s.split()
    try:
        opts, args = getopt.getopt(argv, "h", optkeys)
    except getopt.GetoptError:
        pyEnsLib.EnsSumPop_usage()
        sys.exit(2)

    # Put command line options in a dictionary - also set defaults
    opts_dict = {}

    # Defaults
    opts_dict['tag'] = 'cesm2_1_0'
    opts_dict['compset'] = 'G'
    opts_dict['mach'] = 'cheyenne'
    opts_dict['tslice'] = 0
    opts_dict['nyear'] = 1
    opts_dict['nmonth'] = 12
    opts_dict['esize'] = 40
    opts_dict['npert'] = 0  #for backwards compatible
    opts_dict['nbin'] = 40
    opts_dict['minrange'] = 0.0
    opts_dict['maxrange'] = 4.0
    opts_dict['res'] = 'T62_g17'
    opts_dict['sumfile'] = 'pop.ens.summary.nc'
    opts_dict['indir'] = './'
    opts_dict['jsonfile'] = 'pop_ensemble.json'
    opts_dict['verbose'] = True
    opts_dict['mpi_enable'] = True
    opts_dict['mpi_disable'] = False
    #opts_dict['zscoreonly'] = True
    opts_dict['popens'] = True
    opts_dict['nrand'] = 40
    opts_dict['rand'] = False
    opts_dict['seq'] = 0
    opts_dict['jsondir'] = './'

    # This creates the dictionary of input arguments
    #print "before parseconfig"
    opts_dict = pyEnsLib.getopt_parseconfig(opts, optkeys, 'ESP', opts_dict)

    verbose = opts_dict['verbose']
    nbin = opts_dict['nbin']

    if opts_dict['mpi_disable']:
        opts_dict['mpi_enable'] = False

    #still have npert for backwards compatibility - check if it was set
    #and override esize
    if opts_dict['npert'] > 0:
        user_size = opts_dict['npert']
        print(
            'WARNING: User specified value for --npert will override --esize.  Please consider using --esize instead of --npert in the future.'
        )
        opts_dict['esize'] = user_size

    # Now find file names in indir
    input_dir = opts_dict['indir']

    # Create a mpi simplecomm object
    if opts_dict['mpi_enable']:
        me = simplecomm.create_comm()
    else:
        me = simplecomm.create_comm(False)

    if opts_dict['jsonfile']:
        # Read in the included var list
        Var2d, Var3d = pyEnsLib.read_jsonlist(opts_dict['jsonfile'], 'ESP')
        str_size = 0
        for str in Var3d:
            if str_size < len(str):
                str_size = len(str)
        for str in Var2d:
            if str_size < len(str):
                str_size = len(str)

    if me.get_rank() == 0:
        print('STATUS: Running pyEnsSumPop!')

        if verbose:
            print("VERBOSE: opts_dict = ")
            print(opts_dict)

    in_files = []
    if (os.path.exists(input_dir)):
        # Pick up the 'nrand' random number of input files to generate summary files
        if opts_dict['rand']:
            in_files = pyEnsLib.Random_pickup_pop(input_dir, opts_dict,
                                                  opts_dict['nrand'])
        else:
            # Get the list of files
            in_files_temp = os.listdir(input_dir)
            in_files = sorted(in_files_temp)
        num_files = len(in_files)

    else:
        if me.get_rank() == 0:
            print('ERROR: Input directory: ', input_dir,
                  ' not found => EXITING....')
        sys.exit(2)

    #make sure we have enough files
    files_needed = opts_dict['nmonth'] * opts_dict['esize'] * opts_dict['nyear']
    if (num_files < files_needed):
        if me.get_rank() == 0:
            print(
                'ERROR: Input directory does not contain enough files (must be esize*nyear*nmonth = ',
                files_needed, ' ) and it has only ', num_files, ' files).')
        sys.exit(2)

    #Partition the input file list (ideally we have one processor per month)
    in_file_list = me.partition(in_files, func=EqualStride(), involved=True)

    # Check the files in the input directory
    full_in_files = []
    if me.get_rank() == 0 and opts_dict['verbose']:
        print('VERBOSE: Input files are:')

    for onefile in in_file_list:
        fname = input_dir + '/' + onefile
        if opts_dict['verbose']:
            print("my_rank = ", me.get_rank(), "  ", fname)
        if (os.path.isfile(fname)):
            full_in_files.append(fname)
        else:
            print("ERROR: Could not locate file: " + fname + " => EXITING....")
            sys.exit()

    #open just the first file (all procs)
    first_file = nc.Dataset(full_in_files[0], "r")

    # Store dimensions of the input fields
    if (verbose == True) and me.get_rank() == 0:
        print("VERBOSE: Getting spatial dimensions")
    nlev = -1
    nlat = -1
    nlon = -1

    # Look at first file and get dims
    input_dims = first_file.dimensions
    ndims = len(input_dims)

    # Make sure all files have the same dimensions
    if (verbose == True) and me.get_rank() == 0:
        print("VERBOSE: Checking dimensions ...")
    for key in input_dims:
        if key == "z_t":
            nlev = len(input_dims["z_t"])
        elif key == "nlon":
            nlon = len(input_dims["nlon"])
        elif key == "nlat":
            nlat = len(input_dims["nlat"])

    # Rank 0: prepare new summary ensemble file
    this_sumfile = opts_dict["sumfile"]
    if (me.get_rank() == 0):
        if os.path.exists(this_sumfile):
            os.unlink(this_sumfile)

        if verbose:
            print("VERBOSE: Creating ", this_sumfile, "  ...")

        nc_sumfile = nc.Dataset(this_sumfile, "w", format="NETCDF4_CLASSIC")

        # Set dimensions
        if verbose:
            print("VERBOSE: Setting dimensions .....")
        nc_sumfile.createDimension('nlat', nlat)
        nc_sumfile.createDimension('nlon', nlon)
        nc_sumfile.createDimension('nlev', nlev)
        nc_sumfile.createDimension('time', None)
        nc_sumfile.createDimension('ens_size', opts_dict['esize'])
        nc_sumfile.createDimension('nbin', opts_dict['nbin'])
        nc_sumfile.createDimension('nvars', len(Var3d) + len(Var2d))
        nc_sumfile.createDimension('nvars3d', len(Var3d))
        nc_sumfile.createDimension('nvars2d', len(Var2d))
        nc_sumfile.createDimension('str_size', str_size)

        # Set global attributes
        now = time.strftime("%c")
        if verbose:
            print("VERBOSE: Setting global attributes .....")
        nc_sumfile.creation_date = now
        nc_sumfile.title = 'POP verification ensemble summary file'
        nc_sumfile.tag = opts_dict["tag"]
        nc_sumfile.compset = opts_dict["compset"]
        nc_sumfile.resolution = opts_dict["res"]
        nc_sumfile.machine = opts_dict["mach"]

        # Create variables
        if verbose:
            print("VERBOSE: Creating variables .....")
        v_lev = nc_sumfile.createVariable("z_t", 'f', ('nlev', ))
        v_vars = nc_sumfile.createVariable("vars", 'S1', ('nvars', 'str_size'))
        v_var3d = nc_sumfile.createVariable("var3d", 'S1',
                                            ('nvars3d', 'str_size'))
        v_var2d = nc_sumfile.createVariable("var2d", 'S1',
                                            ('nvars2d', 'str_size'))
        v_time = nc_sumfile.createVariable("time", 'd', ('time', ))
        v_ens_avg3d = nc_sumfile.createVariable(
            "ens_avg3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_stddev3d = nc_sumfile.createVariable(
            "ens_stddev3d", 'f', ('time', 'nvars3d', 'nlev', 'nlat', 'nlon'))
        v_ens_avg2d = nc_sumfile.createVariable(
            "ens_avg2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_ens_stddev2d = nc_sumfile.createVariable(
            "ens_stddev2d", 'f', ('time', 'nvars2d', 'nlat', 'nlon'))
        v_RMSZ = nc_sumfile.createVariable(
            "RMSZ", 'f', ('time', 'nvars', 'ens_size', 'nbin'))

        # Assign vars, var3d and var2d
        if verbose:
            print("VERBOSE: Assigning vars, var3d, and var2d .....")

        eq_all_var_names = []
        eq_d3_var_names = []
        eq_d2_var_names = []
        all_var_names = list(Var3d)
        all_var_names += Var2d
        l_eq = len(all_var_names)
        for i in range(l_eq):
            tt = list(all_var_names[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_all_var_names.append(tt)

        l_eq = len(Var3d)
        for i in range(l_eq):
            tt = list(Var3d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d3_var_names.append(tt)

        l_eq = len(Var2d)
        for i in range(l_eq):
            tt = list(Var2d[i])
            l_tt = len(tt)
            if (l_tt < str_size):
                extra = list(' ') * (str_size - l_tt)
                tt.extend(extra)
            eq_d2_var_names.append(tt)

        v_vars[:] = eq_all_var_names[:]
        v_var3d[:] = eq_d3_var_names[:]
        v_var2d[:] = eq_d2_var_names[:]

        # Time-invarient metadata
        if verbose:
            print("VERBOSE: Assigning time invariant metadata .....")
        vars_dict = first_file.variables
        lev_data = vars_dict["z_t"]
        v_lev[:] = lev_data[:]

        #end of rank 0

    #All:
    # Time-varient metadata
    if verbose:
        if me.get_rank() == 0:
            print("VERBOSE: Assigning time variant metadata .....")
    vars_dict = first_file.variables
    time_value = vars_dict['time']
    time_array = np.array([time_value])
    time_array = pyEnsLib.gather_npArray_pop(time_array, me, (me.get_size(), ))
    if me.get_rank() == 0:
        v_time[:] = time_array[:]

    #Assign zero values to first time slice of RMSZ and avg and stddev for 2d & 3d
    #in case of a calculation problem before finishing
    e_size = opts_dict['esize']
    b_size = opts_dict['nbin']
    z_ens_avg3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32)
    z_ens_stddev3d = np.zeros((len(Var3d), nlev, nlat, nlon), dtype=np.float32)
    z_ens_avg2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32)
    z_ens_stddev2d = np.zeros((len(Var2d), nlat, nlon), dtype=np.float32)
    z_RMSZ = np.zeros(((len(Var3d) + len(Var2d)), e_size, b_size),
                      dtype=np.float32)

    #rank 0 (put zero values in summary file)
    if me.get_rank() == 0:
        v_RMSZ[0, :, :, :] = z_RMSZ[:, :, :]
        v_ens_avg3d[0, :, :, :, :] = z_ens_avg3d[:, :, :, :]
        v_ens_stddev3d[0, :, :, :, :] = z_ens_stddev3d[:, :, :, :]
        v_ens_avg2d[0, :, :, :] = z_ens_avg2d[:, :, :]
        v_ens_stddev2d[0, :, :, :] = z_ens_stddev2d[:, :, :]

    #close file[0]
    first_file.close()

    # Calculate RMSZ scores
    if (verbose == True and me.get_rank() == 0):
        print("VERBOSE: Calculating RMSZ scores .....")

    zscore3d, zscore2d, ens_avg3d, ens_stddev3d, ens_avg2d, ens_stddev2d = pyEnsLib.calc_rmsz(
        full_in_files, Var3d, Var2d, opts_dict)

    if (verbose == True and me.get_rank() == 0):
        print("VERBOSE: Finished with RMSZ scores .....")

    # Collect from all processors
    if opts_dict['mpi_enable']:
        # Gather the 3d variable results from all processors to the master processor

        zmall = np.concatenate((zscore3d, zscore2d), axis=0)
        zmall = pyEnsLib.gather_npArray_pop(
            zmall, me,
            (me.get_size(), len(Var3d) + len(Var2d), len(full_in_files), nbin))

        ens_avg3d = pyEnsLib.gather_npArray_pop(
            ens_avg3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_avg2d = pyEnsLib.gather_npArray_pop(ens_avg2d, me,
                                                (me.get_size(), len(Var2d),
                                                 (nlat), nlon))
        ens_stddev3d = pyEnsLib.gather_npArray_pop(
            ens_stddev3d, me, (me.get_size(), len(Var3d), nlev, (nlat), nlon))
        ens_stddev2d = pyEnsLib.gather_npArray_pop(ens_stddev2d, me,
                                                   (me.get_size(), len(Var2d),
                                                    (nlat), nlon))

    # Assign to summary file:
    if me.get_rank() == 0:

        v_RMSZ[:, :, :, :] = zmall[:, :, :, :]
        v_ens_avg3d[:, :, :, :, :] = ens_avg3d[:, :, :, :, :]
        v_ens_stddev3d[:, :, :, :, :] = ens_stddev3d[:, :, :, :, :]
        v_ens_avg2d[:, :, :, :] = ens_avg2d[:, :, :, :]
        v_ens_stddev2d[:, :, :, :] = ens_stddev2d[:, :, :, :]

        print("STATUS: PyEnsSumPop has completed.")

        nc_sumfile.close()
Beispiel #41
0
    def __init__(self, specifier, serial=False, verbosity=1, wmode='w', once=False, simplecomm=None):
        """
        Constructor

        Parameters:
            specifier (Specifier): An instance of the Specifier class,
                defining the input specification for this reshaper operation.
            serial (bool): True or False, indicating whether the operation
                should be performed in serial (True) or parallel
                (False).  The default is to assume parallel operation
                (but serial will be chosen if the mpi4py cannot be
                found when trying to initialize decomposition.
            verbosity(int): Level of printed output (stdout).  A value of 0
                means no output, and a higher value means more output.  The
                default value is 1.
            wmode (str): The mode to use for writing output.  Can be 'w' for
                normal write operation, 's' to skip the output generation for
                existing time-series files, 'o' to overwrite existing
                time-series files, 'a' to append to existing time-series files.
            once (bool): True or False, indicating whether the Reshaper should
                write all metadata to a 'once' file (separately).
            simplecomm (SimpleComm): A SimpleComm object to handle the parallel
                communication, if necessary
        """

        # Type checking (or double-checking)
        if not isinstance(specifier, Specifier):
            err_msg = "Input must be given in the form of a Specifier object"
            raise TypeError(err_msg)
        if type(serial) is not bool:
            err_msg = "Serial indicator must be True or False."
            raise TypeError(err_msg)
        if type(verbosity) is not int:
            err_msg = "Verbosity level must be an integer."
            raise TypeError(err_msg)
        if type(wmode) is not str:
            err_msg = "Write mode flag must be a str."
            raise TypeError(err_msg)
        if type(once) is not bool:
            err_msg = "Once-file indicator must be True or False."
            raise TypeError(err_msg)
        if simplecomm is not None:
            if not isinstance(simplecomm, SimpleComm):
                err_msg = "Simple communicator object is not a SimpleComm"
                raise TypeError(err_msg)
        if wmode not in ['w', 's', 'o', 'a']:
            err_msg = "Write mode '{0}' not recognized".format(wmode)
            raise ValueError(err_msg)

        # Whether to write a once file
        self._use_once_file = once

        # The output write mode to use
        self._write_mode = wmode

        # Internal timer data
        self._timer = TimeKeeper()

        self._timer.start('Initializing Simple Communicator')
        if simplecomm is None:
            simplecomm = create_comm(serial=serial)

        # Reference to the simple communicator
        self._simplecomm = simplecomm
        self._timer.stop('Initializing Simple Communicator')

        # Dictionary storing read/write data amounts
        self.assumed_block_size = float(4 * 1024 * 1024)
        self._byte_counts = {}

        # Contruct the print header
        header = ''.join(['[', str(self._simplecomm.get_rank()),
                          '/', str(self._simplecomm.get_size()), '] '])

        # Reference to the verbose printer tool
        self._vprint = VPrinter(header=header, verbosity=verbosity)

        # Debug output starting
        if self._simplecomm.is_manager():
            self._vprint('Initializing Reshaper...', verbosity=0)
            self._vprint('  MPI Communicator Size: {}'.format(
                self._simplecomm.get_size()), verbosity=1)

        # Validate the user input data
        self._timer.start('Specifier Validation')
        specifier.validate()
        self._timer.stop('Specifier Validation')
        if self._simplecomm.is_manager():
            self._vprint('  Specifier validated', verbosity=1)

        # The I/O backend to use
        if iobackend.is_available(specifier.io_backend):
            self._backend = specifier.io_backend
        else:
            self._backend = iobackend.get_backend()
            self._vprint(('  I/O Backend {0} not available.  Using {1} '
                          'instead').format(specifier.io_backend, self._backend), verbosity=1)

        # Store the input file names
        self._input_filenames = specifier.input_file_list

        # Store the time-series variable names
        self._time_series_names = specifier.time_series
        if self._time_series_names is not None:
            vnames = ', '.join(self._time_series_names)
            if self._simplecomm.is_manager():
                self._vprint('WARNING: Extracting only variables: {0}'.format(
                    vnames), verbosity=-1)

        # Store the list of metadata names
        self._metadata_names = specifier.time_variant_metadata

        # Store whether to treat 1D time-variant variables as metadata
        self._1d_metadata = specifier.assume_1d_time_variant_metadata

        # Store the metadata filename
        self._metadata_filename = specifier.metadata_filename

        # Store time invariant variables that should be excluded from the timeseries files
        self._exclude_list = specifier.exclude_list

        # Store the output file prefix and suffix
        self._output_prefix = specifier.output_file_prefix
        self._output_suffix = specifier.output_file_suffix

        # Setup NetCDF file options
        self._netcdf_format = specifier.netcdf_format
        self._netcdf_compression = specifier.compression_level
        self._netcdf_least_significant_digit = specifier.least_significant_digit
        if self._simplecomm.is_manager():
            self._vprint(
                '  NetCDF I/O Backend: {0}'.format(self._backend), verbosity=1)
            self._vprint('  NetCDF Output Format: {0}'.format(
                self._netcdf_format), verbosity=1)
            self._vprint('  NetCDF Compression: {0}'.format(
                self._netcdf_compression), verbosity=1)
            trunc_str = ('{} decimal places'.format(self._netcdf_least_significant_digit)
                         if self._netcdf_least_significant_digit else 'Disabled')
            self._vprint('  NetCDF Truncation: {0}'.format(
                trunc_str), verbosity=1)

        # Helpful debugging message
        if self._simplecomm.is_manager():
            self._vprint('...Reshaper initialized.', verbosity=0)

        # Sync before continuing..
        self._simplecomm.sync()
Beispiel #42
0
#!/usr/bin/env python

from asaptools import simplecomm

scomm = simplecomm.create_comm()
rank = scomm.get_rank()
size = scomm.get_size()

if scomm.is_manager():

    l = range(10)

    for i in l:
        scomm.ration(i)
        print '{0}/{1}: Sent {2!r}'.format(rank, size, i)

    for i in range(scomm.get_size() - 1):
        scomm.ration(None)
        print '{0}/{1}: Sent None'.format(rank, size)

else:

    i = -1
    while i is not None:
        i = scomm.ration()
        print '{0}/{1}: Recvd {2!r}'.format(rank, size, i)

print '{0}/{1}: Out of loop'.format(rank, size)

scomm.sync()
if scomm.is_manager():