예제 #1
0
def time_concat(var,years,hist_dict,ave_info,file_dict,ave_type,simplecomm,all_files_vars,serial,timer,collapse_dim=''):

    '''
    Concats files together in the time dimension.

    @param var             The name of the variable to concat.

    @param years           A list of the years that are in this average

    @param hist_dict       A dictionary that holds file references for all years/months. 

    @param ave_info        A dictionary of the type of average that is to be done.
                           Includes:  type, months_to_average, fn, and weights
                           (weights are not used in this function/average)
    
    @param file_dict       A dictionary which holds file pointers to the input files that
                           are needed by this average calculation.

    @param ave_type        The average type key that indicated which type of average will be done.

    @param simplecomm      The simple comm object used for mpi communication.

    @param all_files_vars  All of the file's variables with ncids attached.

    @param collapse_dim    Used to collapse/average over one dim.

    @serial                Boolean if running in serial mode.

    '''
    import asaptools
    if (not simplecomm.is_manager() or serial):
        print('Concatenating ',ave_info['type'],' for ',var)
    time_index = 0
    CONCAT_TAG = 60
    CONCAT_VAL_TAG = 67
    # Loop over years, months, and variables to cat them all together into one file
    first = True
    for yr in years:
        for m in ave_info['months_to_average']:
            if ('__meta' in var):
                parts = var.split('__')
                var = parts[0]
            # If slave, get slice and pass to master
            if (not simplecomm.is_manager() or serial):
                if 'zonalavg' in ave_type:
                    if collapse_dim is not None:
                        timer.start("Time to compute Average")
                        var_val = zonal_average(var,yr,m,hist_dict,file_dict,timer,collapse_dim)
                        timer.stop("Time to compute Average")
                else:
                    timer.start("Time to compute Average")
                    timer.start("Variable fetch time")
                    var_val = rover.fetch_slice(hist_dict,yr,m,var,file_dict)
                    timer.stop("Time to compute Average")
                    timer.stop("Variable fetch time")
                #print var, asaptools.__version__,type(var_val),var_val.dtype
                #--------------------
                if not serial:
                    timer.start("Send Average Time")
                    var_shape = var_val.shape
                    var_dtype = var_val.dtype
                    md_message = {'name':var,'shape':var_shape,'dtype':var_dtype,'index':time_index}
                    simplecomm.collect(data=md_message,tag=CONCAT_TAG)
                    #var_val = np.ma.filled(var_val)
                    #print type(var_val), md_message
                    simplecomm.collect(data=var_val,tag=CONCAT_VAL_TAG)
                    timer.stop("Send Average Time")
            if (simplecomm.is_manager() or serial):
                # If master, recv slice and write to file
                if not serial:
                    timer.start("Recv Average Time")
                    r_rank,results = simplecomm.collect(tag=CONCAT_TAG)
                    r_rank,var_val = simplecomm.collect(tag=CONCAT_VAL_TAG)
                    if results['dtype'] == 'S1' or results['dtype'] == 'c':
                        var_val = var_val[0]
                    ti = results['index']
                    var_n = results['name']
                    timer.stop("Recv Average Time")
                else:
                    var_n = var
                    ti = time_index
                    if var_val.dtype == 'S1' or var_val.dtype == 'c':
                        var_val = var_val[0]
                timer.start("Write Netcdf Averages")
                climFileIO.write_averages(all_files_vars, var_val, var_n, index=ti) 
                timer.stop("Write Netcdf Averages")
            time_index = time_index + 1
예제 #2
0
    def compute_averages(self, spec):
        '''
        Sets up the run information and computes the averages.

        @param spec          An instance of the Specification class which holds the user settings
                             that define which averages to compute, directories, file prefixes, etc
        '''
        import os, sys
        import rover
        import climAverager
        import climFileIO
        import average_types as ave_t
        import regionOpts
        import string
        import collections
        from asaptools import timekeeper
        from asaptools import partition
        #==============================================================================
        #
        # Initialize
        #
        #==============================================================================
        # Initialize the timekeeper class and start 'total' timer
        timer = timekeeper.TimeKeeper()
        timer.start("Total Time")
        # Initialize some timers that are not used by all tasks
        timer.reset("Send Average Time")
        timer.reset("Variable fetch time")
        timer.reset("Recv Average Time")
        timer.reset("Write Netcdf Averages")
        timer.reset("Variable fetch time")
        timer.reset("Time to compute Average")

        # Check average list to make sure it complies with the standards
        ave_t.average_compliance(spec.avg_list)

        # Check if I'm the global master
        g_master = spec.main_comm.is_manager()

        for tag in spec.m_id:

            file_pattern = list(spec.file_pattern)

            if ('-999' not in tag):
                prefix = spec.prefix + '_' + tag
                p_index = file_pattern.index('$prefix')
                t_index = file_pattern.index('$m_id')

                for i in range(p_index + 1, t_index + 1):
                    del file_pattern[p_index + 1]
            else:
                prefix = spec.prefix

# Sort through the average list and figure out dependencies and do
# averages in steps if need be.
            avg_dict = {0: spec.avg_list}
            for i in range(1, 20):
                avg_dict[i] = []
            avg_dict = ave_t.sort_depend(avg_dict, 0, spec.out_directory,
                                         prefix, spec.regions)
            print avg_dict

            # Initialize the tag for the average send/recv
            AVE_TAG = 40
            VNAME_TAG = 41

            #start_level = 0
            start_level = min(avg_dict.keys())
            found_level = False
            #for i in range(0,len(avg_dict)):
            #		if found_level == False:
            #		    if (i in avg_dict):
            #			start_level = i
            #			found_level = True

            ordered_avg_dict = collections.OrderedDict(avg_dict)
            #for i in range(start_level,len(avg_dict)):
            for i, value in ordered_avg_dict.items():

                # Initialize some containers
                var_list = []
                full_hist_dict = {}
                hist_dict = {}

                #==============================================================================
                #
                # Set the hist_dict up with file references for all years/months.
                # Create a list of all variables and meta variables within the file
                # and set the final variable list passed on user preferences.
                #
                #==============================================================================

                ## Set var_list and file info dictionary
                timer.start("Define history dictionary")
                if (spec.hist_type == 'series'):
                    full_hist_dict, full_var_list, meta_list, key = rover.set_slices_and_vars_time_series(
                        spec.in_directory, file_pattern, spec.date_pattern,
                        prefix, spec.suffix, spec.year0, spec.year1,
                        spec.split, spec.split_files)
                else:
                    full_hist_dict, full_var_list, meta_list, key = rover.set_slices_and_vars_time_slice(
                        spec.in_directory, file_pattern, prefix, spec.suffix,
                        spec.year0, spec.year1)
                timer.stop("Define history dictionary")

                # Set variable list.  If there was a variable list passed to the averager, use this list.  Other wise,
                # use all variables within the file.
                if (len(spec.varlist) > 0):
                    var_list = spec.varlist
                    for v in full_var_list:
                        if '__meta' in v:
                            var_list.append(v)
                else:
                    var_list = full_var_list
                meta_list = list(set(meta_list))
                var_list = list(set(var_list))

                #==============================================================================
                #
                # Workload Distribution
                #
                #==============================================================================

                # Each intercommunicator recieves a list of averages it's responsible for
                # Each mpi task within that intercommunicator gets a portion of the variable list
                num_of_avg = len(avg_dict[i])
                min_procs_per_ave = min(4, spec.main_comm.get_size())

                # Override user selection if they picked less than 2 or
                # the variable list is less than the min procs per sub-communicator
                if (min_procs_per_ave < 2 or len(var_list) <=
                    (min_procs_per_ave - 1)):
                    min_procs_per_ave = 2

                # If running in paralllel mode, split the communicator and partition the averages
                if (spec.serial == False):
                    size = spec.main_comm.get_size()
                    rank = spec.main_comm.get_rank()

                    # split mpi comm world
                    temp_color = (rank // min_procs_per_ave) % num_of_avg
                    num_of_groups = size / min_procs_per_ave
                    if (temp_color == num_of_groups):
                        temp_color = temp_color - 1
                    groups = []
                    for g in range(0, num_of_groups):
                        groups.append(g)
            #print 'g_rank:',rank,'size:',size,'#of ave:',num_of_avg,'min_procs:',min_procs_per_ave,'temp_color:',temp_color,'#of groups',num_of_groups,'groups:',groups
                    group = groups[temp_color]
                    inter_comm, multi_comm = spec.main_comm.divide(group)
                    color = inter_comm.get_color()
                    lsize = inter_comm.get_size()
                    lrank = inter_comm.get_rank()

                    #g_master = spec.main_comm.is_manager()
                    l_master = inter_comm.is_manager()

                    #print 'global rank: ',rank,'local rank: ',lrank,'color: ',color,'tempcolor: ',temp_color,'group: ',group,'is local master: ',l_master
                    laverages = []
                    AVE_LIST_TAG = 50
                    # Partion the average task list amoung the inter/split communicators
                    if (l_master):
                        laverages = multi_comm.partition(
                            avg_dict[i],
                            func=partition.EqualStride(),
                            involved=True)
                        for b in range(1, lsize):
                            laverages_send = inter_comm.ration(
                                data=laverages, tag=AVE_LIST_TAG)
                    else:
                        laverages = inter_comm.ration(tag=AVE_LIST_TAG)
                else:
                    # Running in serial mode.  Just copy the average list.
                    laverages = avg_dict[i]
                    inter_comm = spec.main_comm
                    lsize = inter_comm.get_size()
                    #g_master = spec.main_comm.is_manager()
                    l_master = inter_comm.is_manager()

                # Partition the variable list between the tasks of each communicator
                if (lsize > 1 and spec.serial == False):
                    lvar_list = inter_comm.partition(
                        var_list, func=partition.EqualStride(), involved=False)
                    if (l_master):
                        lvar_list = var_list
                else:
                    lvar_list = var_list
                #print rank,lvar_list

                #print(rank,'averages :',laverages, ' vars :',lvar_list)

                #==============================================================================
                #
                # Create the output directory if it doesn't exist
                #
                #==============================================================================

                if spec.serial or g_master:
                    if not os.path.exists(spec.out_directory):
                        os.makedirs(spec.out_directory)
                spec.main_comm.sync()
                #==============================================================================
                #
                # Main Averaging Loop
                #
                #==============================================================================
                # Files are only split for the first loop.  When the depend averages start, they will operate on files
                # that are already stiched together.
                if (i != 0):
                    spec.split_name = 'null'
                    spec.split = False
                    spec.split_files = 'null'
                # Toggle to incate that extra variables were added to the local file list (only do once per average level
                added_extra_vars = False

                for ave in laverages:
                    for split_name in spec.split_files.split(","):
                        # Split apart the average info to get type of average and year(s)
                        ave_descr = ave.split(':')
                        if ('hor.meanyr' in ave_descr[0]
                                or 'hor.meanConcat' in ave_descr[0]):
                            ave_name_split = ave_descr[0].split('_')
                            region_num = ave_name_split[len(ave_name_split) -
                                                        1]
                            region_name = spec.regions[int(region_num)]
                            # Remove the region number as part of the average name
                            ave_descr[0] = ave_name_split[0]
                        else:
                            region_name = 'null'
                            region_num = -99

                        # If the average depends on other averages that have to be computed, create a new temporary dictionary
                        if '__d' in ave_descr:
                            yr0 = ave_descr[1]
                            if (len(ave_descr) > 2
                                    and '_d' not in ave_descr[2]):
                                yr1 = ave_descr[2]
                            else:
                                yr1 = ave_descr[1]

                            hist_dict = rover.set_slices_and_vars_depend(
                                spec.out_directory, file_pattern, prefix, yr0,
                                yr1, ave_t.average_types[ave_descr[0]],
                                ave_descr[0], region_name)
                        else:
                            hist_dict = dict(full_hist_dict)

                        # If concat' mean_diff_rms files, for each var, also add the _DIFF and _RMS variables.
                        if ('hor.meanConcat' in ave_descr
                                and added_extra_vars == False):
                            new_vars = []
                            for v in lvar_list:
                                if '__meta' not in v:
                                    new_vars.append(v + '_DIFF')
                                    new_vars.append(v + '_RMS')
                            lvar_list = lvar_list + new_vars
                            added_extra_vars = True

                        # Create and define the average file
                        timer.start("Create/Define Netcdf File")
                        if (len(ave_descr) < 3 or 'hor.meanyr' in ave_descr):
                            ave_date = string.zfill(ave_descr[1], 4)
                            ave_date2 = str(ave_descr[1])
                        else:
                            date1 = string.zfill(ave_descr[1], 4)
                            date2 = string.zfill(ave_descr[2], 4)
                            ave_date = date1 + '-' + date2
                            ave_date2 = str(ave_descr[1]) + '-' + str(
                                ave_descr[2])
                        outfile_name = climFileIO.get_out_fn(
                            ave_descr[0], prefix, ave_date,
                            ave_t.average_types[ave_descr[0]]['fn'],
                            region_name)
                        if 'zonalavg' in ave_descr:
                            l_collapse_dim = spec.collapse_dim
                        else:
                            l_collapse_dim = ''
                        all_files_vars, new_file = climFileIO.define_ave_file(
                            l_master,
                            spec.serial,
                            var_list,
                            lvar_list,
                            meta_list,
                            hist_dict,
                            spec.hist_type,
                            ave_descr,
                            prefix,
                            outfile_name,
                            spec.split,
                            split_name,
                            spec.out_directory,
                            inter_comm,
                            spec.ncformat,
                            ave_t.average_types[
                                ave_descr[0]]['months_to_average'][0],
                            key,
                            spec.clobber,
                            spec.year0,
                            spec.year1,
                            ave_date2,
                            collapse_dim=l_collapse_dim)
                        timer.stop("Create/Define Netcdf File")

                        # Start loops to compute averages
                        # create a list of years that are needed for this average
                        years = []
                        if '__d' in ave_descr:
                            if (ave_t.average_types[
                                    ave_descr[0]]['depend_type'] == 'month'
                                    or '_d' in ave_descr[2]):
                                years.append(int(ave_descr[1]))
                            else:
                                years = list(
                                    range(int(ave_descr[1]),
                                          int(ave_descr[2]) + 1))
                            depend = True
                        else:
                            if (len(ave_descr) == 2):
                                years.append(int(ave_descr[1]))
                            else:
                                years = list(
                                    range(int(ave_descr[1]),
                                          int(ave_descr[2]) + 1))
                            depend = False

            # Get the first year.  If part of a sig avg, this will be the sig first year, not year of indiv average
                        fyr = years[0]
                        if i + 1 in avg_dict.keys():
                            for a in avg_dict[i + 1]:
                                if (ave_descr[0] + '_sig') in a:
                                    spl = a.split(':')
                                    fyr = int(spl[1])

                        file_dict = []
                        open_list = []
                        # Open all of the files that this rank will need for this average (for time slice files)
                        if ((spec.hist_type == 'slice' or '__d' in ave_descr)
                                and (spec.serial or not l_master)
                                and len(lvar_list) > 0):
                            file_dict = []
                            open_list = []
                            file_dict, open_list = climFileIO.open_all_files(
                                hist_dict, ave_t.average_types[ave_descr[0]]
                                ['months_to_average'], years, lvar_list[0],
                                'null', ave_descr[0], depend, fyr)
                        # If concat of file instead of average, piece file together here.  If not, enter averaging loop
                        if (('mavg' in ave_descr or 'moc' in ave_descr
                             or 'annall' in ave_descr or 'mons' in ave_descr
                             or '_mean' in ave_descr[0])
                                and len(lvar_list) > 0):
                            file_dict = []
                            open_list = []
                            if (spec.serial or not l_master):
                                # Open files
                                file_dict, open_list = climFileIO.open_all_files(
                                    hist_dict, ave_t.average_types[
                                        ave_descr[0]]['months_to_average'],
                                    years, lvar_list[0], 'null', ave_descr[0],
                                    depend, fyr)
                        # Loop through variables and compute the averages
                        for orig_var in lvar_list:
                            # Some variable names were suffixed with a meta label indicaticating that the variable exists in all files,
                            # but there isn't a didicated ts file to open.  Pick the first variable off the list and get values from there
                            if ('__meta' in orig_var):
                                var = key
                            else:
                                var = orig_var
                            # Open all of the files that this rank will need for this average (for time series files)
                            if ((spec.hist_type == 'series'
                                 and '__d' not in ave_descr)
                                    and (spec.serial or not l_master)):
                                if ('mavg' not in ave_descr
                                        or 'moc' not in ave_descr
                                        or 'annall' not in ave_descr
                                        or 'mons' not in ave_descr
                                        or '_mean' not in ave_descr[0]):
                                    file_dict = []
                                    open_list = []
                                    file_dict, open_list = climFileIO.open_all_files(
                                        hist_dict, ave_t.average_types[
                                            ave_descr[0]]['months_to_average'],
                                        years, var, split_name, ave_descr[0],
                                        depend, fyr)
                            # We now have open files to pull values from.  Now reset var name
                            if ('__meta' in orig_var):
                                parts = orig_var.split('__')
                                var = parts[0]
                            # If concat, all of the procs will participate in this call
                            if ('mavg' in ave_descr or 'moc' in ave_descr
                                    or 'mocm' in ave_descr
                                    or 'hor.meanConcat' in ave_descr
                                    or 'annall' in ave_descr
                                    or 'mons' in ave_descr
                                    or '_mean' in ave_descr[0]
                                    or 'zonalavg' in ave_descr):
                                if 'zonalavg' in ave_descr:
                                    l_collapse_dim = spec.collapse_dim
                                else:
                                    l_collapse_dim = ''
                        # Concat
                                var_avg_results = climAverager.time_concat(
                                    var,
                                    years,
                                    hist_dict,
                                    ave_t.average_types[ave_descr[0]],
                                    file_dict,
                                    ave_descr[0],
                                    inter_comm,
                                    all_files_vars,
                                    spec.serial,
                                    timer,
                                    collapse_dim=spec.collapse_dim)
                            # Else (not concat), each slave will compute averages and each master will collect and write
                            else:
                                if spec.serial or not l_master:
                                    # mean_diff_rsm file
                                    if ('hor.meanyr' in ave_descr
                                            and '__meta' not in orig_var):
                                        obs_file = spec.obs_dir + "/" + spec.obs_file
                                        reg_obs_file = spec.obs_dir + "/" + region_name + spec.reg_obs_file_suffix
                                        # The mean diff rsm function will send the variables once they are created
                                        var_avg_results, var_DIFF_results, var_RMS_results = climAverager.mean_diff_rms(
                                            var, region_name, region_num,
                                            spec.region_nc_var,
                                            spec.region_wgt_var, years,
                                            hist_dict,
                                            ave_t.average_types[ave_descr[0]],
                                            file_dict, obs_file, reg_obs_file,
                                            inter_comm, spec.serial, VNAME_TAG,
                                            AVE_TAG)
                                    else:
                                        if ('__metaChar' in orig_var):
                                            # Handle special meta
                                            var_avg_results = climAverager.get_metaCharValue(
                                                var, years, hist_dict,
                                                ave_t.average_types[
                                                    ave_descr[0]], file_dict,
                                                timer)
                                        else:
                                            # Average
                                            if (spec.weighted == True
                                                    and 'weights'
                                                    in ave_t.average_types[
                                                        ave_descr[0]]):
                                                var_avg_results = climAverager.weighted_avg_var(
                                                    var, years, hist_dict,
                                                    ave_t.average_types[
                                                        ave_descr[0]],
                                                    file_dict, ave_descr[0],
                                                    timer, depend, fyr)
                                            else:
                                                var_avg_results = climAverager.avg_var(
                                                    var, years, hist_dict,
                                                    ave_t.average_types[
                                                        ave_descr[0]],
                                                    file_dict, ave_descr[0],
                                                    timer, depend, fyr)

                                        # Close all open files (for time series files)
                                        if ((spec.hist_type == 'series'
                                             and '__d' not in ave_descr) and
                                            (spec.serial or not l_master)):
                                            climFileIO.close_all_files(
                                                open_list)

                                        # Pass the average results to master rank for writing
                                        var_shape = var_avg_results.shape
                                        var_dtype = var_avg_results.dtype
                                        var_type = type(var_avg_results)
                                        md_message = {
                                            'name': var,
                                            'shape': var_shape,
                                            'dtype': var_dtype,
                                            'average': var_avg_results,
                                            'type': var_type
                                        }
                                        if not spec.serial:
                                            timer.start("Send Average Time")
                                            #inter_comm.collect(data=md_message, tag=AVE_TAG)
                                            inter_comm.collect(data=var,
                                                               tag=VNAME_TAG)
                                            inter_comm.collect(
                                                data=var_avg_results,
                                                tag=AVE_TAG)
                                            timer.stop("Send Average Time")

                                if spec.serial or l_master:
                                    # If ave_descr is hor.meanyr, there will be three variables to write for each variable.
                                    # Other wise, there will only be 1
                                    if ('hor.meanyr' in ave_descr
                                            and '__meta' not in orig_var):
                                        var_cnt = 3
                                    else:
                                        var_cnt = 1
                                    for r in range(0, var_cnt):
                                        if not spec.serial:
                                            timer.start("Recv Average Time")
                                            #r_rank,results = inter_comm.collect(tag=AVE_TAG)
                                            #r_var_avg_results = results['average']
                                            r_rank, var_name = inter_comm.collect(
                                                tag=VNAME_TAG)
                                            r_rank, r_var_avg_results = inter_comm.collect(
                                                tag=AVE_TAG)
                                            #var_name = results['name']
                                            timer.start("Recv Average Time")
                                        else:
                                            var_name = var
                                            r_var_avg_results = var_avg_results

                                        timer.start("Write Netcdf Averages")
                                        climFileIO.write_averages(
                                            all_files_vars, r_var_avg_results,
                                            var_name)
                                        if ('hor.meanyr' in ave_descr
                                                and spec.serial
                                            ) and '__meta' not in orig_var:
                                            climFileIO.write_averages(
                                                all_files_vars,
                                                var_DIFF_results,
                                                var_name + '_DIFF')
                                            climFileIO.write_averages(
                                                all_files_vars,
                                                var_RMS_results,
                                                var_name + '_RMS')
                                        timer.stop("Write Netcdf Averages")

                        # Close all open files (for time slice files)
                        if (('mavg' in ave_descr or 'moc__d' == ave_descr[0]
                             or 'annall' in ave_descr or 'mons' in ave_descr
                             or '_mean' in ave_descr[0])
                                and len(lvar_list) > 0):
                            if (spec.serial or not l_master):
                                climFileIO.close_all_files(open_list)
                        elif ((spec.hist_type == 'slice' or '__d' in ave_descr)
                              and (spec.serial or not l_master)
                              and len(lvar_list) > 0):
                            climFileIO.close_all_files(open_list)

                        # Sync the local communicator before closing the averaged netcdf file and moving to the next average
                        inter_comm.sync()

                    # Close the newly created average file
                    if spec.serial or l_master:
                        new_file.close()

                    # If needed, stitch spatially split files together.
                    if spec.serial or l_master:
                        if (len(spec.split_files.split(",")) > 1):
                            fn1 = spec.out_directory + '/nh_' + outfile_name
                            fn2 = spec.out_directory + '/sh_' + outfile_name
                            out_fn = spec.out_directory + '/' + outfile_name
                            dim_info = spec.split_orig_size.split(",")
                            dim1 = dim_info[0].split("=")
                            dim2 = dim_info[1].split("=")
                            regionOpts.combine_regions(fn1, fn2,
                                                       out_fn, dim1[0],
                                                       int(dim1[1]), dim2[0],
                                                       int(dim2[1]), "nj",
                                                       spec.clobber)

                if not spec.serial:
                    # Free the inter-communicators
                    #intercomm.Free()
                    # Sync all mpi tasks / All averages should have been computed at this point
                    spec.main_comm.sync()

    #==============================================================================
    #
    # Collect and print timing information
    #
    #==============================================================================

        timer.stop("Total Time")
        my_times = spec.main_comm.allreduce(timer.get_all_times(), 'max')

        if g_master:
            print("==============================================")
            print "COMPLETED SUCCESSFULLY"
            print my_times
            print("==============================================")
예제 #3
0
def time_concat(var,years,hist_dict,ave_info,file_dict,ave_type,simplecomm,all_files_vars,serial,timer,collapse_dim=''):

    '''
    Concats files together in the time dimension.

    @param var             The name of the variable to concat.

    @param years           A list of the years that are in this average

    @param hist_dict       A dictionary that holds file references for all years/months. 

    @param ave_info        A dictionary of the type of average that is to be done.
                           Includes:  type, months_to_average, fn, and weights
                           (weights are not used in this function/average)
    
    @param file_dict       A dictionary which holds file pointers to the input files that
                           are needed by this average calculation.

    @param ave_type        The average type key that indicated which type of average will be done.

    @param simplecomm      The simple comm object used for mpi communication.

    @param all_files_vars  All of the file's variables with ncids attached.

    @param collapse_dim    Used to collapse/average over one dim.

    @serial                Boolean if running in serial mode.

    '''
    import asaptools
    if (not simplecomm.is_manager() or serial):
        print('Concatenating ',ave_info['type'],' for ',var)
    time_index = 0
    CONCAT_TAG = 60
    CONCAT_VAL_TAG = 67
    # Loop over years, months, and variables to cat them all together into one file
    first = True
    for yr in years:
        for m in ave_info['months_to_average']:
            if ('__meta' in var):
                parts = var.split('__')
                var = parts[0]
            # If slave, get slice and pass to master
            if (not simplecomm.is_manager() or serial):
                if 'zonalavg' in ave_type:
                    if collapse_dim is not None:
                        timer.start("Time to compute Average")
                        var_val = zonal_average(var,yr,m,hist_dict,file_dict,timer,collapse_dim)
                        timer.stop("Time to compute Average")
                else:
                    timer.start("Time to compute Average")
                    timer.start("Variable fetch time")
                    var_val = rover.fetch_slice(hist_dict,yr,m,var,file_dict)
                    timer.stop("Time to compute Average")
                    timer.stop("Variable fetch time")
                #print var, asaptools.__version__,type(var_val),var_val.dtype
                #--------------------
                if not serial:
                    timer.start("Send Average Time")
                    var_shape = var_val.shape
                    var_dtype = var_val.dtype
                    md_message = {'name':var,'shape':var_shape,'dtype':var_dtype,'index':time_index}
                    simplecomm.collect(data=md_message,tag=CONCAT_TAG)
                    #var_val = np.ma.filled(var_val)
                    #print type(var_val), md_message
                    simplecomm.collect(data=var_val,tag=CONCAT_VAL_TAG)
                    timer.stop("Send Average Time")
            if (simplecomm.is_manager() or serial):
                # If master, recv slice and write to file
                if not serial:
                    timer.start("Recv Average Time")
                    r_rank,results = simplecomm.collect(tag=CONCAT_TAG)
                    r_rank,var_val = simplecomm.collect(tag=CONCAT_VAL_TAG)
                    if results['dtype'] == 'S1':
                        var_val = var_val[0]
                    ti = results['index']
                    var_n = results['name']
                    timer.stop("Recv Average Time")
                else:
                    var_n = var
                    ti = time_index
                    if var_val.dtype == 'S1':
                        var_val = var_val[0]
                timer.start("Write Netcdf Averages")
                climFileIO.write_averages(all_files_vars, var_val, var_n, index=ti) 
                timer.stop("Write Netcdf Averages")
            time_index = time_index + 1
예제 #4
0
    def create_pre_proc(self,spec):

        '''
        Creates the CICE pre_proc file.

        @param spec          An instance of the Specification class which holds the user settings
                             that define which averages to compute, directories, file prefixes, etc    
        '''

	variables = {'hi':{'factor':1.0e-13,'units':'1.E+13 m3'},
		     'ai':{'factor':1.0e-14,'units':'1.E+13 m3'},
		     'ext':{'factor':1.0e-12,'units':'1.E+12 m2'},
		     'hs':{'factor':1.0e-13,'units':'1.E+12 m2'},
	}

        #  All of the region names, with 0=Northern Hem and 1=Southern Hem
	regions = {'nh':0, 'sh':1, 'Lab':0,'GIN':0,'Bar':0,'ArcOc':0,'Sib':0,'Beau':0,
                   'CArc':0,'Bering':0,'Okhotsk':0,'Hudson':0,'CAArch':0,
                   'Wed':1,'Ross':1,'Ind':1,'Pac':1,'BAm':1}

        split_hem = spec.split_files.split(',') 

        attributes = {'missing_value':1.e+30, 'coordinates':'time', 'cell_methods':'time:mean','_FillValue':1.e+30}

	poly_masks = {}
        ave_descr = ['preproc',str(spec.year0),str(spec.year1)]

        AVE_TAG = 40
 
        time_dim = 'time'
 
        years = list(range(int(spec.year0),int(spec.year1)+1))
        months = ave_t.average_types[ave_descr[0]]['months_to_average']

	# Initialize simplecomm (MPI wrappers) 
	main_comm = spec.main_comm 

        # If the region mask file doesn't exist, have root call ncl to create it
        if (not os.path.isfile(spec.reg_file) and (main_comm.is_manager() or spec.serial)):
            import subprocess
            os.environ['GRIDFILE'] = spec.ice_obs_file
            os.environ['REGIONFILE'] = spec.reg_file
            ncl_command = 'ncl < '+ spec.ncl_location +'/ice_pre_proc_mask.ncl'
            subprocess.call(ncl_command,shell=True)
        # make sure to have all ranks sync to prevent ranks other than root from continuing on without a region mask file
        main_comm.sync()

	# Get the history dictionary that lists were files are located for each time slice, a variable list, meta list, and a key lookup variable
	if (spec.hist_type == 'series'):
	    hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_series(spec.in_directory, spec.file_pattern, spec.date_pattern, 
							spec.prefix, spec.suffix, int(spec.year0), int(spec.year1), spec.split, spec.split_files)
	else:
	    hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_slice(spec.in_directory, spec.file_pattern, spec.prefix, spec.suffix, int(spec.year0), int(spec.year1))

	# Loop over the regions and variable names to get full list of variables
        global_var_list = []
	for reg in regions:
            for var in variables:
                if ('ext' in var):
                    global_var_list.append(var+'_mo_'+reg) 
                else:
	            global_var_list.append('v'+var+'_mo_'+reg)
        global_var_list.append('time') 

	# Partition the global variable list between the MPI ranks
	local_var_list = main_comm.partition(global_var_list,func=partition.EqualLength(),involved=False)
	# If master/root, give it the full variable list
	if main_comm.is_manager() or spec.serial:
	    local_var_list = global_var_list

        meta_list = []

	# Define the netcdf file
        outfile = 'ice_vol_'+spec.prefix[:-7]+'_'+str(spec.year0)+'-'+str(spec.year1)+'.nc'
        ave_date = str(spec.year0)+'-'+str(spec.year1)
	all_files_vars,new_file = climFileIO.define_ave_file(main_comm.is_manager(),spec.serial,global_var_list,local_var_list,meta_list,hist_dict,spec.hist_type,
	    ave_descr,spec.prefix,outfile,spec.split,split_hem[regions['GIN']],spec.out_directory,main_comm,spec.ncformat,
	    ave_t.average_types[ave_descr[0]]['months_to_average'][0],key,spec.clobber,int(spec.year0),int(spec.year1),ave_date,attributes,variables)
       

	# If using time slice files, open all files now
        if (len(local_var_list) > 0):
	    if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())):
	        file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'],
		    		        years,local_var_list[0],'null',ave_descr[0],False,int(spec.year0))

	# Loop over each variable in the local list and read/operate on/write
	for nc_var in local_var_list:
	    if not main_comm.is_manager() or spec.serial: # Slave
                print('Computing ice_pre_proc for', nc_var)
              # Get variable/region names
                if ('time' in nc_var):
                    get_var_name = 'aice'
                    var_name = 'time'
                else:
                    var_name,reg = nc_var.split('_mo_')
                    if ('ext' in var_name):
                        var_name = var_name
                    else:
                        var_name = var_name[1:]
                    if ('ext' in var_name or 'ai' in var_name):
                        get_var_name = 'aice'
                    else:
                        get_var_name = var_name
                # Get observation lat,lon,area
                obs_file = spec.ice_obs_file
                tarea = 'TAREA'
                tlong = 'TLONG'
                tlat = 'TLAT'
                o_lat,o_lon,o_area = self.read_obs(obs_file,tarea,tlong,tlat)

		# If using time series files, open the variable's file now
		if (spec.hist_type == 'series'):
                    if spec.split:
                        split_name = split_hem[regions[reg]]
                    else:
                        split_name = ''
		    file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'],
					    years,get_var_name,split_name,ave_descr[0],False,int(spec.year0)) 

	    time_slice = 0
	    for year in years:
              for m in months:
		if not main_comm.is_manager() or spec.serial: # Slave
                    if ('time' in nc_var):
                        var_sum = rover.fetch_slice(hist_dict, year, m, var_name, file_dict)
                    else:
		        # Get month slice
		        var_slice = rover.fetch_slice(hist_dict, year, m, get_var_name, file_dict)
                        lat,lon = var_slice.shape
                        full_lat,full_lon = o_lat.shape
                        if spec.split:
                            fill = full_lat-lat
                            missing_vals = np.zeros((fill,lon))
                            var_slice = np.array(var_slice)
                            var_slice[var_slice >= 1e+20] = 0 
                            if regions[reg] == 1: 
                                var_slice = np.concatenate((var_slice,missing_vals),axis=0)
                            else:
                                var_slice = np.concatenate((missing_vals,var_slice),axis=0)

		        # Get ai factor
		        if ('ext' in var_name or 'ai' in var_name):
			    aimax = np.amax(var_slice)
			    if (aimax < 2):
			        aifac = 100
			    else:
			        aifac = 1
			    var_slice = var_slice*aifac
                       # The ext variable is true/false based on the ai variable.  Set accordingly 
                        if ('ext' in var_name):
                            var_slice = np.array(var_slice)
                            var_slice[var_slice >= 1e+20] = 0
                            var_slice[var_slice < 15] = 0
                            var_slice[var_slice >= 15] = 1

                        # Mult by weight
                        var_slice = var_slice * o_area

                        # Mask the variable to get just this region
                        mask_to_apply = self.read_reg_mask(spec.reg_file,reg)
                        masked_var = MA.masked_where(mask_to_apply==0,var_slice) 

                        # Sum the variable 
		        var_sum = self.get_sum(masked_var,variables[var_name],var_name)

		    # Pass the average results to master rank for writing
		    var_shape = var_sum.shape
		    var_dtype = var_sum.dtype
		    md_message = {'name':nc_var,'shape':var_shape,'dtype':var_dtype,'average':var_sum,'index':time_slice}
		    if not spec.serial:
			main_comm.collect(data=md_message, tag=AVE_TAG)

		if main_comm.is_manager() or spec.serial: # Master
		    # Recv the variable to write
		    if not spec.serial:
			r_rank,results = main_comm.collect(tag=AVE_TAG)
			var_sum_results = results['average']
			v_name = results['name']
			index = results['index']
		    else:
			v_name = nc_var
			var_sum_results = var_sum    
			index = time_slice

		    #Write Var
		    climFileIO.write_averages(all_files_vars, var_sum_results, v_name, index)

		time_slice = time_slice + 1    
	    # Close timeseries files that are open
	    if (spec.hist_type == 'series' and (not main_comm.is_manager() or spec.serial)):
		climFileIO.close_all_files(open_list)

	# Close timeslice files that are open
        if (len(local_var_list) > 0):
	    if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())):
	        climFileIO.close_all_files(open_list)   
 
        # Make sure everyone gets sync'ed up
        main_comm.sync()

	# Close the file that was just created
	if spec.serial or main_comm.is_manager():
	    new_file.close()
예제 #5
0
    def compute_averages(self,spec):

        '''
        Sets up the run information and computes the averages.

        @param spec          An instance of the Specification class which holds the user settings
                             that define which averages to compute, directories, file prefixes, etc
        '''
        import os,sys
        import rover
        import climAverager
        import climFileIO
        import average_types as ave_t
        import regionOpts
        import string
        import collections
        from asaptools import timekeeper
        from asaptools import partition 
#==============================================================================
#
# Initialize 
#
#==============================================================================
        # Initialize the timekeeper class and start 'total' timer
        timer = timekeeper.TimeKeeper()
        timer.start("Total Time")
        # Initialize some timers that are not used by all tasks
        timer.reset("Send Average Time")
        timer.reset("Variable fetch time")
        timer.reset("Recv Average Time")
        timer.reset("Write Netcdf Averages")
        timer.reset("Variable fetch time")
        timer.reset("Time to compute Average")

        # Check average list to make sure it complies with the standards
        ave_t.average_compliance(spec.avg_list)

        # Check if I'm the global master
        g_master = spec.main_comm.is_manager()

        for tag in spec.m_id:
    
            file_pattern = list(spec.file_pattern)

            if ('-999' not in tag):
                prefix = spec.prefix + '_' + tag
                p_index = file_pattern.index('$prefix')
                t_index = file_pattern.index('$m_id')
 
                for i in range(p_index+1,t_index+1):
                    del file_pattern[p_index+1]
            else:
                prefix = spec.prefix

	    # Sort through the average list and figure out dependencies and do
	    # averages in steps if need be.
	    avg_dict = {0:spec.avg_list}
	    for i in range(1,20):
		avg_dict[i] = []
	    avg_dict = ave_t.sort_depend(avg_dict,0,spec.out_directory,prefix,spec.regions)
            print avg_dict

	    # Initialize the tag for the average send/recv
	    AVE_TAG = 40
            VNAME_TAG = 41	   
 
	    #start_level = 0
            start_level = min(avg_dict.keys())
	    found_level = False
	    #for i in range(0,len(avg_dict)):
#		if found_level == False:    
#		    if (i in avg_dict): 
#			start_level = i
#			found_level = True

            ordered_avg_dict =  collections.OrderedDict(avg_dict)
	    #for i in range(start_level,len(avg_dict)):
            for i,value in ordered_avg_dict.items():
	     
		# Initialize some containers 
		var_list = []
		full_hist_dict = {}
		hist_dict = {}

    #==============================================================================
    #
    # Set the hist_dict up with file references for all years/months.
    # Create a list of all variables and meta variables within the file
    # and set the final variable list passed on user preferences. 
    #
    #==============================================================================

		## Set var_list and file info dictionary
		timer.start("Define history dictionary")
		if (spec.hist_type == 'series'):
		    full_hist_dict,full_var_list,meta_list,key = rover.set_slices_and_vars_time_series(spec.in_directory, file_pattern, spec.date_pattern, 
								    prefix, spec.suffix, spec.year0, spec.year1, spec.split, spec.split_files)
		else:
		    full_hist_dict,full_var_list,meta_list,key = rover.set_slices_and_vars_time_slice(spec.in_directory, file_pattern, prefix, spec.suffix, spec.year0, spec.year1)
		timer.stop("Define history dictionary")

		# Set variable list.  If there was a variable list passed to the averager, use this list.  Other wise,
		# use all variables within the file.
		if (len(spec.varlist)>0):
		    var_list = spec.varlist
                    for v in full_var_list:
                        if '__meta' in v:
                            var_list.append(v)
		else:
		    var_list = full_var_list
                meta_list = list(set(meta_list))
                var_list = list(set(var_list))

    #==============================================================================
    #
    # Workload Distribution
    #
    #==============================================================================

		# Each intercommunicator recieves a list of averages it's responsible for
		# Each mpi task within that intercommunicator gets a portion of the variable list 
     		num_of_avg = len(avg_dict[i])
		min_procs_per_ave = min(4,spec.main_comm.get_size())

		# Override user selection if they picked less than 2 or
		# the variable list is less than the min procs per sub-communicator
		if (min_procs_per_ave < 2 or len(var_list) <= (min_procs_per_ave-1)):
		    min_procs_per_ave = 2

		# If running in paralllel mode, split the communicator and partition the averages
		if (spec.serial == False):
		    size = spec.main_comm.get_size()
		    rank = spec.main_comm.get_rank()

		    # split mpi comm world
                    temp_color = (rank // min_procs_per_ave) % num_of_avg
		    num_of_groups = size/min_procs_per_ave
                    if (temp_color == num_of_groups):
                        temp_color = temp_color - 1
		    groups = []
		    for g in range(0,num_of_groups):
			groups.append(g)
                    #print 'g_rank:',rank,'size:',size,'#of ave:',num_of_avg,'min_procs:',min_procs_per_ave,'temp_color:',temp_color,'#of groups',num_of_groups,'groups:',groups 
		    group = groups[temp_color]
		    inter_comm,multi_comm = spec.main_comm.divide(group)
		    color = inter_comm.get_color()
		    lsize = inter_comm.get_size()
		    lrank = inter_comm.get_rank()

		    #g_master = spec.main_comm.is_manager()
		    l_master = inter_comm.is_manager()
	  
		    #print 'global rank: ',rank,'local rank: ',lrank,'color: ',color,'tempcolor: ',temp_color,'group: ',group,'is local master: ',l_master
                    laverages = []
                    AVE_LIST_TAG = 50
		    # Partion the average task list amoung the inter/split communicators
                    if (l_master):
		        laverages = multi_comm.partition(avg_dict[i],func=partition.EqualStride(),involved=True)
                        for b in range(1,lsize):
                            laverages_send = inter_comm.ration(data=laverages,tag=AVE_LIST_TAG) 
                    else:
                        laverages = inter_comm.ration(tag=AVE_LIST_TAG)
		else: 
		    # Running in serial mode.  Just copy the average list.
		    laverages = avg_dict[i]
		    inter_comm = spec.main_comm
		    lsize = inter_comm.get_size()
		    #g_master = spec.main_comm.is_manager()
		    l_master = inter_comm.is_manager()

		# Partition the variable list between the tasks of each communicator
		if (lsize > 1 and spec.serial == False):
		    lvar_list = inter_comm.partition(var_list,func=partition.EqualStride(),involved=False) 
		    if (l_master):
			lvar_list = var_list
		else:
		    lvar_list = var_list
		#print rank,lvar_list

		#print(rank,'averages :',laverages, ' vars :',lvar_list)
 
    #==============================================================================
    #
    # Create the output directory if it doesn't exist
    #
    #==============================================================================

		if spec.serial or g_master:
		    if not os.path.exists(spec.out_directory):
			os.makedirs(spec.out_directory)
                spec.main_comm.sync() 
    #==============================================================================
    #
    # Main Averaging Loop
    #
    #==============================================================================
		# Files are only split for the first loop.  When the depend averages start, they will operate on files
		# that are already stiched together.
		if (i != 0):
		    spec.split_name = 'null'
		    spec.split = False
		    spec.split_files = 'null'
		# Toggle to incate that extra variables were added to the local file list (only do once per average level
		added_extra_vars = False

		for ave in laverages:
		    for split_name in spec.split_files.split(","): 
			# Split apart the average info to get type of average and year(s) 
			ave_descr = ave.split(':')
			if ('hor.meanyr' in ave_descr[0] or 'hor.meanConcat' in ave_descr[0]):
			    ave_name_split = ave_descr[0].split('_')
			    region_num = ave_name_split[len(ave_name_split)-1]
			    region_name = spec.regions[int(region_num)]
			    # Remove the region number as part of the average name
			    ave_descr[0] = ave_name_split[0]
			else:
			    region_name = 'null'
			    region_num = -99

			# If the average depends on other averages that have to be computed, create a new temporary dictionary
			if '__d' in ave_descr:
			    yr0 = ave_descr[1]
			    if (len(ave_descr) > 2 and '_d' not in ave_descr[2]):
			       yr1 = ave_descr[2]
			    else:
			       yr1 = ave_descr[1]
                            
			    hist_dict = rover.set_slices_and_vars_depend(spec.out_directory, file_pattern, prefix, yr0, yr1,
										ave_t.average_types[ave_descr[0]],ave_descr[0],region_name)
			else:
			    hist_dict = dict(full_hist_dict)   

			# If concat' mean_diff_rms files, for each var, also add the _DIFF and _RMS variables.
			if ('hor.meanConcat' in ave_descr and added_extra_vars==False):
			    new_vars = []
			    for v in lvar_list:
                                if '__meta' not in v:
				    new_vars.append(v+'_DIFF')
				    new_vars.append(v+'_RMS')
			    lvar_list = lvar_list + new_vars
			    added_extra_vars = True

			# Create and define the average file 
			timer.start("Create/Define Netcdf File")
                        if (len(ave_descr)<3 or 'hor.meanyr' in ave_descr):
                            ave_date = string.zfill(ave_descr[1],4)
                            ave_date2 = str(ave_descr[1])
                        else:
                            date1 = string.zfill(ave_descr[1],4)
                            date2 = string.zfill(ave_descr[2],4)
                            ave_date = date1+'-'+date2
                            ave_date2 = str(ave_descr[1])+'-'+str(ave_descr[2])
			outfile_name = climFileIO.get_out_fn(ave_descr[0],prefix,ave_date,ave_t.average_types[ave_descr[0]]['fn'],region_name)
                        if 'zonalavg' in ave_descr:
                            l_collapse_dim = spec.collapse_dim
                        else:
                            l_collapse_dim = ''
			all_files_vars,new_file = climFileIO.define_ave_file(l_master,spec.serial,var_list,lvar_list,meta_list,hist_dict,
									     spec.hist_type,ave_descr,prefix,outfile_name,
									     spec.split,split_name,spec.out_directory,inter_comm,
									     spec.ncformat,ave_t.average_types[ave_descr[0]]['months_to_average'][0],
                                                                             key,spec.clobber,spec.year0,spec.year1,ave_date2,collapse_dim=l_collapse_dim) 
			timer.stop("Create/Define Netcdf File")
		       
			# Start loops to compute averages
			# create a list of years that are needed for this average
			years = []
			if '__d' in ave_descr:
			    if (ave_t.average_types[ave_descr[0]]['depend_type'] == 'month' or '_d' in ave_descr[2]):
				years.append(int(ave_descr[1]))
			    else:
				years = list(range(int(ave_descr[1]),int(ave_descr[2])+1))
			    depend = True
			else: 
			    if (len(ave_descr) == 2):
				years.append(int(ave_descr[1]))
			    else:
				years = list(range(int(ave_descr[1]),int(ave_descr[2])+1))
			    depend = False

                        # Get the first year.  If part of a sig avg, this will be the sig first year, not year of indiv average
                        fyr = years[0]
                        if i+1 in avg_dict.keys():
                            for a in avg_dict[i+1]:
                                if (ave_descr[0]+'_sig') in a:
                                    spl = a.split(':') 
                                    fyr = int(spl[1]) 

			file_dict = []
			open_list = []
			# Open all of the files that this rank will need for this average (for time slice files)
			if ((spec.hist_type == 'slice' or '__d' in ave_descr)  and (spec.serial or not l_master) and len(lvar_list) > 0):
			    file_dict = []
			    open_list = []
			    file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'],
								    years,lvar_list[0],'null',ave_descr[0],depend,fyr)
			# If concat of file instead of average, piece file together here.  If not, enter averaging loop
			if (('mavg' in ave_descr or 'moc' in ave_descr or 'annall' in ave_descr or 'mons' in ave_descr or
                             '_mean' in ave_descr[0]) and len(lvar_list) > 0):
			    file_dict = []
			    open_list = []
			    if (spec.serial or not l_master):
				# Open files
				file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'],
								    years,lvar_list[0],'null',ave_descr[0],depend,fyr)
			# Loop through variables and compute the averages
			for orig_var in lvar_list:
			    # Some variable names were suffixed with a meta label indicaticating that the variable exists in all files,
			    # but there isn't a didicated ts file to open.  Pick the first variable off the list and get values from there
			    if ('__meta' in orig_var):
				var = key 
			    else:
				var = orig_var
			    # Open all of the files that this rank will need for this average (for time series files)
			    if ((spec.hist_type == 'series' and '__d' not in ave_descr) and (spec.serial or not l_master)):
				if ('mavg' not in ave_descr or 'moc' not in ave_descr or 'annall' not in ave_descr or 
      				    'mons' not in ave_descr or '_mean' not in ave_descr[0]):
				    file_dict = []
				    open_list = []
				    file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'],
											years,var,split_name,ave_descr[0],depend,fyr)
			    # We now have open files to pull values from.  Now reset var name
			    if ('__meta' in orig_var):
				parts = orig_var.split('__')
				var = parts[0]
			    # If concat, all of the procs will participate in this call
			    if ('mavg' in ave_descr or 'moc' in ave_descr or 'mocm' in ave_descr or 'hor.meanConcat' in ave_descr 
                                or 'annall' in ave_descr or 'mons' in ave_descr or '_mean' in ave_descr[0] or 'zonalavg' in ave_descr):
                                        if 'zonalavg' in ave_descr:
                                            l_collapse_dim = spec.collapse_dim
                                        else:
                                            l_collapse_dim = ''
					# Concat
					var_avg_results =  climAverager.time_concat(var,years,hist_dict,ave_t.average_types[ave_descr[0]],
								    file_dict,ave_descr[0],inter_comm,all_files_vars,spec.serial,timer,collapse_dim=spec.collapse_dim)
			    # Else (not concat), each slave will compute averages and each master will collect and write
			    else:
				if spec.serial or not l_master:
				    # mean_diff_rsm file
				    if ('hor.meanyr' in ave_descr and '__meta' not in orig_var):
					obs_file = spec.obs_dir+"/"+spec.obs_file
					reg_obs_file = spec.obs_dir+"/"+region_name+spec.reg_obs_file_suffix
					# The mean diff rsm function will send the variables once they are created 
					var_avg_results,var_DIFF_results,var_RMS_results = climAverager.mean_diff_rms(var,region_name,region_num,spec.region_nc_var,
					    spec.region_wgt_var,years,hist_dict,ave_t.average_types[ave_descr[0]],file_dict,obs_file,
					    reg_obs_file,inter_comm,spec.serial,VNAME_TAG,AVE_TAG,spec.vertical_levels)
				    else:
					if ('__metaChar' in orig_var):
					    # Handle special meta
					    var_avg_results =  climAverager.get_metaCharValue(var,years,hist_dict,ave_t.average_types[ave_descr[0]],
						    file_dict,timer)
					else: 
					    # Average
					    if (spec.weighted == True and 'weights' in ave_t.average_types[ave_descr[0]]):
						var_avg_results =  climAverager.weighted_avg_var(var,years,hist_dict,
						      ave_t.average_types[ave_descr[0]],file_dict,ave_descr[0],timer,depend,fyr)
					    else:
						var_avg_results =  climAverager.avg_var(var,years,hist_dict,
						    ave_t.average_types[ave_descr[0]],file_dict,ave_descr[0],timer,depend,fyr)
      
					# Close all open files (for time series files)
					if ((spec.hist_type == 'series' and '__d' not in ave_descr) and (spec.serial or not l_master)):
					    climFileIO.close_all_files(open_list)

					# Pass the average results to master rank for writing
					var_shape = var_avg_results.shape
					var_dtype = var_avg_results.dtype
                                        var_type = type(var_avg_results)
					md_message = {'name':var,'shape':var_shape,'dtype':var_dtype,'average':var_avg_results,'type':var_type}
					if not spec.serial:
					    timer.start("Send Average Time")
					    #inter_comm.collect(data=md_message, tag=AVE_TAG)
                                            inter_comm.collect(data=var, tag=VNAME_TAG)
                                            inter_comm.collect(data=var_avg_results, tag=AVE_TAG)
					    timer.stop("Send Average Time")
	
				if spec.serial or l_master:
				    # If ave_descr is hor.meanyr, there will be three variables to write for each variable.  
				    # Other wise, there will only be 1
				    if ('hor.meanyr' in ave_descr and '__meta' not in orig_var):
					var_cnt = 3
				    else:
					var_cnt = 1
				    for r in range(0,var_cnt):
					if not spec.serial:
					    timer.start("Recv Average Time")
					    #r_rank,results = inter_comm.collect(tag=AVE_TAG)
					    #r_var_avg_results = results['average']
                                            r_rank,var_name = inter_comm.collect(tag=VNAME_TAG)
                                            r_rank,r_var_avg_results = inter_comm.collect(tag=AVE_TAG)
					    #var_name = results['name']
					    timer.start("Recv Average Time") 
					else:
					    var_name = var
					    r_var_avg_results = var_avg_results 
				    
					timer.start("Write Netcdf Averages")
					climFileIO.write_averages(all_files_vars, r_var_avg_results, var_name)
					if ('hor.meanyr' in ave_descr and spec.serial) and '__meta' not in orig_var:
					    climFileIO.write_averages(all_files_vars, var_DIFF_results, var_name+'_DIFF')
					    climFileIO.write_averages(all_files_vars, var_RMS_results, var_name+'_RMS')
					timer.stop("Write Netcdf Averages")

			# Close all open files (for time slice files)
			if (('mavg' in ave_descr or 'moc__d'==ave_descr[0] or 'annall' in ave_descr or 'mons' in ave_descr or
                            '_mean' in ave_descr[0]) and len(lvar_list) > 0):
			    if (spec.serial or not l_master):
				climFileIO.close_all_files(open_list)
			elif ((spec.hist_type == 'slice' or '__d' in ave_descr)and (spec.serial or not l_master) and len(lvar_list) > 0):
			    climFileIO.close_all_files(open_list)  
       
			# Sync the local communicator before closing the averaged netcdf file and moving to the next average          
			inter_comm.sync()

		    # Close the newly created average file
		    if spec.serial or l_master:
			new_file.close()

		    # If needed, stitch spatially split files together.
		    if spec.serial or l_master:
			if (len(spec.split_files.split(",")) > 1):
			    fn1 = spec.out_directory+'/nh_'+outfile_name
			    fn2 = spec.out_directory+'/sh_'+outfile_name
			    out_fn = spec.out_directory+'/'+outfile_name
			    dim_info = spec.split_orig_size.split(",")
			    dim1 = dim_info[0].split("=")
			    dim2 = dim_info[1].split("=")
			    regionOpts.combine_regions(fn1, fn2,  out_fn, dim1[0], int(dim1[1]), dim2[0], int(dim2[1]), "nj", spec.clobber) 
		
		if not spec.serial:
		    # Free the inter-communicators
		    #intercomm.Free()
		    # Sync all mpi tasks / All averages should have been computed at this point 
		    spec.main_comm.sync()

    #==============================================================================
    #
    # Collect and print timing information
    #
    #==============================================================================

        timer.stop("Total Time")
	my_times = spec.main_comm.allreduce(timer.get_all_times(),'max')

	if g_master:
	    print("==============================================")
            print "COMPLETED SUCCESSFULLY"
	    print my_times
	    print("==============================================") 
예제 #6
0
파일: PreProc.py 프로젝트: NCAR/pyAverager
    def create_pre_proc(self,spec):

        '''
        Creates the CICE pre_proc file.

        @param spec          An instance of the Specification class which holds the user settings
                             that define which averages to compute, directories, file prefixes, etc    
        '''

	variables = {'hi':{'factor':1.0e-13,'units':'1.E+13 m3'},
		     'ai':{'factor':1.0e-14,'units':'1.E+13 m3'},
		     'ext':{'factor':1.0e-12,'units':'1.E+12 m2'},
		     'hs':{'factor':1.0e-13,'units':'1.E+12 m2'},
	}

        #  All of the region names, with 0=Northern Hem and 1=Southern Hem
	regions = {'nh':0, 'sh':1, 'Lab':0,'GIN':0,'Bar':0,'ArcOc':0,'Sib':0,'Beau':0,
                   'CArc':0,'Bering':0,'Okhotsk':0,'Hudson':0,'CAArch':0,
                   'Wed':1,'Ross':1,'Ind':1,'Pac':1,'BAm':1}

        split_hem = spec.split_files.split(',') 

        attributes = {'missing_value':1.e+30, 'coordinates':'time', 'cell_methods':'time:mean','_FillValue':1.e+30}

	poly_masks = {}
        ave_descr = ['preproc',str(spec.year0),str(spec.year1)]

        AVE_TAG = 40
 
        time_dim = 'time'
 
        years = list(range(int(spec.year0),int(spec.year1)+1))
        months = ave_t.average_types[ave_descr[0]]['months_to_average']

	# Initialize simplecomm (MPI wrappers) 
	main_comm = spec.main_comm 

        # If the region mask file doesn't exist, have root call ncl to create it
        if (not os.path.isfile(spec.reg_file) and (main_comm.is_manager() or spec.serial)):
            import subprocess
            os.environ['GRIDFILE'] = spec.ice_obs_file
            os.environ['REGIONFILE'] = spec.reg_file
            ncl_command = 'ncl < '+ spec.ncl_location +'/ice_pre_proc_mask.ncl'
            subprocess.call(ncl_command,shell=True)
        # make sure to have all ranks sync to prevent ranks other than root from continuing on without a region mask file
        main_comm.sync()

	# Get the history dictionary that lists were files are located for each time slice, a variable list, meta list, and a key lookup variable
	if (spec.hist_type == 'series'):
	    hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_series(spec.in_directory, spec.file_pattern, spec.date_pattern, 
							spec.prefix, spec.suffix, int(spec.year0), int(spec.year1), spec.split, spec.split_files)
	else:
	    hist_dict,file_var_list,meta_list,key = rover.set_slices_and_vars_time_slice(spec.in_directory, spec.file_pattern, spec.prefix, spec.suffix, int(spec.year0), int(spec.year1))

	# Loop over the regions and variable names to get full list of variables
        global_var_list = []
	for reg in regions:
            for var in variables:
                if ('ext' in var):
                    global_var_list.append(var+'_mo_'+reg) 
                else:
	            global_var_list.append('v'+var+'_mo_'+reg)
        global_var_list.append('time') 

	# Partition the global variable list between the MPI ranks
	local_var_list = main_comm.partition(global_var_list,func=partition.EqualLength(),involved=False)
	# If master/root, give it the full variable list
	if main_comm.is_manager() or spec.serial:
	    local_var_list = global_var_list

        meta_list = []

	# Define the netcdf file
        outfile = 'ice_vol_'+spec.prefix[:-7]+'_'+str(spec.year0)+'-'+str(spec.year1)+'.nc'
        ave_date = str(spec.year0)+'-'+str(spec.year1)
	all_files_vars,new_file = climFileIO.define_ave_file(main_comm.is_manager(),spec.serial,global_var_list,local_var_list,meta_list,hist_dict,spec.hist_type,
	    ave_descr,spec.prefix,outfile,spec.split,split_hem[regions['GIN']],spec.out_directory,main_comm,spec.ncformat,
	    ave_t.average_types[ave_descr[0]]['months_to_average'][0],key,spec.clobber,int(spec.year0),int(spec.year1),ave_date,attributes,variables)
       

	# If using time slice files, open all files now
        if (len(local_var_list) > 0):
	    if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())):
	        file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'],
		    		        years,local_var_list[0],'null',ave_descr[0],False,int(spec.year0))

	# Loop over each variable in the local list and read/operate on/write
	for nc_var in local_var_list:
	    if not main_comm.is_manager() or spec.serial: # Slave
                print('Computing ice_pre_proc for', nc_var)
              # Get variable/region names
                if ('time' in nc_var):
                    get_var_name = 'aice'
                    var_name = 'time'
                else:
                    var_name,reg = nc_var.split('_mo_')
                    if ('ext' in var_name):
                        var_name = var_name
                    else:
                        var_name = var_name[1:]
                    if ('ext' in var_name or 'ai' in var_name):
                        get_var_name = 'aice'
                    else:
                        get_var_name = var_name
                # Get observation lat,lon,area
                obs_file = spec.ice_obs_file
                tarea = 'TAREA'
                tlong = 'TLONG'
                tlat = 'TLAT'

                # Read in the ice observation file to get area, lat, and lon values.
                obs_file_hndl = Nio.open_file(obs_file,'r')
                o_lat = obs_file_hndl.variables[tlat]
                o_lon = obs_file_hndl.variables[tlong]
                o_area = obs_file_hndl.variables[tarea]
                o_area = o_area[:]*1.0e-4

		# If using time series files, open the variable's file now
		if (spec.hist_type == 'series'):
                    if spec.split:
                        split_name = split_hem[regions[reg]]
                    else:
                        split_name = ''
		    file_dict,open_list = climFileIO.open_all_files(hist_dict,ave_t.average_types[ave_descr[0]]['months_to_average'],
					    years,get_var_name,split_name,ave_descr[0],False,int(spec.year0)) 

	    time_slice = 0
	    for year in years:
              for m in months:
		if not main_comm.is_manager() or spec.serial: # Slave
                    if ('time' in nc_var):
                        var_sum = rover.fetch_slice(hist_dict, year, m, var_name, file_dict)
                    else:
		        # Get month slice
		        var_slice = rover.fetch_slice(hist_dict, year, m, get_var_name, file_dict)
                        lat,lon = var_slice.shape
                        full_lat,full_lon = o_lat.shape
                        if spec.split:
                            fill = full_lat-lat
                            missing_vals = np.zeros((fill,lon))
                            var_slice = np.array(var_slice)
                            var_slice[var_slice >= 1e+20] = 0 
                            if regions[reg] == 1: 
                                var_slice = np.concatenate((var_slice,missing_vals),axis=0)
                            else:
                                var_slice = np.concatenate((missing_vals,var_slice),axis=0)

		        # Get ai factor
		        if ('ext' in var_name or 'ai' in var_name):
			    aimax = np.amax(var_slice)
			    if (aimax < 2):
			        aifac = 100
			    else:
			        aifac = 1
			    var_slice = var_slice*aifac
                       # The ext variable is true/false based on the ai variable.  Set accordingly 
                        if ('ext' in var_name):
                            var_slice = np.array(var_slice)
                            var_slice[var_slice >= 1e+20] = 0
                            var_slice[var_slice < 15] = 0
                            var_slice[var_slice >= 15] = 1

                        # Mult by weight
                        var_slice = var_slice * o_area

                        # Mask the variable to get just this region
                        mask_to_apply = self.read_reg_mask(spec.reg_file,reg)
                        masked_var = MA.masked_where(mask_to_apply==0,var_slice) 

                        # Sum the variable 
		        var_sum = self.get_sum(masked_var,variables[var_name],var_name)

		    # Pass the average results to master rank for writing
		    var_shape = var_sum.shape
		    var_dtype = var_sum.dtype
		    md_message = {'name':nc_var,'shape':var_shape,'dtype':var_dtype,'average':var_sum,'index':time_slice}
		    if not spec.serial:
			main_comm.collect(data=md_message, tag=AVE_TAG)

		if main_comm.is_manager() or spec.serial: # Master
		    # Recv the variable to write
		    if not spec.serial:
			r_rank,results = main_comm.collect(tag=AVE_TAG)
			var_sum_results = results['average']
			v_name = results['name']
			index = results['index']
		    else:
			v_name = nc_var
			var_sum_results = var_sum    
			index = time_slice

		    #Write Var
		    climFileIO.write_averages(all_files_vars, var_sum_results, v_name, index)

		time_slice = time_slice + 1    
	    # Close timeseries files that are open
	    if (spec.hist_type == 'series' and (not main_comm.is_manager() or spec.serial)):
		climFileIO.close_all_files(open_list)

	# Close timeslice files that are open
        if (len(local_var_list) > 0):
	    if (spec.hist_type == 'slice' and (spec.serial or not main_comm.is_manager())):
	        climFileIO.close_all_files(open_list)   
 
        # Make sure everyone gets sync'ed up
        main_comm.sync()

	# Close the file that was just created
	if spec.serial or main_comm.is_manager():
	    new_file.close()