Beispiel #1
0
def initialize_redfile(filen,
                       axisdict,
                       typedict,
                       attdict,
                       varnames,
                       lock=None,
                       dt=-1):
    """Initializes the file containing reduced data.  Input is the filename, axes
    variables representing bounds on axes, and names of variables to be initialized.
    Axes are represented as a dictionary, with items like varname:[timeaxis, lataxis, lonaxis].
    Time should always be present as the first axis.
    Note that cdms2 will automatically write the bounds variable if it writes an axis which has bounds.
    """
    if lock is not None: lock.acquire()
    #t1 = time.time()
    g = cdms2.open(filen, 'w')
    #t2 = time.time()
    if lock is not None: lock.release()
    if 'time_bnds' not in varnames:
        addVariable(g, 'time_bnds', typedict['time_bnds'],
                    axisdict['time_bnds'], attdict['time_bnds'])
    if 'climatology_bnds' not in varnames and dt == 0:
        addVariable(g, 'climatology_bnds', typedict['time_bnds'],
                    axisdict['time_bnds'], attdict['time_bnds'])
    for varn in varnames:
        addVariable(g, varn, typedict[varn], axisdict[varn], attdict[varn])
    addVariable(g, 'time_weights', typedict['time'], [axisdict['time']], [])
    g['time_weights'].initialized = 'no'
    return g
Beispiel #2
0
def initialize_redfile( filen, axisdict, typedict, attdict, varnames, lock=None, dt=-1 ):
    """Initializes the file containing reduced data.  Input is the filename, axes
    variables representing bounds on axes, and names of variables to be initialized.
    Axes are represented as a dictionary, with items like varname:[timeaxis, lataxis, lonaxis].
    Time should always be present as the first axis.
    Note that cdms2 will automatically write the bounds variable if it writes an axis which has bounds.
    """
    if lock is not None:  lock.acquire()
    #t1 = time.time()
    g = cdms2.open( filen, 'w' )
    #t2 = time.time()
    if lock is not None:  lock.release()
    if 'time_bnds' not in varnames:
        addVariable( g, 'time_bnds', typedict['time_bnds'], axisdict['time_bnds'], attdict['time_bnds'] )
    if 'climatology_bnds' not in varnames and dt==0:
        addVariable( g, 'climatology_bnds', typedict['time_bnds'], axisdict['time_bnds'], attdict['time_bnds'] )
    for varn in varnames:
        addVariable( g, varn, typedict[varn], axisdict[varn], attdict[varn] )
    addVariable( g, 'time_weights', typedict['time'], [axisdict['time']], [] )
    g['time_weights'].initialized = 'no'
    return g;
Beispiel #3
0
def update_time_avg(redvars,
                    redtime_bnds,
                    redtime_wts,
                    newvars,
                    next_tbounds,
                    dt=None,
                    new_time_weights=None,
                    force_scalar_avg=False):
    """Updates the time-reduced data for a list of variables.  The reduced-time and averaged
    variables are listed in redvars.  Its weights (for time averaging) are another variable,
    redtime_wts.
    (Each member of redvars should have the same the time axis.  Each member of newvars should have
    the same time axis.  Normally it has an attribute wgts which is the same as redtime_wts.id.)
    The new data is listed in newvars, and this list should correspond to redvars, e.g.
    newvars[i].id==redvars[i].id.  The length of both should be equal and at least one.
    Each variable is an MV.  Normally redvar and redtime_wts will be a FileVariable (required if
    they might change) and newvar a TransientVariable.
    If newvar needs any spatial reductions to match redvar, they should have been performed before
    calling this function.
    next_tbounds is the next time interval, used if newvar is defined on a time beyond redvar's
    present time axis.  If next_tbounds==[], newvar will be ignored on such times.  Normally
    next_tbounds will be set to [] when updating a climatology file which has been initialized.
    The penultimate argument dt is used only in that dt=0 means that we are computing climatologies - hence
    the new data's time axis must be adjusted before averaging the data into redvars.
    The last argument is the time_weights global attribute of the data file, if any; it corresponds
    to newvars.  This is expected to occur iff the data file is a climatology file written by
    an earlier use of this module.
    The optional argument force_scalar_avg argument is for testing and is passed on to two_pt_avg.
    """

    # >>>> TO DO <<<< Ensure that each redvar, redtime_wts, newvar have consistent units
    # >>>> for the variable and for time.  Assert that they have the same shape, axes, etc.

    if redvars is None or len(
            redvars) == 0:  # formerly redvar was initialized here
        raise Exception("update_time_avg requires a reduced variable list")
    nvars = len(redvars)
    # The following two asserts express my assumption that the first index of the variable is time.
    # This is almost universal, but in the future I should generalize this code.  That would make
    # slicing more verbose, e.g. if time were changed from the first index to second then
    # v[j] would become v[:,j,:] (for a 2-D variable v).

    for var in redvars:
        redtime = var.getTime()  # partially-reduced time axis
        if redtime is not None:  # some variables have no time axis
            try:
                assert (var.getDomain()[0][0].isTime())
            except Exception as e:
                logging.exception("redvars=%s, var=%s, var.getDomain()=%s",
                                  redvars, var, var.getDomain())
                raise e
            break
    assert (redtime is not None)
    redtime_len = redtime.shape[0]
    for var in newvars:
        newtime = var.getTime()  # original time axis, from the new variable
        if newtime is not None:
            try:
                assert (var.getDomain()[0][0].isTime())
            except Exception as e:
                logging.exception("redvars=%s, var=%s, var.getDomain()=%s",
                                  redvars, var, var.getDomain())
                raise e
            break
    assert (newtime is not None)  # The input data should have a time axis!
    if dt == 0:
        newtime = adjust_time_for_climatology(newtime, redtime)
    newtime_bnds = getClimoBounds(newtime)
    # newtime_wts[j,i] is the weight applied to the data at time newtime[j] in computing
    # an average, reduced time for time newtime[ newtime_rti[i] ], 0<=i<2.
    # If newtime_rti[i]<0, that means the weight is 0.
    maxolaps = 3  # Maximum number of data time intervals which could overlap with a single
    #               reduced-time interval.  We're unlikely to see more than 2.
    newtime_wts = numpy.zeros((newtime.shape[0], maxolaps))
    newtime_rti = numpy.zeros((newtime.shape[0], maxolaps), numpy.int32) - 1
    for j in range(newtime.shape[0]):
        # First, extend redtime and redtime_bnds if necessary:
        # This should be moved to a separate function.
        if newtime_bnds[j][1] > redtime_bnds[-1][1]:
            bndmin = max(newtime_bnds[j][0], next_tbounds[0])
            bndmax = min(newtime_bnds[j][1], next_tbounds[1])
            weight = bndmax - bndmin
            if weight > 0:
                # Extend the time axis to add a new time, time bounds, and weight.  With one more
                # silly step (see below), this will also have the effect of extending redvars along
                # the time axis.
                redtime_bnds[redtime_len] = next_tbounds
                redtime[redtime_len] = 0.5 * (redtime_bnds[redtime_len][1] +
                                              redtime_bnds[redtime_len][0])
                redtime_wts[redtime_len] = 0.0
                redtime_len += 1
        for iv in range(nvars):
            # Without this silly step, the data in redvars[iv] won't be expanded to match the
            # newly expanded time axis...
            dummy = redvars[iv].shape
            # This also will do the job, but it looks like a lot of i/o:
            #   redvars[iv].parent.write(redvars[iv])
            # It doesn't help to write redtime_wts or redtime_bnds.  You need to write a variable
            # with the same axes as redvars.
            # This doesn't do the job:  redvars[iv].parent.sync()

        # The weight of time newtime[j] is the part of its bounds which lie within some reduced-
        # time bounds.  We'll also need to remember the indices of the reduced-times for
        # which this is nonzero (there will be few of those, many reduced times total)
        k = -1
        for i, red_bnds in enumerate(redtime_bnds):
            bndmin = max(newtime_bnds[j][0], red_bnds[0])
            bndmax = min(newtime_bnds[j][1], red_bnds[1])
            weight = bndmax - bndmin
            if weight < 0:
                continue
            else:
                k += 1
                newtime_wts[j, k] = weight
                newtime_rti[j, k] = i
        #  This much simpler expression works if there is no overlap:
        #newtime_wts[j] = newtime_bnds[j][1] - newtime_bnds[j][0]
        kmax = k
        assert (kmax < maxolaps
                )  # If k be unlimited, coding is more complicated.
        # This is the first point at which we decide whether the input file covers any times of interest,
        # e.g. for climatology of DJF, here is where we decide whether the file is a D,J,or F file.
        # Here kmax<0 if the file has no interesting time.

    if new_time_weights is not None:
        # The weights from a climatology file make sense only when time is simply structured.
        # Otherwise, we don't know what to do.
        for j, nt in enumerate(newtime):
            for k in range(kmax + 1)[1:]:
                assert (newtime_wts[j, k] == 0)
        newtime_wts = numpy.array([new_time_weights.data])
    for j, nt in enumerate(newtime):
        for k in range(kmax + 1):
            i = int(newtime_rti[j][k])
            # This is the second point at which we decide whether the input file covers any times of interest,
            # e.g. for climatology of DJF, here is where we decide whether the file is a D,J,or F file.
            # Here i<0 if the file has no interesting time.
            if i < 0: continue
            for iv in range(nvars):
                redvar = redvars[iv]
                newvar = newvars[iv]
                if redvar.id == 'time_bnds' or redvar.id == 'time_weights':
                    continue
                if redvar.dtype.kind=='i' and newvar.dtype.kind=='i' or\
                        redvar.dtype.kind=='S' and newvar.dtype.kind=='S' :
                    # integer, any length, or string.  Time average makes no sense.
                    data = newvar
                    if redvar.shape == newvar.shape:
                        redvar.assignValue(newvar)
                    else:
                        redvar[i] = newvar[j]
                    continue
                if 'time' not in redvar.getAxisIds(
                ) and redvar.initialized == 'yes':
                    # ...This assumes time axis is named 'time', but a full-blown check for a time
                    # axis, e.g. via redvar.getTime(), is surprisingly expensive because of a
                    # udunits inefficiency.
                    # Although there's no time axis (hence index i is irrelevant), values may
                    # differ from one file to the next, so we still have to do a time average.
                    if newtime_wts[j, k] > 0:
                        # Maybe the assignValue call will work for other shapes.  To do: try it,
                        # this code will simplify if it works
                        if len(redvar.shape) == 0:
                            redvar.assignValue(
                                ( redvar.subSlice()*redtime_wts[i] + newvar*newtime_wts[j,k] ) /\
                                    ( redtime_wts[i] + newtime_wts[j,k] ) )
                        elif len(redvar.shape) == 1:
                            redvar[:] =\
                                ( redvar[:]*redtime_wts[i] + newvar[:]*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                        elif len(redvar.shape) == 2:
                            redvar[:,:] =\
                                ( redvar[:,:]*redtime_wts[i] + newvar[:,:]*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                        elif len(redvar.shape) == 3:
                            redvar[:,:,:] =\
                                ( redvar[:,:,:]*redtime_wts[i] + newvar[:,:,:]*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                        else:
                            #won't work because redvar is a FileVariable
                            logging.warning(
                                "Probably miscomputing average of %s",
                                redvar.id)
                            redvar =\
                                ( redvar*redtime_wts[i] + newvar*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                elif 'time' not in redvar.getAxisIds(
                ) and redvar.initialized == 'no':
                    redvar.assignValue(newvar)
                elif (not hasattr(redvar[i],'mask') and redvar.initialized=='yes') or\
                        (hasattr(redvar[i],'mask') and not redvar[i].mask.all()):
                    # ... i.e., redvar[i] is initialized and is not entirely missing data
                    if newtime_wts[j, k] > 0:

                        # Don't miss this line, it's where the average is computed:
                        redvar[i] = two_pt_avg(
                            redvar,
                            newvar,
                            i,
                            newvar[j],
                            redtime_wts[i],
                            newtime_wts[j, k],
                            force_scalar_avg=force_scalar_avg)

                else:
                    # For averaging, unitialized is same as value=0 because redtime_wts is
                    # initialized to 0
                    redvar[i] = redvar.dtype.type(newvar[j])
                    if hasattr(newvar, 'vwgts'):
                        redvar.vwgts = newvar.vwgts
                        wid = redvar.vwgts
                        frv = redvar.parent
                        if wid not in frv.variables:
                            waxes = redvar.getAxisList()
                            addVariable(frv, wid, 'd', waxes, {})
                            frvw = frv[wid]
                            frvw[:] = newvar.from_file(wid)[:]
            if redtime_wts.initialized == 'yes':
                redtime_wts[i] += newtime_wts[j, k]
            else:  # uninitialized is same as value=0
                redtime_wts[i] = newtime_wts[j, k]
    for iv in range(nvars):
        redvars[iv].initialized = 'yes'
    redtime_wts.initialized = 'yes'

    #print "next_tbounds= ",next_tbounds
    #print "redtime_bnds=",redtime_bnds[:][:]
    #print "redtime_wts=",redtime_wts[:][:]
    #print "newtime_bnds=",newtime_bnds[:][:]
    #print "newtime_wts=",newtime_wts[:][:]
    #print "newtime_rti=",newtime_rti[:][:]
    #print
    return redvars, redtime_wts, redtime
Beispiel #4
0
def two_pt_avg(mv1, mv2, i, a2, sw1, sw2, aw2=None, force_scalar_avg=False):
    """Input:
    FileVariables mv1,mv2;  i, (time) index to subset mv1 as a1=mv1[i], a
    corresponding array a2 from mv2, also formed by fixing a point on the first, time, axis;
    scalar weights sw1,sw2.
    Output:   weighted average a of a1 and a2.
    The weights will be scalar weights unless an array weight must be used because either:
    an array weight is already an attribute of mv1 or mv2; or
    v1 and v2 do not have the same mask.
    If an array weight is used, it will become an attribute of the first variable mv1,
    and take its shape.  Normally mv1 is going to be where averages get accumulated,
    so if there is more than one time, it will appear in mv1 but not the input data mv2.
    The optional argument aw2 is meant to support array weights in the case where we are
    averaging different parts of the same variable, i.e. mv1=mv2 but a1==mv1[i]!=a2.
    If aw2 be supplied, it will be used as the array weight for mv2,a2.
    The optional argument force_scalar_avg argument is for testing.  If set to True,
    all computations involving array weights will be bypassed and simple scalar
    weights used instead.
    """
    a1 = mv1[i]
    if not force_scalar_avg:
        w1 = sw1
        if aw2 is not None:
            w2 = aw2
        else:
            w2 = sw2

        # If array weights already exist for mv1 _or_ mv2, use them, and create array weights
        # for whatever doesn't have them.  If mv1,mv2 have different masks, then we need array
        # weights now and henceforth; create them.  The shape attribute is a convenient way
        # to detect whether w1 or w2 is scalar or array.
        if hasattr(mv1, 'vwgts'):
            f1 = mv1.parent  # the (open) file corresponding to the FileVariable mv1
            w1 = f1(mv1.vwgts)
        if hasattr(mv2, 'vwgts') and aw2 is None:
            f2 = mv2.parent  # the (open) file corresponding to the FileVariable mv2
            if f2 is None:
                f2 = mv2.from_file  # the (open) file corresponding to the TransientVariable mv2
            w2 = f2(mv2.vwgts)
        if (not hasattr(w1, 'shape') or len(w1.shape) == 0) and hasattr(
                w2, 'shape') and len(w2.shape) > 0:
            w1 = numpy.full(mv1.shape, -1)
            w1[i] = sw1
            w1 = numpy.ma.masked_less(w1, 0)
        if (not hasattr(w2, 'shape') or len(w2.shape) == 0) and hasattr(
                w1, 'shape') and len(w1.shape) > 0:
            w2 = numpy.full(mv1.shape,
                            -1)  # assumes mv1 time axis is >= mv2 time axis
            w2[i] = sw2
            w2 = numpy.ma.masked_less(w2, 0)
        if (not hasattr(w1,'shape') or len(w1.shape)==0) and\
                (not hasattr(w2,'shape') or len(w2.shape)==0):
            mask1 = False
            mask2 = False
            if hasattr(mv1, '_mask'):
                mask1 = mv1.mask
            elif getattr(mv1,
                         'missing_value'):  # if no missing_value, then no mask
                valu = mv1.getValue()
                if hasattr(valu, '_mask'):
                    mask1 = valu._mask
            if hasattr(mv2, '_mask'):
                mask2 = mv2.mask
            elif getattr(mv2,
                         'missing_value'):  # if no missing_value, then no mask
                valu = mv2.getValue()
                if hasattr(valu, '_mask'):
                    mask2 = valu._mask
            if not numpy.all(mask1 == mask2):
                # Note that this test requires reading all the data.  That has to be done anyway to compute
                # the average.  Let's hope that the system caches well enough so that there won't be any
                # file access cost to this.
                #jfp was w1 = numpy.full( a1.shape, w1 )
                #jfp was w2 = numpy.full( a2.shape, w2 )
                w1 = numpy.full(mv1.shape, -1)
                w1[i] = sw1
                w1 = numpy.ma.masked_less(w1, 0)
                w2 = numpy.full(mv1.shape, -1)
                w2[i] = sw2
                w2 = numpy.ma.masked_less(w2, 0)

    if not force_scalar_avg and\
            hasattr(w1,'shape') and len(w1.shape)>0 and hasattr(w2,'shape') and len(w2.shape)>0:
        if w1[i].mask.all():  # if w1[i] is all missing values:
            w1[i] = sw1
        if w2[i].mask.all():  # if w2[i] is all missing values:
            w2[i] = sw2
        # Here's what I think the numpy.ma averager does about weights and missing values:
        # The output weight w(i)=sw1+sw2 if there be no missing value for mv1(i) and mv2(i) (maybe
        # also if both be missing, because the weight doesn't matter then).
        # But if i be missing for mv1, drop sw1, thus w(i)=sw2.  If i be missing for mv2, drop sw2.
        a, w = numpy.ma.average(numpy.ma.array((a1, a2)),
                                axis=0,
                                weights=numpy.ma.array((w1[i], w2[i])),
                                returned=True)
        # Avoid the occasional surprise about float32/float64 data types:
        a = a.astype(a1.dtype)
        w = w.astype(a.dtype)

        f1 = mv1.parent  # the (open) file corresponding to the FileVariable mv1
        w1id = mv1.id + '_vwgts'
        if not hasattr(mv1, 'vwgts'):
            w1axes = mv1.getAxisList()
            w1attributes = {}
            addVariable(f1, w1id, 'd', w1axes, w1attributes)
        f1w = f1[w1id]
        f1w[:] = w1
        f1w[i] = w
        # TypeError: 'CdmsFile' object does not support item assignment    f1[w1id] = w
        mv1.vwgts = w1id
    else:
        # This is what happens most of the time.  It's a simple average (of two compatible numpy
        # arrays), weighted by scalars.  These scalars are the length of time represented by
        # by mv1, mv2.
        # Note that a1,a2 are type TransientVariable which inherits from numpy.ma.MaskedArray
        sw12 = sw1 + sw2
        a = (a1 * (sw1 / sw12) + a2 * (sw2 / sw12))
        try:
            if a.dtype != a1.dtype:
                a = a.astype(a1.dtype)
        except Exception as e:
            # happens if a1 isn't a numpy array, e.g. a float.  Then it's ok to just go on.
            #print "In arithmetic average of",mv1.id,"in two_pt_avg, encountered exception:",e
            #print "a1=",a1,type(a1),a1.dtype if hasattr(a1,'dtype') else None
            pass
    return a
Beispiel #5
0
def update_time_avg( redvars, redtime_bnds, redtime_wts, newvars, next_tbounds, dt=None,
                     new_time_weights=None, force_scalar_avg=False ):
    """Updates the time-reduced data for a list of variables.  The reduced-time and averaged
    variables are listed in redvars.  Its weights (for time averaging) are another variable,
    redtime_wts.
    (Each member of redvars should have the same the time axis.  Each member of newvars should have
    the same time axis.  Normally it has an attribute wgts which is the same as redtime_wts.id.)
    The new data is listed in newvars, and this list should correspond to redvars, e.g.
    newvars[i].id==redvars[i].id.  The length of both should be equal and at least one.
    Each variable is an MV.  Normally redvar and redtime_wts will be a FileVariable (required if
    they might change) and newvar a TransientVariable.
    If newvar needs any spatial reductions to match redvar, they should have been performed before
    calling this function.
    next_tbounds is the next time interval, used if newvar is defined on a time beyond redvar's
    present time axis.  If next_tbounds==[], newvar will be ignored on such times.  Normally
    next_tbounds will be set to [] when updating a climatology file which has been initialized.
    The penultimate argument dt is used only in that dt=0 means that we are computing climatologies - hence
    the new data's time axis must be adjusted before averaging the data into redvars.
    The last argument is the time_weights global attribute of the data file, if any; it corresponds
    to newvars.  This is expected to occur iff the data file is a climatology file written by
    an earlier use of this module.
    The optional argument force_scalar_avg argument is for testing and is passed on to two_pt_avg.
    """

    # >>>> TO DO <<<< Ensure that each redvar, redtime_wts, newvar have consistent units
    # >>>> for the variable and for time.  Assert that they have the same shape, axes, etc.

    if redvars is None or len(redvars)==0:  # formerly redvar was initialized here
        raise Exception("update_time_avg requires a reduced variable list")
    nvars = len(redvars)
    # The following two asserts express my assumption that the first index of the variable is time.
    # This is almost universal, but in the future I should generalize this code.  That would make
    # slicing more verbose, e.g. if time were changed from the first index to second then
    # v[j] would become v[:,j,:] (for a 2-D variable v).

    for var in redvars:
        redtime = var.getTime()  # partially-reduced time axis
        if redtime is not None:  # some variables have no time axis
            try:
                assert( var.getDomain()[0][0].isTime() )
            except Exception as e:
                logging.exception("redvars=%s, var=%s, var.getDomain()=%s", redvars, var, var.getDomain())
                raise e
            break
    assert( redtime is not None )
    redtime_len = redtime.shape[0]
    for var in newvars:
        newtime = var.getTime()  # original time axis, from the new variable
        if newtime is not None:
            try:
                assert( var.getDomain()[0][0].isTime() )
            except Exception as e:
                logging.exception("redvars=%s, var=%s, var.getDomain()=%s", redvars, var, var.getDomain())
                raise e
            break
    assert( newtime is not None ) # The input data should have a time axis!
    if dt==0:
        newtime = adjust_time_for_climatology( newtime, redtime )
    newtime_bnds = getClimoBounds(newtime)
    # newtime_wts[j,i] is the weight applied to the data at time newtime[j] in computing
    # an average, reduced time for time newtime[ newtime_rti[i] ], 0<=i<2.
    # If newtime_rti[i]<0, that means the weight is 0.
    maxolaps = 3  # Maximum number of data time intervals which could overlap with a single
    #               reduced-time interval.  We're unlikely to see more than 2.
    newtime_wts = numpy.zeros(( newtime.shape[0], maxolaps ))
    newtime_rti = numpy.zeros(( newtime.shape[0], maxolaps ), numpy.int32) - 1
    for j in range( newtime.shape[0] ):
        # First, extend redtime and redtime_bnds if necessary:
        # This should be moved to a separate function.
        if newtime_bnds[j][1] > redtime_bnds[-1][1]:
            bndmin = max( newtime_bnds[j][0], next_tbounds[0] )
            bndmax = min( newtime_bnds[j][1], next_tbounds[1] )
            weight = bndmax-bndmin
            if weight>0:
                # Extend the time axis to add a new time, time bounds, and weight.  With one more
                # silly step (see below), this will also have the effect of extending redvars along
                # the time axis.
                redtime_bnds[redtime_len] = next_tbounds
                redtime[redtime_len] = 0.5*(
                    redtime_bnds[redtime_len][1] + redtime_bnds[redtime_len][0] )
                redtime_wts[redtime_len] = 0.0
                redtime_len +=1
        for iv in range(nvars):
            # Without this silly step, the data in redvars[iv] won't be expanded to match the
            # newly expanded time axis...
            dummy = redvars[iv].shape
            # This also will do the job, but it looks like a lot of i/o:
            #   redvars[iv].parent.write(redvars[iv])
            # It doesn't help to write redtime_wts or redtime_bnds.  You need to write a variable
            # with the same axes as redvars.
            # This doesn't do the job:  redvars[iv].parent.sync()

        # The weight of time newtime[j] is the part of its bounds which lie within some reduced-
        # time bounds.  We'll also need to remember the indices of the reduced-times for
        # which this is nonzero (there will be few of those, many reduced times total)
        k = -1
        for i,red_bnds in enumerate( redtime_bnds ):
            bndmin = max( newtime_bnds[j][0], red_bnds[0] )
            bndmax = min( newtime_bnds[j][1], red_bnds[1] )
            weight = bndmax-bndmin
            if weight<0:
                continue
            else:
                k += 1
                newtime_wts[j,k] = weight
                newtime_rti[j,k] = i
        #  This much simpler expression works if there is no overlap:
        #newtime_wts[j] = newtime_bnds[j][1] - newtime_bnds[j][0]
        kmax = k
        assert( kmax<maxolaps ) # If k be unlimited, coding is more complicated.
        # This is the first point at which we decide whether the input file covers any times of interest,
        # e.g. for climatology of DJF, here is where we decide whether the file is a D,J,or F file.
        # Here kmax<0 if the file has no interesting time.

    if new_time_weights is not None:
        # The weights from a climatology file make sense only when time is simply structured.
        # Otherwise, we don't know what to do.
        for j,nt in enumerate(newtime):
            for k in range(kmax+1)[1:]:
                assert( newtime_wts[j,k] ==0 )
        newtime_wts = numpy.array([new_time_weights.data])
    for j,nt in enumerate(newtime):
        for k in range(kmax+1):
            i = int( newtime_rti[j][k] )
            # This is the second point at which we decide whether the input file covers any times of interest,
            # e.g. for climatology of DJF, here is where we decide whether the file is a D,J,or F file.
            # Here i<0 if the file has no interesting time.
            if i<0: continue
            for iv in range(nvars):
                redvar = redvars[iv]
                newvar = newvars[iv]
                if redvar.id=='time_bnds' or redvar.id=='time_weights':
                    continue
                if redvar.dtype.kind=='i' and newvar.dtype.kind=='i' or\
                        redvar.dtype.kind=='S' and newvar.dtype.kind=='S' :
                    # integer, any length, or string.  Time average makes no sense.
                    data = newvar
                    if redvar.shape==newvar.shape:
                        redvar.assignValue(newvar)
                    else:
                        redvar[i] = newvar[j]
                    continue
                if 'time' not in redvar.getAxisIds() and redvar.initialized=='yes':
                    # ...This assumes time axis is named 'time', but a full-blown check for a time
                    # axis, e.g. via redvar.getTime(), is surprisingly expensive because of a
                    # udunits inefficiency.
                    # Although there's no time axis (hence index i is irrelevant), values may
                    # differ from one file to the next, so we still have to do a time average.
                    if newtime_wts[j,k]>0:
                        # Maybe the assignValue call will work for other shapes.  To do: try it,
                        # this code will simplify if it works
                        if len(redvar.shape)==0:
                            redvar.assignValue(
                                ( redvar.subSlice()*redtime_wts[i] + newvar*newtime_wts[j,k] ) /\
                                    ( redtime_wts[i] + newtime_wts[j,k] ) )
                        elif len(redvar.shape)==1:
                            redvar[:] =\
                                ( redvar[:]*redtime_wts[i] + newvar[:]*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                        elif len(redvar.shape)==2:
                            redvar[:,:] =\
                                ( redvar[:,:]*redtime_wts[i] + newvar[:,:]*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                        elif len(redvar.shape)==3:
                            redvar[:,:,:] =\
                                ( redvar[:,:,:]*redtime_wts[i] + newvar[:,:,:]*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                        else:
                            #won't work because redvar is a FileVariable
                            logging.warning("Probably miscomputing average of %s", redvar.id)
                            redvar =\
                                ( redvar*redtime_wts[i] + newvar*newtime_wts[j,k] ) /\
                                ( redtime_wts[i] + newtime_wts[j,k] )
                elif 'time' not in redvar.getAxisIds() and redvar.initialized=='no':
                    redvar.assignValue(newvar)
                elif (not hasattr(redvar[i],'mask') and redvar.initialized=='yes') or\
                        (hasattr(redvar[i],'mask') and not redvar[i].mask.all()):
                    # ... i.e., redvar[i] is initialized and is not entirely missing data
                    if newtime_wts[j,k]>0:

                        # Don't miss this line, it's where the average is computed:
                        redvar[i] = two_pt_avg( redvar, newvar, i, newvar[j],
                                                redtime_wts[i], newtime_wts[j,k],
                                                force_scalar_avg=force_scalar_avg )

                else:
                    # For averaging, unitialized is same as value=0 because redtime_wts is
                    # initialized to 0
                    redvar[i] = redvar.dtype.type( newvar[j] )
                    if hasattr( newvar, 'vwgts' ):
                        redvar.vwgts = newvar.vwgts
                        wid = redvar.vwgts
                        frv = redvar.parent
                        if wid not in frv.variables:
                            waxes = redvar.getAxisList()
                            addVariable( frv, wid, 'd', waxes, {} )
                            frvw = frv[wid]
                            frvw[:] = newvar.from_file( wid )[:]
            if redtime_wts.initialized=='yes':
                redtime_wts[i] += newtime_wts[j,k]
            else:      # uninitialized is same as value=0
                redtime_wts[i]  = newtime_wts[j,k]
    for iv in range(nvars):
        redvars[iv].initialized = 'yes'
    redtime_wts.initialized = 'yes'

    #print "next_tbounds= ",next_tbounds
    #print "redtime_bnds=",redtime_bnds[:][:]
    #print "redtime_wts=",redtime_wts[:][:]
    #print "newtime_bnds=",newtime_bnds[:][:]
    #print "newtime_wts=",newtime_wts[:][:]
    #print "newtime_rti=",newtime_rti[:][:]
    #print
    return redvars,redtime_wts,redtime
Beispiel #6
0
def two_pt_avg( mv1, mv2, i, a2, sw1, sw2, aw2=None, force_scalar_avg=False ):
    """Input:
    FileVariables mv1,mv2;  i, (time) index to subset mv1 as a1=mv1[i], a
    corresponding array a2 from mv2, also formed by fixing a point on the first, time, axis;
    scalar weights sw1,sw2.
    Output:   weighted average a of a1 and a2.
    The weights will be scalar weights unless an array weight must be used because either:
    an array weight is already an attribute of mv1 or mv2; or
    v1 and v2 do not have the same mask.
    If an array weight is used, it will become an attribute of the first variable mv1,
    and take its shape.  Normally mv1 is going to be where averages get accumulated,
    so if there is more than one time, it will appear in mv1 but not the input data mv2.
    The optional argument aw2 is meant to support array weights in the case where we are
    averaging different parts of the same variable, i.e. mv1=mv2 but a1==mv1[i]!=a2.
    If aw2 be supplied, it will be used as the array weight for mv2,a2.
    The optional argument force_scalar_avg argument is for testing.  If set to True,
    all computations involving array weights will be bypassed and simple scalar
    weights used instead.
    """
    a1 = mv1[i]
    if not force_scalar_avg:
        w1 = sw1
        if aw2 is not None:
            w2 = aw2
        else:
            w2 = sw2

        # If array weights already exist for mv1 _or_ mv2, use them, and create array weights
        # for whatever doesn't have them.  If mv1,mv2 have different masks, then we need array
        # weights now and henceforth; create them.  The shape attribute is a convenient way
        # to detect whether w1 or w2 is scalar or array.
        if hasattr( mv1, 'vwgts' ):
            f1 = mv1.parent # the (open) file corresponding to the FileVariable mv1
            w1 = f1(mv1.vwgts)
        if hasattr( mv2, 'vwgts' ) and aw2 is None:
            f2 = mv2.parent # the (open) file corresponding to the FileVariable mv2
            if f2 is None:
                f2 = mv2.from_file # the (open) file corresponding to the TransientVariable mv2
            w2 = f2(mv2.vwgts)
        if (not hasattr(w1,'shape') or len(w1.shape)==0) and hasattr(w2,'shape') and len(w2.shape)>0:
            w1 = numpy.full( mv1.shape, -1 )
            w1[i] = sw1
            w1 = numpy.ma.masked_less(w1,0)
        if (not hasattr(w2,'shape') or len(w2.shape)==0) and hasattr(w1,'shape') and len(w1.shape)>0:
            w2 = numpy.full( mv1.shape, -1 )   # assumes mv1 time axis is >= mv2 time axis
            w2[i] = sw2
            w2 = numpy.ma.masked_less(w2,0)
        if (not hasattr(w1,'shape') or len(w1.shape)==0) and\
                (not hasattr(w2,'shape') or len(w2.shape)==0):
            mask1 = False
            mask2 = False
            if hasattr(mv1,'_mask'):
                mask1 = mv1.mask
            elif getattr(mv1,'missing_value'):  # if no missing_value, then no mask
                valu = mv1.getValue()
                if hasattr( valu, '_mask' ):
                    mask1 = valu._mask
            if hasattr(mv2,'_mask'):
                mask2 = mv2.mask
            elif getattr(mv2,'missing_value'):  # if no missing_value, then no mask
                valu = mv2.getValue()
                if hasattr( valu, '_mask' ):
                    mask2 = valu._mask
            if not numpy.all(mask1==mask2):
                # Note that this test requires reading all the data.  That has to be done anyway to compute
                # the average.  Let's hope that the system caches well enough so that there won't be any
                # file access cost to this.
                #jfp was w1 = numpy.full( a1.shape, w1 )
                #jfp was w2 = numpy.full( a2.shape, w2 )
                w1 = numpy.full( mv1.shape, -1 )
                w1[i] = sw1
                w1 = numpy.ma.masked_less(w1,0)
                w2 = numpy.full( mv1.shape, -1 )
                w2[i] = sw2
                w2 = numpy.ma.masked_less(w2,0)

    if not force_scalar_avg and\
            hasattr(w1,'shape') and len(w1.shape)>0 and hasattr(w2,'shape') and len(w2.shape)>0:
        if w1[i].mask.all():   # if w1[i] is all missing values:
            w1[i] = sw1
        if w2[i].mask.all():   # if w2[i] is all missing values:
            w2[i] = sw2
        # Here's what I think the numpy.ma averager does about weights and missing values:
        # The output weight w(i)=sw1+sw2 if there be no missing value for mv1(i) and mv2(i) (maybe
        # also if both be missing, because the weight doesn't matter then).
        # But if i be missing for mv1, drop sw1, thus w(i)=sw2.  If i be missing for mv2, drop sw2.
        a,w = numpy.ma.average( numpy.ma.array((a1,a2)), axis=0,
                                weights=numpy.ma.array((w1[i],w2[i])), returned=True )
        # Avoid the occasional surprise about float32/float64 data types:
        a = a.astype( a1.dtype )
        w = w.astype( a.dtype )

        f1 = mv1.parent # the (open) file corresponding to the FileVariable mv1
        w1id = mv1.id+'_vwgts'
        if not hasattr(mv1,'vwgts'):
            w1axes = mv1.getAxisList()
            w1attributes = {}
            addVariable( f1, w1id, 'd', w1axes, w1attributes )
        f1w = f1[w1id]
        f1w[:] = w1
        f1w[i] = w
        # TypeError: 'CdmsFile' object does not support item assignment    f1[w1id] = w
        mv1.vwgts = w1id
    else:
        # This is what happens most of the time.  It's a simple average (of two compatible numpy
        # arrays), weighted by scalars.  These scalars are the length of time represented by
        # by mv1, mv2.
        # Note that a1,a2 are type TransientVariable which inherits from numpy.ma.MaskedArray
        sw12 = sw1+sw2
        a = ( a1*(sw1/sw12) + a2*(sw2/sw12) )
        try:
            if a.dtype != a1.dtype:
                a = a.astype( a1.dtype )
        except Exception as e:
            # happens if a1 isn't a numpy array, e.g. a float.  Then it's ok to just go on.
            #print "In arithmetic average of",mv1.id,"in two_pt_avg, encountered exception:",e
            #print "a1=",a1,type(a1),a1.dtype if hasattr(a1,'dtype') else None
            pass
    return a