Ejemplo n.º 1
def main():
    def compute(param):
        template = populateStringConstructor(args.filename_template, args)
        template.variable = param.varname
        template.month = param.monthname
        fnameRoot = param.fileName
        reverted = template.reverse(os.path.basename(fnameRoot))
        model = reverted["model"]
        print('Specifying latitude / longitude domain of interest ...')
        datanameID = 'diurnalmean'  # Short ID name of output data
        latrange = (param.args.lat1, param.args.lat2)
        lonrange = (param.args.lon1, param.args.lon2)
        region = cdutil.region.domain(latitude=latrange, longitude=lonrange)
        if param.args.region_name == "":
            region_name = "{:g}_{:g}&{:g}_{:g}".format(*(latrange + lonrange))
            region_name = param.args.region_name
        print('Reading %s ...' % fnameRoot)
            f = cdms2.open(fnameRoot)
            x = f(datanameID, region)
            units = x.units
            print('  Shape =', x.shape)

                'Finding standard deviation over first dimension (time of day) ...'
            x = genutil.statistics.std(x)
            print('  Shape =', x.shape)

            print('Finding r.m.s. average over 2nd-3rd dimensions (area) ...')
            x = x * x
            x = cdutil.averager(x, axis='xy')
            x = cdms2.MV2.sqrt(x)

                'For %8s in %s, average variance of hourly values = (%5.2f %s)^2'
                % (model, monthname, x, units))
        except Exception as err:
            print("Failed model %s with error" % (err))
            x = 1.e20
        return model, region, {region_name: float(x)}

        help="Output name for jsons")

    P.add_argument("--lat1", type=float, default=-50., help="First latitude")
    P.add_argument("--lat2", type=float, default=50., help="Last latitude")
    P.add_argument("--lon1", type=float, default=0., help="First longitude")
    P.add_argument("--lon2", type=float, default=360., help="Last longitude")
                   help="name for the region of interest")

    P.add_argument("--model", default="*")

    args = P.get_parameter()
    month = args.month
    monthname = monthname_d[month]
    startyear = args.firstyear  # noqa: F841
    finalyear = args.lastyear  # noqa: F841

    template = populateStringConstructor(args.filename_template, args)
    template.month = monthname

    print("TEMPLATE NAME:", template())

    print('Specifying latitude / longitude domain of interest ...')
    # TRMM (observed) domain:
    latrange = (args.lat1, args.lat2)
    lonrange = (args.lon1, args.lon2)

    region = cdutil.region.domain(latitude=latrange, longitude=lonrange)

    # Amazon basin:
    # latrange = (-15.0,  -5.0)
    # lonrange = (285.0, 295.0)

    print('Preparing to write output to JSON file ...')
    if not os.path.exists(args.results_dir):
    jsonFile = populateStringConstructor(args.outnamejson, args)
    jsonFile.month = monthname

    jsonname = os.path.join(os.path.abspath(args.results_dir), jsonFile())

    if not os.path.exists(jsonname) or args.append is False:
        print('Initializing dictionary of statistical results ...')
        stats_dic = {}
        metrics_dictionary = collections.OrderedDict()
        with open(jsonname) as f:
            metrics_dictionary = json.load(f)
            print("LOADE WITH KEYS:", list(metrics_dictionary.keys()))
            stats_dic = metrics_dictionary["RESULTS"]

    OUT = pcmdi_metrics.io.base.Base(os.path.abspath(args.results_dir),
        egg_pth = pkg_resources.resource_filename(
            pkg_resources.Requirement.parse("pcmdi_metrics"), "share/pmp")
    except Exception:
        # python 2 seems to fail when ran in home directory of source?
        egg_pth = os.path.join(os.getcwd(), "share", "pmp")
    disclaimer = open(os.path.join(egg_pth, "disclaimer.txt")).read()
    metrics_dictionary["DISCLAIMER"] = disclaimer
    metrics_dictionary["REFERENCE"] = (
        "The statistics in this file are based on Trenberth, Zhang & Gehne, "
        "J Hydromet. 2017")

    files = glob.glob(os.path.join(args.modpath, template()))

    params = [INPUT(args, name, template) for name in files]
    print("PARAMS:", params)

    results = cdp.cdp_run.multiprocess(compute,

    for r in results:
        m, region, res = r
        if r[0] not in stats_dic:
            stats_dic[m] = res

    print('Writing output to JSON file ...')
    metrics_dictionary["RESULTS"] = stats_dic
    print("KEYS AT END:", list(metrics_dictionary.keys()))
    rgmsk = metrics_dictionary.get("RegionalMasking", {})
    print("REG MASK:", rgmsk)
    nm = list(res.keys())[0]
    region.id = nm
    rgmsk[nm] = {"id": nm, "domain": region}
    metrics_dictionary["RegionalMasking"] = rgmsk
              json_structure=["model", "domain"],
              separators=(',', ': '))
Ejemplo n.º 2
# *WARNING* some GMT timepoints are actually (0, 3, 6,..., 21) in submitted CMIP5 data, despite character strings in
#           file names (and time axis metadata) to the contrary. See CMIP5 documentation and errata! Overrides to
#           correct these problems are given below:
# Include 00Z as a possible starting time, to accomodate (0, 3, 6,...,
# 21)GMT in the input data.
# startime = -1.5     # Subtract 1.5h from (0, 3, 6,..., 21)GMT input
# data. This is needed for BNU-ESM, CCSM4 and CNRM-CM5.
# Subtract 1.5h from (0, 3, 6,..., 21)GMT input data. This is needed for

# -------------------------------------

monthname = monthname_d[month]
nYears = finalyear - startyear + 1
# Character strings for starting and ending day/GMT (*HARD-CODES
# particular GMT timepoints*):
# *WARNING* GMT timepoints are actually (0, 3, 6,..., 21) in the original TRMM/Obs4MIPs data, despite character strings
# in file names (and time axis metadata). See CMIP5 documentation and
# errata!

template = populateStringConstructor(args.filename_template, args)
template.variable = varbname

fileList = glob.glob(os.path.join(directory, template()))
print "FILES:", fileList

params = [INPUT(args, name, template) for name in fileList]
print "PARAMS:", params

cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers)
Ejemplo n.º 3
def main():
    def compute(params):
        fileName = params.fileName
        month = params.args.month
        monthname = params.monthname
        varbname = params.varname
        template = populateStringConstructor(args.filename_template, args)
        template.variable = varbname
        # Units on output (*may be converted below from the units of input*)
        outunits = "mm/d"
        startime = 1.5  # GMT value for starting time-of-day

        dataname = params.args.model
        if dataname is None or dataname.find("*") != -1:
            # model not passed or passed as *
            reverted = template.reverse(os.path.basename(fileName))
            print("REVERYING", reverted, dataname)
            dataname = reverted["model"]
        if dataname not in args.skip:
                print("Data source:", dataname)
                print("Opening %s ..." % fileName)
                f = cdms2.open(fileName)

                # Composite-mean and composite-s.d diurnal cycle for month and year(s):
                iYear = 0
                for year in range(args.firstyear, args.lastyear + 1):
                    print("Year %s:" % year)
                    startTime = cdtime.comptime(year, month)
                    # Last possible second to get all tpoints
                    finishtime = startTime.add(1, cdtime.Month).add(-1, cdtime.Minute)
                        "Reading %s from %s for time interval %s to %s ..."
                        % (varbname, fileName, startTime, finishtime)
                    # Transient variable stores data for current year's month.
                    tvarb = f(varbname, time=(startTime, finishtime))
                    # *HARD-CODES conversion from kg/m2/sec to mm/day.
                    tvarb *= 86400
                    print("Shape:", tvarb.shape)
                    # The following tasks need to be done only once, extracting
                    # metadata from first-year file:
                    if year == args.firstyear:
                        tc = tvarb.getTime().asComponentTime()
                        print("DATA FROM:", tc[0], "to", tc[-1])
                        day1 = cdtime.comptime(tc[0].year, tc[0].month)
                        day1 = tc[0]
                        firstday = tvarb(time=(day1, day1.add(1.0, cdtime.Day), "con"))
                        dimensions = firstday.shape
                        print("  Shape = ", dimensions)
                        # Number of time points in the selected month for one year
                        N = dimensions[0]
                        nlats = dimensions[1]
                        nlons = dimensions[2]
                        deltaH = 24.0 / N
                        dayspermo = tvarb.shape[0] // N
                            "  %d timepoints per day, %d hr intervals between timepoints"
                            % (N, deltaH)
                        comptime = firstday.getTime()
                        modellons = tvarb.getLongitude()
                        modellats = tvarb.getLatitude()
                        # Longitude values are needed later to compute Local Solar
                        # Times.
                        lons = modellons[:]
                        print("  Creating temporary storage and output fields ...")
                        # Sorts tvarb into separate GMTs for one year
                        tvslice = MV2.zeros((N, dayspermo, nlats, nlons))
                        # Concatenates tvslice over all years
                        concatenation = MV2.zeros((N, dayspermo * nYears, nlats, nlons))
                        LSTs = MV2.zeros((N, nlats, nlons))
                        for iGMT in range(N):
                            hour = iGMT * deltaH + startime
                                "  Computing Local Standard Times for GMT %5.2f ..."
                                % hour
                            for j in range(nlats):
                                for k in range(nlons):
                                    LSTs[iGMT, j, k] = (hour + lons[k] / 15) % 24
                    for iGMT in range(N):
                        hour = iGMT * deltaH + startime
                        print("  Choosing timepoints with GMT %5.2f ..." % hour)
                        print("days per mo :", dayspermo)
                        # Transient-variable slice: every Nth tpoint gets all of
                        # the current GMT's tpoints for current year:
                        tvslice[iGMT] = tvarb[iGMT::N]
                            iGMT, iYear * dayspermo : (iYear + 1) * dayspermo
                        ] = tvslice[iGMT]
                    iYear += 1

                # For each GMT, take mean and standard deviation over all years for
                # the chosen month:
                avgvalues = MV2.zeros((N, nlats, nlons))
                stdvalues = MV2.zeros((N, nlats, nlons))
                for iGMT in range(N):
                    hour = iGMT * deltaH + startime
                        "Computing mean and standard deviation over all GMT %5.2f timepoints ..."
                        % hour
                    # Assumes first dimension of input ("axis#0") is time
                    avgvalues[iGMT] = MV2.average(concatenation[iGMT], axis=0)
                    stdvalues[iGMT] = genutil.statistics.std(concatenation[iGMT])
                avgvalues.id = "diurnalmean"
                stdvalues.id = "diurnalstd"
                LSTs.id = "LST"
                avgvalues.units = outunits
                # Standard deviation has same units as mean (not so for
                # higher-moment stats).
                stdvalues.units = outunits
                LSTs.units = "hr"
                LSTs.longname = "Local Solar Time"
                avgvalues.setAxis(0, comptime)
                avgvalues.setAxis(1, modellats)
                avgvalues.setAxis(2, modellons)
                stdvalues.setAxis(0, comptime)
                stdvalues.setAxis(1, modellats)
                stdvalues.setAxis(2, modellons)
                LSTs.setAxis(0, comptime)
                LSTs.setAxis(1, modellats)
                LSTs.setAxis(2, modellons)
                avgoutfile = ("%s_%s_%s_%s-%s_diurnal_avg.nc") % (
                stdoutfile = ("%s_%s_%s_%s-%s_diurnal_std.nc") % (
                LSToutfile = "%s_%s_LocalSolarTimes.nc" % (varbname, dataname)
                if not os.path.exists(args.results_dir):
                f = cdms2.open(os.path.join(args.results_dir, avgoutfile), "w")
                g = cdms2.open(os.path.join(args.results_dir, stdoutfile), "w")
                h = cdms2.open(os.path.join(args.results_dir, LSToutfile), "w")
            except Exception as err:
                print("Failed for model %s with erro: %s" % (dataname, err))

    args = P.get_parameter()

    month = args.month  # noqa: F841
    monthname = monthname_d[args.month]  # noqa: F841

    # -------------------------------------HARD-CODED INPUT (add to command line later?):

    # These models have been processed already (or tried and found wanting,
    # e.g. problematic time coordinates):
    skipMe = args.skip  # noqa: F841

    # Choose only one ensemble member per model, with the following ensemble-member code (for definitions, see
    # http://cmip-pcmdi.llnl.gov/cmip5/docs/cmip5_data_reference_syntax.pdf):

    # NOTE--These models do not supply 3hr data from the 'r1i1p1' ensemble member,
    #       but do supply it from other ensemble members:
    #       bcc-csm1-1 (3hr data is from r2i1p1)
    #       CCSM4      (3hr data is from r6i1p1)
    #       GFDL-CM3   (3hr data is from r2i1p1, r3i1p1, r4i1p1, r5i1p1)
    #       GISS-E2-H  (3hr data is from r6i1p1, r6i1p3)
    #       GISS-E2-R  (3hr data is from r6i1p2)

    varbname = "pr"

    #           Note that CMIP5 specifications designate (01:30, 04:30, 07:30, ..., 22:30) GMT for 3hr flux fields, but
    # *WARNING* some GMT timepoints are actually (0, 3, 6,..., 21) in submitted CMIP5 data, despite character strings in
    #           file names (and time axis metadata) to the contrary. See CMIP5 documentation and errata! Overrides to
    #           correct these problems are given below:
    # startGMT =  '0:0:0.0' # Include 00Z as a possible starting time, to accomodate (0, 3, 6,..., 21)GMT in the input
    # data.
    # startime = -1.5 # Subtract 1.5h from (0, 3, 6,..., 21)GMT input data. This is needed for BNU-ESM, CCSM4 and
    # CNRM-CM5.
    # startime = -3.0 # Subtract 1.5h from (0, 3, 6,..., 21)GMT input
    # data. This is needed for CMCC-CM.

    # -------------------------------------

    nYears = args.lastyear - args.firstyear + 1

    template = populateStringConstructor(args.filename_template, args)
    template.variable = varbname

    print("TEMPLATE:", template())
    fileList = glob.glob(os.path.join(args.modpath, template()))
    print("FILES:", fileList)
    params = [INPUT(args, name, template) for name in fileList]
    print("PARAMS:", params)
    cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers)
Ejemplo n.º 4
def main():
    def compute(param):
        template = populateStringConstructor(args.filename_template, args)
        template.variable = param.varname
        template.month = param.monthname
        fnameRoot = param.fileName
        reverted = template.reverse(os.path.basename(fnameRoot))
        model = reverted["model"]
        print("Specifying latitude / longitude domain of interest ...")
        datanameID = "diurnalstd"  # Short ID name of output data
        latrange = (param.args.lat1, param.args.lat2)
        lonrange = (param.args.lon1, param.args.lon2)
        region = cdutil.region.domain(latitude=latrange, longitude=lonrange)
        if param.args.region_name == "":
            region_name = "{:g}_{:g}&{:g}_{:g}".format(*(latrange + lonrange))
            region_name = param.args.region_name
        print("Reading %s ..." % fnameRoot)
        reverted = template.reverse(os.path.basename(fnameRoot))
        model = reverted["model"]
            f = cdms2.open(fnameRoot)
            x = f(datanameID, region)
            units = x.units
            print("  Shape =", x.shape)
            print("Finding RMS area-average ...")
            x = x * x
            x = cdutil.averager(x, weights="unweighted")
            x = cdutil.averager(x, axis="xy")
            x = numpy.ma.sqrt(x)
                "For %8s in %s, average variance of hourly values = (%5.2f %s)^2"
                % (model, monthname, x, units))
        except Exception as err:
            print("Failed model %s with error: %s" % (model, err))
            x = 1.0e20
        return model, region, {region_name: x}

        help="Output name for jsons",

    P.add_argument("--lat1", type=float, default=-50.0, help="First latitude")
    P.add_argument("--lat2", type=float, default=50.0, help="Last latitude")
    P.add_argument("--lon1", type=float, default=0.0, help="First longitude")
    P.add_argument("--lon2", type=float, default=360.0, help="Last longitude")
        help="name for the region of interest",

    P.add_argument("--model", default="*")
        help="Use to save metrics in CMEC JSON format",
        help="Use to disable saving metrics in CMEC JSON format",

    args = P.get_parameter()
    month = args.month
    monthname = monthname_d[month]
    startyear = args.firstyear  # noqa: F841
    finalyear = args.lastyear  # noqa: F841
    cmec = args.cmec

    template = populateStringConstructor(args.filename_template, args)
    template.month = monthname

    print("TEMPLATE NAME:", template())

    print("Specifying latitude / longitude domain of interest ...")
    # TRMM (observed) domain:
    latrange = (args.lat1, args.lat2)
    lonrange = (args.lon1, args.lon2)

    region = cdutil.region.domain(latitude=latrange, longitude=lonrange)

    # Amazon basin:
    # latrange = (-15.0,  -5.0)
    # lonrange = (285.0, 295.0)

    print("Preparing to write output to JSON file ...")
    if not os.path.exists(args.results_dir):
    jsonFile = populateStringConstructor(args.outnamejson, args)
    jsonFile.month = monthname

    jsonname = os.path.join(os.path.abspath(args.results_dir), jsonFile())

    if not os.path.exists(jsonname) or args.append is False:
        print("Initializing dictionary of statistical results ...")
        stats_dic = {}
        metrics_dictionary = collections.OrderedDict()
        with open(jsonname) as f:
            metrics_dictionary = json.load(f)
            stats_dic = metrics_dictionary["RESULTS"]

    OUT = pcmdi_metrics.io.base.Base(os.path.abspath(args.results_dir),
    egg_pth = resources.resource_path()
    disclaimer = open(os.path.join(egg_pth, "disclaimer.txt")).read()
    metrics_dictionary["DISCLAIMER"] = disclaimer
    metrics_dictionary["REFERENCE"] = (
        "The statistics in this file are based on Trenberth, Zhang & Gehne, "
        "J Hydromet. 2017")

    files = glob.glob(os.path.join(args.modpath, template()))

    params = [INPUT(args, name, template) for name in files]
    print("PARAMS:", params)

    results = cdp.cdp_run.multiprocess(compute,

    for r in results:
        m, region, res = r
        if r[0] not in stats_dic:
            stats_dic[m] = res

    print("Writing output to JSON file ...")
    metrics_dictionary["RESULTS"] = stats_dic
    rgmsk = metrics_dictionary.get("RegionalMasking", {})
    nm = list(res.keys())[0]
    region.id = nm
    rgmsk[nm] = {"id": nm, "domain": region}
    metrics_dictionary["RegionalMasking"] = rgmsk
        json_structure=["model", "domain"],
        separators=(",", ": "),
    if cmec:
        print("Writing cmec file")
        OUT.write_cmec(indent=4, separators=(",", ": "))
Ejemplo n.º 5
def main():
    def compute(params):
        fileName = params.fileName
        startyear = params.args.firstyear
        finalyear = params.args.lastyear
        month = params.args.month
        monthname = params.monthname
        varbname = params.varname
        template = populateStringConstructor(args.filename_template, args)
        template.variable = varbname

        dataname = params.args.model
        if dataname is None or dataname.find("*") != -1:
            # model not passed or passed as *
            reverted = template.reverse(os.path.basename(fileName))
            dataname = reverted["model"]
        print('Data source:', dataname)
        print('Opening %s ...' % fileName)
        if dataname not in args.skip:
                print('Data source:', dataname)
                print('Opening %s ...' % fileName)
                f = cdms2.open(fileName)
                iYear = 0
                dmean = None
                for year in range(startyear, finalyear + 1):
                    print('Year %s:' % year)
                    startTime = cdtime.comptime(year, month)
                    # Last possible second to get all tpoints
                    finishtime = startTime.add(1, cdtime.Month).add(
                        -1, cdtime.Minute)
                    print('Reading %s from %s for time interval %s to %s ...' %
                          (varbname, fileName, startTime, finishtime))
                    # Transient variable stores data for current year's month.
                    tvarb = f(varbname, time=(startTime, finishtime, "ccn"))
                    # *HARD-CODES conversion from kg/m2/sec to mm/day.
                    tvarb *= 86400
                    # The following tasks need to be done only once, extracting
                    # metadata from first-year file:
                    tc = tvarb.getTime().asComponentTime()
                    current = tc[0]
                    while current.month == month:
                        end = cdtime.comptime(current.year, current.month,
                                              current.day).add(1, cdtime.Day)
                        sub = tvarb(time=(current, end, "con"))
                        # Assumes first dimension of input ("axis#0") is time
                        tmp = numpy.ma.average(sub, axis=0)
                        sh = list(tmp.shape)
                        sh.insert(0, 1)
                        if dmean is None:
                            dmean = tmp.reshape(sh)
                            dmean = numpy.ma.concatenate(
                                (dmean, tmp.reshape(sh)), axis=0)
                        current = end
                    iYear += 1
                stdvalues = cdms2.MV2.array(genutil.statistics.std(dmean))
                stdvalues.setAxis(0, tvarb.getLatitude())
                stdvalues.setAxis(1, tvarb.getLongitude())
                stdvalues.id = 'dailySD'
                # Standard deviation has same units as mean.
                stdvalues.units = "mm/d"
                stdoutfile = ('%s_%s_%s_%s-%s_std_of_dailymeans.nc') % (
                    varbname, dataname, monthname, str(startyear),
            except Exception as err:
                print("Failed for model: %s with error: %s" % (dataname, err))
        if not os.path.exists(args.results_dir):
        g = cdms2.open(os.path.join(args.results_dir, stdoutfile), 'w')

    args = P.get_parameter()
    month = args.month
    startyear = args.firstyear
    finalyear = args.lastyear
    directory = args.modpath  # Input  directory for model data
    # These models have been processed already (or tried and found wanting,
    # e.g. problematic time coordinates):
    skipMe = args.skip
    print("SKIPPING:", skipMe)

    # Choose only one ensemble member per model, with the following ensemble-member code (for definitions, see
    # http://cmip-pcmdi.llnl.gov/cmip5/docs/cmip5_data_reference_syntax.pdf):

    # NOTE--These models do not supply 3hr data from the 'r1i1p1' ensemble member,
    #       but do supply it from other ensemble members:
    #       bcc-csm1-1 (3hr data is from r2i1p1)
    #       CCSM4      (3hr data is from r6i1p1)
    #       GFDL-CM3   (3hr data is from r2i1p1, r3i1p1, r4i1p1, r5i1p1)
    #       GISS-E2-H  (3hr data is from r6i1p1, r6i1p3)
    #       GISS-E2-R  (3hr data is from r6i1p2)

    varbname = "pr"

    #           Note that CMIP5 specifications designate (01:30, 04:30, 07:30, ..., 22:30) GMT for 3hr flux fields, but
    # *WARNING* some GMT timepoints are actually (0, 3, 6,..., 21) in submitted CMIP5 data, despite character strings in
    #           file names (and time axis metadata) to the contrary. See CMIP5 documentation and errata! Overrides to
    #           correct these problems are given below:
    # Include 00Z as a possible starting time, to accomodate (0, 3, 6,...,
    # 21)GMT in the input data.
    # startime = -1.5     # Subtract 1.5h from (0, 3, 6,..., 21)GMT input
    # data. This is needed for BNU-ESM, CCSM4 and CNRM-CM5.
    # Subtract 1.5h from (0, 3, 6,..., 21)GMT input data. This is needed for
    # CMCC-CM.

    # -------------------------------------

    monthname = monthname_d[month]  # noqa: F841
    nYears = finalyear - startyear + 1  # noqa: F841
    # Character strings for starting and ending day/GMT (*HARD-CODES
    # particular GMT timepoints*):
    # *WARNING* GMT timepoints are actually (0, 3, 6,..., 21) in the original TRMM/Obs4MIPs data, despite character
    # strings in file names (and time axis metadata). See CMIP5 documentation and
    # errata!

    template = populateStringConstructor(args.filename_template, args)
    template.variable = varbname

    fileList = glob.glob(os.path.join(directory, template()))
    print("FILES:", fileList)

    params = [INPUT(args, name, template) for name in fileList]
    print("PARAMS:", params)

    cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers)