def main(): def compute(param): template = populateStringConstructor(args.filename_template, args) template.variable = param.varname template.month = param.monthname fnameRoot = param.fileName reverted = template.reverse(os.path.basename(fnameRoot)) model = reverted["model"] print('Specifying latitude / longitude domain of interest ...') datanameID = 'diurnalmean' # Short ID name of output data latrange = (param.args.lat1, param.args.lat2) lonrange = (param.args.lon1, param.args.lon2) region = cdutil.region.domain(latitude=latrange, longitude=lonrange) if param.args.region_name == "": region_name = "{:g}_{:g}&{:g}_{:g}".format(*(latrange + lonrange)) else: region_name = param.args.region_name print('Reading %s ...' % fnameRoot) try: f = cdms2.open(fnameRoot) x = f(datanameID, region) units = x.units print(' Shape =', x.shape) print( 'Finding standard deviation over first dimension (time of day) ...' ) x = genutil.statistics.std(x) print(' Shape =', x.shape) print('Finding r.m.s. average over 2nd-3rd dimensions (area) ...') x = x * x x = cdutil.averager(x, axis='xy') x = cdms2.MV2.sqrt(x) print( 'For %8s in %s, average variance of hourly values = (%5.2f %s)^2' % (model, monthname, x, units)) f.close() except Exception as err: print("Failed model %s with error" % (err)) x = 1.e20 return model, region, {region_name: float(x)} P.add_argument( "-j", "--outnamejson", type=str, dest='outnamejson', default= 'pr_%(month)_%(firstyear)-%(lastyear)_std_of_meandiurnalcyc.json', help="Output name for jsons") P.add_argument("--lat1", type=float, default=-50., help="First latitude") P.add_argument("--lat2", type=float, default=50., help="Last latitude") P.add_argument("--lon1", type=float, default=0., help="First longitude") P.add_argument("--lon2", type=float, default=360., help="Last longitude") P.add_argument("--region_name", type=str, default="TRMM", help="name for the region of interest") P.add_argument( "-t", "--filename_template", default="pr_%(model)_%(month)_%(firstyear)-%(lastyear)_diurnal_avg.nc") P.add_argument("--model", default="*") args = P.get_parameter() month = args.month monthname = monthname_d[month] startyear = args.firstyear # noqa: F841 finalyear = args.lastyear # noqa: F841 template = populateStringConstructor(args.filename_template, args) template.month = monthname print("TEMPLATE NAME:", template()) print('Specifying latitude / longitude domain of interest ...') # TRMM (observed) domain: latrange = (args.lat1, args.lat2) lonrange = (args.lon1, args.lon2) region = cdutil.region.domain(latitude=latrange, longitude=lonrange) # Amazon basin: # latrange = (-15.0, -5.0) # lonrange = (285.0, 295.0) print('Preparing to write output to JSON file ...') if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) jsonFile = populateStringConstructor(args.outnamejson, args) jsonFile.month = monthname jsonname = os.path.join(os.path.abspath(args.results_dir), jsonFile()) if not os.path.exists(jsonname) or args.append is False: print('Initializing dictionary of statistical results ...') stats_dic = {} metrics_dictionary = collections.OrderedDict() else: with open(jsonname) as f: metrics_dictionary = json.load(f) print("LOADE WITH KEYS:", list(metrics_dictionary.keys())) stats_dic = metrics_dictionary["RESULTS"] OUT = pcmdi_metrics.io.base.Base(os.path.abspath(args.results_dir), jsonFile()) try: egg_pth = pkg_resources.resource_filename( pkg_resources.Requirement.parse("pcmdi_metrics"), "share/pmp") except Exception: # python 2 seems to fail when ran in home directory of source? egg_pth = os.path.join(os.getcwd(), "share", "pmp") disclaimer = open(os.path.join(egg_pth, "disclaimer.txt")).read() metrics_dictionary["DISCLAIMER"] = disclaimer metrics_dictionary["REFERENCE"] = ( "The statistics in this file are based on Trenberth, Zhang & Gehne, " "J Hydromet. 2017") files = glob.glob(os.path.join(args.modpath, template())) print(files) params = [INPUT(args, name, template) for name in files] print("PARAMS:", params) results = cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers) for r in results: m, region, res = r if r[0] not in stats_dic: stats_dic[m] = res else: stats_dic[m].update(res) print('Writing output to JSON file ...') metrics_dictionary["RESULTS"] = stats_dic print("KEYS AT END:", list(metrics_dictionary.keys())) rgmsk = metrics_dictionary.get("RegionalMasking", {}) print("REG MASK:", rgmsk) nm = list(res.keys())[0] region.id = nm rgmsk[nm] = {"id": nm, "domain": region} metrics_dictionary["RegionalMasking"] = rgmsk OUT.write(metrics_dictionary, json_structure=["model", "domain"], indent=4, separators=(',', ': ')) print('done')
# *WARNING* some GMT timepoints are actually (0, 3, 6,..., 21) in submitted CMIP5 data, despite character strings in # file names (and time axis metadata) to the contrary. See CMIP5 documentation and errata! Overrides to # correct these problems are given below: # Include 00Z as a possible starting time, to accomodate (0, 3, 6,..., # 21)GMT in the input data. # startime = -1.5 # Subtract 1.5h from (0, 3, 6,..., 21)GMT input # data. This is needed for BNU-ESM, CCSM4 and CNRM-CM5. # Subtract 1.5h from (0, 3, 6,..., 21)GMT input data. This is needed for # CMCC-CM. # ------------------------------------- monthname = monthname_d[month] nYears = finalyear - startyear + 1 # Character strings for starting and ending day/GMT (*HARD-CODES # particular GMT timepoints*): # *WARNING* GMT timepoints are actually (0, 3, 6,..., 21) in the original TRMM/Obs4MIPs data, despite character strings # in file names (and time axis metadata). See CMIP5 documentation and # errata! template = populateStringConstructor(args.filename_template, args) template.variable = varbname fileList = glob.glob(os.path.join(directory, template())) print "FILES:", fileList params = [INPUT(args, name, template) for name in fileList] print "PARAMS:", params cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers)
def main(): def compute(params): fileName = params.fileName month = params.args.month monthname = params.monthname varbname = params.varname template = populateStringConstructor(args.filename_template, args) template.variable = varbname # Units on output (*may be converted below from the units of input*) outunits = "mm/d" startime = 1.5 # GMT value for starting time-of-day dataname = params.args.model if dataname is None or dataname.find("*") != -1: # model not passed or passed as * reverted = template.reverse(os.path.basename(fileName)) print("REVERYING", reverted, dataname) dataname = reverted["model"] if dataname not in args.skip: try: print("Data source:", dataname) print("Opening %s ..." % fileName) f = cdms2.open(fileName) # Composite-mean and composite-s.d diurnal cycle for month and year(s): iYear = 0 for year in range(args.firstyear, args.lastyear + 1): print("Year %s:" % year) startTime = cdtime.comptime(year, month) # Last possible second to get all tpoints finishtime = startTime.add(1, cdtime.Month).add(-1, cdtime.Minute) print( "Reading %s from %s for time interval %s to %s ..." % (varbname, fileName, startTime, finishtime) ) # Transient variable stores data for current year's month. tvarb = f(varbname, time=(startTime, finishtime)) # *HARD-CODES conversion from kg/m2/sec to mm/day. tvarb *= 86400 print("Shape:", tvarb.shape) # The following tasks need to be done only once, extracting # metadata from first-year file: if year == args.firstyear: tc = tvarb.getTime().asComponentTime() print("DATA FROM:", tc[0], "to", tc[-1]) day1 = cdtime.comptime(tc[0].year, tc[0].month) day1 = tc[0] firstday = tvarb(time=(day1, day1.add(1.0, cdtime.Day), "con")) dimensions = firstday.shape print(" Shape = ", dimensions) # Number of time points in the selected month for one year N = dimensions[0] nlats = dimensions[1] nlons = dimensions[2] deltaH = 24.0 / N dayspermo = tvarb.shape[0] // N print( " %d timepoints per day, %d hr intervals between timepoints" % (N, deltaH) ) comptime = firstday.getTime() modellons = tvarb.getLongitude() modellats = tvarb.getLatitude() # Longitude values are needed later to compute Local Solar # Times. lons = modellons[:] print(" Creating temporary storage and output fields ...") # Sorts tvarb into separate GMTs for one year tvslice = MV2.zeros((N, dayspermo, nlats, nlons)) # Concatenates tvslice over all years concatenation = MV2.zeros((N, dayspermo * nYears, nlats, nlons)) LSTs = MV2.zeros((N, nlats, nlons)) for iGMT in range(N): hour = iGMT * deltaH + startime print( " Computing Local Standard Times for GMT %5.2f ..." % hour ) for j in range(nlats): for k in range(nlons): LSTs[iGMT, j, k] = (hour + lons[k] / 15) % 24 for iGMT in range(N): hour = iGMT * deltaH + startime print(" Choosing timepoints with GMT %5.2f ..." % hour) print("days per mo :", dayspermo) # Transient-variable slice: every Nth tpoint gets all of # the current GMT's tpoints for current year: tvslice[iGMT] = tvarb[iGMT::N] concatenation[ iGMT, iYear * dayspermo : (iYear + 1) * dayspermo ] = tvslice[iGMT] iYear += 1 f.close() # For each GMT, take mean and standard deviation over all years for # the chosen month: avgvalues = MV2.zeros((N, nlats, nlons)) stdvalues = MV2.zeros((N, nlats, nlons)) for iGMT in range(N): hour = iGMT * deltaH + startime print( "Computing mean and standard deviation over all GMT %5.2f timepoints ..." % hour ) # Assumes first dimension of input ("axis#0") is time avgvalues[iGMT] = MV2.average(concatenation[iGMT], axis=0) stdvalues[iGMT] = genutil.statistics.std(concatenation[iGMT]) avgvalues.id = "diurnalmean" stdvalues.id = "diurnalstd" LSTs.id = "LST" avgvalues.units = outunits # Standard deviation has same units as mean (not so for # higher-moment stats). stdvalues.units = outunits LSTs.units = "hr" LSTs.longname = "Local Solar Time" avgvalues.setAxis(0, comptime) avgvalues.setAxis(1, modellats) avgvalues.setAxis(2, modellons) stdvalues.setAxis(0, comptime) stdvalues.setAxis(1, modellats) stdvalues.setAxis(2, modellons) LSTs.setAxis(0, comptime) LSTs.setAxis(1, modellats) LSTs.setAxis(2, modellons) avgoutfile = ("%s_%s_%s_%s-%s_diurnal_avg.nc") % ( varbname, dataname, monthname, str(args.firstyear), str(args.lastyear), ) stdoutfile = ("%s_%s_%s_%s-%s_diurnal_std.nc") % ( varbname, dataname, monthname, str(args.firstyear), str(args.lastyear), ) LSToutfile = "%s_%s_LocalSolarTimes.nc" % (varbname, dataname) if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) f = cdms2.open(os.path.join(args.results_dir, avgoutfile), "w") g = cdms2.open(os.path.join(args.results_dir, stdoutfile), "w") h = cdms2.open(os.path.join(args.results_dir, LSToutfile), "w") f.write(avgvalues) g.write(stdvalues) h.write(LSTs) f.close() g.close() h.close() except Exception as err: print("Failed for model %s with erro: %s" % (dataname, err)) print("done") args = P.get_parameter() month = args.month # noqa: F841 monthname = monthname_d[args.month] # noqa: F841 # -------------------------------------HARD-CODED INPUT (add to command line later?): # These models have been processed already (or tried and found wanting, # e.g. problematic time coordinates): skipMe = args.skip # noqa: F841 # Choose only one ensemble member per model, with the following ensemble-member code (for definitions, see # http://cmip-pcmdi.llnl.gov/cmip5/docs/cmip5_data_reference_syntax.pdf): # NOTE--These models do not supply 3hr data from the 'r1i1p1' ensemble member, # but do supply it from other ensemble members: # bcc-csm1-1 (3hr data is from r2i1p1) # CCSM4 (3hr data is from r6i1p1) # GFDL-CM3 (3hr data is from r2i1p1, r3i1p1, r4i1p1, r5i1p1) # GISS-E2-H (3hr data is from r6i1p1, r6i1p3) # GISS-E2-R (3hr data is from r6i1p2) varbname = "pr" # Note that CMIP5 specifications designate (01:30, 04:30, 07:30, ..., 22:30) GMT for 3hr flux fields, but # *WARNING* some GMT timepoints are actually (0, 3, 6,..., 21) in submitted CMIP5 data, despite character strings in # file names (and time axis metadata) to the contrary. See CMIP5 documentation and errata! Overrides to # correct these problems are given below: # startGMT = '0:0:0.0' # Include 00Z as a possible starting time, to accomodate (0, 3, 6,..., 21)GMT in the input # data. # startime = -1.5 # Subtract 1.5h from (0, 3, 6,..., 21)GMT input data. This is needed for BNU-ESM, CCSM4 and # CNRM-CM5. # startime = -3.0 # Subtract 1.5h from (0, 3, 6,..., 21)GMT input # data. This is needed for CMCC-CM. # ------------------------------------- nYears = args.lastyear - args.firstyear + 1 template = populateStringConstructor(args.filename_template, args) template.variable = varbname print("TEMPLATE:", template()) fileList = glob.glob(os.path.join(args.modpath, template())) print("FILES:", fileList) params = [INPUT(args, name, template) for name in fileList] print("PARAMS:", params) cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers)
def main(): def compute(param): template = populateStringConstructor(args.filename_template, args) template.variable = param.varname template.month = param.monthname fnameRoot = param.fileName reverted = template.reverse(os.path.basename(fnameRoot)) model = reverted["model"] print("Specifying latitude / longitude domain of interest ...") datanameID = "diurnalstd" # Short ID name of output data latrange = (param.args.lat1, param.args.lat2) lonrange = (param.args.lon1, param.args.lon2) region = cdutil.region.domain(latitude=latrange, longitude=lonrange) if param.args.region_name == "": region_name = "{:g}_{:g}&{:g}_{:g}".format(*(latrange + lonrange)) else: region_name = param.args.region_name print("Reading %s ..." % fnameRoot) reverted = template.reverse(os.path.basename(fnameRoot)) model = reverted["model"] try: f = cdms2.open(fnameRoot) x = f(datanameID, region) units = x.units print(" Shape =", x.shape) print("Finding RMS area-average ...") x = x * x x = cdutil.averager(x, weights="unweighted") x = cdutil.averager(x, axis="xy") x = numpy.ma.sqrt(x) print( "For %8s in %s, average variance of hourly values = (%5.2f %s)^2" % (model, monthname, x, units)) f.close() except Exception as err: print("Failed model %s with error: %s" % (model, err)) x = 1.0e20 return model, region, {region_name: x} P.add_argument( "-j", "--outnamejson", type=str, dest="outnamejson", default="pr_%(month)_%(firstyear)-%(lastyear)_std_of_hourlymeans.json", help="Output name for jsons", ) P.add_argument("--lat1", type=float, default=-50.0, help="First latitude") P.add_argument("--lat2", type=float, default=50.0, help="Last latitude") P.add_argument("--lon1", type=float, default=0.0, help="First longitude") P.add_argument("--lon2", type=float, default=360.0, help="Last longitude") P.add_argument( "--region_name", type=str, default="TRMM", help="name for the region of interest", ) P.add_argument( "-t", "--filename_template", default="pr_%(model)_%(month)_%(firstyear)-%(lastyear)_diurnal_std.nc", ) P.add_argument("--model", default="*") P.add_argument( "--cmec", dest="cmec", action="store_true", default=False, help="Use to save metrics in CMEC JSON format", ) P.add_argument( "--no_cmec", dest="cmec", action="store_false", default=False, help="Use to disable saving metrics in CMEC JSON format", ) args = P.get_parameter() month = args.month monthname = monthname_d[month] startyear = args.firstyear # noqa: F841 finalyear = args.lastyear # noqa: F841 cmec = args.cmec template = populateStringConstructor(args.filename_template, args) template.month = monthname print("TEMPLATE NAME:", template()) print("Specifying latitude / longitude domain of interest ...") # TRMM (observed) domain: latrange = (args.lat1, args.lat2) lonrange = (args.lon1, args.lon2) region = cdutil.region.domain(latitude=latrange, longitude=lonrange) # Amazon basin: # latrange = (-15.0, -5.0) # lonrange = (285.0, 295.0) print("Preparing to write output to JSON file ...") if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) jsonFile = populateStringConstructor(args.outnamejson, args) jsonFile.month = monthname jsonname = os.path.join(os.path.abspath(args.results_dir), jsonFile()) if not os.path.exists(jsonname) or args.append is False: print("Initializing dictionary of statistical results ...") stats_dic = {} metrics_dictionary = collections.OrderedDict() else: with open(jsonname) as f: metrics_dictionary = json.load(f) stats_dic = metrics_dictionary["RESULTS"] OUT = pcmdi_metrics.io.base.Base(os.path.abspath(args.results_dir), jsonFile()) egg_pth = resources.resource_path() disclaimer = open(os.path.join(egg_pth, "disclaimer.txt")).read() metrics_dictionary["DISCLAIMER"] = disclaimer metrics_dictionary["REFERENCE"] = ( "The statistics in this file are based on Trenberth, Zhang & Gehne, " "J Hydromet. 2017") files = glob.glob(os.path.join(args.modpath, template())) print(files) params = [INPUT(args, name, template) for name in files] print("PARAMS:", params) results = cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers) for r in results: m, region, res = r if r[0] not in stats_dic: stats_dic[m] = res else: stats_dic[m].update(res) print("Writing output to JSON file ...") metrics_dictionary["RESULTS"] = stats_dic rgmsk = metrics_dictionary.get("RegionalMasking", {}) nm = list(res.keys())[0] region.id = nm rgmsk[nm] = {"id": nm, "domain": region} metrics_dictionary["RegionalMasking"] = rgmsk OUT.write( metrics_dictionary, json_structure=["model", "domain"], indent=4, separators=(",", ": "), ) if cmec: print("Writing cmec file") OUT.write_cmec(indent=4, separators=(",", ": ")) print("done")
def main(): def compute(params): fileName = params.fileName startyear = params.args.firstyear finalyear = params.args.lastyear month = params.args.month monthname = params.monthname varbname = params.varname template = populateStringConstructor(args.filename_template, args) template.variable = varbname dataname = params.args.model if dataname is None or dataname.find("*") != -1: # model not passed or passed as * reverted = template.reverse(os.path.basename(fileName)) dataname = reverted["model"] print('Data source:', dataname) print('Opening %s ...' % fileName) if dataname not in args.skip: try: print('Data source:', dataname) print('Opening %s ...' % fileName) f = cdms2.open(fileName) iYear = 0 dmean = None for year in range(startyear, finalyear + 1): print('Year %s:' % year) startTime = cdtime.comptime(year, month) # Last possible second to get all tpoints finishtime = startTime.add(1, cdtime.Month).add( -1, cdtime.Minute) print('Reading %s from %s for time interval %s to %s ...' % (varbname, fileName, startTime, finishtime)) # Transient variable stores data for current year's month. tvarb = f(varbname, time=(startTime, finishtime, "ccn")) # *HARD-CODES conversion from kg/m2/sec to mm/day. tvarb *= 86400 # The following tasks need to be done only once, extracting # metadata from first-year file: tc = tvarb.getTime().asComponentTime() current = tc[0] while current.month == month: end = cdtime.comptime(current.year, current.month, current.day).add(1, cdtime.Day) sub = tvarb(time=(current, end, "con")) # Assumes first dimension of input ("axis#0") is time tmp = numpy.ma.average(sub, axis=0) sh = list(tmp.shape) sh.insert(0, 1) if dmean is None: dmean = tmp.reshape(sh) else: dmean = numpy.ma.concatenate( (dmean, tmp.reshape(sh)), axis=0) current = end iYear += 1 f.close() stdvalues = cdms2.MV2.array(genutil.statistics.std(dmean)) stdvalues.setAxis(0, tvarb.getLatitude()) stdvalues.setAxis(1, tvarb.getLongitude()) stdvalues.id = 'dailySD' # Standard deviation has same units as mean. stdvalues.units = "mm/d" stdoutfile = ('%s_%s_%s_%s-%s_std_of_dailymeans.nc') % ( varbname, dataname, monthname, str(startyear), str(finalyear)) except Exception as err: print("Failed for model: %s with error: %s" % (dataname, err)) if not os.path.exists(args.results_dir): os.makedirs(args.results_dir) g = cdms2.open(os.path.join(args.results_dir, stdoutfile), 'w') g.write(stdvalues) g.close() args = P.get_parameter() month = args.month startyear = args.firstyear finalyear = args.lastyear directory = args.modpath # Input directory for model data # These models have been processed already (or tried and found wanting, # e.g. problematic time coordinates): skipMe = args.skip print("SKIPPING:", skipMe) # Choose only one ensemble member per model, with the following ensemble-member code (for definitions, see # http://cmip-pcmdi.llnl.gov/cmip5/docs/cmip5_data_reference_syntax.pdf): # NOTE--These models do not supply 3hr data from the 'r1i1p1' ensemble member, # but do supply it from other ensemble members: # bcc-csm1-1 (3hr data is from r2i1p1) # CCSM4 (3hr data is from r6i1p1) # GFDL-CM3 (3hr data is from r2i1p1, r3i1p1, r4i1p1, r5i1p1) # GISS-E2-H (3hr data is from r6i1p1, r6i1p3) # GISS-E2-R (3hr data is from r6i1p2) varbname = "pr" # Note that CMIP5 specifications designate (01:30, 04:30, 07:30, ..., 22:30) GMT for 3hr flux fields, but # *WARNING* some GMT timepoints are actually (0, 3, 6,..., 21) in submitted CMIP5 data, despite character strings in # file names (and time axis metadata) to the contrary. See CMIP5 documentation and errata! Overrides to # correct these problems are given below: # Include 00Z as a possible starting time, to accomodate (0, 3, 6,..., # 21)GMT in the input data. # startime = -1.5 # Subtract 1.5h from (0, 3, 6,..., 21)GMT input # data. This is needed for BNU-ESM, CCSM4 and CNRM-CM5. # Subtract 1.5h from (0, 3, 6,..., 21)GMT input data. This is needed for # CMCC-CM. # ------------------------------------- monthname = monthname_d[month] # noqa: F841 nYears = finalyear - startyear + 1 # noqa: F841 # Character strings for starting and ending day/GMT (*HARD-CODES # particular GMT timepoints*): # *WARNING* GMT timepoints are actually (0, 3, 6,..., 21) in the original TRMM/Obs4MIPs data, despite character # strings in file names (and time axis metadata). See CMIP5 documentation and # errata! template = populateStringConstructor(args.filename_template, args) template.variable = varbname fileList = glob.glob(os.path.join(directory, template())) print("FILES:", fileList) params = [INPUT(args, name, template) for name in fileList] print("PARAMS:", params) cdp.cdp_run.multiprocess(compute, params, num_workers=args.num_workers)