def setAggregator(self, name): self._aggregatorName = name if (name == "mean"): self._aggregator = np.mean elif (name == "median"): self._aggregator = np.median elif (name == "min"): self._aggregator = np.min elif (name == "max"): self._aggregator = np.max elif (name == "std"): self._aggregator = np.std elif (name == "range"): self._aggregator = Util.nprange elif (name == "count"): self._aggregator = Util.numvalid elif (name == "meanabs"): self._aggregator = Util.meanabs elif (Util.isnumeric(name)): quantile = float(name) if quantile < 0 or quantile > 1: Util.error("Number after -ct must must be between 0 and 1") def func(x): return np.percentile(x, quantile * 100) self._aggregator = func else: Util.error("Invalid aggregator")
def getAxisValues(self, axis=None): if (axis is None): axis = self._axis if (axis == "date"): return Util.convertDates(self._getScore("Date").astype(int)) elif (axis == "month"): dates = self._getScore("Date").astype(int) months = np.unique((dates / 100) * 100 + 1) return Util.convertDates(months) elif (axis == "year"): dates = self._getScore("Date").astype(int) years = np.unique((dates / 10000) * 10000 + 101) return Util.convertDates(years) elif (axis == "offset"): return self._getScore("Offset").astype(int) elif (axis == "none"): return [0] elif (self.isLocationAxis(axis)): if (axis == "location"): data = range(0, len(self._getScore("Location"))) elif (axis == "locationId"): data = self._getScore("Location").astype(int) elif (axis == "locationElev"): data = self._getScore("Elev") elif (axis == "locationLat"): data = self._getScore("Lat") elif (axis == "locationLon"): data = self._getScore("Lon") else: Util.error("Data.getAxisValues has a bad axis name: " + axis) return data else: return [0]
def computeObsFcst(self, obs, fcst, tRange): if (tRange is None): Util.error("Metric " + self.getClassName() + " requires '-r <threshold>'") value = np.nan if (len(fcst) > 0): # Compute frequencies if (self._usingQuantiles): fcstSort = np.sort(fcst) obsSort = np.sort(obs) fRange = self._quantileToThreshold(fcstSort, tRange) oRange = self._quantileToThreshold(obsSort, tRange) a = np.ma.sum((self.within(fcst, fRange)) & (self.within(obs, oRange))) # Hit b = np.ma.sum((self.within(fcst, fRange)) & (self.within(obs, oRange) == 0)) # FA c = np.ma.sum((self.within(fcst, fRange) == 0) & (self.within(obs, oRange))) # Miss d = np.ma.sum((self.within(fcst, fRange) == 0) & (self.within(obs, oRange) == 0)) # CR else: a = np.ma.sum((self.within(fcst, tRange)) & (self.within(obs, tRange))) # Hit b = np.ma.sum((self.within(fcst, tRange)) & (self.within(obs, tRange) == 0)) # FA c = np.ma.sum((self.within(fcst, tRange) == 0) & (self.within(obs, tRange))) # Miss d = np.ma.sum((self.within(fcst, tRange) == 0) & (self.within(obs, tRange) == 0)) # CR value = self.calc(a, b, c, d) if (np.isinf(value)): value = np.nan return value
def _getIndices(self, axis, findex=None): if (axis == "date"): I = self._getDateIndices(findex) elif (axis == "offset"): I = self._getOffsetIndices(findex) elif (axis == "location"): I = self._getLocationIndices(findex) else: Util.error("Could not get indices for axis: " + str(axis)) return I
def computeCore(self, data, tRange): if (tRange is None): Util.error("Metric " + self.getClassName() + " requires '-r <threshold>'") [obs, fcst] = data.getScores(["obs", "fcst"]) value = np.nan if (len(fcst) > 0): # Compute frequencies a = np.ma.sum((self.within(fcst, tRange)) & (self.within(obs, tRange))) # Hit b = np.ma.sum((self.within(fcst, tRange)) & (self.within(obs, tRange) == 0)) # FA c = np.ma.sum((self.within(fcst, tRange) == 0) & (self.within(obs, tRange))) # Miss d = np.ma.sum((self.within(fcst, tRange) == 0) & (self.within(obs, tRange) == 0)) # CR value = self.calc(a, b, c, d) if (np.isinf(value)): value = np.nan return value
def setAggregator(self, name): self._aggregatorName = name if (name == "mean"): self._aggregator = np.mean elif (name == "median"): self._aggregator = np.median elif (name == "min"): self._aggregator = np.min elif (name == "max"): self._aggregator = np.max elif (name == "std"): self._aggregator = np.std elif (name == "range"): self._aggregator = Util.nprange elif (Util.isnumeric(name)): def func(x): return np.percentile(x, float(name)) self._aggregator = func else: Util.error("Invalid aggregator")
def _getScore(self, metric, findex=None): if (findex is None): findex = self._findex if (metric in self._cache[findex]): return self._cache[findex][metric] # Load all files for f in range(0, self.getNumFilesWithClim()): if (metric not in self._cache[f]): file = self._files[f] if (metric not in file.getVariables()): Util.error("Variable '" + metric + "' does not exist in " + self.getFilenames()[f]) temp = file.getScores(metric) dims = file.getDims(metric) temp = Util.clean(temp) for i in range(0, len(dims)): I = self._getIndices(dims[i].lower(), f) if (i == 0): temp = temp[I, Ellipsis] if (i == 1): temp = temp[:, I, Ellipsis] if (i == 2): temp = temp[:, :, I, Ellipsis] self._cache[f][metric] = temp # Remove missing. If one configuration has a missing value, set all # configurations to missing This can happen when the dates are available, # but have missing values if self._removeMissingAcrossAll: isMissing = np.isnan(self._cache[0][metric]) for f in range(1, self.getNumFilesWithClim()): isMissing = isMissing | (np.isnan(self._cache[f][metric])) for f in range(0, self.getNumFilesWithClim()): self._cache[f][metric][isMissing] = np.nan return self._cache[findex][metric]
def run(argv): ############ # Defaults # ############ ifiles = list() ofile = None metric = None locations = None latlonRange = None training = 0 thresholds = None dates = None climFile = None climType = "subtract" leg = None ylabel = None xlabel = None title = None offsets = None xdim = None sdim = None figSize = None dpi = 100 showText = False showMap = False noMargin = False binType = None markerSize = None lineWidth = None tickFontSize = None labFontSize = None legFontSize = None type = "plot" XRotation = None MajorLength = None MinorLength = None MajorWidth = None Bottom = None Top = None Right = None Left = None Pad = None showPerfect = None cType = "mean" doHist = False doSort = False doAcc = False xlim = None ylim = None clim = None version = None listThresholds = False listQuantiles = False listLocations = False listDates = False # Read command line arguments i = 1 while (i < len(argv)): arg = argv[i] if (arg[0] == '-'): # Process option if (arg == "-nomargin"): noMargin = True elif (arg == "--version"): version = True elif (arg == "--list-thresholds"): listThresholds = True elif (arg == "--list-quantiles"): listQuantiles = True elif (arg == "--list-locations"): listLocations = True elif (arg == "--list-dates"): listDates = True elif (arg == "-sp"): showPerfect = True elif (arg == "-hist"): doHist = True elif (arg == "-acc"): doAcc = True elif (arg == "-sort"): doSort = True else: if (arg == "-f"): ofile = argv[i + 1] elif (arg == "-l"): locations = Util.parseNumbers(argv[i + 1]) elif (arg == "-llrange"): latlonRange = Util.parseNumbers(argv[i + 1]) elif (arg == "-t"): training = int(argv[i + 1]) elif (arg == "-x"): xdim = argv[i + 1] elif (arg == "-o"): offsets = Util.parseNumbers(argv[i + 1]) elif (arg == "-leg"): leg = unicode(argv[i + 1], 'utf8') elif (arg == "-ylabel"): ylabel = unicode(argv[i + 1], 'utf8') elif (arg == "-xlabel"): xlabel = unicode(argv[i + 1], 'utf8') elif (arg == "-title"): title = unicode(argv[i + 1], 'utf8') elif (arg == "-b"): binType = argv[i + 1] elif (arg == "-type"): type = argv[i + 1] elif (arg == "-fs"): figSize = argv[i + 1] elif (arg == "-dpi"): dpi = int(argv[i + 1]) elif (arg == "-d"): # Either format is ok: # -d 20150101 20150103 # -d 20150101:20150103 if (i + 2 < len(argv) and argv[i + 2].isdigit()): dates = Util.parseNumbers( "%s:%s" % (argv[i + 1], argv[i + 2]), True) i = i + 1 else: dates = Util.parseNumbers(argv[i + 1], True) elif (arg == "-c"): climFile = argv[i + 1] climType = "subtract" elif (arg == "-C"): climFile = argv[i + 1] climType = "divide" elif (arg == "-xlim"): xlim = Util.parseNumbers(argv[i + 1]) elif (arg == "-ylim"): ylim = Util.parseNumbers(argv[i + 1]) elif (arg == "-clim"): clim = Util.parseNumbers(argv[i + 1]) elif (arg == "-s"): sdim = argv[i + 1] elif (arg == "-ct"): cType = argv[i + 1] elif (arg == "-r"): thresholds = Util.parseNumbers(argv[i + 1]) elif (arg == "-ms"): markerSize = float(argv[i + 1]) elif (arg == "-lw"): lineWidth = float(argv[i + 1]) elif (arg == "-tickfs"): tickFontSize = float(argv[i + 1]) elif (arg == "-labfs"): labFontSize = float(argv[i + 1]) elif (arg == "-legfs"): legFontSize = float(argv[i + 1]) elif (arg == "-xrot"): XRotation = float(argv[i + 1]) elif (arg == "-majlth"): MajorLength = float(argv[i + 1]) elif (arg == "-minlth"): MinorLength = float(argv[i + 1]) elif (arg == "-majwid"): MajorWidth = float(argv[i + 1]) elif (arg == "-bot"): Bottom = float(argv[i + 1]) elif (arg == "-top"): Top = float(argv[i + 1]) elif (arg == "-right"): Right = float(argv[i + 1]) elif (arg == "-left"): Left = float(argv[i + 1]) elif (arg == "-pad"): Pad = argv[i + 1] elif (arg == "-m"): metric = argv[i + 1] else: Util.error("Flag '" + argv[i] + "' not recognized") i = i + 1 else: ifiles.append(argv[i]) i = i + 1 if (version): print "Version: " + Version.__version__ return # Deal with legend entries if (leg is not None): leg = leg.split(',') for i in range(0, len(leg)): leg[i] = leg[i].replace('_', ' ') if (latlonRange is not None and len(latlonRange) != 4): Util.error("-llRange <values> must have exactly 4 values") if (len(ifiles) > 0): data = Data.Data(ifiles, clim=climFile, climType=climType, dates=dates, offsets=offsets, locations=locations, latlonRange=latlonRange, training=training) else: data = None if (listThresholds or listQuantiles or listLocations or listDates): if (len(ifiles) == 0): Util.error( "Files are required in order to list thresholds or quantiles") if (listThresholds): print "Thresholds:", for threshold in data.getThresholds(): print "%g" % threshold, print "" if (listQuantiles): print "Quantiles:", for quantile in data.getQuantiles(): print "%g" % quantile, print "" if (listLocations): print " id lat lon elev" for station in data.getStations(): print "%6d %7.2f %7.2f %7.1f" % (station.id(), station.lat(), station.lon(), station.elev()) print "" if (listDates): dates = data.getAxisValues("date") dates = Util.convertToYYYYMMDD(dates) for date in dates: print "%d" % date print "" return elif (len(argv) == 1 or len(ifiles) == 0 or metric is None): showDescription(data) return if (figSize is not None): figSize = figSize.split(',') if (len(figSize) != 2): print "-fs figSize must be in the form: width,height" sys.exit(1) m = None # Handle special plots if (doHist): pl = Output.Hist(metric) elif (doSort): pl = Output.Sort(metric) elif (metric == "pithist"): m = Metric.Pit("pit") pl = Output.PitHist(m) elif (metric == "obsfcst"): pl = Output.ObsFcst() elif (metric == "timeseries"): pl = Output.TimeSeries() elif (metric == "meteo"): pl = Output.Meteo() elif (metric == "qq"): pl = Output.QQ() elif (metric == "cond"): pl = Output.Cond() elif (metric == "against"): pl = Output.Against() elif (metric == "count"): pl = Output.Count() elif (metric == "scatter"): pl = Output.Scatter() elif (metric == "change"): pl = Output.Change() elif (metric == "spreadskill"): pl = Output.SpreadSkill() elif (metric == "taylor"): pl = Output.Taylor() elif (metric == "error"): pl = Output.Error() elif (metric == "freq"): pl = Output.Freq() elif (metric == "roc"): pl = Output.Roc() elif (metric == "droc"): pl = Output.DRoc() elif (metric == "droc0"): pl = Output.DRoc0() elif (metric == "drocnorm"): pl = Output.DRocNorm() elif (metric == "reliability"): pl = Output.Reliability() elif (metric == "invreliability"): pl = Output.InvReliability() elif (metric == "igncontrib"): pl = Output.IgnContrib() elif (metric == "economicvalue"): pl = Output.EconomicValue() elif (metric == "marginal"): pl = Output.Marginal() else: # Standard plots # Attempt at automating metrics = Metric.getAllMetrics() m = None for mm in metrics: if (metric == mm[0].lower() and mm[1].isValid()): m = mm[1]() break if (m is None): m = Metric.Default(metric) m.setAggregator(cType) # Output type if (type == "plot" or type == "text" or type == "map" or type == "maprank"): pl = Output.Default(m) pl.setShowAcc(doAcc) else: Util.error("Type not understood") # Rest dimension of '-x' is not allowed if (xdim is not None and not pl.supportsX()): Util.warning(metric + " does not support -x. Ignoring it.") xdim = None # Reset dimension if 'threshold' is not allowed if (xdim == "threshold" and ((not pl.supportsThreshold()) or (not m.supportsThreshold()))): Util.warning(metric + " does not support '-x threshold'. Ignoring it.") thresholds = None xdim = None # Create thresholds if needed if ((thresholds is None) and (pl.requiresThresholds() or (m is not None and m.requiresThresholds()))): data.setAxis("none") obs = data.getScores("obs")[0] fcst = data.getScores("fcst")[0] smin = min(min(obs), min(fcst)) smax = max(max(obs), max(fcst)) thresholds = np.linspace(smin, smax, 10) Util.warning("Missing '-r <thresholds>'. Automatically setting\ thresholds.") # Set plot parameters if (markerSize is not None): pl.setMarkerSize(markerSize) if (lineWidth is not None): pl.setLineWidth(lineWidth) if (labFontSize is not None): pl.setLabFontSize(labFontSize) if (legFontSize is not None): pl.setLegFontSize(legFontSize) if (tickFontSize is not None): pl.setTickFontSize(tickFontSize) if (XRotation is not None): pl.setXRotation(XRotation) if (MajorLength is not None): pl.setMajorLength(MajorLength) if (MinorLength is not None): pl.setMinorLength(MinorLength) if (MajorWidth is not None): pl.setMajorWidth(MajorWidth) if (Bottom is not None): pl.setBottom(Bottom) if (Top is not None): pl.setTop(Top) if (Right is not None): pl.setRight(Right) if (Left is not None): pl.setLeft(Left) if (Pad is not None): pl.setPad(None) if (binType is not None): pl.setBinType(binType) if (showPerfect is not None): pl.setShowPerfect(showPerfect) if (xlim is not None): pl.setXLim(xlim) if (ylim is not None): pl.setYLim(ylim) if (clim is not None): pl.setCLim(clim) pl.setFilename(ofile) pl.setThresholds(thresholds) pl.setLegend(leg) pl.setFigsize(figSize) pl.setDpi(dpi) pl.setAxis(xdim) pl.setShowMargin(not noMargin) pl.setYlabel(ylabel) pl.setXlabel(xlabel) pl.setTitle(title) if (type == "text"): pl.text(data) elif (type == "map"): pl.map(data) elif (type == "maprank"): pl.setShowRank(True) pl.map(data) else: pl.plot(data)
def __init__(self, filenames, dates=None, offsets=None, locations=None, latlonRange=None, elevRange=None, clim=None, climType="subtract", training=None, legend=None, removeMissingAcrossAll=True): if (not isinstance(filenames, list)): filenames = [filenames] self._axis = "date" self._index = 0 self._removeMissingAcrossAll = removeMissingAcrossAll if (legend is not None and len(filenames) is not len(legend)): Util.error("Need one legend entry for each filename") self._legend = legend # Organize files self._files = list() self._cache = list() self._clim = None for filename in filenames: if (not os.path.exists(filename)): Util.error("File '" + filename + "' does not exist") if (Input.NetcdfCf.isValid(filename)): file = Input.NetcdfCf(filename) elif (Input.Comps.isValid(filename)): file = Input.Comps(filename) elif (Input.Text.isValid(filename)): file = Input.Text(filename) else: Util.error("File '" + filename + "' is not a valid input file") self._files.append(file) self._cache.append(dict()) if (clim is not None): if (not os.path.exists(clim)): Util.error("File '" + clim + "' does not exist") if (Input.NetcdfCf.isValid(clim)): self._clim = Input.NetcdfCf(clim) elif (Input.Comps.isValid(clim)): self._clim = Input.Comps(clim) elif (Input.Text.isValid(clim)): self._clim = Input.Text(clim) else: Util.error("File '" + clim + "' is not a valid climatology file") self._cache.append(dict()) if (not (climType == "subtract" or climType == "divide")): Util.error("Data: climType must be 'subtract' or 'divide") self._climType = climType # Climatology file self._files = self._files + [self._clim] # Latitude-Longitude range if (latlonRange is not None): lat = self._files[0].getLats() lon = self._files[0].getLons() locId = self._files[0].getStationIds() latlonLocations = list() minLon = latlonRange[0] maxLon = latlonRange[1] minLat = latlonRange[2] maxLat = latlonRange[3] for i in range(0, len(lat)): currLat = float(lat[i]) currLon = float(lon[i]) if (currLat >= minLat and currLat <= maxLat and currLon >= minLon and currLon <= maxLon): latlonLocations.append(locId[i]) useLocations = list() if (locations is not None): for i in range(0, len(locations)): currLocation = locations[i] if (currLocation in latlonLocations): useLocations.append(currLocation) else: useLocations = latlonLocations if (len(useLocations) == 0): Util.error("No available locations within lat/lon range") elif locations is not None: useLocations = locations else: useLocations = self._files[0].getStationIds() # Elevation range if (elevRange is not None): stations = self._files[0].getStations() minElev = elevRange[0] maxElev = elevRange[1] elevLocations = list() for i in range(0, len(stations)): currElev = float(stations[i].elev()) id = stations[i].id() if (currElev >= minElev and currElev <= maxElev): elevLocations.append(id) useLocations = Util.intersect(useLocations, elevLocations) if (len(useLocations) == 0): Util.error("No available locations within elevation range") # Find common indicies self._datesI = Data._getUtilIndices(self._files, "Date", dates) self._offsetsI = Data._getUtilIndices(self._files, "Offset", offsets) self._locationsI = Data._getUtilIndices(self._files, "Location", useLocations) if (len(self._datesI[0]) == 0): Util.error("No valid dates selected") if (len(self._offsetsI[0]) == 0): Util.error("No valid offsets selected") if (len(self._locationsI[0]) == 0): Util.error("No valid locations selected") # Training if (training is not None): for f in range(0, len(self._datesI)): if (len(self._datesI[f]) <= training): Util.error("Training period too long for " + self.getFilenames()[f] + ". Max training period is " + str(len(self._datesI[f]) - 1) + ".") self._datesI[f] = self._datesI[f][training:] self._findex = 0
def getScores(self, metrics): if (not isinstance(metrics, list)): metrics = [metrics] data = dict() valid = None axis = self._getAxisIndex(self._axis) # Compute climatology, if needed obsFcstAvailable = ("obs" in metrics or "fcst" in metrics) doClim = self._clim is not None and obsFcstAvailable if (doClim): temp = self._getScore("fcst", len(self._files) - 1) if (self._axis == "date"): clim = temp[self._index, :, :].flatten() elif (self._axis == "month"): dates = self.getAxisValues("date") months = self.getAxisValues("month") if (self._index == months.shape[0] - 1): I = np.where(dates >= months[self._index]) else: I = np.where((dates >= months[self._index]) & (dates < months[self._index + 1])) clim = temp[I, :, :].flatten() elif (self._axis == "year"): dates = self.getAxisValues("date") years = self.getAxisValues("year") if (self._index == years.shape[0] - 1): I = np.where(dates >= years[self._index]) else: I = np.where((dates >= years[self._index]) & (dates < years[self._index + 1])) clim = temp[I, :, :].flatten() elif (self._axis == "offset"): clim = temp[:, self._index, :].flatten() elif (self.isLocationAxis(self._axis)): clim = temp[:, :, self._index].flatten() elif (self._axis == "none" or self._axis == "threshold"): clim = temp.flatten() elif (self._axis == "all"): clim = temp else: clim = 0 for i in range(0, len(metrics)): metric = metrics[i] temp = self._getScore(metric) # print self._axis if (self._axis == "date"): data[metric] = temp[self._index, :, :].flatten() elif (self._axis == "month"): dates = self.getAxisValues("date") months = self.getAxisValues("month") if (self._index == months.shape[0] - 1): I = np.where(dates >= months[self._index]) else: I = np.where((dates >= months[self._index]) & (dates < months[self._index + 1])) data[metric] = temp[I, :, :].flatten() elif (self._axis == "year"): dates = self.getAxisValues("date") years = self.getAxisValues("year") if (self._index == years.shape[0] - 1): I = np.where(dates >= years[self._index]) else: I = np.where((dates >= years[self._index]) & (dates < years[self._index + 1])) data[metric] = temp[I, :, :].flatten() elif (self._axis == "offset"): data[metric] = temp[:, self._index, :].flatten() elif (self.isLocationAxis(self._axis)): data[metric] = temp[:, :, self._index].flatten() elif (self._axis == "none" or self._axis == "threshold"): data[metric] = temp.flatten() elif (self._axis == "all"): data[metric] = temp else: Util.error("Data.py: unrecognized value of self._axis: " + self._axis) # Subtract climatology if (doClim and (metric == "fcst" or metric == "obs")): if (self._climType == "subtract"): data[metric] = data[metric] - clim else: data[metric] = data[metric] / clim # Remove missing values if (self._axis != "all"): currValid = (np.isnan(data[metric]) == 0)\ & (np.isinf(data[metric]) == 0) if (valid is None): valid = currValid else: valid = (valid & currValid) if (self._axis != "all"): I = np.where(valid) q = list() for i in range(0, len(metrics)): if (self._axis != "all"): q.append(data[metrics[i]][I]) else: q.append(data[metrics[i]]) # No valid data if (q[0].shape[0] == 0): for i in range(0, len(metrics)): q[i] = np.nan * np.zeros([1], 'float') return q
def getScores(self, metric): if (metric == "obs"): return self._obs elif (metric == "fcst"): return self._fcst elif (metric == "pit"): if (self._pit is None): Util.error("File does not contain 'pit'") return self._pit elif (metric[0] == "p"): threshold = float(metric[1:]) I = np.where(abs(self._thresholds - threshold) < 0.0001)[0] if (len(I) == 0): Util.error("Cannot find " + metric) elif (len(I) > 1): Util.error("Could not find unique threshold: " + str(threshold)) return self._cdf[:, :, :, I[0]] elif (metric[0] == "q"): quantile = float(metric[1:]) I = np.where(abs(self._quantiles - quantile) < 0.0001)[0] if (len(I) == 0): Util.error("Cannot find " + metric) elif (len(I) > 1): Util.error("Could not find unique quantile: " + str(quantile)) return self._x[:, :, :, I[0]] elif (metric == "Offset"): return self._offsets elif (metric == "Date"): return self._dates elif (metric == "Location"): stations = np.zeros(len(self._stations), 'float') for i in range(0, len(self._stations)): stations[i] = self._stations[i].id() return stations elif (metric in ["Lat", "Lon", "Elev"]): values = np.zeros(len(self._stations), 'float') for i in range(0, len(self._stations)): station = self._stations[i] if (metric == "Lat"): values[i] = station.lat() elif (metric == "Lon"): values[i] = station.lon() elif (metric == "Elev"): values[i] = station.elev() return values else: Util.error("Cannot find " + metric)
def __init__(self, filename): import csv Input.__init__(self, filename) file = open(filename, 'r') self._units = "Unknown units" self._variable = "Unknown" self._pit = None self._dates = set() self._offsets = set() self._stations = set() self._quantiles = set() self._thresholds = set() fields = dict() obs = dict() fcst = dict() cdf = dict() pit = dict() x = dict() indices = dict() header = None # Default values if columns not available offset = 0 date = 0 lat = 0 lon = 0 elev = 0 import time start = time.time() # Read the data into dictionary with (date,offset,lat,lon,elev) as key and obs/fcst as values for rowstr in file: if (rowstr[0] == "#"): curr = rowstr[1:] curr = curr.split() if (curr[0] == "variable:"): self._variable = curr[1] elif (curr[0] == "units:"): self._units = curr[1] else: Util.warning("Ignoring line '" + rowstr.strip() + "' in file '" + filename + "'") else: row = rowstr.split() if (header is None): # Parse the header so we know what each column represents header = row for i in range(0, len(header)): att = header[i] if (att == "date"): indices["date"] = i elif (att == "offset"): indices["offset"] = i elif (att == "lat"): indices["lat"] = i elif (att == "lon"): indices["lon"] = i elif (att == "elev"): indices["elev"] = i elif (att == "obs"): indices["obs"] = i elif (att == "fcst"): indices["fcst"] = i else: indices[att] = i # Ensure we have required columns requiredColumns = ["obs", "fcst"] for col in requiredColumns: if (col not in indices): msg = "Could not parse %s: Missing column '%s'" % ( filename, col) Util.error(msg) else: if (len(row) is not len(header)): Util.error( "Incorrect number of columns (expecting %d) in row '%s'" % (len(header), rowstr.strip())) if ("date" in indices): date = self._clean(row[indices["date"]]) self._dates.add(date) if ("offset" in indices): offset = self._clean(row[indices["offset"]]) self._offsets.add(offset) if ("id" in indices): id = self._clean(row[indices["id"]]) else: id = np.nan if ("lat" in indices): lat = self._clean(row[indices["lat"]]) if ("lon" in indices): lon = self._clean(row[indices["lon"]]) if ("elev" in indices): elev = self._clean(row[indices["elev"]]) station = Station.Station(id, lat, lon, elev) self._stations.add(station) key = (date, offset, lat, lon, elev) obs[key] = self._clean(row[indices["obs"]]) fcst[key] = self._clean(row[indices["fcst"]]) quantileFields = self._getQuantileFields(header) thresholdFields = self._getThresholdFields(header) if "pit" in indices: pit[key] = self._clean(row[indices["pit"]]) for field in quantileFields: quantile = float(field[1:]) self._quantiles.add(quantile) key = (date, offset, lat, lon, elev, quantile) x[key] = self._clean(row[indices[field]]) for field in thresholdFields: threshold = float(field[1:]) self._thresholds.add(threshold) key = (date, offset, lat, lon, elev, threshold) cdf[key] = self._clean(row[indices[field]]) end = time.time() file.close() self._dates = list(self._dates) self._offsets = list(self._offsets) self._stations = list(self._stations) self._quantiles = list(self._quantiles) self._thresholds = np.array(list(self._thresholds)) Ndates = len(self._dates) Noffsets = len(self._offsets) Nlocations = len(self._stations) Nquantiles = len(self._quantiles) Nthresholds = len(self._thresholds) # Put the dictionary data into a regular 3D array self._obs = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan self._fcst = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan if (len(pit) != 0): self._pit = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan self._cdf = np.zeros([Ndates, Noffsets, Nlocations, Nthresholds], 'float') * np.nan self._x = np.zeros([Ndates, Noffsets, Nlocations, Nquantiles], 'float') * np.nan for d in range(0, len(self._dates)): date = self._dates[d] end = time.time() for o in range(0, len(self._offsets)): offset = self._offsets[o] for s in range(0, len(self._stations)): station = self._stations[s] lat = station.lat() lon = station.lon() elev = station.elev() key = (date, offset, lat, lon, elev) if (key in obs): self._obs[d][o][s] = obs[key] if (key in fcst): self._fcst[d][o][s] = fcst[key] if (key in pit): self._pit[d][o][s] = pit[key] for q in range(0, len(self._quantiles)): quantile = self._quantiles[q] key = (date, offset, lat, lon, elev, quantile) if (key in x): self._x[d, o, s, q] = x[key] for t in range(0, len(self._thresholds)): threshold = self._thresholds[t] key = (date, offset, lat, lon, elev, threshold) if (key in cdf): self._cdf[d, o, s, t] = cdf[key] end = time.time() maxStationId = np.nan for station in self._stations: if (np.isnan(maxStationId)): maxStationId = station.id() elif (station.id() > maxStationId): maxStationId = station.id() counter = 0 if (not np.isnan(maxStationId)): counter = maxStationId + 1 for station in self._stations: if (np.isnan(station.id())): station.id(counter) counter = counter + 1 self._dates = np.array(self._dates) self._offsets = np.array(self._offsets)
def computeCore(self, data, tRange): Util.error("Metric '" + self.getClassName() + "' has not been implemented yet")
def _computeObsFcst(self, obs, fcst): Util.error("Metric " + self.name() + " has not implemented _computeObsFcst()")