Example #1
0
 def getAxisValues(self, axis=None):
     if (axis is None):
         axis = self._axis
     if (axis == "date"):
         return Util.convertDates(self._getScore("Date").astype(int))
     elif (axis == "month"):
         dates = self._getScore("Date").astype(int)
         months = np.unique((dates / 100) * 100 + 1)
         return Util.convertDates(months)
     elif (axis == "year"):
         dates = self._getScore("Date").astype(int)
         years = np.unique((dates / 10000) * 10000 + 101)
         return Util.convertDates(years)
     elif (axis == "offset"):
         return self._getScore("Offset").astype(int)
     elif (axis == "none"):
         return [0]
     elif (self.isLocationAxis(axis)):
         if (axis == "location"):
             data = range(0, len(self._getScore("Location")))
         elif (axis == "locationId"):
             data = self._getScore("Location").astype(int)
         elif (axis == "locationElev"):
             data = self._getScore("Elev")
         elif (axis == "locationLat"):
             data = self._getScore("Lat")
         elif (axis == "locationLon"):
             data = self._getScore("Lon")
         else:
             Util.error("Data.getAxisValues has a bad axis name: " + axis)
         return data
     else:
         return [0]
Example #2
0
    def setAggregator(self, name):
        self._aggregatorName = name
        if (name == "mean"):
            self._aggregator = np.mean
        elif (name == "median"):
            self._aggregator = np.median
        elif (name == "min"):
            self._aggregator = np.min
        elif (name == "max"):
            self._aggregator = np.max
        elif (name == "std"):
            self._aggregator = np.std
        elif (name == "range"):
            self._aggregator = Util.nprange
        elif (name == "count"):
            self._aggregator = Util.numvalid
        elif (name == "meanabs"):
            self._aggregator = Util.meanabs
        elif (Util.isnumeric(name)):
            quantile = float(name)
            if quantile < 0 or quantile > 1:
                Util.error("Number after -ct must must be between 0 and 1")

            def func(x):
                return np.percentile(x, quantile * 100)

            self._aggregator = func
        else:
            Util.error("Invalid aggregator")
Example #3
0
    def computeObsFcst(self, obs, fcst, tRange):
        if (tRange is None):
            Util.error("Metric " + self.getClassName() +
                       " requires '-r <threshold>'")
        value = np.nan
        if (len(fcst) > 0):
            # Compute frequencies
            if (self._usingQuantiles):
                fcstSort = np.sort(fcst)
                obsSort = np.sort(obs)
                fRange = self._quantileToThreshold(fcstSort, tRange)
                oRange = self._quantileToThreshold(obsSort, tRange)
                a = np.ma.sum((self.within(fcst, fRange)) &
                              (self.within(obs, oRange)))  # Hit
                b = np.ma.sum((self.within(fcst, fRange)) &
                              (self.within(obs, oRange) == 0))  # FA
                c = np.ma.sum((self.within(fcst, fRange) == 0) &
                              (self.within(obs, oRange)))  # Miss
                d = np.ma.sum((self.within(fcst, fRange) == 0) &
                              (self.within(obs, oRange) == 0))  # CR
            else:
                a = np.ma.sum((self.within(fcst, tRange)) &
                              (self.within(obs, tRange)))  # Hit
                b = np.ma.sum((self.within(fcst, tRange)) &
                              (self.within(obs, tRange) == 0))  # FA
                c = np.ma.sum((self.within(fcst, tRange) == 0) &
                              (self.within(obs, tRange)))  # Miss
                d = np.ma.sum((self.within(fcst, tRange) == 0) &
                              (self.within(obs, tRange) == 0))  # CR
            value = self.calc(a, b, c, d)
            if (np.isinf(value)):
                value = np.nan

        return value
Example #4
0
 def getStations(self):
     lat = Util.clean(self._file.variables["Lat"])
     lon = Util.clean(self._file.variables["Lon"])
     id = Util.clean(self._file.variables["Location"])
     elev = Util.clean(self._file.variables["Elev"])
     stations = list()
     for i in range(0, lat.shape[0]):
         station = Station.Station(id[i], lat[i], lon[i], elev[i])
         stations.append(station)
     return stations
Example #5
0
 def _getIndices(self, axis, findex=None):
     if (axis == "date"):
         I = self._getDateIndices(findex)
     elif (axis == "offset"):
         I = self._getOffsetIndices(findex)
     elif (axis == "location"):
         I = self._getLocationIndices(findex)
     else:
         Util.error("Could not get indices for axis: " + str(axis))
     return I
Example #6
0
    def getQvar(self, quantile):
        quantile = quantile * 100
        minus = ""
        if (abs(quantile - int(quantile)) > 0.01):
            var = "q" + minus + str(abs(quantile)).replace(".", "")
        else:
            var = "q" + minus + str(int(abs(quantile)))

        if (not self.hasMetric(var) and quantile == 50):
            Util.warning("Could not find q50, using fcst instead")
            return "fcst"
        return var
Example #7
0
 def computeCore(self, data, tRange):
     [obsP, p] = Bs.getP(data, tRange)
     bs = np.nan * np.zeros(len(p), 'float')
     meanObs = np.mean(obsP)
     for i in range(0, len(self._edges) - 1):
         I = np.where((p >= self._edges[i]) & (p < self._edges[i + 1]))[0]
         if (len(I) > 0):
             meanObsI = np.mean(obsP[I])
             bs[I] = (meanObsI - meanObs)**2
     return Util.nanmean(bs)
Example #8
0
    def computeCore(self, data, tRange):
        [obsP, p] = Bs.getP(data, tRange)

        # Break p into bins, and comute reliability
        bs = np.nan * np.zeros(len(p), 'float')
        for i in range(0, len(self._edges) - 1):
            I = np.where((p >= self._edges[i]) & (p < self._edges[i + 1]))[0]
            if (len(I) > 0):
                meanObsI = np.mean(obsP[I])
                bs[I] = (np.mean(p[I]) - meanObsI)**2
        return Util.nanmean(bs)
Example #9
0
    def computeCore(self, data, tRange):
        if (tRange is None):
            Util.error("Metric " + self.getClassName() +
                       " requires '-r <threshold>'")
        [obs, fcst] = data.getScores(["obs", "fcst"])
        value = np.nan
        if (len(fcst) > 0):
            # Compute frequencies
            a = np.ma.sum((self.within(fcst, tRange)) &
                          (self.within(obs, tRange)))  # Hit
            b = np.ma.sum((self.within(fcst, tRange)) &
                          (self.within(obs, tRange) == 0))  # FA
            c = np.ma.sum((self.within(fcst, tRange) == 0) &
                          (self.within(obs, tRange)))  # Miss
            d = np.ma.sum((self.within(fcst, tRange) == 0) &
                          (self.within(obs, tRange) == 0))  # CR
            value = self.calc(a, b, c, d)
            if (np.isinf(value)):
                value = np.nan

        return value
Example #10
0
    def setAggregator(self, name):
        self._aggregatorName = name
        if (name == "mean"):
            self._aggregator = np.mean
        elif (name == "median"):
            self._aggregator = np.median
        elif (name == "min"):
            self._aggregator = np.min
        elif (name == "max"):
            self._aggregator = np.max
        elif (name == "std"):
            self._aggregator = np.std
        elif (name == "range"):
            self._aggregator = Util.nprange
        elif (Util.isnumeric(name)):

            def func(x):
                return np.percentile(x, float(name))

            self._aggregator = func
        else:
            Util.error("Invalid aggregator")
Example #11
0
    def _getScore(self, metric, findex=None):
        if (findex is None):
            findex = self._findex

        if (metric in self._cache[findex]):
            return self._cache[findex][metric]

        # Load all files
        for f in range(0, self.getNumFilesWithClim()):
            if (metric not in self._cache[f]):
                file = self._files[f]
                if (metric not in file.getVariables()):
                    Util.error("Variable '" + metric + "' does not exist in " +
                               self.getFilenames()[f])
                temp = file.getScores(metric)
                dims = file.getDims(metric)
                temp = Util.clean(temp)
                for i in range(0, len(dims)):
                    I = self._getIndices(dims[i].lower(), f)
                    if (i == 0):
                        temp = temp[I, Ellipsis]
                    if (i == 1):
                        temp = temp[:, I, Ellipsis]
                    if (i == 2):
                        temp = temp[:, :, I, Ellipsis]
                self._cache[f][metric] = temp

        # Remove missing. If one configuration has a missing value, set all
        # configurations to missing This can happen when the dates are available,
        # but have missing values
        if self._removeMissingAcrossAll:
            isMissing = np.isnan(self._cache[0][metric])
            for f in range(1, self.getNumFilesWithClim()):
                isMissing = isMissing | (np.isnan(self._cache[f][metric]))
            for f in range(0, self.getNumFilesWithClim()):
                self._cache[f][metric][isMissing] = np.nan

        return self._cache[findex][metric]
Example #12
0
 def computeCore(self, data, tRange):
     [obsP, p] = Bs.getP(data, tRange)
     bs = np.nan * np.zeros(len(p), 'float')
     for i in range(0, len(self._edges) - 1):
         I = np.where((p >= self._edges[i]) & (p < self._edges[i + 1]))[0]
         if (len(I) > 0):
             bs[I] = (np.mean(p[I]) - obsP[I])**2
     bs = Util.nanmean(bs)
     bsunc = np.mean(obsP) * (1 - np.mean(obsP))
     if (bsunc == 0):
         bss = np.nan
     else:
         bss = (bsunc - bs) / bsunc
     return bss
Example #13
0
    def computeCore(self, data, tRange):
        # Compute probabilities based on thresholds
        p0 = 0
        p1 = 1
        if (tRange[0] != -np.inf and tRange[1] != np.inf):
            var0 = data.getPvar(tRange[0])
            var1 = data.getPvar(tRange[1])
            [obs, p0, p1] = data.getScores(["obs", var0, var1])
        elif (tRange[0] != -np.inf):
            var0 = data.getPvar(tRange[0])
            [obs, p0] = data.getScores(["obs", var0])
        elif (tRange[1] != np.inf):
            var1 = data.getPvar(tRange[1])
            [obs, p1] = data.getScores(["obs", var1])
        obsP = self.within(obs, tRange)
        p = p1 - p0  # Prob of obs within range
        bs = np.nan * np.zeros(len(p), 'float')

        # Split into bins and compute Brier score on each bin
        for i in range(0, len(self._edges) - 1):
            I = np.where((p >= self._edges[i]) & (p < self._edges[i + 1]))[0]
            if (len(I) > 0):
                bs[I] = (np.mean(p[I]) - obsP[I])**2
        return Util.nanmean(bs)
Example #14
0
 def help(cls):
     s = cls.description()
     if (cls.orientation is not 0):
         s = s + "\n" + Util.green("Orientation: ")
         if (cls.orientation == 1):
             s = s + "Positive"
         elif (cls.orientation == -1):
             s = s + "Negative"
         else:
             s = s + "None"
     if (cls.perfectScore is not None):
         s = s + "\n" + Util.green("Perfect score: ") + str(
             cls._perfectScore)
     if (cls.min is not None):
         s = s + "\n" + Util.green("Minimum value: ") + str(cls._min)
     if (cls.max is not None):
         s = s + "\n" + Util.green("Maximum value: ") + str(cls._max)
     if (cls._long is not None):
         s = s + "\n" + Util.green("Description: ") + cls._long
     if (cls.reference() is not None):
         s = s + "\n" + Util.green("Reference: ") + cls.reference()
     return s
Example #15
0
    def __init__(self,
                 filenames,
                 dates=None,
                 offsets=None,
                 locations=None,
                 latlonRange=None,
                 elevRange=None,
                 clim=None,
                 climType="subtract",
                 training=None,
                 legend=None,
                 removeMissingAcrossAll=True):
        if (not isinstance(filenames, list)):
            filenames = [filenames]
        self._axis = "date"
        self._index = 0
        self._removeMissingAcrossAll = removeMissingAcrossAll

        if (legend is not None and len(filenames) is not len(legend)):
            Util.error("Need one legend entry for each filename")
        self._legend = legend

        # Organize files
        self._files = list()
        self._cache = list()
        self._clim = None
        for filename in filenames:
            if (not os.path.exists(filename)):
                Util.error("File '" + filename + "' does not exist")
            if (Input.NetcdfCf.isValid(filename)):
                file = Input.NetcdfCf(filename)
            elif (Input.Comps.isValid(filename)):
                file = Input.Comps(filename)
            elif (Input.Text.isValid(filename)):
                file = Input.Text(filename)
            else:
                Util.error("File '" + filename + "' is not a valid input file")
            self._files.append(file)
            self._cache.append(dict())
        if (clim is not None):
            if (not os.path.exists(clim)):
                Util.error("File '" + clim + "' does not exist")
            if (Input.NetcdfCf.isValid(clim)):
                self._clim = Input.NetcdfCf(clim)
            elif (Input.Comps.isValid(clim)):
                self._clim = Input.Comps(clim)
            elif (Input.Text.isValid(clim)):
                self._clim = Input.Text(clim)
            else:
                Util.error("File '" + clim +
                           "' is not a valid climatology file")
            self._cache.append(dict())
            if (not (climType == "subtract" or climType == "divide")):
                Util.error("Data: climType must be 'subtract' or 'divide")
            self._climType = climType

            # Climatology file
            self._files = self._files + [self._clim]

        # Latitude-Longitude range
        if (latlonRange is not None):
            lat = self._files[0].getLats()
            lon = self._files[0].getLons()
            locId = self._files[0].getStationIds()
            latlonLocations = list()
            minLon = latlonRange[0]
            maxLon = latlonRange[1]
            minLat = latlonRange[2]
            maxLat = latlonRange[3]
            for i in range(0, len(lat)):
                currLat = float(lat[i])
                currLon = float(lon[i])
                if (currLat >= minLat and currLat <= maxLat
                        and currLon >= minLon and currLon <= maxLon):
                    latlonLocations.append(locId[i])
            useLocations = list()
            if (locations is not None):
                for i in range(0, len(locations)):
                    currLocation = locations[i]
                    if (currLocation in latlonLocations):
                        useLocations.append(currLocation)
            else:
                useLocations = latlonLocations
            if (len(useLocations) == 0):
                Util.error("No available locations within lat/lon range")
        elif locations is not None:
            useLocations = locations
        else:
            useLocations = self._files[0].getStationIds()

        # Elevation range
        if (elevRange is not None):
            stations = self._files[0].getStations()
            minElev = elevRange[0]
            maxElev = elevRange[1]
            elevLocations = list()
            for i in range(0, len(stations)):
                currElev = float(stations[i].elev())
                id = stations[i].id()
                if (currElev >= minElev and currElev <= maxElev):
                    elevLocations.append(id)
            useLocations = Util.intersect(useLocations, elevLocations)
            if (len(useLocations) == 0):
                Util.error("No available locations within elevation range")

        # Find common indicies
        self._datesI = Data._getUtilIndices(self._files, "Date", dates)
        self._offsetsI = Data._getUtilIndices(self._files, "Offset", offsets)
        self._locationsI = Data._getUtilIndices(self._files, "Location",
                                                useLocations)
        if (len(self._datesI[0]) == 0):
            Util.error("No valid dates selected")
        if (len(self._offsetsI[0]) == 0):
            Util.error("No valid offsets selected")
        if (len(self._locationsI[0]) == 0):
            Util.error("No valid locations selected")

        # Training
        if (training is not None):
            for f in range(0, len(self._datesI)):
                if (len(self._datesI[f]) <= training):
                    Util.error("Training period too long for " +
                               self.getFilenames()[f] +
                               ". Max training period is " +
                               str(len(self._datesI[f]) - 1) + ".")
                self._datesI[f] = self._datesI[f][training:]

        self._findex = 0
Example #16
0
    def getScores(self, metrics):
        if (not isinstance(metrics, list)):
            metrics = [metrics]
        data = dict()
        valid = None
        axis = self._getAxisIndex(self._axis)

        # Compute climatology, if needed
        obsFcstAvailable = ("obs" in metrics or "fcst" in metrics)
        doClim = self._clim is not None and obsFcstAvailable
        if (doClim):
            temp = self._getScore("fcst", len(self._files) - 1)
            if (self._axis == "date"):
                clim = temp[self._index, :, :].flatten()
            elif (self._axis == "month"):
                dates = self.getAxisValues("date")
                months = self.getAxisValues("month")
                if (self._index == months.shape[0] - 1):
                    I = np.where(dates >= months[self._index])
                else:
                    I = np.where((dates >= months[self._index])
                                 & (dates < months[self._index + 1]))
                clim = temp[I, :, :].flatten()
            elif (self._axis == "year"):
                dates = self.getAxisValues("date")
                years = self.getAxisValues("year")
                if (self._index == years.shape[0] - 1):
                    I = np.where(dates >= years[self._index])
                else:
                    I = np.where((dates >= years[self._index])
                                 & (dates < years[self._index + 1]))
                clim = temp[I, :, :].flatten()
            elif (self._axis == "offset"):
                clim = temp[:, self._index, :].flatten()
            elif (self.isLocationAxis(self._axis)):
                clim = temp[:, :, self._index].flatten()
            elif (self._axis == "none" or self._axis == "threshold"):
                clim = temp.flatten()
            elif (self._axis == "all"):
                clim = temp
        else:
            clim = 0

        for i in range(0, len(metrics)):
            metric = metrics[i]
            temp = self._getScore(metric)
            # print self._axis

            if (self._axis == "date"):
                data[metric] = temp[self._index, :, :].flatten()
            elif (self._axis == "month"):
                dates = self.getAxisValues("date")
                months = self.getAxisValues("month")
                if (self._index == months.shape[0] - 1):
                    I = np.where(dates >= months[self._index])
                else:
                    I = np.where((dates >= months[self._index])
                                 & (dates < months[self._index + 1]))
                data[metric] = temp[I, :, :].flatten()
            elif (self._axis == "year"):
                dates = self.getAxisValues("date")
                years = self.getAxisValues("year")
                if (self._index == years.shape[0] - 1):
                    I = np.where(dates >= years[self._index])
                else:
                    I = np.where((dates >= years[self._index])
                                 & (dates < years[self._index + 1]))
                data[metric] = temp[I, :, :].flatten()
            elif (self._axis == "offset"):
                data[metric] = temp[:, self._index, :].flatten()
            elif (self.isLocationAxis(self._axis)):
                data[metric] = temp[:, :, self._index].flatten()
            elif (self._axis == "none" or self._axis == "threshold"):
                data[metric] = temp.flatten()
            elif (self._axis == "all"):
                data[metric] = temp
            else:
                Util.error("Data.py: unrecognized value of self._axis: " +
                           self._axis)

            # Subtract climatology
            if (doClim and (metric == "fcst" or metric == "obs")):
                if (self._climType == "subtract"):
                    data[metric] = data[metric] - clim
                else:
                    data[metric] = data[metric] / clim

            # Remove missing values
            if (self._axis != "all"):
                currValid = (np.isnan(data[metric]) == 0)\
                          & (np.isinf(data[metric]) == 0)
                if (valid is None):
                    valid = currValid
                else:
                    valid = (valid & currValid)
        if (self._axis != "all"):
            I = np.where(valid)

        q = list()
        for i in range(0, len(metrics)):
            if (self._axis != "all"):
                q.append(data[metrics[i]][I])
            else:
                q.append(data[metrics[i]])

        # No valid data
        if (q[0].shape[0] == 0):
            for i in range(0, len(metrics)):
                q[i] = np.nan * np.zeros([1], 'float')

        return q
Example #17
0
class Comps(Input):
    _dimensionMetrics = ["Date", "Offset", "Location", "Lat", "Lon", "Elev"]
    _description = Util.formatArgument(
        "netcdf", "Undocumented legacy " +
        "NetCDF format, to be phased out. A new NetCDF based format will " +
        "be defined.")

    def __init__(self, filename):
        Input.__init__(self, filename)
        self._file = netcdf(filename, 'r')

    def getName(self):
        return self._file.variables

    def getStations(self):
        lat = Util.clean(self._file.variables["Lat"])
        lon = Util.clean(self._file.variables["Lon"])
        id = Util.clean(self._file.variables["Location"])
        elev = Util.clean(self._file.variables["Elev"])
        stations = list()
        for i in range(0, lat.shape[0]):
            station = Station.Station(id[i], lat[i], lon[i], elev[i])
            stations.append(station)
        return stations

    def getScores(self, metric):
        metric = self._toPvarComps(metric)
        temp = Util.clean(self._file.variables[metric])
        return temp

    def _toPvarVerif(self, metric):
        if (metric[0] == "p" and metric != "pit"):
            metric = metric.replace("m", "-")
            if (metric != "p0"):
                metric = metric.replace("p0", "p0.")
            metric = metric.replace("p-0", "p-0.")
        return metric

    def _toPvarComps(self, metric):
        if (metric[0] == "p" and metric != "pit"):
            metric = metric.replace("-", "m")
            metric = metric.replace(".", "")
        return metric

    def getDims(self, metric):
        metric = self._toPvarComps(metric)
        return self._file.variables[metric].dimensions

    def getDates(self):
        return Util.clean(self._file.variables["Date"])

    def getOffsets(self):
        return Util.clean(self._file.variables["Offset"])

    def getThresholds(self):
        thresholds = list()
        for (metric, v) in self._file.variables.iteritems():
            if (metric not in self._dimensionMetrics):
                if (metric[0] == "p" and metric != "pit"):
                    metric = self._toPvarVerif(metric)
                    thresholds.append(float(metric[1:]))
        return thresholds

    def getQuantiles(self):
        quantiles = list()
        for (metric, v) in self._file.variables.iteritems():
            if (metric not in self._dimensionMetrics):
                if (metric[0] == "q"):
                    quantiles.append(float(metric[1:]))
        return quantiles

    def getMetrics(self):
        metrics = list()
        for (metric, v) in self._file.variables.iteritems():
            if (metric not in self._dimensionMetrics):
                metrics.append(metric)
        return metrics

    def getVariables(self):
        metrics = list()
        for (metric, v) in self._file.variables.iteritems():
            metrics.append(metric)
        for i in range(0, len(metrics)):
            metrics[i] = self._toPvarVerif(metrics[i])
        return metrics

    def getUnits(self):
        if (hasattr(self._file, "Units")):
            if (self._file.Units == ""):
                return "No units"
            elif (self._file.Units == "%"):
                return "%"
            else:
                return "$" + self._file.Units + "$"
        else:
            return "No units"

    def getVariable(self):
        return self._file.Variable

    @staticmethod
    def isValid(filename):
        try:
            file = netcdf(filename, 'r')
        except:
            return False
        return True
Example #18
0
 def getEns(self):
     return Util.clean(self._file.variables["ens"])
Example #19
0
    def __init__(self, filename):
        import csv
        Input.__init__(self, filename)
        file = open(filename, 'r')
        self._units = "Unknown units"
        self._variable = "Unknown"
        self._pit = None

        self._dates = set()
        self._offsets = set()
        self._stations = set()
        self._quantiles = set()
        self._thresholds = set()
        fields = dict()
        obs = dict()
        fcst = dict()
        cdf = dict()
        pit = dict()
        x = dict()
        indices = dict()
        header = None

        # Default values if columns not available
        offset = 0
        date = 0
        lat = 0
        lon = 0
        elev = 0

        import time
        start = time.time()
        # Read the data into dictionary with (date,offset,lat,lon,elev) as key and obs/fcst as values
        for rowstr in file:
            if (rowstr[0] == "#"):
                curr = rowstr[1:]
                curr = curr.split()
                if (curr[0] == "variable:"):
                    self._variable = curr[1]
                elif (curr[0] == "units:"):
                    self._units = curr[1]
                else:
                    Util.warning("Ignoring line '" + rowstr.strip() +
                                 "' in file '" + filename + "'")
            else:
                row = rowstr.split()
                if (header is None):
                    # Parse the header so we know what each column represents
                    header = row
                    for i in range(0, len(header)):
                        att = header[i]
                        if (att == "date"):
                            indices["date"] = i
                        elif (att == "offset"):
                            indices["offset"] = i
                        elif (att == "lat"):
                            indices["lat"] = i
                        elif (att == "lon"):
                            indices["lon"] = i
                        elif (att == "elev"):
                            indices["elev"] = i
                        elif (att == "obs"):
                            indices["obs"] = i
                        elif (att == "fcst"):
                            indices["fcst"] = i
                        else:
                            indices[att] = i

                    # Ensure we have required columns
                    requiredColumns = ["obs", "fcst"]
                    for col in requiredColumns:
                        if (col not in indices):
                            msg = "Could not parse %s: Missing column '%s'" % (
                                filename, col)
                            Util.error(msg)
                else:
                    if (len(row) is not len(header)):
                        Util.error(
                            "Incorrect number of columns (expecting %d) in row '%s'"
                            % (len(header), rowstr.strip()))
                    if ("date" in indices):
                        date = self._clean(row[indices["date"]])
                    self._dates.add(date)
                    if ("offset" in indices):
                        offset = self._clean(row[indices["offset"]])
                    self._offsets.add(offset)
                    if ("id" in indices):
                        id = self._clean(row[indices["id"]])
                    else:
                        id = np.nan
                    if ("lat" in indices):
                        lat = self._clean(row[indices["lat"]])
                    if ("lon" in indices):
                        lon = self._clean(row[indices["lon"]])
                    if ("elev" in indices):
                        elev = self._clean(row[indices["elev"]])
                    station = Station.Station(id, lat, lon, elev)
                    self._stations.add(station)
                    key = (date, offset, lat, lon, elev)
                    obs[key] = self._clean(row[indices["obs"]])
                    fcst[key] = self._clean(row[indices["fcst"]])
                    quantileFields = self._getQuantileFields(header)
                    thresholdFields = self._getThresholdFields(header)
                    if "pit" in indices:
                        pit[key] = self._clean(row[indices["pit"]])
                    for field in quantileFields:
                        quantile = float(field[1:])
                        self._quantiles.add(quantile)
                        key = (date, offset, lat, lon, elev, quantile)
                        x[key] = self._clean(row[indices[field]])
                    for field in thresholdFields:
                        threshold = float(field[1:])
                        self._thresholds.add(threshold)
                        key = (date, offset, lat, lon, elev, threshold)
                        cdf[key] = self._clean(row[indices[field]])
        end = time.time()
        file.close()
        self._dates = list(self._dates)
        self._offsets = list(self._offsets)
        self._stations = list(self._stations)
        self._quantiles = list(self._quantiles)
        self._thresholds = np.array(list(self._thresholds))
        Ndates = len(self._dates)
        Noffsets = len(self._offsets)
        Nlocations = len(self._stations)
        Nquantiles = len(self._quantiles)
        Nthresholds = len(self._thresholds)

        # Put the dictionary data into a regular 3D array
        self._obs = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan
        self._fcst = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan
        if (len(pit) != 0):
            self._pit = np.zeros([Ndates, Noffsets, Nlocations],
                                 'float') * np.nan
        self._cdf = np.zeros([Ndates, Noffsets, Nlocations, Nthresholds],
                             'float') * np.nan
        self._x = np.zeros([Ndates, Noffsets, Nlocations, Nquantiles],
                           'float') * np.nan
        for d in range(0, len(self._dates)):
            date = self._dates[d]
            end = time.time()
            for o in range(0, len(self._offsets)):
                offset = self._offsets[o]
                for s in range(0, len(self._stations)):
                    station = self._stations[s]
                    lat = station.lat()
                    lon = station.lon()
                    elev = station.elev()
                    key = (date, offset, lat, lon, elev)
                    if (key in obs):
                        self._obs[d][o][s] = obs[key]
                    if (key in fcst):
                        self._fcst[d][o][s] = fcst[key]
                    if (key in pit):
                        self._pit[d][o][s] = pit[key]
                    for q in range(0, len(self._quantiles)):
                        quantile = self._quantiles[q]
                        key = (date, offset, lat, lon, elev, quantile)
                        if (key in x):
                            self._x[d, o, s, q] = x[key]
                    for t in range(0, len(self._thresholds)):
                        threshold = self._thresholds[t]
                        key = (date, offset, lat, lon, elev, threshold)
                        if (key in cdf):
                            self._cdf[d, o, s, t] = cdf[key]
        end = time.time()
        maxStationId = np.nan
        for station in self._stations:
            if (np.isnan(maxStationId)):
                maxStationId = station.id()
            elif (station.id() > maxStationId):
                maxStationId = station.id()

        counter = 0
        if (not np.isnan(maxStationId)):
            counter = maxStationId + 1

        for station in self._stations:
            if (np.isnan(station.id())):
                station.id(counter)
                counter = counter + 1
        self._dates = np.array(self._dates)
        self._offsets = np.array(self._offsets)
Example #20
0
class Text(Input):
    _description = Util.formatArgument("text", "Data organized in rows and columns with space as a delimiter. Each row represents one forecast/obs pair, and each column represents one attribute of the data. Here is an example:") + "\n"\
    + Util.formatArgument("", "") + "\n"\
    + Util.formatArgument("", "# variable: Temperature") + "\n"\
    + Util.formatArgument("", "# units: $^oC$") + "\n"\
    + Util.formatArgument("", "date     offset id      lat     lon      elev obs fcst      p10") + "\n"\
    + Util.formatArgument("", "20150101 0      214     49.2    -122.1   92 3.4 2.1     0.91") + "\n"\
    + Util.formatArgument("", "20150101 1      214     49.2    -122.1   92 4.7 4.2      0.85") + "\n"\
    + Util.formatArgument("", "20150101 0      180     50.3    -120.3   150 0.2 -1.2 0.99") + "\n"\
    + Util.formatArgument("", "") + "\n"\
    + Util.formatArgument("", " Any lines starting with '#' can be metadata (currently variable: and units: are recognized). After that is a header line that must describe the data columns below. The following attributes are recognized: date (in YYYYMMDD), offset (in hours), id (station identifier), lat (in degrees), lon (in degrees), obs (observations), fcst (deterministic forecast), p<number> (cumulative probability at a threshold of 10). obs and fcst are required columns: a value of 0 is used for any missing column. The columns can be in any order. If 'id' is not provided, then they are assigned sequentially starting at 0.")

    def __init__(self, filename):
        import csv
        Input.__init__(self, filename)
        file = open(filename, 'r')
        self._units = "Unknown units"
        self._variable = "Unknown"
        self._pit = None

        self._dates = set()
        self._offsets = set()
        self._stations = set()
        self._quantiles = set()
        self._thresholds = set()
        fields = dict()
        obs = dict()
        fcst = dict()
        cdf = dict()
        pit = dict()
        x = dict()
        indices = dict()
        header = None

        # Default values if columns not available
        offset = 0
        date = 0
        lat = 0
        lon = 0
        elev = 0

        import time
        start = time.time()
        # Read the data into dictionary with (date,offset,lat,lon,elev) as key and obs/fcst as values
        for rowstr in file:
            if (rowstr[0] == "#"):
                curr = rowstr[1:]
                curr = curr.split()
                if (curr[0] == "variable:"):
                    self._variable = curr[1]
                elif (curr[0] == "units:"):
                    self._units = curr[1]
                else:
                    Util.warning("Ignoring line '" + rowstr.strip() +
                                 "' in file '" + filename + "'")
            else:
                row = rowstr.split()
                if (header is None):
                    # Parse the header so we know what each column represents
                    header = row
                    for i in range(0, len(header)):
                        att = header[i]
                        if (att == "date"):
                            indices["date"] = i
                        elif (att == "offset"):
                            indices["offset"] = i
                        elif (att == "lat"):
                            indices["lat"] = i
                        elif (att == "lon"):
                            indices["lon"] = i
                        elif (att == "elev"):
                            indices["elev"] = i
                        elif (att == "obs"):
                            indices["obs"] = i
                        elif (att == "fcst"):
                            indices["fcst"] = i
                        else:
                            indices[att] = i

                    # Ensure we have required columns
                    requiredColumns = ["obs", "fcst"]
                    for col in requiredColumns:
                        if (col not in indices):
                            msg = "Could not parse %s: Missing column '%s'" % (
                                filename, col)
                            Util.error(msg)
                else:
                    if (len(row) is not len(header)):
                        Util.error(
                            "Incorrect number of columns (expecting %d) in row '%s'"
                            % (len(header), rowstr.strip()))
                    if ("date" in indices):
                        date = self._clean(row[indices["date"]])
                    self._dates.add(date)
                    if ("offset" in indices):
                        offset = self._clean(row[indices["offset"]])
                    self._offsets.add(offset)
                    if ("id" in indices):
                        id = self._clean(row[indices["id"]])
                    else:
                        id = np.nan
                    if ("lat" in indices):
                        lat = self._clean(row[indices["lat"]])
                    if ("lon" in indices):
                        lon = self._clean(row[indices["lon"]])
                    if ("elev" in indices):
                        elev = self._clean(row[indices["elev"]])
                    station = Station.Station(id, lat, lon, elev)
                    self._stations.add(station)
                    key = (date, offset, lat, lon, elev)
                    obs[key] = self._clean(row[indices["obs"]])
                    fcst[key] = self._clean(row[indices["fcst"]])
                    quantileFields = self._getQuantileFields(header)
                    thresholdFields = self._getThresholdFields(header)
                    if "pit" in indices:
                        pit[key] = self._clean(row[indices["pit"]])
                    for field in quantileFields:
                        quantile = float(field[1:])
                        self._quantiles.add(quantile)
                        key = (date, offset, lat, lon, elev, quantile)
                        x[key] = self._clean(row[indices[field]])
                    for field in thresholdFields:
                        threshold = float(field[1:])
                        self._thresholds.add(threshold)
                        key = (date, offset, lat, lon, elev, threshold)
                        cdf[key] = self._clean(row[indices[field]])
        end = time.time()
        file.close()
        self._dates = list(self._dates)
        self._offsets = list(self._offsets)
        self._stations = list(self._stations)
        self._quantiles = list(self._quantiles)
        self._thresholds = np.array(list(self._thresholds))
        Ndates = len(self._dates)
        Noffsets = len(self._offsets)
        Nlocations = len(self._stations)
        Nquantiles = len(self._quantiles)
        Nthresholds = len(self._thresholds)

        # Put the dictionary data into a regular 3D array
        self._obs = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan
        self._fcst = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan
        if (len(pit) != 0):
            self._pit = np.zeros([Ndates, Noffsets, Nlocations],
                                 'float') * np.nan
        self._cdf = np.zeros([Ndates, Noffsets, Nlocations, Nthresholds],
                             'float') * np.nan
        self._x = np.zeros([Ndates, Noffsets, Nlocations, Nquantiles],
                           'float') * np.nan
        for d in range(0, len(self._dates)):
            date = self._dates[d]
            end = time.time()
            for o in range(0, len(self._offsets)):
                offset = self._offsets[o]
                for s in range(0, len(self._stations)):
                    station = self._stations[s]
                    lat = station.lat()
                    lon = station.lon()
                    elev = station.elev()
                    key = (date, offset, lat, lon, elev)
                    if (key in obs):
                        self._obs[d][o][s] = obs[key]
                    if (key in fcst):
                        self._fcst[d][o][s] = fcst[key]
                    if (key in pit):
                        self._pit[d][o][s] = pit[key]
                    for q in range(0, len(self._quantiles)):
                        quantile = self._quantiles[q]
                        key = (date, offset, lat, lon, elev, quantile)
                        if (key in x):
                            self._x[d, o, s, q] = x[key]
                    for t in range(0, len(self._thresholds)):
                        threshold = self._thresholds[t]
                        key = (date, offset, lat, lon, elev, threshold)
                        if (key in cdf):
                            self._cdf[d, o, s, t] = cdf[key]
        end = time.time()
        maxStationId = np.nan
        for station in self._stations:
            if (np.isnan(maxStationId)):
                maxStationId = station.id()
            elif (station.id() > maxStationId):
                maxStationId = station.id()

        counter = 0
        if (not np.isnan(maxStationId)):
            counter = maxStationId + 1

        for station in self._stations:
            if (np.isnan(station.id())):
                station.id(counter)
                counter = counter + 1
        self._dates = np.array(self._dates)
        self._offsets = np.array(self._offsets)

    # Parse string into float, changing -999 into np.nan
    def _clean(self, value):
        fvalue = float(value)
        if (fvalue == -999):
            fvalue = np.nan
        return fvalue

    def _getQuantileFields(self, fields):
        quantiles = list()
        for att in fields:
            if (att[0] == "q"):
                quantiles.append(att)
        return quantiles

    def _getThresholdFields(self, fields):
        thresholds = list()
        for att in fields:
            if (att[0] == "p" and att != "pit"):
                thresholds.append(att)
        return thresholds

    def getThresholds(self):
        return self._thresholds

    def getQuantiles(self):
        return self._quantiles

    def getName(self):
        return "Unknown"

    def getStations(self):
        return self._stations

    def getScores(self, metric):
        if (metric == "obs"):
            return self._obs
        elif (metric == "fcst"):
            return self._fcst
        elif (metric == "pit"):
            if (self._pit is None):
                Util.error("File does not contain 'pit'")
            return self._pit
        elif (metric[0] == "p"):
            threshold = float(metric[1:])
            I = np.where(abs(self._thresholds - threshold) < 0.0001)[0]
            if (len(I) == 0):
                Util.error("Cannot find " + metric)
            elif (len(I) > 1):
                Util.error("Could not find unique threshold: " +
                           str(threshold))
            return self._cdf[:, :, :, I[0]]
        elif (metric[0] == "q"):
            quantile = float(metric[1:])
            I = np.where(abs(self._quantiles - quantile) < 0.0001)[0]
            if (len(I) == 0):
                Util.error("Cannot find " + metric)
            elif (len(I) > 1):
                Util.error("Could not find unique quantile: " + str(quantile))
            return self._x[:, :, :, I[0]]
        elif (metric == "Offset"):
            return self._offsets
        elif (metric == "Date"):
            return self._dates
        elif (metric == "Location"):
            stations = np.zeros(len(self._stations), 'float')
            for i in range(0, len(self._stations)):
                stations[i] = self._stations[i].id()
            return stations
        elif (metric in ["Lat", "Lon", "Elev"]):
            values = np.zeros(len(self._stations), 'float')
            for i in range(0, len(self._stations)):
                station = self._stations[i]
                if (metric == "Lat"):
                    values[i] = station.lat()
                elif (metric == "Lon"):
                    values[i] = station.lon()
                elif (metric == "Elev"):
                    values[i] = station.elev()
            return values
        else:
            Util.error("Cannot find " + metric)

    def getDims(self, metric):
        if (metric in ["Date", "Offset", "Location"]):
            return [metric]
        elif (metric in ["Lat", "Lon", "Elev"]):
            return ["Location"]
        else:
            return ["Date", "Offset", "Location"]

    def getDates(self):
        return self._dates

    def getOffsets(self):
        return self._offsets

    def getMetrics(self):
        metrics = ["obs", "fcst"]
        for quantile in self._quantiles:
            metrics.append("q%g" % quantile)
        for threshold in self._thresholds:
            metrics.append("p%g" % threshold)
        if (self._pit is not None):
            metrics.append("pit")
        return metrics

    def getQuantiles(self):
        return self._quantiles

    def getVariables(self):
        metrics = self.getMetrics() + [
            "Date", "Offset", "Location", "Lat", "Lon", "Elev"
        ]
        return metrics

    def getUnits(self):
        return self._units

    def getVariable(self):
        return self._variable

    @staticmethod
    def isValid(filename):
        return True
Example #21
0
 def getQuantiles(self):
     return Util.clean(self._file.variables["quantiles"])
Example #22
0
 def getObs(self):
     return Util.clean(self._file.variables["obs"])
Example #23
0
def run(argv):
    ############
    # Defaults #
    ############
    ifiles = list()
    ofile = None
    metric = None
    locations = None
    latlonRange = None
    training = 0
    thresholds = None
    dates = None
    climFile = None
    climType = "subtract"
    leg = None
    ylabel = None
    xlabel = None
    title = None
    offsets = None
    xdim = None
    sdim = None
    figSize = None
    dpi = 100
    showText = False
    showMap = False
    noMargin = False
    binType = None
    markerSize = None
    lineWidth = None
    tickFontSize = None
    labFontSize = None
    legFontSize = None
    type = "plot"
    XRotation = None
    MajorLength = None
    MinorLength = None
    MajorWidth = None
    Bottom = None
    Top = None
    Right = None
    Left = None
    Pad = None
    showPerfect = None
    cType = "mean"
    doHist = False
    doSort = False
    doAcc = False
    xlim = None
    ylim = None
    clim = None
    version = None
    listThresholds = False
    listQuantiles = False
    listLocations = False
    listDates = False

    # Read command line arguments
    i = 1
    while (i < len(argv)):
        arg = argv[i]
        if (arg[0] == '-'):
            # Process option
            if (arg == "-nomargin"):
                noMargin = True
            elif (arg == "--version"):
                version = True
            elif (arg == "--list-thresholds"):
                listThresholds = True
            elif (arg == "--list-quantiles"):
                listQuantiles = True
            elif (arg == "--list-locations"):
                listLocations = True
            elif (arg == "--list-dates"):
                listDates = True
            elif (arg == "-sp"):
                showPerfect = True
            elif (arg == "-hist"):
                doHist = True
            elif (arg == "-acc"):
                doAcc = True
            elif (arg == "-sort"):
                doSort = True
            else:
                if (arg == "-f"):
                    ofile = argv[i + 1]
                elif (arg == "-l"):
                    locations = Util.parseNumbers(argv[i + 1])
                elif (arg == "-llrange"):
                    latlonRange = Util.parseNumbers(argv[i + 1])
                elif (arg == "-t"):
                    training = int(argv[i + 1])
                elif (arg == "-x"):
                    xdim = argv[i + 1]
                elif (arg == "-o"):
                    offsets = Util.parseNumbers(argv[i + 1])
                elif (arg == "-leg"):
                    leg = unicode(argv[i + 1], 'utf8')
                elif (arg == "-ylabel"):
                    ylabel = unicode(argv[i + 1], 'utf8')
                elif (arg == "-xlabel"):
                    xlabel = unicode(argv[i + 1], 'utf8')
                elif (arg == "-title"):
                    title = unicode(argv[i + 1], 'utf8')
                elif (arg == "-b"):
                    binType = argv[i + 1]
                elif (arg == "-type"):
                    type = argv[i + 1]
                elif (arg == "-fs"):
                    figSize = argv[i + 1]
                elif (arg == "-dpi"):
                    dpi = int(argv[i + 1])
                elif (arg == "-d"):
                    # Either format is ok:
                    # -d 20150101 20150103
                    # -d 20150101:20150103
                    if (i + 2 < len(argv) and argv[i + 2].isdigit()):
                        dates = Util.parseNumbers(
                            "%s:%s" % (argv[i + 1], argv[i + 2]), True)
                        i = i + 1
                    else:
                        dates = Util.parseNumbers(argv[i + 1], True)
                elif (arg == "-c"):
                    climFile = argv[i + 1]
                    climType = "subtract"
                elif (arg == "-C"):
                    climFile = argv[i + 1]
                    climType = "divide"
                elif (arg == "-xlim"):
                    xlim = Util.parseNumbers(argv[i + 1])
                elif (arg == "-ylim"):
                    ylim = Util.parseNumbers(argv[i + 1])
                elif (arg == "-clim"):
                    clim = Util.parseNumbers(argv[i + 1])
                elif (arg == "-s"):
                    sdim = argv[i + 1]
                elif (arg == "-ct"):
                    cType = argv[i + 1]
                elif (arg == "-r"):
                    thresholds = Util.parseNumbers(argv[i + 1])
                elif (arg == "-ms"):
                    markerSize = float(argv[i + 1])
                elif (arg == "-lw"):
                    lineWidth = float(argv[i + 1])
                elif (arg == "-tickfs"):
                    tickFontSize = float(argv[i + 1])
                elif (arg == "-labfs"):
                    labFontSize = float(argv[i + 1])
                elif (arg == "-legfs"):
                    legFontSize = float(argv[i + 1])
                elif (arg == "-xrot"):
                    XRotation = float(argv[i + 1])
                elif (arg == "-majlth"):
                    MajorLength = float(argv[i + 1])
                elif (arg == "-minlth"):
                    MinorLength = float(argv[i + 1])
                elif (arg == "-majwid"):
                    MajorWidth = float(argv[i + 1])
                elif (arg == "-bot"):
                    Bottom = float(argv[i + 1])
                elif (arg == "-top"):
                    Top = float(argv[i + 1])
                elif (arg == "-right"):
                    Right = float(argv[i + 1])
                elif (arg == "-left"):
                    Left = float(argv[i + 1])
                elif (arg == "-pad"):
                    Pad = argv[i + 1]
                elif (arg == "-m"):
                    metric = argv[i + 1]
                else:
                    Util.error("Flag '" + argv[i] + "' not recognized")
                i = i + 1
        else:
            ifiles.append(argv[i])
        i = i + 1

    if (version):
        print "Version: " + Version.__version__
        return

    # Deal with legend entries
    if (leg is not None):
        leg = leg.split(',')
        for i in range(0, len(leg)):
            leg[i] = leg[i].replace('_', ' ')

    if (latlonRange is not None and len(latlonRange) != 4):
        Util.error("-llRange <values> must have exactly 4 values")

    if (len(ifiles) > 0):
        data = Data.Data(ifiles,
                         clim=climFile,
                         climType=climType,
                         dates=dates,
                         offsets=offsets,
                         locations=locations,
                         latlonRange=latlonRange,
                         training=training)
    else:
        data = None

    if (listThresholds or listQuantiles or listLocations or listDates):
        if (len(ifiles) == 0):
            Util.error(
                "Files are required in order to list thresholds or quantiles")
        if (listThresholds):
            print "Thresholds:",
            for threshold in data.getThresholds():
                print "%g" % threshold,
            print ""
        if (listQuantiles):
            print "Quantiles:",
            for quantile in data.getQuantiles():
                print "%g" % quantile,
            print ""
        if (listLocations):
            print "    id     lat     lon    elev"
            for station in data.getStations():
                print "%6d %7.2f %7.2f %7.1f" % (station.id(), station.lat(),
                                                 station.lon(), station.elev())
            print ""
        if (listDates):
            dates = data.getAxisValues("date")
            dates = Util.convertToYYYYMMDD(dates)
            for date in dates:
                print "%d" % date
            print ""
        return
    elif (len(argv) == 1 or len(ifiles) == 0 or metric is None):
        showDescription(data)
        return

    if (figSize is not None):
        figSize = figSize.split(',')
        if (len(figSize) != 2):
            print "-fs figSize must be in the form: width,height"
            sys.exit(1)

    m = None

    # Handle special plots
    if (doHist):
        pl = Output.Hist(metric)
    elif (doSort):
        pl = Output.Sort(metric)
    elif (metric == "pithist"):
        m = Metric.Pit("pit")
        pl = Output.PitHist(m)
    elif (metric == "obsfcst"):
        pl = Output.ObsFcst()
    elif (metric == "timeseries"):
        pl = Output.TimeSeries()
    elif (metric == "meteo"):
        pl = Output.Meteo()
    elif (metric == "qq"):
        pl = Output.QQ()
    elif (metric == "cond"):
        pl = Output.Cond()
    elif (metric == "against"):
        pl = Output.Against()
    elif (metric == "count"):
        pl = Output.Count()
    elif (metric == "scatter"):
        pl = Output.Scatter()
    elif (metric == "change"):
        pl = Output.Change()
    elif (metric == "spreadskill"):
        pl = Output.SpreadSkill()
    elif (metric == "taylor"):
        pl = Output.Taylor()
    elif (metric == "error"):
        pl = Output.Error()
    elif (metric == "freq"):
        pl = Output.Freq()
    elif (metric == "roc"):
        pl = Output.Roc()
    elif (metric == "droc"):
        pl = Output.DRoc()
    elif (metric == "droc0"):
        pl = Output.DRoc0()
    elif (metric == "drocnorm"):
        pl = Output.DRocNorm()
    elif (metric == "reliability"):
        pl = Output.Reliability()
    elif (metric == "invreliability"):
        pl = Output.InvReliability()
    elif (metric == "igncontrib"):
        pl = Output.IgnContrib()
    elif (metric == "economicvalue"):
        pl = Output.EconomicValue()
    elif (metric == "marginal"):
        pl = Output.Marginal()
    else:
        # Standard plots
        # Attempt at automating
        metrics = Metric.getAllMetrics()
        m = None
        for mm in metrics:
            if (metric == mm[0].lower() and mm[1].isValid()):
                m = mm[1]()
                break
        if (m is None):
            m = Metric.Default(metric)

        m.setAggregator(cType)

        # Output type
        if (type == "plot" or type == "text" or type == "map"
                or type == "maprank"):
            pl = Output.Default(m)
            pl.setShowAcc(doAcc)
        else:
            Util.error("Type not understood")

    # Rest dimension of '-x' is not allowed
    if (xdim is not None and not pl.supportsX()):
        Util.warning(metric + " does not support -x. Ignoring it.")
        xdim = None

    # Reset dimension if 'threshold' is not allowed
    if (xdim == "threshold"
            and ((not pl.supportsThreshold()) or (not m.supportsThreshold()))):
        Util.warning(metric + " does not support '-x threshold'. Ignoring it.")
        thresholds = None
        xdim = None

    # Create thresholds if needed
    if ((thresholds is None) and (pl.requiresThresholds() or
                                  (m is not None and m.requiresThresholds()))):
        data.setAxis("none")
        obs = data.getScores("obs")[0]
        fcst = data.getScores("fcst")[0]
        smin = min(min(obs), min(fcst))
        smax = max(max(obs), max(fcst))
        thresholds = np.linspace(smin, smax, 10)
        Util.warning("Missing '-r <thresholds>'. Automatically setting\
            thresholds.")

    # Set plot parameters
    if (markerSize is not None):
        pl.setMarkerSize(markerSize)
    if (lineWidth is not None):
        pl.setLineWidth(lineWidth)
    if (labFontSize is not None):
        pl.setLabFontSize(labFontSize)
    if (legFontSize is not None):
        pl.setLegFontSize(legFontSize)
    if (tickFontSize is not None):
        pl.setTickFontSize(tickFontSize)
    if (XRotation is not None):
        pl.setXRotation(XRotation)
    if (MajorLength is not None):
        pl.setMajorLength(MajorLength)
    if (MinorLength is not None):
        pl.setMinorLength(MinorLength)
    if (MajorWidth is not None):
        pl.setMajorWidth(MajorWidth)
    if (Bottom is not None):
        pl.setBottom(Bottom)
    if (Top is not None):
        pl.setTop(Top)
    if (Right is not None):
        pl.setRight(Right)
    if (Left is not None):
        pl.setLeft(Left)
    if (Pad is not None):
        pl.setPad(None)
    if (binType is not None):
        pl.setBinType(binType)
    if (showPerfect is not None):
        pl.setShowPerfect(showPerfect)
    if (xlim is not None):
        pl.setXLim(xlim)
    if (ylim is not None):
        pl.setYLim(ylim)
    if (clim is not None):
        pl.setCLim(clim)
    pl.setFilename(ofile)
    pl.setThresholds(thresholds)
    pl.setLegend(leg)
    pl.setFigsize(figSize)
    pl.setDpi(dpi)
    pl.setAxis(xdim)
    pl.setShowMargin(not noMargin)
    pl.setYlabel(ylabel)
    pl.setXlabel(xlabel)
    pl.setTitle(title)

    if (type == "text"):
        pl.text(data)
    elif (type == "map"):
        pl.map(data)
    elif (type == "maprank"):
        pl.setShowRank(True)
        pl.map(data)
    else:
        pl.plot(data)
Example #24
0
 def getDates(self):
     return Util.clean(self._file.variables["date"])
Example #25
0
 def getCdf(self, threshold):
     # thresholds = getThresholds()
     # I = np.where(thresholds == threshold)[0]
     # assert(len(I) == 1)
     temp = Util.clean(self._file.variables["cdf"])
     return temp
Example #26
0
 def getScores(self, metric):
     if (metric == "obs"):
         return self._obs
     elif (metric == "fcst"):
         return self._fcst
     elif (metric == "pit"):
         if (self._pit is None):
             Util.error("File does not contain 'pit'")
         return self._pit
     elif (metric[0] == "p"):
         threshold = float(metric[1:])
         I = np.where(abs(self._thresholds - threshold) < 0.0001)[0]
         if (len(I) == 0):
             Util.error("Cannot find " + metric)
         elif (len(I) > 1):
             Util.error("Could not find unique threshold: " +
                        str(threshold))
         return self._cdf[:, :, :, I[0]]
     elif (metric[0] == "q"):
         quantile = float(metric[1:])
         I = np.where(abs(self._quantiles - quantile) < 0.0001)[0]
         if (len(I) == 0):
             Util.error("Cannot find " + metric)
         elif (len(I) > 1):
             Util.error("Could not find unique quantile: " + str(quantile))
         return self._x[:, :, :, I[0]]
     elif (metric == "Offset"):
         return self._offsets
     elif (metric == "Date"):
         return self._dates
     elif (metric == "Location"):
         stations = np.zeros(len(self._stations), 'float')
         for i in range(0, len(self._stations)):
             stations[i] = self._stations[i].id()
         return stations
     elif (metric in ["Lat", "Lon", "Elev"]):
         values = np.zeros(len(self._stations), 'float')
         for i in range(0, len(self._stations)):
             station = self._stations[i]
             if (metric == "Lat"):
                 values[i] = station.lat()
             elif (metric == "Lon"):
                 values[i] = station.lon()
             elif (metric == "Elev"):
                 values[i] = station.elev()
         return values
     else:
         Util.error("Cannot find " + metric)
Example #27
0
 def getOffsets(self):
     return Util.clean(self._file.variables["offset"])
Example #28
0
 def getFcst(self):
     return Util.clean(self._file.variables["fcst"])
Example #29
0
def showDescription(data=None):
    desc = "Program to compute verification scores for weather forecasts. Can be\
   used to compare forecasts from different files. In that case only dates,\
   offsets, and locations that are common to all forecast files are used."

    print textwrap.fill(desc, Util.getTextWidth())
    print ""
    print "usage: verif files -m metric [options]"
    print "       verif files [--list-thresholds] [--list-quantiles] [--list-locations]"
    print "       verif --version"
    print ""
    print Util.green("Arguments:")
    print Util.formatArgument(
        "files",
        "One or more verification files in NetCDF or text format (see 'File Formats' below)."
    )
    print Util.formatArgument(
        "-m metric", "Which verification metric to use? See 'Metrics' below.")
    print Util.formatArgument("--list-thresholds",
                              "What thresholds are available in the files?")
    print Util.formatArgument("--list-quantiles",
                              "What quantiles are available in the files?")
    print Util.formatArgument("--list-locations",
                              "What locations are available in the files?")
    print Util.formatArgument("--version", "What version of verif is this?")
    print ""
    print Util.green("Options:")
    print "Note: vectors can be entered using commas, or MATLAB syntax (i.e 3:5 is 3,4,5 and 3:2:7 is 3,5,7)"
    # Dimensions
    print Util.green("  Dimensions and subset:")
    print Util.formatArgument(
        "-d dates",
        "A vector of dates in YYYYMMDD format, e.g.  20130101:20130201.")
    print Util.formatArgument("-l locations",
                              "Limit the verification to these location IDs.")
    print Util.formatArgument(
        "-llrange range",
        "Limit the verification to locations within minlon,maxlon,minlat,maxlat."
    )
    print Util.formatArgument(
        "-o offsets", "Limit the verification to these offsets (in hours).")
    print Util.formatArgument(
        "-r thresholds",
        "Compute scores for these thresholds (only used by some metrics).")
    print Util.formatArgument(
        "-t period",
        "Allow this many days of training, i.e. remove this many days from the beginning of the verification."
    )
    print Util.formatArgument(
        "-x dim",
        "Plot this dimension on the x-axis: date, offset, year, month, location, locationId, locationElev, locationLat, locationLon, threshold, or none. Not supported by all metrics. If not specified, then a default is used based on the metric. 'none' collapses all dimensions and computes one value."
    )

    # Data manipulation
    print Util.green("  Data manipulation:")
    print Util.formatArgument(
        "-acc", "Plot accumulated values. Only works for non-derived metrics")
    print Util.formatArgument(
        "-b type",
        "One of 'below', 'within', or 'above'. For threshold plots (ets, hit, within, etc) 'below/above' computes frequency below/above the threshold, and 'within' computes the frequency between consecutive thresholds."
    )
    print Util.formatArgument(
        "-c file",
        "File containing climatology data. Subtract all forecasts and obs with climatology values."
    )
    print Util.formatArgument(
        "-C file",
        "File containing climatology data. Divide all forecasts and obs by climatology values."
    )
    print Util.formatArgument(
        "-ct type",
        "Collapsing type: 'min', 'mean', 'median', 'max', 'std', 'range', or a number between 0 and 1. Some metrics computes a value for each value on the x-axis. Which function should be used to do the collapsing? Default is 'mean'. Only supported by some metrics. A number between 0 and 1 returns a specific quantile (e.g.  0.5 is the median)"
    )
    print Util.formatArgument(
        "-hist",
        "Plot values as histogram. Only works for non-derived metrics")
    print Util.formatArgument(
        "-sort", "Plot values sorted. Only works for non-derived metrics")

    # Plot options
    print Util.green("  Plotting options:")
    print Util.formatArgument(
        "-bot value", "Bottom boundary location for saved figure [range 0-1]")
    print Util.formatArgument(
        "-clim limits", "Force colorbar limits to the two values lower,upper")
    print Util.formatArgument(
        "-dpi value", "Resolution of image in dots per inch (default 100)")
    print Util.formatArgument("-f file", "Save image to this filename")
    print Util.formatArgument(
        "-fs size", "Set figure size width,height (in inches). Default 8x6.")
    print Util.formatArgument(
        "-leg titles",
        "Comma-separated list of legend titles. Use '_' to represent space.")
    print Util.formatArgument("-lw width", "How wide should lines be?")
    print Util.formatArgument("-labfs size", "Font size for axis labels")
    print Util.formatArgument(
        "-left value", "Left boundary location for saved figure [range 0-1]")
    print Util.formatArgument(
        "-legfs size", "Font size for legend. Set to 0 to hide legend.")
    print Util.formatArgument("-majlth length", "Length of major tick marks")
    print Util.formatArgument("-majtwid width",
                              "Adjust the thickness of the major tick marks")
    print Util.formatArgument("-minlth length", "Length of minor tick marks")
    print Util.formatArgument("-ms size", "How big should markers be?")
    print Util.formatArgument(
        "-nomargin", "Remove margins (whitespace) in the plot not x[i] <= T.")
    print Util.formatArgument(
        "-right value", "Right boundary location for saved figure [range 0-1]")
    print Util.formatArgument("-sp",
                              "Show a line indicating the perfect score")
    print Util.formatArgument("-tickfs size", "Font size for axis ticks")
    print Util.formatArgument("-title text", "Custom title to chart top")
    print Util.formatArgument(
        "-top value", "Top boundary location for saved figure [range 0-1]")
    print Util.formatArgument(
        "-type type", "One of 'plot' (default), 'text', 'map', or 'maprank'.")
    print Util.formatArgument("-xlabel text", "Custom x-axis label")
    print Util.formatArgument(
        "-xlim limits", "Force x-axis limits to the two values lower,upper")
    print Util.formatArgument("-xrot value",
                              "Rotation angle for x-axis labels")
    print Util.formatArgument("-ylabel text", "Custom y-axis label")
    print Util.formatArgument(
        "-ylim limits", "Force y-axis limits to the two values lower,upper")
    print ""
    metrics = Metric.getAllMetrics()
    outputs = Output.getAllOutputs()
    print Util.green("Metrics (-m):")
    metricOutputs = metrics + outputs
    metricOutputs.sort(key=lambda x: x[0].lower(), reverse=False)
    for m in metricOutputs:
        name = m[0].lower()
        if (m[1].isValid()):
            desc = m[1].summary()
            print Util.formatArgument(name, desc)
            # print "   %-14s%s" % (name, textwrap.fill(desc, 80).replace('\n', '\n                 ')),
            # print ""
    if (data is not None):
        print ""
        print "  Or one of the following, which plots the raw score from the file:"
        print " ",
        metrics = data.getMetrics()
        for metric in metrics:
            print metric,
    print ""
    print ""
    print Util.green("File formats:")
    print Input.Text.description()
    print Input.Comps.description()
Example #30
0
 def getThresholds(self):
     return Util.clean(self._file.variables["thresholds"])