Example #1
0
    def plotStatistics(self, output_file):

        p = stats.statRemoveNum(np.array(self.param), self.missingValue)
        a = p - np.mean(p)
        pmin = p.min()
        pmax = p.max()
        amin = a.min()
        amax = a.max()
        abins = np.linspace(amin, amax, 50)
        bins = np.linspace(pmin, pmax, 50)
        hist = np.empty((len(bins) - 1, self.maxCell))
        ahist = np.empty((len(abins) - 1, self.maxCell))
        x = np.arange(11)
        alpha = np.empty((11, self.maxCell))
        aalpha = np.empty((11, self.maxCell))

        for i in xrange(self.maxCell + 1):
            p = self.extractParameter(i, 0)
            a = p - np.mean(p)
            hist[:, i - 1], b = np.histogram(p, bins, normed=True)
            ahist[:, i - 1], b = np.histogram(a, abins, normed=True)
            alpha[:, i - 1] = acf(p, 10)
            aalpha[:, i - 1] = acf(a, 10)

        mhist = np.mean(hist, axis=1)
        uhist = percentile(hist, per=95, axis=1)
        lhist = percentile(hist, per=5, axis=1)

        mahist = np.mean(ahist, axis=1)
        uahist = percentile(ahist, per=95, axis=1)
        lahist = percentile(ahist, per=5, axis=1)

        malpha = np.mean(alpha, axis=1)
        ualpha = percentile(alpha, per=95, axis=1)
        lalpha = percentile(alpha, per=5, axis=1)

        maalpha = np.mean(aalpha, axis=1)
        uaalpha = percentile(aalpha, per=95, axis=1)
        laalpha = percentile(aalpha, per=5, axis=1)

        fig = RangeCurve()
        fig.add(bins[:-1], mhist, uhist, lhist, "Values", "Probability", "")
        fig.add(abins[:-1], mahist, uahist, lahist, "Anomalies", "Probability",
                "")
        fig.add(x, malpha, ualpha, lalpha, "Lag", "Autocorrelation",
                "ACF of values")
        fig.add(x, maalpha, uaalpha, laalpha, "Lag", "Autocorrelation",
                "ACF of anomalies")
        fig.plot()

        saveFigure(fig, output_file + '.png')
Example #2
0
    def plotStatistics(self, output_file):

        p = stats.statRemoveNum(np.array(self.param), self.missingValue)
        a = p - np.mean(p)
        pmin = p.min()
        pmax = p.max()
        amin = a.min()
        amax = a.max()
        abins = np.linspace(amin, amax, 50)
        bins = np.linspace(pmin, pmax, 50)
        hist = np.empty((len(bins) - 1, self.maxCell))
        ahist = np.empty((len(abins) - 1, self.maxCell))
        x = np.arange(11)
        alpha = np.empty((11, self.maxCell))
        aalpha = np.empty((11, self.maxCell))

        for i in xrange(self.maxCell + 1):
            p = self.extractParameter(i, 0)
            a = p - np.mean(p)
            hist[:, i - 1], b = np.histogram(p, bins, normed=True)
            ahist[:, i - 1], b = np.histogram(a, abins, normed=True)
            alpha[:, i - 1] = acf(p, 10)
            aalpha[:, i - 1] = acf(a, 10)

        mhist = np.mean(hist, axis=1)
        uhist = percentile(hist, per=95, axis=1)
        lhist = percentile(hist, per=5, axis=1)

        mahist = np.mean(ahist, axis=1)
        uahist = percentile(ahist, per=95, axis=1)
        lahist = percentile(ahist, per=5, axis=1)

        malpha = np.mean(alpha, axis=1)
        ualpha = percentile(alpha, per=95, axis=1)
        lalpha = percentile(alpha, per=5, axis=1)

        maalpha = np.mean(aalpha, axis=1)
        uaalpha = percentile(aalpha, per=95, axis=1)
        laalpha = percentile(aalpha, per=5, axis=1)
        
        fig = RangeCurve()
        fig.add(bins[:-1], mhist, uhist, lhist, "Values", "Probability", "")
        fig.add(abins[:-1], mahist, uahist, lahist, "Anomalies", "Probability", "")
        fig.add(x, malpha, ualpha, lalpha, "Lag", "Autocorrelation", "ACF of values")
        fig.add(x, maalpha, uaalpha, laalpha, "Lag", "Autocorrelation", "ACF of anomalies")
        fig.plot()

        saveFigure(fig, output_file + '.png')
Example #3
0
    def minPressureLat(self, pAllData, latData, latMin=-40., latMax=0.):
        """
        Plot the minimum central pressures as a function of latitude
        """
        rLat = numpy.round(latData, 0)
        lats = numpy.arange(latMin, latMax + 0.1, 1)
        minP = numpy.zeros(len(lats))
        n = 0
        for l in lats:
            i = numpy.where(rLat == l)[0]
            if len(i > 0):
                pvals = pAllData[i]
                pvals = stats.statRemoveNum(pvals, 0)
                if len(pvals)>0:
                    minP[n] = pvals.min()
                else:
                    minP[n] = 1020.
            else:
                minP[n] = 1020.
            n += 1
        pyplot.figure(self.figurenum())
        pyplot.plot(lats, minP, 'r-', linewidth=2, label=r'Min $P_{centre}$')
        pyplot.xlim(latMin, latMax)
        pyplot.ylim(800, 1020)

        pyplot.xlabel('Latitude', fontsize=10)
        pyplot.ylabel('Minimum central pressure (hPa)', fontsize=10)
        pyplot.legend(loc=3)
        pyplot.grid(True)

        self.savefig("min_pressure_lat")

        x = numpy.zeros((len(lats), 2))
        x[:, 0] = lats
        x[:, 1] = minP
        files.flSaveFile(os.path.join(self.outpath, 'min_pressure_lat.csv'), x,
                         delimiter=',', fmt='%6.2f')
Example #4
0
    def extractParameter(self, cellNum):
        """extractParameter(cellNum):
        Extracts the cyclone parameter data for the given cell.
        If the population of a cell is insufficient for generating a
        PDF, the bounds of the cell are expanded until the population is
        sufficient.

        Null/missing values are removed.
        """
        if not stats.validCellNum(cellNum, self.gridLimit, self.gridSpace):
            self.logger.critical("Invalid input on cellNum: cell number %i is out of range"%cellNum)
            raise InvalidArguments, 'Invalid input on cellNum: cell number is out of range'
        lon = self.lonLat[:,0]
        lat = self.lonLat[:,1]
        cellLon, cellLat = stats.getCellLonLat(cellNum, self.gridLimit,
                                               self.gridSpace)

        wLon = cellLon
        eLon = cellLon + self.gridSpace['x']
        nLat = cellLat
        sLat = cellLat - self.gridSpace['y']

        indij = np.where(((lat >= sLat) & (lat < nLat)) &
                          (lon >= wLon) & (lon < eLon))
        parameter_ = self.pList[indij]
        self.parameter = stats.statRemoveNum(np.array(parameter_),
                                             self.missingValue)

        while np.size(self.parameter) <= self.minSamplesCell:
            self.logger.debug("Insufficient samples. Increasing the size of the cell")
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon, eLon,
                                                      nLat, sLat)
            if (wLon == wLon_last) & (eLon == eLon_last) & (nLat == nLat_last) & (sLat == sLat_last):
                errMsg = "Insufficient grid points in selected domain to " \
                       + "estimate storm statistics - please select a larger " \
                       + "domain. Samples = %i / %i" % (np.size(self.parameter), 
                                                        self.minSamplesCell)
                self.logger.critical(errMsg)
                raise StopIteration, errMsg
            indij = np.where(((lat >= sLat) & (lat < nLat)) &
                              ((lon >= wLon) & (lon < eLon)))
            parameter_ = self.pList[indij]
            self.parameter = stats.statRemoveNum(np.array(parameter_),
                                                 self.missingValue)

        # Check to see if all values in the array are the same. If the
        # values are the same, bandwidth would be 0, and therefore KDE
        # cannot proceed
        while self.parameter.max() == self.parameter.min():
            self.logger.debug("Parameter values appear to be the same. Increasing the size of the cell")
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon,
                                                      eLon, nLat, sLat)
            if (wLon == wLon_last) & (eLon == eLon_last) & (nLat == nLat_last) & (sLat == sLat_last):
                errMsg = "Insufficient grid points in selected domain to estimate storm statistics - please select a larger domain."
                self.logger.critical(errMsg)
                raise StopIteration, errMsg
            indij = np.where(((lat >= sLat) & (lat < nLat)) &
                              ((lon >= wLon) & (lon < eLon)))
            parameter_ = self.pList[indij]
            self.parameter = stats.statRemoveNum(np.array(parameter_),
                                                 self.missingValue)
        self.logger.debug("Number of valid observations in cell %s : %s" %
                      (str(cellNum), str(np.size(self.parameter))))
Example #5
0
    def generateKDE(self, parameters, kdeStep, kdeParameters=None,
                    cdfParameters=None, angular=False, periodic=False,
                    missingValue=sys.maxint):
        """
        Generate a PDF and CDF for a given parameter set using the
        method of kernel density estimators.
        Optionally return the PDF and CDF as an array, or write both
        to separate files.
        """

        self.logger.debug("Running generateKDE")
        if type(parameters) is str:
            self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue)
        else:
            if parameters.size <= 1:
                self.logger.error("Insufficient members in parameter list")
                raise IndexError, "Insufficient members in parameter list"

            self.parameters = stats.statRemoveNum(parameters, missingValue)

        if angular:
            xmin = 0.0
            xmax = 360.0
        elif periodic:
            xmin = 0.0
            xmax = periodic
        else:
            xmin = self.parameters.min()
            xmax = self.parameters.max()

        self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                           (xmin, xmax, kdeStep))
        if periodic:
            x = np.arange(1, periodic + 1, kdeStep)
            self.grid = np.concatenate( [x - periodic, x, x + periodic] )
            self.parameters = np.concatenate([self.parameters - periodic, 
                                              self.parameters, 
                                              self.parameters + periodic])
        else:
            self.grid = np.arange(xmin, xmax, kdeStep)

        if self.grid.size<2:
            self.logger.critical("Grid for CDF generation is a single value")
            self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                                  (xmin, xmax,kdeStep))
            raise ValueError

        bw = KPDF.UPDFOptimumBandwidth(self.parameters)
        self.pdf = self._generatePDF(self.grid, bw, self.parameters)
        
        if periodic:
            self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)]
            self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)]
            
        self.cy = stats.cdf(self.grid, self.pdf)
        if kdeParameters is None:
            return np.transpose(np.array([self.grid, self.pdf, self.cy]))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            self.logger.debug("Saving KDE and CDF data to files")
            flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf])))
            flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
Example #6
0
    def generateKDE(self,
                    parameters,
                    kdeStep,
                    kdeParameters=None,
                    cdfParameters=None,
                    angular=False,
                    periodic=False,
                    missingValue=sys.maxsize):
        """
        Generate a PDF and CDF for a given parameter set using the
        method of kernel density estimators. Optionally return the PDF
        and CDF as an array, or write both to separate files.

        :param parameters: Parameter values. If a string is given,
                           then it is the path to a file containing
                           the values. If an array is passed, then it
                           should hold the parameter values.

        :param kdeStep: Increment of the ordinate values at which
                        the distributions will be calculated.
        :type  kdeStep: float, default=`0.1`
        :param str kdeParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param str cdfParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param angular: Does the data represent an angular measure
                        (e.g. bearing).
        :type  angular: boolean, default=``False``
        :param periodic: Does the data represent some form of periodic
                         data (e.g. day of year). If given, it should
                         be the period of the data (e.g. for annual data,
                         ``periodic=365``).
        :type  periodic: boolean or int, default=``False``
        :param missingValue: Missing values have this value (default
                         :attr:`sys.maxint`).

        returns: If ``kdeParameters`` is given, returns ``None``
                  (data are saved to file), otherwise
                  :class:`numpy.ndarray` of the parameter grid, the PDF and CDF.

        """

        LOG.debug("Running generateKDE")
        if type(parameters) is str:
            self.parameters = stats.statRemoveNum(
                flLoadFile(parameters, '%', ','), missingValue)
        else:
            if parameters.size <= 1:
                LOG.error("Insufficient members in parameter list")
                raise IndexError("Insufficient members in parameter list")

            self.parameters = stats.statRemoveNum(parameters, missingValue)

        if angular:
            xmin = 0.0
            xmax = 360.0
        elif periodic:
            xmin = 0.0
            xmax = periodic
        else:
            xmin = self.parameters.min()
            xmax = self.parameters.max()

        LOG.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                  (xmin, xmax, kdeStep))
        if periodic:
            x = np.arange(1, periodic + 1, kdeStep)
            self.grid = np.concatenate([x - periodic, x, x + periodic])
            self.parameters = np.concatenate([
                self.parameters - periodic, self.parameters,
                self.parameters + periodic
            ])
        else:
            self.grid = np.arange(xmin, xmax, kdeStep)

        if self.grid.size < 2:
            LOG.critical("Grid for CDF generation is a single value")
            LOG.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f", xmin, xmax,
                         kdeStep)
            raise ValueError

        #bw = KPDF.UPDFOptimumBandwidth(self.parameters)
        bw = stats.bandwidth(self.parameters)
        self.pdf = self._generatePDF(self.grid, bw, self.parameters)

        if periodic:
            idx = int(periodic / kdeStep)
            self.pdf = 3.0 * self.pdf[idx:2 * idx]
            self.grid = self.grid[idx:2 * idx]

        self.cy = stats.cdf(self.grid, self.pdf)
        if kdeParameters is None:
            return np.transpose(np.array([self.grid, self.pdf, self.cy]))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            LOG.debug("Saving KDE and CDF data to files")
            flSaveFile(kdeParameters,
                       np.transpose(np.array([self.grid, self.pdf])))
            flSaveFile(cdfParameters,
                       np.transpose(np.array([self.grid, self.cy])))
Example #7
0
    def extractParameter(self, cellNum, onLand):
        """
        Extracts the cyclone parameter data for the given cell.
        If the population of a cell is insufficient for generating a
        PDF, the bounds of the cell are expanded until the population is
        sufficient.

        Null/missing values are removed.

        :param int cellNum: The cell number to process.
        :returns: None. The :attr:`parameter` attribute is updated.
        :raises InvalidArguments: if the cell number is not valid
                                  (i.e. if it is outside the possible
                                  range of cell numbers).
        
        """

        if not stats.validCellNum(cellNum, self.gridLimit, self.gridSpace):
            self.logger.critical("Invalid input on cellNum: cell number %i is out of range"%cellNum)
            raise InvalidArguments, 'Invalid input on cellNum: cell number %i is out of range'%cellNum
        cellLon, cellLat = stats.getCellLonLat(cellNum, self.gridLimit,
                                               self.gridSpace)
        wLon = cellLon
        eLon = cellLon + self.gridSpace['x']
        nLat = cellLat
        sLat = cellLat - self.gridSpace['y']

        lon = self.lonLat[:,0]
        lat = self.lonLat[:,1]
        lsflag = self.lonLat[:,2]

        if onLand:
            ij = np.where(((lat >= sLat) & (lat < nLat)) &
                       (lon >= wLon) & (lon < eLon) & (lsflag>0))
        else:
            ij = np.where(((lat >= sLat) & (lat < nLat)) &
                       (lon >= wLon) & (lon < eLon) & (lsflag==0))

        p_ = self.param[ij]
        p = stats.statRemoveNum(np.array(p_), self.missingValue)

        while np.size(p) <= self.minSample:
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon, eLon,
                                                      nLat, sLat)
            # Check if grid has reached maximum extent
            if (wLon == wLon_last) & (eLon == eLon_last) & (nLat == nLat_last) & (sLat == sLat_last):
                if onLand:
                    if not self.domain_warning_raised:
                        self.domain_warning_raised = True
                        self.logger.warning("Insufficient grid points over land in selected domain to estimate storm statistics - reverting to statistics for open ocean.")
                    return self.extractParameter(cellNum, False)
                else:
                    errMsg = ("Insufficient grid points in selected "
                              "domain to estimate storm statistics - "
                              "please select a larger domain.")
                    self.logger.critical(errMsg)
                    raise StopIteration, errMsg

            if onLand:
                ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon) &
                           (lon < eLon) & (lsflag>0))
            else:
                ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon) &
                           (lon < eLon) & (lsflag==0))
            p_ = self.param[ij]
            p = stats.statRemoveNum(np.array(p_), self.missingValue)

        # Check to see if all values in the np.array are the same. If the values
        # are the same, bandwidth would be 0, and therefore KDE cannot be generated
        while p.max() == p.min():
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon, eLon,
                                                      nLat, sLat)
            # Check if grid has reached maximum extent
            if (wLon == wLon_last) & (eLon == eLon_last) & (nLat == nLat_last) & (sLat == sLat_last):
                if onLand:
                    if not self.domain_warning_raised:
                        self.domain_warning_raised = True
                        self.logger.warning("Insufficient grid points over land in selected domain to estimate storm statistics - reverting to statistics for open ocean.")
                    return self.extractParameter(cellNum, False)
                else:
                    errMsg = "Insufficient grid points in selected domain to estimate storm statistics - please select a larger domain."
                    self.logger.critical(errMsg)
                    raise StopIteration, errMsg
            if onLand:
                ij = np.where(((lat >= sLat) & (lat < nLat)) &
                           (lon >= wLon) & (lon < eLon) & (lsflag>0))
            else:
                ij = np.where(((lat >= sLat) & (lat < nLat)) &
                           (lon >= wLon) & (lon < eLon) & (lsflag==0))

            p_ = self.param[ij]
            p = stats.statRemoveNum(np.array(p_), self.missingValue)
        return p
Example #8
0
    def extractParameter(self, cellNum, onLand):
        """
        Extracts the cyclone parameter data for the given cell.
        If the population of a cell is insufficient for generating a
        PDF, the bounds of the cell are expanded until the population is
        sufficient.

        Null/missing values are removed.

        :param int cellNum: The cell number to process.
        :returns: None. The :attr:`parameter` attribute is updated.
        :raises IndexError: if the cell number is not valid
                            (i.e. if it is outside the possible
                            range of cell numbers).

        """

        if not stats.validCellNum(cellNum, self.gridLimit, self.gridSpace):
            self.logger.critical(
                "Invalid input on cellNum: cell number %i is out of range" %
                cellNum)
            raise IndexError, 'Invalid input on cellNum: cell number %i is out of range' % cellNum
        cellLon, cellLat = stats.getCellLonLat(cellNum, self.gridLimit,
                                               self.gridSpace)
        wLon = cellLon
        eLon = cellLon + self.gridSpace['x']
        nLat = cellLat
        sLat = cellLat - self.gridSpace['y']

        lon = self.lonLat[:, 0]
        lat = self.lonLat[:, 1]
        lsflag = self.lonLat[:, 2]

        if onLand:
            ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon)
                          & (lon < eLon) & (lsflag > 0))
        else:
            ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon)
                          & (lon < eLon) & (lsflag == 0))

        p_ = self.param[ij]
        p = stats.statRemoveNum(np.array(p_), self.missingValue)

        while np.size(p) <= self.minSample:
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon, eLon,
                                                      nLat, sLat)
            # Check if grid has reached maximum extent
            if (wLon == wLon_last) & (eLon == eLon_last) & (
                    nLat == nLat_last) & (sLat == sLat_last):
                if onLand:
                    if not self.domain_warning_raised:
                        self.domain_warning_raised = True
                        self.logger.warning(
                            "Insufficient grid points over land in selected domain to estimate storm statistics - reverting to statistics for open ocean."
                        )
                    return self.extractParameter(cellNum, False)
                else:
                    errMsg = ("Insufficient grid points in selected "
                              "domain to estimate storm statistics - "
                              "please select a larger domain.")
                    self.logger.critical(errMsg)
                    raise StopIteration, errMsg

            if onLand:
                ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon)
                              & (lon < eLon) & (lsflag > 0))
            else:
                ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon)
                              & (lon < eLon) & (lsflag == 0))
            p_ = self.param[ij]
            p = stats.statRemoveNum(np.array(p_), self.missingValue)

        # Check to see if all values in the np.array are the same. If the values
        # are the same, bandwidth would be 0, and therefore KDE cannot be generated
        while p.max() == p.min():
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon, eLon,
                                                      nLat, sLat)
            # Check if grid has reached maximum extent
            if (wLon == wLon_last) & (eLon == eLon_last) & (
                    nLat == nLat_last) & (sLat == sLat_last):
                if onLand:
                    if not self.domain_warning_raised:
                        self.domain_warning_raised = True
                        self.logger.warning(
                            "Insufficient grid points over land in selected domain to estimate storm statistics - reverting to statistics for open ocean."
                        )
                    return self.extractParameter(cellNum, False)
                else:
                    errMsg = "Insufficient grid points in selected domain to estimate storm statistics - please select a larger domain."
                    self.logger.critical(errMsg)
                    raise StopIteration, errMsg
            if onLand:
                ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon)
                              & (lon < eLon) & (lsflag > 0))
            else:
                ij = np.where(((lat >= sLat) & (lat < nLat)) & (lon >= wLon)
                              & (lon < eLon) & (lsflag == 0))

            p_ = self.param[ij]
            p = stats.statRemoveNum(np.array(p_), self.missingValue)
        return p
Example #9
0
    def generateKDE(self, parameters, kdeStep, kdeParameters=None,
                    cdfParameters=None, angular=False, periodic=False,
                    missingValue=sys.maxint):
        """
        Generate a PDF and CDF for a given parameter set using the
        method of kernel density estimators. Optionally return the PDF
        and CDF as an array, or write both to separate files.

        :param parameters: Parameter values. If a string is given,
                           then it is the path to a file containing
                           the values. If an array is passed, then it
                           should hold the parameter values.
        
        :param kdeStep: Increment of the ordinate values at which
                        the distributions will be calculated.
        :type  kdeStep: float, default=`0.1`
        :param str kdeParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param str cdfParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param angular: Does the data represent an angular measure
                        (e.g. bearing).
        :type  angular: boolean, default=``False``
        :param periodic: Does the data represent some form of periodic
                         data (e.g. day of year). If given, it should
                         be the period of the data (e.g. for annual data,
                         ``periodic=365``).
        :type  periodic: boolean or float, default=``False``
        :param missingValue: Missing values have this value (default
                         :attr:`sys.maxint`).

        returns: If ``kdeParameters`` is given, returns ``None``
                  (data are saved to file), otherwise
                  :class:`numpy.ndarray` of the parameter grid, the PDF and CDF.
                 
        """

        self.logger.debug("Running generateKDE")
        if type(parameters) is str:
            self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue)
        else:
            if parameters.size <= 1:
                self.logger.error("Insufficient members in parameter list")
                raise IndexError, "Insufficient members in parameter list"

            self.parameters = stats.statRemoveNum(parameters, missingValue)

        if angular:
            xmin = 0.0
            xmax = 360.0
        elif periodic:
            xmin = 0.0
            xmax = periodic
        else:
            xmin = self.parameters.min()
            xmax = self.parameters.max()

        self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                           (xmin, xmax, kdeStep))
        if periodic:
            x = np.arange(1, periodic + 1, kdeStep)
            self.grid = np.concatenate( [x - periodic, x, x + periodic] )
            self.parameters = np.concatenate([self.parameters - periodic,
                                              self.parameters,
                                              self.parameters + periodic])
        else:
            self.grid = np.arange(xmin, xmax, kdeStep)

        if self.grid.size<2:
            self.logger.critical("Grid for CDF generation is a single value")
            self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                                  (xmin, xmax,kdeStep))
            raise ValueError

        bw = KPDF.UPDFOptimumBandwidth(self.parameters)
        self.pdf = self._generatePDF(self.grid, bw, self.parameters)

        if periodic:
            self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)]
            self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)]

        self.cy = stats.cdf(self.grid, self.pdf)
        if kdeParameters is None:
            return np.transpose(np.array([self.grid, self.pdf, self.cy]))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            self.logger.debug("Saving KDE and CDF data to files")
            flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf])))
            flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
Example #10
0
    def extractParameter(self, cellNum):
        """
        Extracts the cyclone parameter data for the given cell.
        If the population of a cell is insufficient for generating a
        PDF, the bounds of the cell are expanded until the population is
        sufficient.

        Null/missing values are removed.

        :param int cellNum: The cell number to process.
        :returns: None. The :attr:`parameter` attribute is updated.
        :raises IndexError: if the cell number is not valid
                            (i.e. if it is outside the possible
                            range of cell numbers).
        """
        if not stats.validCellNum(cellNum, self.gridLimit, self.gridSpace):
            self.logger.critical(("Invalid input on cellNum: "
                                  "cell number %i is out of range")%cellNum)
            raise IndexError('Invalid input on cellNum: '
                               'cell number is out of range')
        lon = self.lonLat[:, 0]
        lat = self.lonLat[:, 1]
        cellLon, cellLat = stats.getCellLonLat(cellNum, self.gridLimit,
                                               self.gridSpace)

        wLon = cellLon
        eLon = cellLon + self.gridSpace['x']
        nLat = cellLat
        sLat = cellLat - self.gridSpace['y']

        indij = np.where(((lat >= sLat) & (lat < nLat)) &
                         ((lon >= wLon) & (lon < eLon)))
        parameter_ = self.pList[indij]
        self.parameter = stats.statRemoveNum(np.array(parameter_),
                                             self.missingValue)

        while np.size(self.parameter) <= self.minSamplesCell:
            self.logger.debug(("Insufficient samples. Increasing the "
                               "size of the cell"))
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon, eLon,
                                                      nLat, sLat)
            if ((wLon == wLon_last) & (eLon == eLon_last) &
                    (nLat == nLat_last) & (sLat == sLat_last)):
                errMsg = ("Insufficient grid points in selected domain to "
                          "estimate storm statistics - please select a larger "
                          "domain. Samples = %i / %i")%(np.size(self.parameter),
                                                        self.minSamplesCell)
                self.logger.critical(errMsg)
                raise StopIteration(errMsg)
            indij = np.where(((lat >= sLat) & (lat < nLat)) &
                             ((lon >= wLon) & (lon < eLon)))
            parameter_ = self.pList[indij]
            self.parameter = stats.statRemoveNum(np.array(parameter_),
                                                 self.missingValue)

        # Check to see if all values in the array are the same. If the
        # values are the same, bandwidth would be 0, and therefore KDE
        # cannot proceed
        while self.parameter.max() == self.parameter.min():
            self.logger.debug(("Parameter values appear to be the same. "
                               "Increasing the size of the cell"))
            wLon_last = wLon
            eLon_last = eLon
            nLat_last = nLat
            sLat_last = sLat
            wLon, eLon, nLat, sLat = self._expandCell(lon, lat, wLon,
                                                      eLon, nLat, sLat)
            if ((wLon == wLon_last) & (eLon == eLon_last) &
                    (nLat == nLat_last) & (sLat == sLat_last)):
                errMsg = ("Insufficient grid points in selected domain "
                          "to estimate storm statistics - "
                          "please select a larger domain.")
                self.logger.critical(errMsg)
                raise StopIteration(errMsg)
            indij = np.where(((lat >= sLat) & (lat < nLat)) &
                             ((lon >= wLon) & (lon < eLon)))
            parameter_ = self.pList[indij]
            self.parameter = stats.statRemoveNum(np.array(parameter_),
                                                 self.missingValue)
        self.logger.debug("Number of valid observations in cell %s : %s",
                          str(cellNum), str(np.size(self.parameter)))