예제 #1
0
    def test_cdf(self):
        """Testing cdf"""
        pdf = array([0.33350065, 0.71365127, 0.42428029, 0.99204143, 0.01738811])
        cdf = array([0.13442936, 0.42209201, 0.59311334, 0.9929911 , 1.0])
        y = array([0, 1, 2, 3, 4])

        self.numpyAssertAlmostEqual(statutils.cdf(y, pdf), cdf)
예제 #2
0
    def generateGenesisDateCDF(self, genDays, lonLat, bw=None, genesisKDE=None):
        """
        Calculate the PDF of genesis day using KDEs.
        Since the data is periodic, we use a simple method to include the 
        periodicity in estimating the PDF. We prepend and append the data
        to itself, then use the central third of the PDF and multiply by three to
        obtain the required PDF. Probably notquite exact, but it should be
        sufficient for our purposes. 
        """

        data = flLoadFile( genDays )
        days = np.arange( 1, 366 )
        ndays = np.concatenate( [days - 365, days, days + 365] )
        ndata = np.concatenate( [data - 365, data, data + 365] )

        if bw is None:
            bw = KPDF.UPDFOptimumBandwidth( ndata ) 

        try:
            kdeMethod = getattr(KPDF, "UPDF%s" %self.kdeType)
        except AttributeError:
            self.logger.exception("Invalid input on option: KDE method UPDF%s does not exist"%self.kdeType)
            raise
        pdf = kdeMethod( ndata, ndays, bw )
        # Actual PDF to return
        apdf = 3.0*pdf[365:730]
        cy = stats.cdf(days, apdf)
        if genesisKDE is None:
            return np.transpose(np.array(np.concatenate( [days, apdf, cy] ) ))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            self.logger.debug("Saving KDE and CDF data to files")
            #flSaveFile(genesisKDE, transpose(numpy.concatenate([days, pdf])))
            flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
예제 #3
0
    def generateGenesisDateCDF(self,
                               genDays,
                               lonLat,
                               bw=None,
                               genesisKDE=None):
        """
        Calculate the PDF of genesis day using KDEs.
        Since the data is periodic, we use a simple method to include
        the periodicity in estimating the PDF. We prepend and append
        the data to itself, then use the central third of the PDF and
        multiply by three to obtain the required PDF. Probably not
        quite exact, but it should be sufficient for our purposes.

        :param str genDays: Name of file containing genesis days
                            (as day of year).
        :param lonLat: Array of genesis longitudes and latitudes.
        :param float bw: Optional. Bandwidth of the KDE to use.
        :param str genesisKDE: Optional. File name to save resulting CDF to.
        :type  lonLat: :class:`numpy.ndarray`

        :returns: :class:`numpy.ndarray` containing the days, the PDF and CDF
                  of the genesis days.
        """

        data = flLoadFile(genDays)
        days = np.arange(1, 366)
        ndays = np.concatenate([days - 365, days, days + 365])
        ndata = np.concatenate([data - 365, data, data + 365])

        if bw is None:
            bw = stats.bandwidth(self.parameters)

        kde = sm.nonparametric.KDEUnivariate(self.parameters)
        kde.fit(kernel=self.kdeType,
                bw=bw,
                fft=False,
                gridsize=len(grid),
                clip=(min(grid), max(grid)),
                cut=0)
        #try:
        #    kdeMethod = getattr(KPDF, "UPDF%s" % self.kdeType)
        #except AttributeError:
        #    LOG.exception(("Invalid input on option: "
        #                   "KDE method UPDF%s does not exist"),
        #                  self.kdeType)
        #    raise

        veceval = np.vectorize(kde.evaluate)
        pdf = np.nan_to_num(veceval(grid))

        # Actual PDF to return
        apdf = 3.0 * pdf[365:730]
        cy = stats.cdf(days, apdf)
        if genesisKDE is None:
            return np.transpose(np.array(np.concatenate([days, apdf, cy])))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            LOG.debug("Saving KDE and CDF data to files")
            flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
예제 #4
0
    def test_cdf(self):
        """Testing cdf"""
        pdf = array(
            [0.33350065, 0.71365127, 0.42428029, 0.99204143, 0.01738811])
        cdf = array([0.13442936, 0.42209201, 0.59311334, 0.9929911, 1.0])
        y = array([0, 1, 2, 3, 4])

        self.numpyAssertAlmostEqual(statutils.cdf(y, pdf), cdf)
예제 #5
0
    def _calculateCDF(self):
        """Calculate Py and CDFy beforehand to remove the need of
        repeated calculation later
        """
        # sum along the column of z to get sum(z(i,:))
        # (check 'help sum' if need)
        px = self.z.sum(axis=0)
        # calculate CDF of (x,Px)
        cdfX = stats.cdf(self.x, px)
        # define Py & CDFy with nx by ny
        py = np.zeros(self.z.shape, 'd').T
        cdfY = np.zeros(self.z.shape, 'd').T
        # Py=conditional distribution,  CDFy = CDF of Y
        try:
            for i in xrange(len(self.x)):
                for j in xrange(len(self.z[:, i])):
                    if px[i] == 0:
                        py[i, j] = 0
                    else:
                        py[i, j] = self.z[j, i]/px[i]
                cdfTemp = stats.cdf(self.y, py[i, :])
                for j in xrange(len(cdfTemp)):
                    cdfY[i, j] = cdfTemp[j]
        except IndexError:
            LOG.debug("i = %s", str(i))
            LOG.debug("j = %s", str(j))
            LOG.debug("p_y[%s, %s] = %s"%(str(i), str(j), str(py[i, j])))
            LOG.debug("z[%s, %s] = %s"%(str(i), str(j), str(self.z[j, i])))
            LOG.debug("p_x[%s] = %s"%(str(i), str(px[i])))
            LOG.debug("cdfy dim = %s", (str(cdfY.shape)))
            LOG.debug("p_y dim = %s", (str(py.shape)))
            LOG.debug("cdfx dim = %s", (str(cdfX.shape)))
            LOG.debug("p_x dim = %s", (str(px.shape)))

            raise

        self.cdfX = cdfX
        self.cdfY = cdfY
        return
예제 #6
0
    def _calculateCDF(self):
        """Calculate Py and CDFy beforehand to remove the need of
        repeated calculation later
        """
        # sum along the column of z to get sum(z(i,:))
        # (check 'help sum' if need)
        px =  self.z.sum(axis=0)
        # calculate CDF of (x,Px)
        cdfX = stats.cdf(self.x, px)
        # define Py & CDFy with nx by ny
        py = np.zeros([self.x.size, self.y.size], 'd')
        cdfY = np.zeros([self.x.size, self.y.size], 'd')
        # Py=conditional distribution,  CDFy = CDF of Y
        try:
            for i in xrange(len(self.x)):
                for j in xrange(len(self.z[:, i])):
                    if px[i] == 0:
                        py[i,j] = 0
                    else:
                        py[i,j] = self.z[j, i]/px[i]
                cdfTemp = stats.cdf(self.y, py[i, :])
                for j in xrange(len(cdfTemp)):
                    cdfY[i,j] = cdfTemp[j]
        except IndexError:
            self.logger.debug("i = %s"%str(i))
            self.logger.debug("j = %s"%str(j))
            self.logger.debug("p_y[%s, %s] = %s"%(str(i), str(j), str(py[i, j])))
            self.logger.debug("z[%s, %s] = %s"%(str(i), str(j), str(self.z[j, i])))
            self.logger.debug("p_x[%s] = %s"%(str(i), str(px[i])))
            self.logger.debug("cdfy dim = %s"%(str(cdfY.shape)))
            self.logger.debug("p_y dim = %s"%(str(py.shape)))
            self.logger.debug("cdfx dim = %s"%(str(cdfX.shape)))
            self.logger.debug("p_x dim = %s"%(str(px.shape)))

            raise

        self.cdfX = cdfX
        self.cdfY = cdfY
        return
예제 #7
0
    def generateGenesisDateCDF(self,
                               genDays,
                               lonLat,
                               bw=None,
                               genesisKDE=None):
        """
        Calculate the PDF of genesis day using KDEs.
        Since the data is periodic, we use a simple method to include
        the periodicity in estimating the PDF. We prepend and append
        the data to itself, then use the central third of the PDF and
        multiply by three to obtain the required PDF. Probably not
        quite exact, but it should be sufficient for our purposes.

        :param str genDays: Name of file containing genesis days
                            (as day of year).
        :param lonLat: Array of genesis longitudes and latitudes.
        :param float bw: Optional. Bandwidth of the KDE to use.
        :param str genesisKDE: Optional. File name to save resulting CDF to.
        :type  lonLat: :class:`numpy.ndarray`

        :returns: :class:`numpy.ndarray` containing the days, the PDF and CDF
                  of the genesis days.
        """

        data = flLoadFile(genDays)
        days = np.arange(1, 366)
        ndays = np.concatenate([days - 365, days, days + 365])
        ndata = np.concatenate([data - 365, data, data + 365])

        if bw is None:
            bw = KPDF.UPDFOptimumBandwidth(ndata)

        try:
            kdeMethod = getattr(KPDF, "UPDF%s" % self.kdeType)
        except AttributeError:
            LOG.exception(("Invalid input on option: "
                           "KDE method UPDF%s does not exist"), self.kdeType)
            raise
        pdf = kdeMethod(ndata, ndays, bw)
        # Actual PDF to return
        apdf = 3.0 * pdf[365:730]
        cy = stats.cdf(days, apdf)
        if genesisKDE is None:
            return np.transpose(np.array(np.concatenate([days, apdf, cy])))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            LOG.debug("Saving KDE and CDF data to files")
            flSaveFile(genesisKDE, np.transpose(np.array([days, cy])))
예제 #8
0
    def generateKDE(self, parameters, kdeStep, kdeParameters=None,
                    cdfParameters=None, angular=False, periodic=False,
                    missingValue=sys.maxint):
        """
        Generate a PDF and CDF for a given parameter set using the
        method of kernel density estimators.
        Optionally return the PDF and CDF as an array, or write both
        to separate files.
        """

        self.logger.debug("Running generateKDE")
        if type(parameters) is str:
            self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue)
        else:
            if parameters.size <= 1:
                self.logger.error("Insufficient members in parameter list")
                raise IndexError, "Insufficient members in parameter list"

            self.parameters = stats.statRemoveNum(parameters, missingValue)

        if angular:
            xmin = 0.0
            xmax = 360.0
        elif periodic:
            xmin = 0.0
            xmax = periodic
        else:
            xmin = self.parameters.min()
            xmax = self.parameters.max()

        self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                           (xmin, xmax, kdeStep))
        if periodic:
            x = np.arange(1, periodic + 1, kdeStep)
            self.grid = np.concatenate( [x - periodic, x, x + periodic] )
            self.parameters = np.concatenate([self.parameters - periodic, 
                                              self.parameters, 
                                              self.parameters + periodic])
        else:
            self.grid = np.arange(xmin, xmax, kdeStep)

        if self.grid.size<2:
            self.logger.critical("Grid for CDF generation is a single value")
            self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                                  (xmin, xmax,kdeStep))
            raise ValueError

        bw = KPDF.UPDFOptimumBandwidth(self.parameters)
        self.pdf = self._generatePDF(self.grid, bw, self.parameters)
        
        if periodic:
            self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)]
            self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)]
            
        self.cy = stats.cdf(self.grid, self.pdf)
        if kdeParameters is None:
            return np.transpose(np.array([self.grid, self.pdf, self.cy]))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            self.logger.debug("Saving KDE and CDF data to files")
            flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf])))
            flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))
예제 #9
0
    def generateKDE(self,
                    parameters,
                    kdeStep,
                    kdeParameters=None,
                    cdfParameters=None,
                    angular=False,
                    periodic=False,
                    missingValue=sys.maxsize):
        """
        Generate a PDF and CDF for a given parameter set using the
        method of kernel density estimators. Optionally return the PDF
        and CDF as an array, or write both to separate files.

        :param parameters: Parameter values. If a string is given,
                           then it is the path to a file containing
                           the values. If an array is passed, then it
                           should hold the parameter values.

        :param kdeStep: Increment of the ordinate values at which
                        the distributions will be calculated.
        :type  kdeStep: float, default=`0.1`
        :param str kdeParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param str cdfParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param angular: Does the data represent an angular measure
                        (e.g. bearing).
        :type  angular: boolean, default=``False``
        :param periodic: Does the data represent some form of periodic
                         data (e.g. day of year). If given, it should
                         be the period of the data (e.g. for annual data,
                         ``periodic=365``).
        :type  periodic: boolean or int, default=``False``
        :param missingValue: Missing values have this value (default
                         :attr:`sys.maxint`).

        returns: If ``kdeParameters`` is given, returns ``None``
                  (data are saved to file), otherwise
                  :class:`numpy.ndarray` of the parameter grid, the PDF and CDF.

        """

        LOG.debug("Running generateKDE")
        if type(parameters) is str:
            self.parameters = stats.statRemoveNum(
                flLoadFile(parameters, '%', ','), missingValue)
        else:
            if parameters.size <= 1:
                LOG.error("Insufficient members in parameter list")
                raise IndexError("Insufficient members in parameter list")

            self.parameters = stats.statRemoveNum(parameters, missingValue)

        if angular:
            xmin = 0.0
            xmax = 360.0
        elif periodic:
            xmin = 0.0
            xmax = periodic
        else:
            xmin = self.parameters.min()
            xmax = self.parameters.max()

        LOG.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                  (xmin, xmax, kdeStep))
        if periodic:
            x = np.arange(1, periodic + 1, kdeStep)
            self.grid = np.concatenate([x - periodic, x, x + periodic])
            self.parameters = np.concatenate([
                self.parameters - periodic, self.parameters,
                self.parameters + periodic
            ])
        else:
            self.grid = np.arange(xmin, xmax, kdeStep)

        if self.grid.size < 2:
            LOG.critical("Grid for CDF generation is a single value")
            LOG.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f", xmin, xmax,
                         kdeStep)
            raise ValueError

        #bw = KPDF.UPDFOptimumBandwidth(self.parameters)
        bw = stats.bandwidth(self.parameters)
        self.pdf = self._generatePDF(self.grid, bw, self.parameters)

        if periodic:
            idx = int(periodic / kdeStep)
            self.pdf = 3.0 * self.pdf[idx:2 * idx]
            self.grid = self.grid[idx:2 * idx]

        self.cy = stats.cdf(self.grid, self.pdf)
        if kdeParameters is None:
            return np.transpose(np.array([self.grid, self.pdf, self.cy]))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            LOG.debug("Saving KDE and CDF data to files")
            flSaveFile(kdeParameters,
                       np.transpose(np.array([self.grid, self.pdf])))
            flSaveFile(cdfParameters,
                       np.transpose(np.array([self.grid, self.cy])))
예제 #10
0
 def test_cdfzeros(self):
     """Test cdf returns zero array for zero input"""
     x = array([0, 1, 2, 3, 4])
     y = zeros(len(x))
     self.numpyAssertAlmostEqual(statutils.cdf(y, x), zeros(len(x)))
예제 #11
0
    def generateKDE(self, parameters, kdeStep, kdeParameters=None,
                    cdfParameters=None, angular=False, periodic=False,
                    missingValue=sys.maxint):
        """
        Generate a PDF and CDF for a given parameter set using the
        method of kernel density estimators. Optionally return the PDF
        and CDF as an array, or write both to separate files.

        :param parameters: Parameter values. If a string is given,
                           then it is the path to a file containing
                           the values. If an array is passed, then it
                           should hold the parameter values.
        
        :param kdeStep: Increment of the ordinate values at which
                        the distributions will be calculated.
        :type  kdeStep: float, default=`0.1`
        :param str kdeParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param str cdfParameters: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param angular: Does the data represent an angular measure
                        (e.g. bearing).
        :type  angular: boolean, default=``False``
        :param periodic: Does the data represent some form of periodic
                         data (e.g. day of year). If given, it should
                         be the period of the data (e.g. for annual data,
                         ``periodic=365``).
        :type  periodic: boolean or float, default=``False``
        :param missingValue: Missing values have this value (default
                         :attr:`sys.maxint`).

        returns: If ``kdeParameters`` is given, returns ``None``
                  (data are saved to file), otherwise
                  :class:`numpy.ndarray` of the parameter grid, the PDF and CDF.
                 
        """

        self.logger.debug("Running generateKDE")
        if type(parameters) is str:
            self.parameters = stats.statRemoveNum(flLoadFile(parameters, '%', ','), missingValue)
        else:
            if parameters.size <= 1:
                self.logger.error("Insufficient members in parameter list")
                raise IndexError, "Insufficient members in parameter list"

            self.parameters = stats.statRemoveNum(parameters, missingValue)

        if angular:
            xmin = 0.0
            xmax = 360.0
        elif periodic:
            xmin = 0.0
            xmax = periodic
        else:
            xmin = self.parameters.min()
            xmax = self.parameters.max()

        self.logger.debug("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                           (xmin, xmax, kdeStep))
        if periodic:
            x = np.arange(1, periodic + 1, kdeStep)
            self.grid = np.concatenate( [x - periodic, x, x + periodic] )
            self.parameters = np.concatenate([self.parameters - periodic,
                                              self.parameters,
                                              self.parameters + periodic])
        else:
            self.grid = np.arange(xmin, xmax, kdeStep)

        if self.grid.size<2:
            self.logger.critical("Grid for CDF generation is a single value")
            self.logger.critical("xmin=%7.3f, xmax=%7.3f, kdeStep=%7.3f" %
                                  (xmin, xmax,kdeStep))
            raise ValueError

        bw = KPDF.UPDFOptimumBandwidth(self.parameters)
        self.pdf = self._generatePDF(self.grid, bw, self.parameters)

        if periodic:
            self.pdf = 3.0*self.pdf[(periodic/kdeStep):2*(periodic/kdeStep)]
            self.grid = self.grid[(periodic/kdeStep):2*(periodic/kdeStep)]

        self.cy = stats.cdf(self.grid, self.pdf)
        if kdeParameters is None:
            return np.transpose(np.array([self.grid, self.pdf, self.cy]))
        else:
            # Assume both kdeParameters and cdfParameters are defined as files:
            self.logger.debug("Saving KDE and CDF data to files")
            flSaveFile(kdeParameters, np.transpose(np.array([self.grid, self.pdf])))
            flSaveFile(cdfParameters, np.transpose(np.array([self.grid, self.cy])))