Example #1
0
    def __init__(self, parameter, lonLat, gridLimit,
                 gridSpace, gridInc, minSample=100, angular=False,
                 missingValue=sys.maxint, progressbar=None,
                 prgStartValue=0, prgEndValue=1, calculateLater=False):
        
        self.logger = logging.getLogger()
        self.logger.debug('Initialising GenerateStats')
        
        self.gridLimit = gridLimit
        self.gridSpace = gridSpace
        self.gridInc = gridInc
        self.maxCell = stats.maxCellNum(self.gridLimit, self.gridSpace)
        self.minSample = minSample
        self.coeffs = parameters(self.maxCell+1)
        self.angular = angular
        self.missingValue = missingValue

        self.domain_warning_raised = False

        self.progressbar = progressbar
        self.prgStartValue = prgStartValue
        self.prgEndValue = prgEndValue

        if not calculateLater:
            if type(lonLat) is str:
                self.lonLat = np.array(flLoadFile(lonLat, delimiter=','))
            else:
                self.lonLat = lonLat
            if type(parameter) is str:
                self.param = np.array(flLoadFile(parameter))
            else:
                self.param = parameter

            self.calculateStatistics()
Example #2
0
    def __init__(self, parameter, lonLat, gridLimit,
                 gridSpace, gridInc, minSample=100, angular=False,
                 missingValue=sys.maxsize, progressbar=None,
                 prgStartValue=0, prgEndValue=1, calculateLater=False):

        self.logger = logging.getLogger()
        self.logger.debug('Initialising GenerateStats')

        self.gridLimit = gridLimit
        self.gridSpace = gridSpace
        self.gridInc = gridInc
        self.maxCell = stats.maxCellNum(self.gridLimit, self.gridSpace)
        self.minSample = minSample
        self.coeffs = parameters(self.maxCell+1)
        self.angular = angular
        self.missingValue = missingValue

        self.domain_warning_raised = False

        self.progressbar = progressbar
        self.prgStartValue = prgStartValue
        self.prgEndValue = prgEndValue

        if not calculateLater:
            if type(lonLat) is str:
                self.lonLat = np.array(flLoadFile(lonLat, delimiter=','))
            else:
                self.lonLat = lonLat
            if type(parameter) is str:
                self.param = np.array(flLoadFile(parameter))
            else:
                self.param = parameter

            self.calculateStatistics()
Example #3
0
    def allDistributions(self, lonLat, parameterList, parameterName=None,
                         kdeStep=0.1, angular=False, periodic=False,
                         plotParam=False):
        """
        Calculate a distribution for each individual cell and store in a
        file or return
        """
        if parameterName:
            self.logger.debug("Running allDistributions for %s"%parameterName)
        else:
            self.logger.debug("Running allDistributions")

        if type(lonLat) is str:
            self.logger.debug("Loading lat/lon data from file")
            self.lonLat = np.array(flLoadFile(lonLat, delimiter=','))
        else:
            self.lonLat = lonLat

        if type(parameterList) is str:
            self.logger.debug("Loading parameter data from file: %s" %
                          parameterList)
            self.pList = np.array(flLoadFile(parameterList))
        else:
            self.pList = parameterList

        self.pName = parameterName

        maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace)

        # Writing CDF dataset for all individual cell number into files
        self.logger.debug("Writing CDF dataset for all individual cells into files")

        for cellNum in xrange(0, maxCellNum + 1):
            self.logger.debug("Processing cell number %i"%cellNum)

            # Generate cyclone parameter data for the cell number
            self.extractParameter(cellNum)

            # Estimate cyclone parameter data using KDE
            # The returned array contains the grid, the PDF and the CDF
            cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep,
                                                angular=angular,
                                                periodic=periodic)
            if plotParam:
                self._plotParameter(cellNum, kdeStep)
            self.logger.debug('size of parameter array = %d: size of cdf array = %d'
                          % (self.parameter.size,cdf.size))

            cellNumlist = []
            for i in range(len(cdf)):
                cellNumlist.append(cellNum)
            if cellNum == 0:
                results = np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]]))
            else:
                self.logger.debug('size of results array = %s'%str(results.size))
                results = np.concatenate((results, np.transpose(np.array([cellNumlist,
                                                                          cdf[:,0],
                                                                     cdf[:,2]]))))

        if parameterName == None:
            self.logger.debug("Returning CDF dataset for all individual cell numbers")
            return results
        else:
            cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \
                        self.pName + "_y"
            allCellCdfOutput = pjoin(self.outputPath, 'process',
                                     'all_cell_cdf_' + self.pName)
                                     
            args = {"filename":allCellCdfOutput, "data":results,
                    "header":cdfHeader, "delimiter":",", "fmt":"%f"}
                    
            self.logger.debug("Writing CDF dataset for all individual cell numbers into files")
            flSaveFile(**args)

            # Save to netcdf too

            filename = allCellCdfOutput + '.nc'

            ncdf = Dataset(filename, 'w')
            
            ncdf.createDimension('cell', len(results[:,0]))
            cell = ncdf.createVariable('cell', 'i', ('cell',))
            cell[:] = results[:,0]
            
            x = ncdf.createVariable('x', 'f', ('cell',))
            x[:] = results[:,1]
            
            y = ncdf.createVariable('CDF', 'f', ('cell',))
            y[:] = results[:,2]
            
            ncdf.close()
Example #4
0
 def test_MaxCellNum(self):
     """Testing maxCellNum"""
     maxCellNum = 175
     self.assertEqual(maxCellNum,
                      statutils.maxCellNum(self.gridLimit, self.gridSpace))
    def allDistributions(self, lonLat, parameterList, parameterName=None,
                         kdeStep=0.1, angular=False, periodic=False,
                         plotParam=False):
        """
        Calculate a distribution for each individual cell and store in a
        file or return the distribution.

        :param lonLat: The longitude/latitude of all observations in
                       the model domain. If a string is given, then
                       it is the path to a file containing the
                       longitude/latitude information. If an array
                       is given, then it should be a 2-d array
                       containing the data values. 
        :type  lonLat: str or :class:`numpy.ndarray`
        :param parameterList: Parameter values. If a string is given,
                              then it is the path to a file containing
                              the values. If an array is passed, then it
                              should hold the parameter values. 
        :type  parameterList: str or :class:`numpy.ndarray`
        :param str parameterName: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param kdeStep: Increment of the ordinate values at which
                        the distributions will be calculated.
        :type  kdeStep: float, default=`0.1`
        :param angular: Does the data represent an angular measure
                        (e.g. bearing).
        :type  angular: boolean, default=``False``
        :param periodic: Does the data represent some form of periodic
                         data (e.g. day of year). If given, it should
                         be the period of the data (e.g. for annual data,
                         ``periodic=365``).
        :type  periodic: boolean or float, default=``False``
        :param boolean plotParam: Plot the parameters. Default is ``False``.

        :returns: If no ``parameterName`` is given returns ``None``
                  (data are saved to file), otherwise
                  :class:`numpy.ndarray`.

        
        """
        if parameterName:
            self.logger.debug("Running allDistributions for %s"%parameterName)
        else:
            self.logger.debug("Running allDistributions")

        if type(lonLat) is str:
            self.logger.debug("Loading lat/lon data from file")
            self.lonLat = np.array(flLoadFile(lonLat, delimiter=','))
        else:
            self.lonLat = lonLat

        if type(parameterList) is str:
            self.logger.debug("Loading parameter data from file: %s" %
                          parameterList)
            self.pList = np.array(flLoadFile(parameterList))
        else:
            self.pList = parameterList

        self.pName = parameterName

        maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace)

        # Writing CDF dataset for all individual cell number into files
        self.logger.debug("Writing CDF dataset for all individual cells into files")

        for cellNum in xrange(0, maxCellNum + 1):
            self.logger.debug("Processing cell number %i"%cellNum)

            # Generate cyclone parameter data for the cell number
            self.extractParameter(cellNum)

            # Estimate cyclone parameter data using KDE
            # The returned array contains the grid, the PDF and the CDF
            cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep,
                                                angular=angular,
                                                periodic=periodic)
            if plotParam:
                self._plotParameter(cellNum, kdeStep)
            self.logger.debug('size of parameter array = %d: size of cdf array = %d'
                          % (self.parameter.size,cdf.size))

            cellNumlist = []
            for i in range(len(cdf)):
                cellNumlist.append(cellNum)
            if cellNum == 0:
                results = np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]]))
            else:
                self.logger.debug('size of results array = %s'%str(results.size))
                results = np.concatenate((results, np.transpose(np.array([cellNumlist,
                                                                          cdf[:,0],
                                                                     cdf[:,2]]))))

        if parameterName == None:
            self.logger.debug("Returning CDF dataset for all individual cell numbers")
            return results
        else:
            cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \
                        self.pName + "_y"
            allCellCdfOutput = pjoin(self.outputPath, 'process',
                                     'all_cell_cdf_' + self.pName)

            args = {"filename":allCellCdfOutput, "data":results,
                    "header":cdfHeader, "delimiter":",", "fmt":"%f"}

            self.logger.debug("Writing CDF dataset for all individual cell numbers into files")
            flSaveFile(**args)

            # Save to netcdf too

            filename = allCellCdfOutput + '.nc'

            ncdf = Dataset(filename, 'w')

            ncdf.createDimension('cell', len(results[:,0]))
            cell = ncdf.createVariable('cell', 'i', ('cell',))
            cell[:] = results[:,0]

            x = ncdf.createVariable('x', 'f', ('cell',))
            x[:] = results[:,1]

            y = ncdf.createVariable('CDF', 'f', ('cell',))
            y[:] = results[:,2]

            ncdf.close()
Example #6
0
 def test_MaxCellNum(self):
     """Testing maxCellNum"""
     maxCellNum = 175
     self.assertEqual(maxCellNum, statutils.maxCellNum(self.gridLimit, self.gridSpace))
Example #7
0
    def allDistributions(self, lonLat, parameterList, parameterName=None,
                         kdeStep=0.1, angular=False, periodic=False,
                         plotParam=False):
        """
        Calculate a distribution for each individual cell and store in a
        file or return the distribution.

        :param lonLat: The longitude/latitude of all observations in
                       the model domain. If a string is given, then
                       it is the path to a file containing the
                       longitude/latitude information. If an array
                       is given, then it should be a 2-d array
                       containing the data values.
        :type  lonLat: str or :class:`numpy.ndarray`
        :param parameterList: Parameter values. If a string is given,
                              then it is the path to a file containing
                              the values. If an array is passed, then it
                              should hold the parameter values.
        :type  parameterList: str or :class:`numpy.ndarray`
        :param str parameterName: Optional. If given, then the
                                  cell distributions will be saved to a
                                  file with this name. If absent,
                                  the distribution values are returned.
        :param kdeStep: Increment of the ordinate values at which
                        the distributions will be calculated.
        :type  kdeStep: float, default=`0.1`
        :param angular: Does the data represent an angular measure
                        (e.g. bearing).
        :type  angular: boolean, default=``False``
        :param periodic: Does the data represent some form of periodic
                         data (e.g. day of year). If given, it should
                         be the period of the data (e.g. for annual data,
                         ``periodic=365``).
        :type  periodic: boolean or float, default=``False``
        :param boolean plotParam: Plot the parameters. Default is ``False``.

        :returns: If no ``parameterName`` is given returns ``None``
                  (data are saved to file), otherwise
                  :class:`numpy.ndarray`.


        """
        if parameterName:
            self.logger.debug("Running allDistributions for %s",
                              parameterName)
        else:
            self.logger.debug("Running allDistributions")

        if isinstance(lonLat, str):
            self.logger.debug("Loading lat/lon data from file")
            self.lonLat = np.array(flLoadFile(lonLat, delimiter=','))
        else:
            self.lonLat = lonLat

        if isinstance(parameterList, str):
            self.logger.debug("Loading parameter data from file: %s",
                              parameterList)
            self.pList = np.array(flLoadFile(parameterList))
        else:
            self.pList = parameterList

        self.pName = parameterName

        if len(self.pList) != len(self.lonLat):
            errmsg = ("Parameter data and "
                      "Lon/Lat data are not the same length "
                      "for {}.".format(parameterName))
            self.logger.critical(errmsg)
            raise IndexError(errmsg)

        maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace)

        # Writing CDF dataset for all individual cell number into files
        self.logger.debug(("Writing CDF dataset for all individual "
                           "cells into files"))

        for cellNum in range(0, maxCellNum + 1):
            self.logger.debug("Processing cell number %i", cellNum)

            # Generate cyclone parameter data for the cell number
            self.extractParameter(cellNum)

            # Estimate cyclone parameter data using KDE
            # The returned array contains the grid, the PDF and the CDF
            cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep,
                                                angular=angular,
                                                periodic=periodic)
            if plotParam:
                self._plotParameter(cellNum, kdeStep)
            self.logger.debug(('size of parameter array = %d: '
                               'size of cdf array = %d'), 
                              self.parameter.size, cdf.size)

            cellNumlist = []
            for i in range(len(cdf)):
                cellNumlist.append(cellNum)
            if cellNum == 0:
                results = np.transpose(np.array([cellNumlist,
                                                 cdf[:, 0], cdf[:, 2]]))
            else:
                self.logger.debug('size of results = %s', str(results.size))
                results = np.concatenate((results,
                                          np.transpose(np.array([cellNumlist,
                                                                 cdf[:, 0],
                                                                 cdf[:, 2]]))))

        if parameterName == None:
            self.logger.debug(("Returning CDF dataset for all "
                               "individual cell numbers"))
            return results
        else:
            cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \
                        self.pName + "_y"
            allCellCdfOutput = pjoin(self.outputPath, 'process',
                                     'all_cell_cdf_' + self.pName)

            args = {"filename":allCellCdfOutput, "data":results,
                    "header":cdfHeader, "delimiter":",", "fmt":"%f"}

            self.logger.debug(("Writing CDF dataset for all individual "
                               "cell numbers into files"))
            flSaveFile(**args)

            # Save to netcdf too

            filename = allCellCdfOutput + '.nc'

            ncdf = Dataset(filename, 'w')

            ncdf.createDimension('cell', len(results[:, 0]))
            cell = ncdf.createVariable('cell', 'i', ('cell',))
            cell[:] = results[:, 0]

            x = ncdf.createVariable('x', 'f', ('cell',))
            x[:] = results[:, 1]

            y = ncdf.createVariable('CDF', 'f', ('cell',))
            y[:] = results[:, 2]

            ncdf.close()