def __init__(self, parameter, lonLat, gridLimit, gridSpace, gridInc, minSample=100, angular=False, missingValue=sys.maxint, progressbar=None, prgStartValue=0, prgEndValue=1, calculateLater=False): self.logger = logging.getLogger() self.logger.debug('Initialising GenerateStats') self.gridLimit = gridLimit self.gridSpace = gridSpace self.gridInc = gridInc self.maxCell = stats.maxCellNum(self.gridLimit, self.gridSpace) self.minSample = minSample self.coeffs = parameters(self.maxCell+1) self.angular = angular self.missingValue = missingValue self.domain_warning_raised = False self.progressbar = progressbar self.prgStartValue = prgStartValue self.prgEndValue = prgEndValue if not calculateLater: if type(lonLat) is str: self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if type(parameter) is str: self.param = np.array(flLoadFile(parameter)) else: self.param = parameter self.calculateStatistics()
def __init__(self, parameter, lonLat, gridLimit, gridSpace, gridInc, minSample=100, angular=False, missingValue=sys.maxsize, progressbar=None, prgStartValue=0, prgEndValue=1, calculateLater=False): self.logger = logging.getLogger() self.logger.debug('Initialising GenerateStats') self.gridLimit = gridLimit self.gridSpace = gridSpace self.gridInc = gridInc self.maxCell = stats.maxCellNum(self.gridLimit, self.gridSpace) self.minSample = minSample self.coeffs = parameters(self.maxCell+1) self.angular = angular self.missingValue = missingValue self.domain_warning_raised = False self.progressbar = progressbar self.prgStartValue = prgStartValue self.prgEndValue = prgEndValue if not calculateLater: if type(lonLat) is str: self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if type(parameter) is str: self.param = np.array(flLoadFile(parameter)) else: self.param = parameter self.calculateStatistics()
def allDistributions(self, lonLat, parameterList, parameterName=None, kdeStep=0.1, angular=False, periodic=False, plotParam=False): """ Calculate a distribution for each individual cell and store in a file or return """ if parameterName: self.logger.debug("Running allDistributions for %s"%parameterName) else: self.logger.debug("Running allDistributions") if type(lonLat) is str: self.logger.debug("Loading lat/lon data from file") self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if type(parameterList) is str: self.logger.debug("Loading parameter data from file: %s" % parameterList) self.pList = np.array(flLoadFile(parameterList)) else: self.pList = parameterList self.pName = parameterName maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace) # Writing CDF dataset for all individual cell number into files self.logger.debug("Writing CDF dataset for all individual cells into files") for cellNum in xrange(0, maxCellNum + 1): self.logger.debug("Processing cell number %i"%cellNum) # Generate cyclone parameter data for the cell number self.extractParameter(cellNum) # Estimate cyclone parameter data using KDE # The returned array contains the grid, the PDF and the CDF cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep, angular=angular, periodic=periodic) if plotParam: self._plotParameter(cellNum, kdeStep) self.logger.debug('size of parameter array = %d: size of cdf array = %d' % (self.parameter.size,cdf.size)) cellNumlist = [] for i in range(len(cdf)): cellNumlist.append(cellNum) if cellNum == 0: results = np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])) else: self.logger.debug('size of results array = %s'%str(results.size)) results = np.concatenate((results, np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])))) if parameterName == None: self.logger.debug("Returning CDF dataset for all individual cell numbers") return results else: cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \ self.pName + "_y" allCellCdfOutput = pjoin(self.outputPath, 'process', 'all_cell_cdf_' + self.pName) args = {"filename":allCellCdfOutput, "data":results, "header":cdfHeader, "delimiter":",", "fmt":"%f"} self.logger.debug("Writing CDF dataset for all individual cell numbers into files") flSaveFile(**args) # Save to netcdf too filename = allCellCdfOutput + '.nc' ncdf = Dataset(filename, 'w') ncdf.createDimension('cell', len(results[:,0])) cell = ncdf.createVariable('cell', 'i', ('cell',)) cell[:] = results[:,0] x = ncdf.createVariable('x', 'f', ('cell',)) x[:] = results[:,1] y = ncdf.createVariable('CDF', 'f', ('cell',)) y[:] = results[:,2] ncdf.close()
def test_MaxCellNum(self): """Testing maxCellNum""" maxCellNum = 175 self.assertEqual(maxCellNum, statutils.maxCellNum(self.gridLimit, self.gridSpace))
def allDistributions(self, lonLat, parameterList, parameterName=None, kdeStep=0.1, angular=False, periodic=False, plotParam=False): """ Calculate a distribution for each individual cell and store in a file or return the distribution. :param lonLat: The longitude/latitude of all observations in the model domain. If a string is given, then it is the path to a file containing the longitude/latitude information. If an array is given, then it should be a 2-d array containing the data values. :type lonLat: str or :class:`numpy.ndarray` :param parameterList: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :type parameterList: str or :class:`numpy.ndarray` :param str parameterName: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or float, default=``False`` :param boolean plotParam: Plot the parameters. Default is ``False``. :returns: If no ``parameterName`` is given returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray`. """ if parameterName: self.logger.debug("Running allDistributions for %s"%parameterName) else: self.logger.debug("Running allDistributions") if type(lonLat) is str: self.logger.debug("Loading lat/lon data from file") self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if type(parameterList) is str: self.logger.debug("Loading parameter data from file: %s" % parameterList) self.pList = np.array(flLoadFile(parameterList)) else: self.pList = parameterList self.pName = parameterName maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace) # Writing CDF dataset for all individual cell number into files self.logger.debug("Writing CDF dataset for all individual cells into files") for cellNum in xrange(0, maxCellNum + 1): self.logger.debug("Processing cell number %i"%cellNum) # Generate cyclone parameter data for the cell number self.extractParameter(cellNum) # Estimate cyclone parameter data using KDE # The returned array contains the grid, the PDF and the CDF cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep, angular=angular, periodic=periodic) if plotParam: self._plotParameter(cellNum, kdeStep) self.logger.debug('size of parameter array = %d: size of cdf array = %d' % (self.parameter.size,cdf.size)) cellNumlist = [] for i in range(len(cdf)): cellNumlist.append(cellNum) if cellNum == 0: results = np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])) else: self.logger.debug('size of results array = %s'%str(results.size)) results = np.concatenate((results, np.transpose(np.array([cellNumlist, cdf[:,0], cdf[:,2]])))) if parameterName == None: self.logger.debug("Returning CDF dataset for all individual cell numbers") return results else: cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \ self.pName + "_y" allCellCdfOutput = pjoin(self.outputPath, 'process', 'all_cell_cdf_' + self.pName) args = {"filename":allCellCdfOutput, "data":results, "header":cdfHeader, "delimiter":",", "fmt":"%f"} self.logger.debug("Writing CDF dataset for all individual cell numbers into files") flSaveFile(**args) # Save to netcdf too filename = allCellCdfOutput + '.nc' ncdf = Dataset(filename, 'w') ncdf.createDimension('cell', len(results[:,0])) cell = ncdf.createVariable('cell', 'i', ('cell',)) cell[:] = results[:,0] x = ncdf.createVariable('x', 'f', ('cell',)) x[:] = results[:,1] y = ncdf.createVariable('CDF', 'f', ('cell',)) y[:] = results[:,2] ncdf.close()
def allDistributions(self, lonLat, parameterList, parameterName=None, kdeStep=0.1, angular=False, periodic=False, plotParam=False): """ Calculate a distribution for each individual cell and store in a file or return the distribution. :param lonLat: The longitude/latitude of all observations in the model domain. If a string is given, then it is the path to a file containing the longitude/latitude information. If an array is given, then it should be a 2-d array containing the data values. :type lonLat: str or :class:`numpy.ndarray` :param parameterList: Parameter values. If a string is given, then it is the path to a file containing the values. If an array is passed, then it should hold the parameter values. :type parameterList: str or :class:`numpy.ndarray` :param str parameterName: Optional. If given, then the cell distributions will be saved to a file with this name. If absent, the distribution values are returned. :param kdeStep: Increment of the ordinate values at which the distributions will be calculated. :type kdeStep: float, default=`0.1` :param angular: Does the data represent an angular measure (e.g. bearing). :type angular: boolean, default=``False`` :param periodic: Does the data represent some form of periodic data (e.g. day of year). If given, it should be the period of the data (e.g. for annual data, ``periodic=365``). :type periodic: boolean or float, default=``False`` :param boolean plotParam: Plot the parameters. Default is ``False``. :returns: If no ``parameterName`` is given returns ``None`` (data are saved to file), otherwise :class:`numpy.ndarray`. """ if parameterName: self.logger.debug("Running allDistributions for %s", parameterName) else: self.logger.debug("Running allDistributions") if isinstance(lonLat, str): self.logger.debug("Loading lat/lon data from file") self.lonLat = np.array(flLoadFile(lonLat, delimiter=',')) else: self.lonLat = lonLat if isinstance(parameterList, str): self.logger.debug("Loading parameter data from file: %s", parameterList) self.pList = np.array(flLoadFile(parameterList)) else: self.pList = parameterList self.pName = parameterName if len(self.pList) != len(self.lonLat): errmsg = ("Parameter data and " "Lon/Lat data are not the same length " "for {}.".format(parameterName)) self.logger.critical(errmsg) raise IndexError(errmsg) maxCellNum = stats.maxCellNum(self.gridLimit, self.gridSpace) # Writing CDF dataset for all individual cell number into files self.logger.debug(("Writing CDF dataset for all individual " "cells into files")) for cellNum in range(0, maxCellNum + 1): self.logger.debug("Processing cell number %i", cellNum) # Generate cyclone parameter data for the cell number self.extractParameter(cellNum) # Estimate cyclone parameter data using KDE # The returned array contains the grid, the PDF and the CDF cdf = self.kdeParameter.generateKDE(self.parameter, kdeStep, angular=angular, periodic=periodic) if plotParam: self._plotParameter(cellNum, kdeStep) self.logger.debug(('size of parameter array = %d: ' 'size of cdf array = %d'), self.parameter.size, cdf.size) cellNumlist = [] for i in range(len(cdf)): cellNumlist.append(cellNum) if cellNum == 0: results = np.transpose(np.array([cellNumlist, cdf[:, 0], cdf[:, 2]])) else: self.logger.debug('size of results = %s', str(results.size)) results = np.concatenate((results, np.transpose(np.array([cellNumlist, cdf[:, 0], cdf[:, 2]])))) if parameterName == None: self.logger.debug(("Returning CDF dataset for all " "individual cell numbers")) return results else: cdfHeader = "Cell_Number, CDF_" + self.pName + "_x, CDF_" + \ self.pName + "_y" allCellCdfOutput = pjoin(self.outputPath, 'process', 'all_cell_cdf_' + self.pName) args = {"filename":allCellCdfOutput, "data":results, "header":cdfHeader, "delimiter":",", "fmt":"%f"} self.logger.debug(("Writing CDF dataset for all individual " "cell numbers into files")) flSaveFile(**args) # Save to netcdf too filename = allCellCdfOutput + '.nc' ncdf = Dataset(filename, 'w') ncdf.createDimension('cell', len(results[:, 0])) cell = ncdf.createVariable('cell', 'i', ('cell',)) cell[:] = results[:, 0] x = ncdf.createVariable('x', 'f', ('cell',)) x[:] = results[:, 1] y = ncdf.createVariable('CDF', 'f', ('cell',)) y[:] = results[:, 2] ncdf.close()