Exemple #1
0
    def __init__(self, configFile, tilegrid, numSim, minRecords, yrsPerSim,
                 calcCI=False):
        """
        Initialise HazardCalculator object.

        :param str configFile: path to TCRM configuration file.
        :param tilegrid: :class:`TileGrid` instance
        :param int numSim: number of simulations created.
        :param int minRecords: minimum number of valid wind speed values required
                               to do fitting.
        :param int yrsPerSim:
        """
        config = ConfigParser()
        config.read(configFile)

        self.nodata = -9999.
        self.years = np.array(config.get('Hazard',
                                         'Years').split(',')).astype('f')
        self.outputPath = pjoin(config.get('Output', 'Path'), 'hazard')
        self.inputPath = pjoin(config.get('Output', 'Path'), 'windfield')
        gridLimit = config.geteval('Region', 'gridLimit')

        self.numSim = numSim
        self.minRecords = minRecords
        self.yrsPerSim = yrsPerSim
        self.calcCI = calcCI
        if self.calcCI:
            log.debug("Bootstrap confidence intervals will be calculated")
            self.sample_size = config.getint('Hazard', 'SampleSize')
            self.prange = config.getint('Hazard', 'PercentileRange')

        self.tilegrid = tilegrid
        lon, lat = self.tilegrid.getDomainExtent()

        # Create arrays for storing output data:
        self.loc = np.zeros((len(lat), len(lon)), dtype='f')
        self.shp = np.zeros((len(lat), len(lon)), dtype='f')
        self.scale = np.zeros((len(lat), len(lon)), dtype='f')
        self.Rp = np.zeros((len(self.years), len(lat), len(lon)), dtype='f')

        self.RPupper = np.zeros((len(self.years), len(lat), len(lon)), dtype='f')
        self.RPlower = np.zeros((len(self.years), len(lat), len(lon)), dtype='f')

        self.global_atts = {'history': ('TCRM hazard simulation - '
                            'return period wind speeds'),
                            'version': flProgramVersion(),
                            'Python_ver': sys.version}


        # Add configuration settings to global attributes:
        for section in config.sections():
            for option in config.options(section):
                key = "{0}_{1}".format(section, option)
                value = config.get(section, option)
                self.global_atts[key] = value
Exemple #2
0
def run(configFile, callback=None):
    """
    Run the hazard calculations.

    This will attempt to run the calculation in parallel by tiling the
    domain, but also provides a sane fallback mechanism to execute
    in serial.

    :param configFile: str

    """

    log.info("Loading hazard calculation settings")

    config = ConfigParser()
    config.read(configFile)

    outputPath = config.get('Output', 'Path')
    inputPath = pjoin(outputPath, 'windfield')
    gridLimit = config.geteval('Region', 'gridLimit')
    numsimulations = config.getint('TrackGenerator', 'NumSimulations')
    yrsPerSim = config.getint('TrackGenerator', 'YearsPerSimulation')
    minRecords = config.getint('Hazard', 'MinimumRecords')
    calculate_confidence = config.getboolean('Hazard', 'CalculateCI')

    wf_lon, wf_lat = setDomain(inputPath)

    global pp
    pp = attemptParallel()

    log.info("Running hazard calculations")
    TG = TileGrid(gridLimit, wf_lon, wf_lat)
    tiles = getTiles(TG)

    #def progress(i):
    #    callback(i, len(tiles))

    pp.barrier()
    hc = HazardCalculator(configFile, TG,
                          numsimulations,
                          minRecords,
                          yrsPerSim,
                          calculate_confidence)




    hc.dumpHazardFromTiles(tiles)

    pp.barrier()

    hc.saveHazard()

    log.info("Completed hazard calculation")
Exemple #3
0
    def __init__(self, configFile):

        config = ConfigParser()
        config.read(configFile)
        self.configFile = configFile
        
        outputPath = config.get('Output', 'Path')
        self.trackPath = pjoin(outputPath, 'tracks')
        self.plotPath = pjoin(outputPath, 'plots', 'stats')
        self.dataPath = pjoin(outputPath, 'process')

        # Determine TCRM input directory
        tcrm_dir = pathLocator.getRootDirectory()
        self.inputPath = pjoin(tcrm_dir, 'input')

        self.synNumYears = config.getint('TrackGenerator',
                                         'yearspersimulation')

        try:
            gateFile = config.get('Input', 'CoastlineGates')
        except NoOptionError:
            log.exception(("No coastline gate file specified "
                          "in configuration file"))
            raise
        
        gateData = np.genfromtxt(gateFile, delimiter=',')
        nGates = len(gateData)
        self.gates = Int.convert2vertex(gateData[:, 1], gateData[:, 2])
        self.coast = list(self.gates)
        self.coast.append(self.gates[0])
Exemple #4
0
def colReadCSV(configFile, dataFile, source):
    """
    Loads a csv file containing 'column' data into a record (numpy)
    array with columns labelled by 'fields'. There must be a section in
    the ``configFile`` named ``source`` that sets out the format of the
    data file.

    :param str configFile: Path to a configuration file that holds details
                           of the input data.
    :param str dataFile: Path to the input file to load.
    :param str source: Name of the source format. There must be a
                       corresponding section in the ``configFile``.

    :returns: A :class:`numpy.ndarray` that contains the input data.
    
    """
    config = ConfigParser()
    config.read(configFile)
    delimiter = config.get(source, 'FieldDelimiter')
    numHeadingLines = config.getint(source, 'NumberOfHeadingLines')
    cols = config.get(source, 'Columns').split(delimiter)

    usecols = [i for i,c in enumerate(cols) if c != 'skip']

    data = np.genfromtxt(dataFile, dtype=None, delimiter=delimiter,
            usecols=usecols, comments=None, skip_header=numHeadingLines, 
            autostrip=True)

    data.dtype.names = [c for c in cols if c != 'skip']

    return data
Exemple #5
0
    def __init__(self, configFile):
        """
        Calculate density of TC positions on a grid

        :param str configFile: path to a TCRM configuration file.
        """

        config = ConfigParser()
        config.read(configFile)
        self.configFile = configFile

        # Define the grid:
        gridLimit = config.geteval('Region', 'gridLimit')
        gridSpace = config.geteval('Region', 'GridSpace')

        self.lon_range = np.arange(gridLimit['xMin'],
                                   gridLimit['xMax'] + 0.1,
                                   gridSpace['x'])
        self.lat_range = np.arange(gridLimit['yMin'],
                                   gridLimit['yMax'] + 0.1,
                                   gridSpace['y'])

        outputPath = config.get('Output', 'Path')
        self.trackPath = pjoin(outputPath, 'tracks')
        self.plotPath = pjoin(outputPath, 'plots', 'stats')
        self.dataPath = pjoin(outputPath, 'process')

        # Determine TCRM input directory
        tcrm_dir = pathLocator.getRootDirectory()
        self.inputPath = pjoin(tcrm_dir, 'input')

        self.synNumYears = config.getint('TrackGenerator',
                                         'yearspersimulation')
Exemple #6
0
    def __init__(self, configFile, autoCalc_gridLimit=None,
                 progressbar=None):
        """
        Initialize the data and variables required for the interface
        """
        self.configFile = configFile
        config = ConfigParser()
        config.read(configFile)
        self.progressbar = progressbar

        log.info("Initialising StatInterface")

        self.kdeType = config.get('StatInterface', 'kdeType')
        self.kde2DType = config.get('StatInterface','kde2DType')
        minSamplesCell = config.getint('StatInterface', 'minSamplesCell')
        self.kdeStep = config.getfloat('StatInterface', 'kdeStep')
        self.outputPath = config.get('Output', 'Path')
        self.processPath = pjoin(self.outputPath, 'process')

        missingValue = cnfGetIniValue(self.configFile, 'StatInterface',
                                      'MissingValue', sys.maxint)

        gridLimitStr = cnfGetIniValue(self.configFile, 'StatInterface',
                                      'gridLimit', '')

        if gridLimitStr is not '':
            try:
                self.gridLimit = eval(gridLimitStr)
            except SyntaxError:
                log.exception('Error! gridLimit is not a dictionary')
        else:
            self.gridLimit = autoCalc_gridLimit
            log.info('No gridLimit specified - using automatic' +
                     ' selection: ' + str(self.gridLimit))

        try:
            gridSpace = config.geteval('Region', 'gridSpace')
            gridInc = config.geteval('Region', 'gridInc')
        except SyntaxError:
            log.exception('Error! gridSpace or gridInc not dictionaries')
            raise

        self.generateDist = GenerateDistributions(self.configFile,
                                                  self.gridLimit,
                                                  gridSpace, gridInc,
                                                  self.kdeType,
                                                  minSamplesCell,
                                                  missingValue)
        self.gridSpace = gridSpace
        self.gridInc = gridInc
Exemple #7
0
    def __init__(self, configFile):
        """
        Calculate density of TC genesis positions on a grid

        :param str configFile: path to a TCRM configuration file.
        """

        config = ConfigParser()
        config.read(configFile)
        self.configFile = configFile

        # Define the grid:
        gridLimit = config.geteval('Region', 'gridLimit')
        gridSpace = config.geteval('Region', 'GridSpace')

        self.lon_range = np.arange(gridLimit['xMin'],
                                   gridLimit['xMax'] + 0.1,
                                   0.1)
        self.lat_range = np.arange(gridLimit['yMin'],
                                   gridLimit['yMax'] + 0.1,
                                   0.1)

        self.X, self.Y = np.meshgrid(self.lon_range, self.lat_range)

        outputPath = config.get('Output', 'Path')
        self.trackPath = pjoin(outputPath, 'tracks')
        self.plotPath = pjoin(outputPath, 'plots', 'stats')
        self.dataPath = pjoin(outputPath, 'process')

        # Determine TCRM input directory
        tcrm_dir = pathLocator.getRootDirectory()
        self.inputPath = pjoin(tcrm_dir, 'input')

        self.synNumYears = config.getint('TrackGenerator',
                                         'yearspersimulation')

        cellnumber = 0
        self.gridCells = []
        for k in xrange(len(self.lon_range) - 1):
            for l in xrange(len(self.lat_range) - 1):
                ymin = self.lat_range[l]
                ymax = self.lat_range[l] + gridSpace['y']
                xmin = self.lon_range[k]
                xmax = self.lon_range[k] + gridSpace['x']
                self.gridCells.append(gridCell(xmin, ymin, xmax, ymax,
                                               cellnumber, (k, l)))
                cellnumber += 1
Exemple #8
0
    def __init__(self, configFile):

        config = ConfigParser()
        config.read(configFile)
        self.configFile = configFile

        # Define the grid:
        gridLimit = config.geteval('Region', 'gridLimit')
        gridSpace = config.geteval('Region', 'GridSpace')

        self.lon_range = np.arange(gridLimit['xMin'],
                                   gridLimit['xMax'] + 0.1,
                                   gridSpace['x'])
        self.lat_range = np.arange(gridLimit['yMin'],
                                   gridLimit['yMax'] + 0.1,
                                   gridSpace['y'])

        outputPath = config.get('Output', 'Path')
        self.trackPath = pjoin(outputPath, 'tracks')
        self.plotPath = pjoin(outputPath, 'plots', 'stats')
        self.dataPath = pjoin(outputPath, 'process')

        # Determine TCRM input directory
        tcrm_dir = pathLocator.getRootDirectory()
        self.inputPath = pjoin(tcrm_dir, 'input')

        self.synNumYears = config.getint('TrackGenerator',
                                         'yearspersimulation')

        # Longitude crossing gates:
        self.gateLons = np.arange(self.lon_range.min(), 
                                  self.lon_range.max() + 0.5, 10.)

        self.gateLats = np.arange(self.lat_range.min(), 
                                  self.lat_range.max() + 0.5, 2.)

        # Add configuration settings to global attributes:
        self.gatts = {'history': "Longitude crossing rates for TCRM simulation",
                      'version': flProgramVersion() }

        for section in config.sections():
            for option in config.options(section):
                key = "{0}_{1}".format(section, option)
                value = config.get(section, option)
                self.gatts[key] = value
Exemple #9
0
def colReadCSV(configFile, dataFile, source):
    """
    Loads a csv file containing 'column' data into a record
    (numpy) array with columns labelled by 'fields'.
    """
    config = ConfigParser()
    config.read(configFile)
    delimiter = config.get(source, 'FieldDelimiter')
    numHeadingLines = config.getint(source, 'NumberOfHeadingLines')
    cols = config.get(source, 'Columns').split(delimiter)

    usecols = [i for i,c in enumerate(cols) if c != 'skip']

    data = np.genfromtxt(dataFile, dtype=None, delimiter=delimiter,
            usecols=usecols, comments=None, skip_header=numHeadingLines, 
            autostrip=True)

    data.dtype.names = [c for c in cols if c != 'skip']

    return data
Exemple #10
0
    def __init__(self, configFile):
        """
        Calculate density of TC genesis positions on a grid

        :param str configFile: path to a TCRM configuration file.
        """

        config = ConfigParser()
        config.read(configFile)
        self.configFile = configFile

        # Define the grid:
        self.gridLimit = config.geteval("Region", "gridLimit")
        self.gridSpace = config.geteval("Region", "GridSpace")

        self.lon_range = np.arange(self.gridLimit["xMin"], self.gridLimit["xMax"] + 0.1, 0.1)
        self.lat_range = np.arange(self.gridLimit["yMin"], self.gridLimit["yMax"] + 0.1, 0.1)

        self.X, self.Y = np.meshgrid(self.lon_range, self.lat_range)

        outputPath = config.get("Output", "Path")
        self.trackPath = pjoin(outputPath, "tracks")
        self.plotPath = pjoin(outputPath, "plots", "stats")
        self.dataPath = pjoin(outputPath, "process")

        # Determine TCRM input directory
        tcrm_dir = pathLocator.getRootDirectory()
        self.inputPath = pjoin(tcrm_dir, "input")

        self.synNumYears = config.getint("TrackGenerator", "yearspersimulation")

        cellnumber = 0
        self.gridCells = []
        for k in xrange(len(self.lon_range) - 1):
            for l in xrange(len(self.lat_range) - 1):
                ymin = self.lat_range[l]
                ymax = self.lat_range[l] + self.gridSpace["y"]
                xmin = self.lon_range[k]
                xmax = self.lon_range[k] + self.gridSpace["x"]
                self.gridCells.append(gridCell(xmin, ymin, xmax, ymax, cellnumber, (k, l)))
                cellnumber += 1
Exemple #11
0
    def __init__(self, configFile):

        config = ConfigParser()
        config.read(configFile)
        self.configFile = configFile

        # Define the grid:
        gridLimit = config.geteval("Region", "gridLimit")
        gridSpace = config.geteval("Region", "GridSpace")

        self.lon_range = np.arange(gridLimit["xMin"], gridLimit["xMax"] + 0.1, gridSpace["x"])
        self.lat_range = np.arange(gridLimit["yMin"], gridLimit["yMax"] + 0.1, gridSpace["y"])

        outputPath = config.get("Output", "Path")
        self.trackPath = pjoin(outputPath, "tracks")
        self.plotPath = pjoin(outputPath, "plots", "stats")
        self.dataPath = pjoin(outputPath, "process")

        # Determine TCRM input directory
        tcrm_dir = pathLocator.getRootDirectory()
        self.inputPath = pjoin(tcrm_dir, "input")

        self.synNumYears = config.getint("TrackGenerator", "yearspersimulation")

        # Longitude crossing gates:
        self.gateLons = np.arange(self.lon_range.min(), self.lon_range.max() + 0.5, 10.0)

        self.gateLats = np.arange(self.lat_range.min(), self.lat_range.max() + 0.5, 2.0)

        # Add configuration settings to global attributes:
        self.gatts = {"history": "Longitude crossing rates for TCRM simulation", "version": flProgramVersion()}

        for section in config.sections():
            for option in config.options(section):
                key = "{0}_{1}".format(section, option)
                value = config.get(section, option)
                self.gatts[key] = value
Exemple #12
0
    def processData(self, restrictToWindfieldDomain=False):
        """
        Process raw data into ASCII files that can be read by the main
        components of the system

        :param bool restrictToWindfieldDomain: if True, only process data
            within the wind field domain, otherwise, process data from
            across the track generation domain.
            
        """
        config = ConfigParser()
        config.read(self.configFile)

        self.logger.info("Running %s" % flModuleName())

        if config.has_option('DataProcess', 'InputFile'):
            inputFile = config.get('DataProcess', 'InputFile')

        if config.has_option('DataProcess', 'Source'):
            source = config.get('DataProcess', 'Source')
            self.logger.info('Loading %s dataset', source)
            fn = config.get(source, 'filename')
            path = config.get(source, 'path')
            inputFile = pjoin(path, fn)

        # If input file has no path information, default to tcrm input folder
        if len(os.path.dirname(inputFile)) == 0:
            inputFile = pjoin(self.tcrm_input_dir, inputFile)

        self.logger.info("Processing %s" % inputFile)

        self.source = config.get('DataProcess', 'Source')

        inputData = colReadCSV(self.configFile, inputFile, self.source)

        inputSpeedUnits = config.get(self.source, 'SpeedUnits')
        inputPressureUnits = config.get(self.source, 'PressureUnits')
        inputLengthUnits = config.get(self.source, 'LengthUnits')
        startSeason = config.getint('DataProcess', 'StartSeason')

        indicator = loadData.getInitialPositions(inputData)
        lat = np.array(inputData['lat'], 'd')
        lon = np.mod(np.array(inputData['lon'], 'd'), 360)

        if restrictToWindfieldDomain:
            # Filter the input arrays to only retain the tracks that
            # pass through the windfield domain.
            CD = CalcTrackDomain(self.configFile)
            self.domain = CD.calcDomainFromTracks(indicator, lon, lat)
            domainIndex = self.extractTracks(indicator, lon, lat)
            inputData = inputData[domainIndex]
            indicator = indicator[domainIndex]
            lon = lon[domainIndex]
            lat = lat[domainIndex]

        if self.progressbar is not None:
            self.progressbar.update(0.125)

        # Sort date/time information
        try:
            dt = np.empty(indicator.size, 'f')
            dt[1:] = np.diff(inputData['age'])
        except (ValueError, KeyError):

            try:
                self.logger.info("Filtering input data by season: season > %d"%startSeason)
                # Find indicies that satisfy minimum season filter
                idx = np.where(inputData['season'] >= startSeason)[0]
                # Filter records:
                inputData = inputData[idx]
                indicator = indicator[idx]
                lon = lon[idx]
                lat = lat[idx]
            except (ValueError, KeyError):
                pass

            year, month, day, hour, minute, datetimes \
                = loadData.parseDates(inputData, indicator)

            # Time between observations:
            dt = loadData.getTimeDelta(year, month, day, hour, minute)

            # Calculate julian days:
            jdays = loadData.julianDays(year, month, day, hour, minute)

        delta_lon = np.diff(lon)
        delta_lat = np.diff(lat)

        # Split into separate tracks if large jump occurs (delta_lon >
        # 15 degrees or delta_lat > 5 degrees) This avoids two tracks
        # being accidentally combined when seasons and track numbers
        # match but basins are different as occurs in the IBTrACS
        # dataset.  This problem can also be prevented if the
        # 'tcserialno' column is specified.
        indicator[np.where(delta_lon > 15)[0] + 1] = 1
        indicator[np.where(delta_lat > 5)[0] + 1] = 1

        # Save information required for frequency auto-calculation
        try:
            origin_seasonOrYear = np.array(
                inputData['season'], 'i').compress(indicator)
            header = 'Season'
        except (ValueError, KeyError):
            origin_seasonOrYear = year.compress(indicator)
            header = 'Year'

        flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear),
                   header, ',', fmt='%d')

        pressure = np.array(inputData['pressure'], 'd')
        novalue_index = np.where(pressure == sys.maxint)
        pressure = metutils.convert(pressure, inputPressureUnits, "hPa")
        pressure[novalue_index] = sys.maxint

        # Convert any non-physical central pressure values to maximum integer
        # This is required because IBTrACS has a mix of missing value codes
        # (i.e. -999, 0, 9999) in the same global dataset.
        pressure = np.where((pressure < 600) | (pressure > 1100),
                            sys.maxint, pressure)

        if self.progressbar is not None:
            self.progressbar.update(0.25)

        try:
            vmax = np.array(inputData['vmax'], 'd')
        except (ValueError, KeyError):
            self.logger.warning("No max wind speed data")
            vmax = np.empty(indicator.size, 'f')
        else:
            novalue_index = np.where(vmax == sys.maxint)
            vmax = metutils.convert(vmax, inputSpeedUnits, "mps")
            vmax[novalue_index] = sys.maxint

        assert lat.size == indicator.size
        assert lon.size == indicator.size
        assert pressure.size == indicator.size
        #assert vmax.size == indicator.size

        try:
            rmax = np.array(inputData['rmax'])
            novalue_index = np.where(rmax == sys.maxint)
            rmax = metutils.convert(rmax, inputLengthUnits, "km")
            rmax[novalue_index] = sys.maxint

            self._rmax(rmax, indicator)
            self._rmaxRate(rmax, dt, indicator)
        except (ValueError, KeyError):
            self.logger.warning("No rmax data available")

        if self.ncflag:
            self.data['index'] = indicator

        # ieast : parameter used in latLon2Azi
        # FIXME: should be a config setting describing the input data.
        ieast = 1

        # Determine the index of initial cyclone observations, excluding
        # those cyclones that have only one observation. This is used
        # for calculating initial bearing and speed
        indicator2 = np.where(indicator > 0, 1, 0)
        initIndex = np.concatenate([np.where(np.diff(indicator2) ==
                                             -1, 1, 0), [0]])

        # Calculate the bearing and distance (km) of every two
        # consecutive records using ll2azi
        bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0)
        assert bear_.size == indicator.size - 1
        assert dist_.size == indicator.size - 1
        bear = np.empty(indicator.size, 'f')
        bear[1:] = bear_
        dist = np.empty(indicator.size, 'f')
        dist[1:] = dist_

        self._lonLat(lon, lat, indicator, initIndex)
        self._bearing(bear, indicator, initIndex)
        self._bearingRate(bear, dt, indicator)
        if self.progressbar is not None:
            self.progressbar.update(0.375)
        self._speed(dist, dt, indicator, initIndex)
        self._speedRate(dist, dt, indicator)
        self._pressure(pressure, indicator)
        self._pressureRate(pressure, dt, indicator)
        self._windSpeed(vmax)

        try:
            self._frequency(year, indicator)
            self._juliandays(jdays, indicator, year)
        except (ValueError, KeyError):
            pass

        self.logger.info("Completed %s" % flModuleName())
        if self.progressbar is not None:
            self.progressbar.update(0.5)
Exemple #13
0
    def __init__(self,
                 configFile,
                 tilegrid,
                 numSim,
                 minRecords,
                 yrsPerSim,
                 calcCI=False,
                 evd='GEV'):
        """
        Initialise HazardCalculator object.

        :param str configFile: path to TCRM configuration file.
        :param tilegrid: :class:`TileGrid` instance
        :param int numSim: number of simulations created.
        :param int minRecords: minimum number of valid wind speed values required
                               to do fitting.
        :param int yrsPerSim:
        :param boolean calcCI:
        :param str extreme_value_distribution: evd to use. Options so far are GEV
                                               and GPD.
        """
        config = ConfigParser()
        config.read(configFile)

        self.nodata = -9999.
        self.years = np.array(config.get('Hazard',
                                         'Years').split(',')).astype('f')
        self.outputPath = pjoin(config.get('Output', 'Path'), 'hazard')
        self.inputPath = pjoin(config.get('Output', 'Path'), 'windfield')
        gridLimit = config.geteval('Region', 'gridLimit')

        self.numSim = numSim
        self.minRecords = minRecords
        self.yrsPerSim = yrsPerSim
        self.calcCI = calcCI
        if self.calcCI:
            log.debug("Bootstrap confidence intervals will be calculated")
            self.sample_size = config.getint('Hazard', 'SampleSize')
            self.prange = config.getint('Hazard', 'PercentileRange')
        self.evd = evd

        self.tilegrid = tilegrid
        lon, lat = self.tilegrid.getDomainExtent()

        # Create arrays for storing output data:
        self.loc = np.zeros((len(lat), len(lon)), dtype='f')
        self.shp = np.zeros((len(lat), len(lon)), dtype='f')
        self.scale = np.zeros((len(lat), len(lon)), dtype='f')
        self.Rp = np.zeros((len(self.years), len(lat), len(lon)), dtype='f')

        self.RPupper = np.zeros((len(self.years), len(lat), len(lon)),
                                dtype='f')
        self.RPlower = np.zeros((len(self.years), len(lat), len(lon)),
                                dtype='f')

        self.global_atts = {
            'title': ('TCRM hazard simulation - '
                      'return period wind speeds'),
            'tcrm_version': flProgramVersion(),
            'python_version': sys.version
        }

        # Add configuration settings to global attributes:
        for section in config.sections():
            for option in config.options(section):
                key = "{0}_{1}".format(section, option)
                value = config.get(section, option)
                self.global_atts[key] = value
Exemple #14
0
import random

import seaborn as sns
sns.set_context("paper")
figsize = (6.5, 4.5)
sns.set_style("whitegrid")

# Load the configuration file from the TCHA18, then open the database
# and get teh list of available locations.

configFile = "/home/547/cxa547/tcrmconfig/tcrm2.1.ini"
config = ConfigParser()
config.read(configFile)
outputPath = config.get('Output', 'Path')
plotPath = os.path.join(outputPath, 'plots', 'convergence')
NumSimulations = config.getint('TrackGenerator', 'NumSimulations')

db = database.HazardDatabase(configFile)
locations = db.getLocations()
locNameList = list(locations['locName'])

# The following step performs the calculations. First a helper
# function to add nicely formatted grid lines on a logarithmic axis.
# The second function (`plotConvergenceTest`) loads the data from the
# database, then splits into two separate collections (called `d1` and
# `d2`). For each of these, we then calculate empirical ARI values and
# plot alongside each other. We also plot the annual exceedance
# probability as an alternate view on the likelihood of extreme winds.


def addARIGrid(axes):
Exemple #15
0
def loadTrackFile(configFile,
                  trackFile,
                  source,
                  missingValue=0,
                  calculateWindSpeed=True):
    """
    Load TC track data from the given input file, from a specified source.
    The configFile is a configuration file that contains a section called
    'source' that describes the data.
    This returns a collection of :class:`Track` objects that contains
    the details of the TC tracks in the input file.

    :param str configFile: Configuration file with a section ``source``.
    :param str trackFile: Path to a csv-formatted file containing TC data.
    :pararm str source: Name of the source format of the TC data. There
                        *must* be a section in ``configFile`` matching
                        this string, containing the details of the format
                        of the data.
    :param missingValue: Replace all null values in the input data with
                         this value (default=0).
    :param boolean calculateWindSpeed: Calculate maximum wind speed using
                                       a pressure-wind relation described
                                       in :func:`maxWindSpeed`

    :returns: A collection of :class:`Track` objects.
              If any of the variables are not present in the input
              dataset, they are (where possible) calculated
              (date/time/windspeed), sampled from default datasets
              (e.g. environmental pressure) or set to the missing value.

    Example::

      >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' )

    """

    LOG.info("Loading %s" % trackFile)
    inputData = colReadCSV(configFile, trackFile, source)  #,
    #nullValue=missingValue)

    config = ConfigParser()
    config.read(configFile)

    inputSpeedUnits = config.get(source, 'SpeedUnits')
    inputPressureUnits = config.get(source, 'PressureUnits')
    inputLengthUnits = config.get(source, 'LengthUnits')
    inputDateFormat = config.get(source, 'DateFormat')

    if config.getboolean('DataProcess', 'FilterSeasons'):
        startSeason = config.getint('DataProcess', 'StartSeason')
        idx = np.where(inputData['season'] >= startSeason)[0]
        inputData = inputData[idx]

    # Determine the initial TC positions...
    indicator = getInitialPositions(inputData)

    # Sort date/time information
    if 'age' in inputData.dtype.names:
        year, month, day, hour, minute, datetimes = parseAge(
            inputData, indicator)
        timeElapsed = inputData['age']
    else:
        year, month, day, hour, minute, datetimes = parseDates(
            inputData, indicator, inputDateFormat)
        timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute)

    # Time between observations:
    dt = getTimeDelta(year, month, day, hour, minute)

    # Calculate julian days
    jdays = julianDays(year, month, day, hour, minute)

    lat = np.array(inputData['lat'], 'd')
    lon = np.mod(np.array(inputData['lon'], 'd'), 360)
    delta_lon = np.diff(lon)
    delta_lat = np.diff(lat)

    # Split into separate tracks if large jump occurs (delta_lon > 10 degrees
    # or delta_lat > 5 degrees)
    # This avoids two tracks being accidentally combined when seasons and track
    # numbers match but basins are different as occurs in the IBTrACS dataset.
    # This problem can also be prevented if the 'tcserialno' column is
    # specified.
    indicator[np.where(delta_lon > 10)[0] + 1] = 1
    indicator[np.where(delta_lat > 5)[0] + 1] = 1

    pressure = filterPressure(np.array(inputData['pressure'], 'd'),
                              inputPressureUnits, missingValue)
    try:
        windspeed = np.array(inputData['vmax'], 'd')
        novalue_index = np.where(windspeed == sys.maxint)
        windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps")
        windspeed[novalue_index] = missingValue
    except (ValueError, KeyError):
        LOG.debug("No max wind speed data - all values will be zero")
        windspeed = np.zeros(indicator.size, 'f')
    assert lat.size == indicator.size
    assert lon.size == indicator.size
    assert pressure.size == indicator.size

    try:
        rmax = np.array(inputData['rmax'])
        novalue_index = np.where(rmax == missingValue)
        rmax = metutils.convert(rmax, inputLengthUnits, "km")
        rmax[novalue_index] = missingValue

    except (ValueError, KeyError):
        LOG.debug("No radius to max wind data - all values will be zero")
        rmax = np.zeros(indicator.size, 'f')

    if 'penv' in inputData.dtype.names:
        penv = np.array(inputData['penv'], 'd')
    else:
        LOG.debug("No ambient MSLP data in this input file")
        LOG.debug("Sampling data from MSLP data defined in "
                  "configuration file")
        # Warning: using sampled data will likely lead to some odd behaviour
        # near the boundary of the MSLP grid boundaries - higher resolution
        # MSLP data will decrease this unusual behaviour.

        try:
            ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile')
        except:
            LOG.exception("No input MSLP file specified in configuration")
            raise
        time = getTime(year, month, day, hour, minute)
        penv = ltmPressure(jdays, time, lon, lat, ncfile)

    if 'poci' in inputData.dtype.names:
        poci = np.array(inputData['poci'], 'd')
    else:
        LOG.debug("Determining poci")
        eps = np.random.normal(0, scale=2.5717)
        poci = getPoci(penv, pressure, lat, jdays, eps)

    speed, bearing = getSpeedBearing(indicator,
                                     lon,
                                     lat,
                                     dt,
                                     missingValue=missingValue)

    if calculateWindSpeed:
        windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, poci)

    TCID = np.cumsum(indicator)

    data = np.empty(len(indicator),
                    dtype={
                        'names': trackFields,
                        'formats': trackTypes
                    })
    for key, value in zip(trackFields, [
            indicator, TCID, year, month, day, hour, minute, timeElapsed,
            datetimes, lon, lat, speed, bearing, pressure, windspeed, rmax,
            poci
    ]):
        data[key] = value

    tracks = []
    n = np.max(TCID)
    for i in range(1, n + 1):
        track = Track(data[TCID == i])
        track.trackId = (i, n)
        track.trackfile = trackFile
        getMinPressure(track, missingValue)
        getMaxWind(track, missingValue)
        tracks.append(track)

    return tracks
Exemple #16
0
def loadTrackFile(configFile, trackFile, source, missingValue=0,
                  calculateWindSpeed=True):
    """
    Load TC track data from the given input file, from a specified source.
    The configFile is a configuration file that contains a section called
    'source' that describes the data.
    This returns a collection of :class:`Track` objects that contains
    the details of the TC tracks in the input file.

    :param str configFile: Configuration file with a section ``source``.
    :param str trackFile: Path to a csv-formatted file containing TC data.
    :pararm str source: Name of the source format of the TC data. There
                        *must* be a section in ``configFile`` matching
                        this string, containing the details of the format
                        of the data.
    :param missingValue: Replace all null values in the input data with
                         this value (default=0).
    :param boolean calculateWindSpeed: Calculate maximum wind speed using
                                       a pressure-wind relation described
                                       in :func:`maxWindSpeed`

    :returns: A collection of :class:`Track` objects. 
              If any of the variables are not present in the input
              dataset, they are (where possible) calculated
              (date/time/windspeed), sampled from default datasets
              (e.g. environmental pressure) or set to the missing value.

    Example::

      >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' )

    """
    
    logger.info("Loading %s" % trackFile)
    inputData = colReadCSV(configFile, trackFile, source) #,
                          #nullValue=missingValue)

    config = ConfigParser()
    config.read(configFile)

    inputSpeedUnits = config.get(source, 'SpeedUnits')
    inputPressureUnits = config.get(source, 'PressureUnits')
    inputLengthUnits = config.get(source, 'LengthUnits')
    inputDateFormat = config.get(source, 'DateFormat')
    
    if config.getboolean('DataProcess', 'FilterSeasons'):
        startSeason = config.getint('DataProcess', 'StartSeason')        
        idx = np.where(inputData['season'] >= startSeason)[0]
        inputData = inputData[idx]
        
    # Determine the initial TC positions...
    indicator = getInitialPositions(inputData)


    # Sort date/time information
    if 'age' in inputData.dtype.names:
        year, month, day, hour, minute, datetimes = parseAge(inputData, indicator)
        timeElapsed = inputData['age']
    else:
        year, month, day, hour, minute, datetimes = parseDates(inputData, indicator,
                                                    inputDateFormat)
        timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute)
        
    # Time between observations:
    dt = getTimeDelta(year, month, day, hour, minute)

    # Calculate julian days
    jdays = julianDays(year, month, day, hour, minute)

    lat = np.array(inputData['lat'], 'd')
    lon = np.mod(np.array(inputData['lon'], 'd'), 360)
    delta_lon = np.diff(lon)
    delta_lat = np.diff(lat)

    # Split into separate tracks if large jump occurs (delta_lon > 10 degrees
    # or delta_lat > 5 degrees)
    # This avoids two tracks being accidentally combined when seasons and track
    # numbers match but basins are different as occurs in the IBTrACS dataset.
    # This problem can also be prevented if the 'tcserialno' column is
    # specified.
    indicator[np.where(delta_lon > 10)[0] + 1] = 1
    indicator[np.where(delta_lat > 5)[0] + 1] = 1

    pressure = filterPressure(np.array(inputData['pressure'], 'd'),
                              inputPressureUnits, missingValue)
    try:
        windspeed = np.array(inputData['vmax'], 'd')
        novalue_index = np.where(windspeed == sys.maxint)
        windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps")
        windspeed[novalue_index] = missingValue
    except (ValueError,KeyError):
        logger.debug("No max wind speed data - all values will be zero")
        windspeed = np.zeros(indicator.size, 'f')
    assert lat.size == indicator.size
    assert lon.size == indicator.size
    assert pressure.size == indicator.size

    try:
        rmax = np.array(inputData['rmax'])
        novalue_index = np.where(rmax == missingValue)
        rmax = metutils.convert(rmax, inputLengthUnits, "km")
        rmax[novalue_index] = missingValue

    except (ValueError, KeyError):
        logger.debug("No radius to max wind data - all values will be zero")
        rmax = np.zeros(indicator.size, 'f')

    if 'penv' in inputData.dtype.names:
        penv = np.array(inputData['penv'], 'd')
    else:
        logger.debug("No ambient MSLP data in this input file")
        logger.debug("Sampling data from MSLP data defined in "
                    "configuration file")
        # Warning: using sampled data will likely lead to some odd behaviour
        # near the boundary of the MSLP grid boundaries - higher resolution
        # MSLP data will decrease this unusual behaviour.

        try:
            ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile')
        except:
            logger.exception("No input MSLP file specified in configuration")
            raise
        time = getTime(year, month, day, hour, minute)
        penv = ltmPressure(jdays, time, lon, lat, ncfile)

    speed, bearing = getSpeedBearing(indicator, lon, lat, dt,
                                     missingValue=missingValue)

    if calculateWindSpeed:
        windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, penv)

    TCID = np.cumsum(indicator)

    data = np.empty(len(indicator), 
                        dtype={
                               'names': trackFields,
                               'formats': trackTypes
                               } )
    for key, value in zip(trackFields, [indicator, TCID, year, month,
                                           day, hour, minute, timeElapsed, datetimes,
                                           lon, lat, speed, bearing,
                                           pressure, windspeed, rmax, penv]):
        data[key] = value
        
    tracks = []
    n = np.max(TCID)
    for i in range(1, n + 1):
        track = Track(data[TCID == i])
        track.trackId = (i, n)
        track.trackfile = trackFile
        getMinPressure(track, missingValue)
        getMaxWind(track, missingValue)
        tracks.append(track)

    return tracks
Exemple #17
0
    def processData(self, restrictToWindfieldDomain=False):
        """
        Process raw data into ASCII files that can be read by the main
        components of the system

        :param bool restrictToWindfieldDomain: if True, only process data
            within the wind field domain, otherwise, process data from
            across the track generation domain.

        """
        config = ConfigParser()
        config.read(self.configFile)

        self.logger.info("Running", flModuleName())

        if config.has_option('DataProcess', 'InputFile'):
            inputFile = config.get('DataProcess', 'InputFile')

        if config.has_option('DataProcess', 'Source'):
            source = config.get('DataProcess', 'Source')
            self.logger.info('Loading %s dataset', source)
            fn = config.get(source, 'filename')
            path = config.get(source, 'path')
            inputFile = pjoin(path, fn)

        # If input file has no path information, default to tcrm input folder
        if len(os.path.dirname(inputFile)) == 0:
            inputFile = pjoin(self.tcrm_input_dir, inputFile)

        self.logger.info("Processing", inputFile)

        self.source = config.get('DataProcess', 'Source')

        inputData = colReadCSV(self.configFile, inputFile, self.source)

        inputSpeedUnits = config.get(self.source, 'SpeedUnits')
        inputPressureUnits = config.get(self.source, 'PressureUnits')
        inputLengthUnits = config.get(self.source, 'LengthUnits')
        startSeason = config.getint('DataProcess', 'StartSeason')

        indicator = loadData.getInitialPositions(inputData)
        lat = np.array(inputData['lat'], 'd')
        lon = np.mod(np.array(inputData['lon'], 'd'), 360)

        if restrictToWindfieldDomain:
            # Filter the input arrays to only retain the tracks that
            # pass through the windfield domain.
            CD = CalcTrackDomain(self.configFile)
            self.domain = CD.calcDomainFromTracks(indicator, lon, lat)
            domainIndex = self.extractTracks(indicator, lon, lat)
            inputData = inputData[domainIndex]
            indicator = indicator[domainIndex]
            lon = lon[domainIndex]
            lat = lat[domainIndex]

        if self.progressbar is not None:
            self.progressbar.update(0.125)

        # Sort date/time information
        try:
            dt = np.empty(indicator.size, 'f')
            dt[1:] = np.diff(inputData['age'])
        except (ValueError, KeyError):

            try:
                self.logger.info("Filtering input data by season: season > ", startSeason)
                # Find indicies that satisfy minimum season filter
                idx = np.where(inputData['season'] >= startSeason)[0]
                # Filter records:
                inputData = inputData[idx]
                indicator = indicator[idx]
                lon = lon[idx]
                lat = lat[idx]
            except (ValueError, KeyError):
                pass

            year, month, day, hour, minute, datetimes \
                = loadData.parseDates(inputData, indicator)

            # Time between observations:
            dt = loadData.getTimeDelta(year, month, day, hour, minute)

            # Calculate julian days:
            jdays = loadData.julianDays(year, month, day, hour, minute)

        delta_lon = np.diff(lon)
        delta_lat = np.diff(lat)

        # Split into separate tracks if large jump occurs (delta_lon >
        # 15 degrees or delta_lat > 5 degrees) This avoids two tracks
        # being accidentally combined when seasons and track numbers
        # match but basins are different as occurs in the IBTrACS
        # dataset.  This problem can also be prevented if the
        # 'tcserialno' column is specified.
        indicator[np.where(delta_lon > 15)[0] + 1] = 1
        indicator[np.where(delta_lat > 5)[0] + 1] = 1

        # Save information required for frequency auto-calculation
        try:
            origin_seasonOrYear = np.array(
                inputData['season'], 'i').compress(indicator)
            header = 'Season'
        except (ValueError, KeyError):
            origin_seasonOrYear = year.compress(indicator)
            header = 'Year'

        flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear),
                   header, ',', fmt='%d')

        pressure = np.array(inputData['pressure'], 'd')
        novalue_index = np.where(pressure == sys.maxint)
        pressure = metutils.convert(pressure, inputPressureUnits, "hPa")
        pressure[novalue_index] = sys.maxint

        # Convert any non-physical central pressure values to maximum integer
        # This is required because IBTrACS has a mix of missing value codes
        # (i.e. -999, 0, 9999) in the same global dataset.
        pressure = np.where((pressure < 600) | (pressure > 1100),
                            sys.maxint, pressure)

        if self.progressbar is not None:
            self.progressbar.update(0.25)

        try:
            vmax = np.array(inputData['vmax'], 'd')
        except (ValueError, KeyError):
            self.logger.warning("No max wind speed data")
            vmax = np.empty(indicator.size, 'f')
        else:
            novalue_index = np.where(vmax == sys.maxint)
            vmax = metutils.convert(vmax, inputSpeedUnits, "mps")
            vmax[novalue_index] = sys.maxint

        assert lat.size == indicator.size
        assert lon.size == indicator.size
        assert pressure.size == indicator.size
        #assert vmax.size == indicator.size

        try:
            rmax = np.array(inputData['rmax'])
            novalue_index = np.where(rmax == sys.maxint)
            rmax = metutils.convert(rmax, inputLengthUnits, "km")
            rmax[novalue_index] = sys.maxint

            self._rmax(rmax, indicator)
            self._rmaxRate(rmax, dt, indicator)
        except (ValueError, KeyError):
            self.logger.warning("No rmax data available")

        if self.ncflag:
            self.data['index'] = indicator

        # ieast : parameter used in latLon2Azi
        # FIXME: should be a config setting describing the input data.
        ieast = 1

        # Determine the index of initial cyclone observations, excluding
        # those cyclones that have only one observation. This is used
        # for calculating initial bearing and speed
        indicator2 = np.where(indicator > 0, 1, 0)
        initIndex = np.concatenate([np.where(np.diff(indicator2) ==
                                             -1, 1, 0), [0]])

        # Calculate the bearing and distance (km) of every two
        # consecutive records using ll2azi
        bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0)
        assert bear_.size == indicator.size - 1
        assert dist_.size == indicator.size - 1
        bear = np.empty(indicator.size, 'f')
        bear[1:] = bear_
        dist = np.empty(indicator.size, 'f')
        dist[1:] = dist_

        self._lonLat(lon, lat, indicator, initIndex)
        self._bearing(bear, indicator, initIndex)
        self._bearingRate(bear, dt, indicator)
        if self.progressbar is not None:
            self.progressbar.update(0.375)
        self._speed(dist, dt, indicator, initIndex)
        self._speedRate(dist, dt, indicator)
        self._pressure(pressure, indicator)
        self._pressureRate(pressure, dt, indicator)
        self._windSpeed(vmax)

        try:
            self._frequency(year, indicator)
            self._juliandays(jdays, indicator, year)
        except (ValueError, KeyError):
            pass

        self.logger.info("Completed ", flModuleName())
        if self.progressbar is not None:
            self.progressbar.update(0.5)