def __init__(self, configFile, tilegrid, numSim, minRecords, yrsPerSim, calcCI=False): """ Initialise HazardCalculator object. :param str configFile: path to TCRM configuration file. :param tilegrid: :class:`TileGrid` instance :param int numSim: number of simulations created. :param int minRecords: minimum number of valid wind speed values required to do fitting. :param int yrsPerSim: """ config = ConfigParser() config.read(configFile) self.nodata = -9999. self.years = np.array(config.get('Hazard', 'Years').split(',')).astype('f') self.outputPath = pjoin(config.get('Output', 'Path'), 'hazard') self.inputPath = pjoin(config.get('Output', 'Path'), 'windfield') gridLimit = config.geteval('Region', 'gridLimit') self.numSim = numSim self.minRecords = minRecords self.yrsPerSim = yrsPerSim self.calcCI = calcCI if self.calcCI: log.debug("Bootstrap confidence intervals will be calculated") self.sample_size = config.getint('Hazard', 'SampleSize') self.prange = config.getint('Hazard', 'PercentileRange') self.tilegrid = tilegrid lon, lat = self.tilegrid.getDomainExtent() # Create arrays for storing output data: self.loc = np.zeros((len(lat), len(lon)), dtype='f') self.shp = np.zeros((len(lat), len(lon)), dtype='f') self.scale = np.zeros((len(lat), len(lon)), dtype='f') self.Rp = np.zeros((len(self.years), len(lat), len(lon)), dtype='f') self.RPupper = np.zeros((len(self.years), len(lat), len(lon)), dtype='f') self.RPlower = np.zeros((len(self.years), len(lat), len(lon)), dtype='f') self.global_atts = {'history': ('TCRM hazard simulation - ' 'return period wind speeds'), 'version': flProgramVersion(), 'Python_ver': sys.version} # Add configuration settings to global attributes: for section in config.sections(): for option in config.options(section): key = "{0}_{1}".format(section, option) value = config.get(section, option) self.global_atts[key] = value
def run(configFile, callback=None): """ Run the hazard calculations. This will attempt to run the calculation in parallel by tiling the domain, but also provides a sane fallback mechanism to execute in serial. :param configFile: str """ log.info("Loading hazard calculation settings") config = ConfigParser() config.read(configFile) outputPath = config.get('Output', 'Path') inputPath = pjoin(outputPath, 'windfield') gridLimit = config.geteval('Region', 'gridLimit') numsimulations = config.getint('TrackGenerator', 'NumSimulations') yrsPerSim = config.getint('TrackGenerator', 'YearsPerSimulation') minRecords = config.getint('Hazard', 'MinimumRecords') calculate_confidence = config.getboolean('Hazard', 'CalculateCI') wf_lon, wf_lat = setDomain(inputPath) global pp pp = attemptParallel() log.info("Running hazard calculations") TG = TileGrid(gridLimit, wf_lon, wf_lat) tiles = getTiles(TG) #def progress(i): # callback(i, len(tiles)) pp.barrier() hc = HazardCalculator(configFile, TG, numsimulations, minRecords, yrsPerSim, calculate_confidence) hc.dumpHazardFromTiles(tiles) pp.barrier() hc.saveHazard() log.info("Completed hazard calculation")
def __init__(self, configFile): config = ConfigParser() config.read(configFile) self.configFile = configFile outputPath = config.get('Output', 'Path') self.trackPath = pjoin(outputPath, 'tracks') self.plotPath = pjoin(outputPath, 'plots', 'stats') self.dataPath = pjoin(outputPath, 'process') # Determine TCRM input directory tcrm_dir = pathLocator.getRootDirectory() self.inputPath = pjoin(tcrm_dir, 'input') self.synNumYears = config.getint('TrackGenerator', 'yearspersimulation') try: gateFile = config.get('Input', 'CoastlineGates') except NoOptionError: log.exception(("No coastline gate file specified " "in configuration file")) raise gateData = np.genfromtxt(gateFile, delimiter=',') nGates = len(gateData) self.gates = Int.convert2vertex(gateData[:, 1], gateData[:, 2]) self.coast = list(self.gates) self.coast.append(self.gates[0])
def colReadCSV(configFile, dataFile, source): """ Loads a csv file containing 'column' data into a record (numpy) array with columns labelled by 'fields'. There must be a section in the ``configFile`` named ``source`` that sets out the format of the data file. :param str configFile: Path to a configuration file that holds details of the input data. :param str dataFile: Path to the input file to load. :param str source: Name of the source format. There must be a corresponding section in the ``configFile``. :returns: A :class:`numpy.ndarray` that contains the input data. """ config = ConfigParser() config.read(configFile) delimiter = config.get(source, 'FieldDelimiter') numHeadingLines = config.getint(source, 'NumberOfHeadingLines') cols = config.get(source, 'Columns').split(delimiter) usecols = [i for i,c in enumerate(cols) if c != 'skip'] data = np.genfromtxt(dataFile, dtype=None, delimiter=delimiter, usecols=usecols, comments=None, skip_header=numHeadingLines, autostrip=True) data.dtype.names = [c for c in cols if c != 'skip'] return data
def __init__(self, configFile): """ Calculate density of TC positions on a grid :param str configFile: path to a TCRM configuration file. """ config = ConfigParser() config.read(configFile) self.configFile = configFile # Define the grid: gridLimit = config.geteval('Region', 'gridLimit') gridSpace = config.geteval('Region', 'GridSpace') self.lon_range = np.arange(gridLimit['xMin'], gridLimit['xMax'] + 0.1, gridSpace['x']) self.lat_range = np.arange(gridLimit['yMin'], gridLimit['yMax'] + 0.1, gridSpace['y']) outputPath = config.get('Output', 'Path') self.trackPath = pjoin(outputPath, 'tracks') self.plotPath = pjoin(outputPath, 'plots', 'stats') self.dataPath = pjoin(outputPath, 'process') # Determine TCRM input directory tcrm_dir = pathLocator.getRootDirectory() self.inputPath = pjoin(tcrm_dir, 'input') self.synNumYears = config.getint('TrackGenerator', 'yearspersimulation')
def __init__(self, configFile, autoCalc_gridLimit=None, progressbar=None): """ Initialize the data and variables required for the interface """ self.configFile = configFile config = ConfigParser() config.read(configFile) self.progressbar = progressbar log.info("Initialising StatInterface") self.kdeType = config.get('StatInterface', 'kdeType') self.kde2DType = config.get('StatInterface','kde2DType') minSamplesCell = config.getint('StatInterface', 'minSamplesCell') self.kdeStep = config.getfloat('StatInterface', 'kdeStep') self.outputPath = config.get('Output', 'Path') self.processPath = pjoin(self.outputPath, 'process') missingValue = cnfGetIniValue(self.configFile, 'StatInterface', 'MissingValue', sys.maxint) gridLimitStr = cnfGetIniValue(self.configFile, 'StatInterface', 'gridLimit', '') if gridLimitStr is not '': try: self.gridLimit = eval(gridLimitStr) except SyntaxError: log.exception('Error! gridLimit is not a dictionary') else: self.gridLimit = autoCalc_gridLimit log.info('No gridLimit specified - using automatic' + ' selection: ' + str(self.gridLimit)) try: gridSpace = config.geteval('Region', 'gridSpace') gridInc = config.geteval('Region', 'gridInc') except SyntaxError: log.exception('Error! gridSpace or gridInc not dictionaries') raise self.generateDist = GenerateDistributions(self.configFile, self.gridLimit, gridSpace, gridInc, self.kdeType, minSamplesCell, missingValue) self.gridSpace = gridSpace self.gridInc = gridInc
def __init__(self, configFile): """ Calculate density of TC genesis positions on a grid :param str configFile: path to a TCRM configuration file. """ config = ConfigParser() config.read(configFile) self.configFile = configFile # Define the grid: gridLimit = config.geteval('Region', 'gridLimit') gridSpace = config.geteval('Region', 'GridSpace') self.lon_range = np.arange(gridLimit['xMin'], gridLimit['xMax'] + 0.1, 0.1) self.lat_range = np.arange(gridLimit['yMin'], gridLimit['yMax'] + 0.1, 0.1) self.X, self.Y = np.meshgrid(self.lon_range, self.lat_range) outputPath = config.get('Output', 'Path') self.trackPath = pjoin(outputPath, 'tracks') self.plotPath = pjoin(outputPath, 'plots', 'stats') self.dataPath = pjoin(outputPath, 'process') # Determine TCRM input directory tcrm_dir = pathLocator.getRootDirectory() self.inputPath = pjoin(tcrm_dir, 'input') self.synNumYears = config.getint('TrackGenerator', 'yearspersimulation') cellnumber = 0 self.gridCells = [] for k in xrange(len(self.lon_range) - 1): for l in xrange(len(self.lat_range) - 1): ymin = self.lat_range[l] ymax = self.lat_range[l] + gridSpace['y'] xmin = self.lon_range[k] xmax = self.lon_range[k] + gridSpace['x'] self.gridCells.append(gridCell(xmin, ymin, xmax, ymax, cellnumber, (k, l))) cellnumber += 1
def __init__(self, configFile): config = ConfigParser() config.read(configFile) self.configFile = configFile # Define the grid: gridLimit = config.geteval('Region', 'gridLimit') gridSpace = config.geteval('Region', 'GridSpace') self.lon_range = np.arange(gridLimit['xMin'], gridLimit['xMax'] + 0.1, gridSpace['x']) self.lat_range = np.arange(gridLimit['yMin'], gridLimit['yMax'] + 0.1, gridSpace['y']) outputPath = config.get('Output', 'Path') self.trackPath = pjoin(outputPath, 'tracks') self.plotPath = pjoin(outputPath, 'plots', 'stats') self.dataPath = pjoin(outputPath, 'process') # Determine TCRM input directory tcrm_dir = pathLocator.getRootDirectory() self.inputPath = pjoin(tcrm_dir, 'input') self.synNumYears = config.getint('TrackGenerator', 'yearspersimulation') # Longitude crossing gates: self.gateLons = np.arange(self.lon_range.min(), self.lon_range.max() + 0.5, 10.) self.gateLats = np.arange(self.lat_range.min(), self.lat_range.max() + 0.5, 2.) # Add configuration settings to global attributes: self.gatts = {'history': "Longitude crossing rates for TCRM simulation", 'version': flProgramVersion() } for section in config.sections(): for option in config.options(section): key = "{0}_{1}".format(section, option) value = config.get(section, option) self.gatts[key] = value
def colReadCSV(configFile, dataFile, source): """ Loads a csv file containing 'column' data into a record (numpy) array with columns labelled by 'fields'. """ config = ConfigParser() config.read(configFile) delimiter = config.get(source, 'FieldDelimiter') numHeadingLines = config.getint(source, 'NumberOfHeadingLines') cols = config.get(source, 'Columns').split(delimiter) usecols = [i for i,c in enumerate(cols) if c != 'skip'] data = np.genfromtxt(dataFile, dtype=None, delimiter=delimiter, usecols=usecols, comments=None, skip_header=numHeadingLines, autostrip=True) data.dtype.names = [c for c in cols if c != 'skip'] return data
def __init__(self, configFile): """ Calculate density of TC genesis positions on a grid :param str configFile: path to a TCRM configuration file. """ config = ConfigParser() config.read(configFile) self.configFile = configFile # Define the grid: self.gridLimit = config.geteval("Region", "gridLimit") self.gridSpace = config.geteval("Region", "GridSpace") self.lon_range = np.arange(self.gridLimit["xMin"], self.gridLimit["xMax"] + 0.1, 0.1) self.lat_range = np.arange(self.gridLimit["yMin"], self.gridLimit["yMax"] + 0.1, 0.1) self.X, self.Y = np.meshgrid(self.lon_range, self.lat_range) outputPath = config.get("Output", "Path") self.trackPath = pjoin(outputPath, "tracks") self.plotPath = pjoin(outputPath, "plots", "stats") self.dataPath = pjoin(outputPath, "process") # Determine TCRM input directory tcrm_dir = pathLocator.getRootDirectory() self.inputPath = pjoin(tcrm_dir, "input") self.synNumYears = config.getint("TrackGenerator", "yearspersimulation") cellnumber = 0 self.gridCells = [] for k in xrange(len(self.lon_range) - 1): for l in xrange(len(self.lat_range) - 1): ymin = self.lat_range[l] ymax = self.lat_range[l] + self.gridSpace["y"] xmin = self.lon_range[k] xmax = self.lon_range[k] + self.gridSpace["x"] self.gridCells.append(gridCell(xmin, ymin, xmax, ymax, cellnumber, (k, l))) cellnumber += 1
def __init__(self, configFile): config = ConfigParser() config.read(configFile) self.configFile = configFile # Define the grid: gridLimit = config.geteval("Region", "gridLimit") gridSpace = config.geteval("Region", "GridSpace") self.lon_range = np.arange(gridLimit["xMin"], gridLimit["xMax"] + 0.1, gridSpace["x"]) self.lat_range = np.arange(gridLimit["yMin"], gridLimit["yMax"] + 0.1, gridSpace["y"]) outputPath = config.get("Output", "Path") self.trackPath = pjoin(outputPath, "tracks") self.plotPath = pjoin(outputPath, "plots", "stats") self.dataPath = pjoin(outputPath, "process") # Determine TCRM input directory tcrm_dir = pathLocator.getRootDirectory() self.inputPath = pjoin(tcrm_dir, "input") self.synNumYears = config.getint("TrackGenerator", "yearspersimulation") # Longitude crossing gates: self.gateLons = np.arange(self.lon_range.min(), self.lon_range.max() + 0.5, 10.0) self.gateLats = np.arange(self.lat_range.min(), self.lat_range.max() + 0.5, 2.0) # Add configuration settings to global attributes: self.gatts = {"history": "Longitude crossing rates for TCRM simulation", "version": flProgramVersion()} for section in config.sections(): for option in config.options(section): key = "{0}_{1}".format(section, option) value = config.get(section, option) self.gatts[key] = value
def processData(self, restrictToWindfieldDomain=False): """ Process raw data into ASCII files that can be read by the main components of the system :param bool restrictToWindfieldDomain: if True, only process data within the wind field domain, otherwise, process data from across the track generation domain. """ config = ConfigParser() config.read(self.configFile) self.logger.info("Running %s" % flModuleName()) if config.has_option('DataProcess', 'InputFile'): inputFile = config.get('DataProcess', 'InputFile') if config.has_option('DataProcess', 'Source'): source = config.get('DataProcess', 'Source') self.logger.info('Loading %s dataset', source) fn = config.get(source, 'filename') path = config.get(source, 'path') inputFile = pjoin(path, fn) # If input file has no path information, default to tcrm input folder if len(os.path.dirname(inputFile)) == 0: inputFile = pjoin(self.tcrm_input_dir, inputFile) self.logger.info("Processing %s" % inputFile) self.source = config.get('DataProcess', 'Source') inputData = colReadCSV(self.configFile, inputFile, self.source) inputSpeedUnits = config.get(self.source, 'SpeedUnits') inputPressureUnits = config.get(self.source, 'PressureUnits') inputLengthUnits = config.get(self.source, 'LengthUnits') startSeason = config.getint('DataProcess', 'StartSeason') indicator = loadData.getInitialPositions(inputData) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) if restrictToWindfieldDomain: # Filter the input arrays to only retain the tracks that # pass through the windfield domain. CD = CalcTrackDomain(self.configFile) self.domain = CD.calcDomainFromTracks(indicator, lon, lat) domainIndex = self.extractTracks(indicator, lon, lat) inputData = inputData[domainIndex] indicator = indicator[domainIndex] lon = lon[domainIndex] lat = lat[domainIndex] if self.progressbar is not None: self.progressbar.update(0.125) # Sort date/time information try: dt = np.empty(indicator.size, 'f') dt[1:] = np.diff(inputData['age']) except (ValueError, KeyError): try: self.logger.info("Filtering input data by season: season > %d"%startSeason) # Find indicies that satisfy minimum season filter idx = np.where(inputData['season'] >= startSeason)[0] # Filter records: inputData = inputData[idx] indicator = indicator[idx] lon = lon[idx] lat = lat[idx] except (ValueError, KeyError): pass year, month, day, hour, minute, datetimes \ = loadData.parseDates(inputData, indicator) # Time between observations: dt = loadData.getTimeDelta(year, month, day, hour, minute) # Calculate julian days: jdays = loadData.julianDays(year, month, day, hour, minute) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > # 15 degrees or delta_lat > 5 degrees) This avoids two tracks # being accidentally combined when seasons and track numbers # match but basins are different as occurs in the IBTrACS # dataset. This problem can also be prevented if the # 'tcserialno' column is specified. indicator[np.where(delta_lon > 15)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 # Save information required for frequency auto-calculation try: origin_seasonOrYear = np.array( inputData['season'], 'i').compress(indicator) header = 'Season' except (ValueError, KeyError): origin_seasonOrYear = year.compress(indicator) header = 'Year' flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear), header, ',', fmt='%d') pressure = np.array(inputData['pressure'], 'd') novalue_index = np.where(pressure == sys.maxint) pressure = metutils.convert(pressure, inputPressureUnits, "hPa") pressure[novalue_index] = sys.maxint # Convert any non-physical central pressure values to maximum integer # This is required because IBTrACS has a mix of missing value codes # (i.e. -999, 0, 9999) in the same global dataset. pressure = np.where((pressure < 600) | (pressure > 1100), sys.maxint, pressure) if self.progressbar is not None: self.progressbar.update(0.25) try: vmax = np.array(inputData['vmax'], 'd') except (ValueError, KeyError): self.logger.warning("No max wind speed data") vmax = np.empty(indicator.size, 'f') else: novalue_index = np.where(vmax == sys.maxint) vmax = metutils.convert(vmax, inputSpeedUnits, "mps") vmax[novalue_index] = sys.maxint assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size #assert vmax.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == sys.maxint) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = sys.maxint self._rmax(rmax, indicator) self._rmaxRate(rmax, dt, indicator) except (ValueError, KeyError): self.logger.warning("No rmax data available") if self.ncflag: self.data['index'] = indicator # ieast : parameter used in latLon2Azi # FIXME: should be a config setting describing the input data. ieast = 1 # Determine the index of initial cyclone observations, excluding # those cyclones that have only one observation. This is used # for calculating initial bearing and speed indicator2 = np.where(indicator > 0, 1, 0) initIndex = np.concatenate([np.where(np.diff(indicator2) == -1, 1, 0), [0]]) # Calculate the bearing and distance (km) of every two # consecutive records using ll2azi bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0) assert bear_.size == indicator.size - 1 assert dist_.size == indicator.size - 1 bear = np.empty(indicator.size, 'f') bear[1:] = bear_ dist = np.empty(indicator.size, 'f') dist[1:] = dist_ self._lonLat(lon, lat, indicator, initIndex) self._bearing(bear, indicator, initIndex) self._bearingRate(bear, dt, indicator) if self.progressbar is not None: self.progressbar.update(0.375) self._speed(dist, dt, indicator, initIndex) self._speedRate(dist, dt, indicator) self._pressure(pressure, indicator) self._pressureRate(pressure, dt, indicator) self._windSpeed(vmax) try: self._frequency(year, indicator) self._juliandays(jdays, indicator, year) except (ValueError, KeyError): pass self.logger.info("Completed %s" % flModuleName()) if self.progressbar is not None: self.progressbar.update(0.5)
def __init__(self, configFile, tilegrid, numSim, minRecords, yrsPerSim, calcCI=False, evd='GEV'): """ Initialise HazardCalculator object. :param str configFile: path to TCRM configuration file. :param tilegrid: :class:`TileGrid` instance :param int numSim: number of simulations created. :param int minRecords: minimum number of valid wind speed values required to do fitting. :param int yrsPerSim: :param boolean calcCI: :param str extreme_value_distribution: evd to use. Options so far are GEV and GPD. """ config = ConfigParser() config.read(configFile) self.nodata = -9999. self.years = np.array(config.get('Hazard', 'Years').split(',')).astype('f') self.outputPath = pjoin(config.get('Output', 'Path'), 'hazard') self.inputPath = pjoin(config.get('Output', 'Path'), 'windfield') gridLimit = config.geteval('Region', 'gridLimit') self.numSim = numSim self.minRecords = minRecords self.yrsPerSim = yrsPerSim self.calcCI = calcCI if self.calcCI: log.debug("Bootstrap confidence intervals will be calculated") self.sample_size = config.getint('Hazard', 'SampleSize') self.prange = config.getint('Hazard', 'PercentileRange') self.evd = evd self.tilegrid = tilegrid lon, lat = self.tilegrid.getDomainExtent() # Create arrays for storing output data: self.loc = np.zeros((len(lat), len(lon)), dtype='f') self.shp = np.zeros((len(lat), len(lon)), dtype='f') self.scale = np.zeros((len(lat), len(lon)), dtype='f') self.Rp = np.zeros((len(self.years), len(lat), len(lon)), dtype='f') self.RPupper = np.zeros((len(self.years), len(lat), len(lon)), dtype='f') self.RPlower = np.zeros((len(self.years), len(lat), len(lon)), dtype='f') self.global_atts = { 'title': ('TCRM hazard simulation - ' 'return period wind speeds'), 'tcrm_version': flProgramVersion(), 'python_version': sys.version } # Add configuration settings to global attributes: for section in config.sections(): for option in config.options(section): key = "{0}_{1}".format(section, option) value = config.get(section, option) self.global_atts[key] = value
import random import seaborn as sns sns.set_context("paper") figsize = (6.5, 4.5) sns.set_style("whitegrid") # Load the configuration file from the TCHA18, then open the database # and get teh list of available locations. configFile = "/home/547/cxa547/tcrmconfig/tcrm2.1.ini" config = ConfigParser() config.read(configFile) outputPath = config.get('Output', 'Path') plotPath = os.path.join(outputPath, 'plots', 'convergence') NumSimulations = config.getint('TrackGenerator', 'NumSimulations') db = database.HazardDatabase(configFile) locations = db.getLocations() locNameList = list(locations['locName']) # The following step performs the calculations. First a helper # function to add nicely formatted grid lines on a logarithmic axis. # The second function (`plotConvergenceTest`) loads the data from the # database, then splits into two separate collections (called `d1` and # `d2`). For each of these, we then calculate empirical ARI values and # plot alongside each other. We also plot the annual exceedance # probability as an alternate view on the likelihood of extreme winds. def addARIGrid(axes):
def loadTrackFile(configFile, trackFile, source, missingValue=0, calculateWindSpeed=True): """ Load TC track data from the given input file, from a specified source. The configFile is a configuration file that contains a section called 'source' that describes the data. This returns a collection of :class:`Track` objects that contains the details of the TC tracks in the input file. :param str configFile: Configuration file with a section ``source``. :param str trackFile: Path to a csv-formatted file containing TC data. :pararm str source: Name of the source format of the TC data. There *must* be a section in ``configFile`` matching this string, containing the details of the format of the data. :param missingValue: Replace all null values in the input data with this value (default=0). :param boolean calculateWindSpeed: Calculate maximum wind speed using a pressure-wind relation described in :func:`maxWindSpeed` :returns: A collection of :class:`Track` objects. If any of the variables are not present in the input dataset, they are (where possible) calculated (date/time/windspeed), sampled from default datasets (e.g. environmental pressure) or set to the missing value. Example:: >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' ) """ LOG.info("Loading %s" % trackFile) inputData = colReadCSV(configFile, trackFile, source) #, #nullValue=missingValue) config = ConfigParser() config.read(configFile) inputSpeedUnits = config.get(source, 'SpeedUnits') inputPressureUnits = config.get(source, 'PressureUnits') inputLengthUnits = config.get(source, 'LengthUnits') inputDateFormat = config.get(source, 'DateFormat') if config.getboolean('DataProcess', 'FilterSeasons'): startSeason = config.getint('DataProcess', 'StartSeason') idx = np.where(inputData['season'] >= startSeason)[0] inputData = inputData[idx] # Determine the initial TC positions... indicator = getInitialPositions(inputData) # Sort date/time information if 'age' in inputData.dtype.names: year, month, day, hour, minute, datetimes = parseAge( inputData, indicator) timeElapsed = inputData['age'] else: year, month, day, hour, minute, datetimes = parseDates( inputData, indicator, inputDateFormat) timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute) # Time between observations: dt = getTimeDelta(year, month, day, hour, minute) # Calculate julian days jdays = julianDays(year, month, day, hour, minute) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > 10 degrees # or delta_lat > 5 degrees) # This avoids two tracks being accidentally combined when seasons and track # numbers match but basins are different as occurs in the IBTrACS dataset. # This problem can also be prevented if the 'tcserialno' column is # specified. indicator[np.where(delta_lon > 10)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 pressure = filterPressure(np.array(inputData['pressure'], 'd'), inputPressureUnits, missingValue) try: windspeed = np.array(inputData['vmax'], 'd') novalue_index = np.where(windspeed == sys.maxint) windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps") windspeed[novalue_index] = missingValue except (ValueError, KeyError): LOG.debug("No max wind speed data - all values will be zero") windspeed = np.zeros(indicator.size, 'f') assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == missingValue) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = missingValue except (ValueError, KeyError): LOG.debug("No radius to max wind data - all values will be zero") rmax = np.zeros(indicator.size, 'f') if 'penv' in inputData.dtype.names: penv = np.array(inputData['penv'], 'd') else: LOG.debug("No ambient MSLP data in this input file") LOG.debug("Sampling data from MSLP data defined in " "configuration file") # Warning: using sampled data will likely lead to some odd behaviour # near the boundary of the MSLP grid boundaries - higher resolution # MSLP data will decrease this unusual behaviour. try: ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile') except: LOG.exception("No input MSLP file specified in configuration") raise time = getTime(year, month, day, hour, minute) penv = ltmPressure(jdays, time, lon, lat, ncfile) if 'poci' in inputData.dtype.names: poci = np.array(inputData['poci'], 'd') else: LOG.debug("Determining poci") eps = np.random.normal(0, scale=2.5717) poci = getPoci(penv, pressure, lat, jdays, eps) speed, bearing = getSpeedBearing(indicator, lon, lat, dt, missingValue=missingValue) if calculateWindSpeed: windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, poci) TCID = np.cumsum(indicator) data = np.empty(len(indicator), dtype={ 'names': trackFields, 'formats': trackTypes }) for key, value in zip(trackFields, [ indicator, TCID, year, month, day, hour, minute, timeElapsed, datetimes, lon, lat, speed, bearing, pressure, windspeed, rmax, poci ]): data[key] = value tracks = [] n = np.max(TCID) for i in range(1, n + 1): track = Track(data[TCID == i]) track.trackId = (i, n) track.trackfile = trackFile getMinPressure(track, missingValue) getMaxWind(track, missingValue) tracks.append(track) return tracks
def loadTrackFile(configFile, trackFile, source, missingValue=0, calculateWindSpeed=True): """ Load TC track data from the given input file, from a specified source. The configFile is a configuration file that contains a section called 'source' that describes the data. This returns a collection of :class:`Track` objects that contains the details of the TC tracks in the input file. :param str configFile: Configuration file with a section ``source``. :param str trackFile: Path to a csv-formatted file containing TC data. :pararm str source: Name of the source format of the TC data. There *must* be a section in ``configFile`` matching this string, containing the details of the format of the data. :param missingValue: Replace all null values in the input data with this value (default=0). :param boolean calculateWindSpeed: Calculate maximum wind speed using a pressure-wind relation described in :func:`maxWindSpeed` :returns: A collection of :class:`Track` objects. If any of the variables are not present in the input dataset, they are (where possible) calculated (date/time/windspeed), sampled from default datasets (e.g. environmental pressure) or set to the missing value. Example:: >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' ) """ logger.info("Loading %s" % trackFile) inputData = colReadCSV(configFile, trackFile, source) #, #nullValue=missingValue) config = ConfigParser() config.read(configFile) inputSpeedUnits = config.get(source, 'SpeedUnits') inputPressureUnits = config.get(source, 'PressureUnits') inputLengthUnits = config.get(source, 'LengthUnits') inputDateFormat = config.get(source, 'DateFormat') if config.getboolean('DataProcess', 'FilterSeasons'): startSeason = config.getint('DataProcess', 'StartSeason') idx = np.where(inputData['season'] >= startSeason)[0] inputData = inputData[idx] # Determine the initial TC positions... indicator = getInitialPositions(inputData) # Sort date/time information if 'age' in inputData.dtype.names: year, month, day, hour, minute, datetimes = parseAge(inputData, indicator) timeElapsed = inputData['age'] else: year, month, day, hour, minute, datetimes = parseDates(inputData, indicator, inputDateFormat) timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute) # Time between observations: dt = getTimeDelta(year, month, day, hour, minute) # Calculate julian days jdays = julianDays(year, month, day, hour, minute) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > 10 degrees # or delta_lat > 5 degrees) # This avoids two tracks being accidentally combined when seasons and track # numbers match but basins are different as occurs in the IBTrACS dataset. # This problem can also be prevented if the 'tcserialno' column is # specified. indicator[np.where(delta_lon > 10)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 pressure = filterPressure(np.array(inputData['pressure'], 'd'), inputPressureUnits, missingValue) try: windspeed = np.array(inputData['vmax'], 'd') novalue_index = np.where(windspeed == sys.maxint) windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps") windspeed[novalue_index] = missingValue except (ValueError,KeyError): logger.debug("No max wind speed data - all values will be zero") windspeed = np.zeros(indicator.size, 'f') assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == missingValue) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = missingValue except (ValueError, KeyError): logger.debug("No radius to max wind data - all values will be zero") rmax = np.zeros(indicator.size, 'f') if 'penv' in inputData.dtype.names: penv = np.array(inputData['penv'], 'd') else: logger.debug("No ambient MSLP data in this input file") logger.debug("Sampling data from MSLP data defined in " "configuration file") # Warning: using sampled data will likely lead to some odd behaviour # near the boundary of the MSLP grid boundaries - higher resolution # MSLP data will decrease this unusual behaviour. try: ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile') except: logger.exception("No input MSLP file specified in configuration") raise time = getTime(year, month, day, hour, minute) penv = ltmPressure(jdays, time, lon, lat, ncfile) speed, bearing = getSpeedBearing(indicator, lon, lat, dt, missingValue=missingValue) if calculateWindSpeed: windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, penv) TCID = np.cumsum(indicator) data = np.empty(len(indicator), dtype={ 'names': trackFields, 'formats': trackTypes } ) for key, value in zip(trackFields, [indicator, TCID, year, month, day, hour, minute, timeElapsed, datetimes, lon, lat, speed, bearing, pressure, windspeed, rmax, penv]): data[key] = value tracks = [] n = np.max(TCID) for i in range(1, n + 1): track = Track(data[TCID == i]) track.trackId = (i, n) track.trackfile = trackFile getMinPressure(track, missingValue) getMaxWind(track, missingValue) tracks.append(track) return tracks
def processData(self, restrictToWindfieldDomain=False): """ Process raw data into ASCII files that can be read by the main components of the system :param bool restrictToWindfieldDomain: if True, only process data within the wind field domain, otherwise, process data from across the track generation domain. """ config = ConfigParser() config.read(self.configFile) self.logger.info("Running", flModuleName()) if config.has_option('DataProcess', 'InputFile'): inputFile = config.get('DataProcess', 'InputFile') if config.has_option('DataProcess', 'Source'): source = config.get('DataProcess', 'Source') self.logger.info('Loading %s dataset', source) fn = config.get(source, 'filename') path = config.get(source, 'path') inputFile = pjoin(path, fn) # If input file has no path information, default to tcrm input folder if len(os.path.dirname(inputFile)) == 0: inputFile = pjoin(self.tcrm_input_dir, inputFile) self.logger.info("Processing", inputFile) self.source = config.get('DataProcess', 'Source') inputData = colReadCSV(self.configFile, inputFile, self.source) inputSpeedUnits = config.get(self.source, 'SpeedUnits') inputPressureUnits = config.get(self.source, 'PressureUnits') inputLengthUnits = config.get(self.source, 'LengthUnits') startSeason = config.getint('DataProcess', 'StartSeason') indicator = loadData.getInitialPositions(inputData) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) if restrictToWindfieldDomain: # Filter the input arrays to only retain the tracks that # pass through the windfield domain. CD = CalcTrackDomain(self.configFile) self.domain = CD.calcDomainFromTracks(indicator, lon, lat) domainIndex = self.extractTracks(indicator, lon, lat) inputData = inputData[domainIndex] indicator = indicator[domainIndex] lon = lon[domainIndex] lat = lat[domainIndex] if self.progressbar is not None: self.progressbar.update(0.125) # Sort date/time information try: dt = np.empty(indicator.size, 'f') dt[1:] = np.diff(inputData['age']) except (ValueError, KeyError): try: self.logger.info("Filtering input data by season: season > ", startSeason) # Find indicies that satisfy minimum season filter idx = np.where(inputData['season'] >= startSeason)[0] # Filter records: inputData = inputData[idx] indicator = indicator[idx] lon = lon[idx] lat = lat[idx] except (ValueError, KeyError): pass year, month, day, hour, minute, datetimes \ = loadData.parseDates(inputData, indicator) # Time between observations: dt = loadData.getTimeDelta(year, month, day, hour, minute) # Calculate julian days: jdays = loadData.julianDays(year, month, day, hour, minute) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > # 15 degrees or delta_lat > 5 degrees) This avoids two tracks # being accidentally combined when seasons and track numbers # match but basins are different as occurs in the IBTrACS # dataset. This problem can also be prevented if the # 'tcserialno' column is specified. indicator[np.where(delta_lon > 15)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 # Save information required for frequency auto-calculation try: origin_seasonOrYear = np.array( inputData['season'], 'i').compress(indicator) header = 'Season' except (ValueError, KeyError): origin_seasonOrYear = year.compress(indicator) header = 'Year' flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear), header, ',', fmt='%d') pressure = np.array(inputData['pressure'], 'd') novalue_index = np.where(pressure == sys.maxint) pressure = metutils.convert(pressure, inputPressureUnits, "hPa") pressure[novalue_index] = sys.maxint # Convert any non-physical central pressure values to maximum integer # This is required because IBTrACS has a mix of missing value codes # (i.e. -999, 0, 9999) in the same global dataset. pressure = np.where((pressure < 600) | (pressure > 1100), sys.maxint, pressure) if self.progressbar is not None: self.progressbar.update(0.25) try: vmax = np.array(inputData['vmax'], 'd') except (ValueError, KeyError): self.logger.warning("No max wind speed data") vmax = np.empty(indicator.size, 'f') else: novalue_index = np.where(vmax == sys.maxint) vmax = metutils.convert(vmax, inputSpeedUnits, "mps") vmax[novalue_index] = sys.maxint assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size #assert vmax.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == sys.maxint) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = sys.maxint self._rmax(rmax, indicator) self._rmaxRate(rmax, dt, indicator) except (ValueError, KeyError): self.logger.warning("No rmax data available") if self.ncflag: self.data['index'] = indicator # ieast : parameter used in latLon2Azi # FIXME: should be a config setting describing the input data. ieast = 1 # Determine the index of initial cyclone observations, excluding # those cyclones that have only one observation. This is used # for calculating initial bearing and speed indicator2 = np.where(indicator > 0, 1, 0) initIndex = np.concatenate([np.where(np.diff(indicator2) == -1, 1, 0), [0]]) # Calculate the bearing and distance (km) of every two # consecutive records using ll2azi bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0) assert bear_.size == indicator.size - 1 assert dist_.size == indicator.size - 1 bear = np.empty(indicator.size, 'f') bear[1:] = bear_ dist = np.empty(indicator.size, 'f') dist[1:] = dist_ self._lonLat(lon, lat, indicator, initIndex) self._bearing(bear, indicator, initIndex) self._bearingRate(bear, dt, indicator) if self.progressbar is not None: self.progressbar.update(0.375) self._speed(dist, dt, indicator, initIndex) self._speedRate(dist, dt, indicator) self._pressure(pressure, indicator) self._pressureRate(pressure, dt, indicator) self._windSpeed(vmax) try: self._frequency(year, indicator) self._juliandays(jdays, indicator, year) except (ValueError, KeyError): pass self.logger.info("Completed ", flModuleName()) if self.progressbar is not None: self.progressbar.update(0.5)