def loadTracks(trackfile):
    """
    Read tracks from a track .csv file and return a list of :class:`Track`
    objects.

    This calls the function `readMultipleTrackData` to parse the track .csv
    file.

    :type  trackfile: str
    :param trackfile: the track data filename.
    """
    tracks = []
    datas = readMultipleTrackData(trackfile)
    n = len(datas)
    for i, data in enumerate(datas):
        track = Track(data)
        track.trackfile = trackfile
        track.trackId = (i, n)
        tracks.append(track)
    return tracks
Example #2
0
def loadTrackFile(configFile, trackFile, source, missingValue=0,
                  calculateWindSpeed=True):
    """
    Load TC track data from the given input file, from a specified source.
    The configFile is a configuration file that contains a section called
    'source' that describes the data.
    This returns a collection of :class:`Track` objects that contains
    the details of the TC tracks in the input file.

    :param str configFile: Configuration file with a section ``source``.
    :param str trackFile: Path to a csv-formatted file containing TC data.
    :pararm str source: Name of the source format of the TC data. There
                        *must* be a section in ``configFile`` matching
                        this string, containing the details of the format
                        of the data.
    :param missingValue: Replace all null values in the input data with
                         this value (default=0).
    :param boolean calculateWindSpeed: Calculate maximum wind speed using
                                       a pressure-wind relation described
                                       in :func:`maxWindSpeed`

    :returns: A collection of :class:`Track` objects. 
              If any of the variables are not present in the input
              dataset, they are (where possible) calculated
              (date/time/windspeed), sampled from default datasets
              (e.g. environmental pressure) or set to the missing value.

    Example::

      >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' )

    """
    
    logger.info("Loading %s" % trackFile)
    inputData = colReadCSV(configFile, trackFile, source) #,
                          #nullValue=missingValue)

    config = ConfigParser()
    config.read(configFile)

    inputSpeedUnits = config.get(source, 'SpeedUnits')
    inputPressureUnits = config.get(source, 'PressureUnits')
    inputLengthUnits = config.get(source, 'LengthUnits')
    inputDateFormat = config.get(source, 'DateFormat')
    
    if config.getboolean('DataProcess', 'FilterSeasons'):
        startSeason = config.getint('DataProcess', 'StartSeason')        
        idx = np.where(inputData['season'] >= startSeason)[0]
        inputData = inputData[idx]
        
    # Determine the initial TC positions...
    indicator = getInitialPositions(inputData)


    # Sort date/time information
    if 'age' in inputData.dtype.names:
        year, month, day, hour, minute, datetimes = parseAge(inputData, indicator)
        timeElapsed = inputData['age']
    else:
        year, month, day, hour, minute, datetimes = parseDates(inputData, indicator,
                                                    inputDateFormat)
        timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute)
        
    # Time between observations:
    dt = getTimeDelta(year, month, day, hour, minute)

    # Calculate julian days
    jdays = julianDays(year, month, day, hour, minute)

    lat = np.array(inputData['lat'], 'd')
    lon = np.mod(np.array(inputData['lon'], 'd'), 360)
    delta_lon = np.diff(lon)
    delta_lat = np.diff(lat)

    # Split into separate tracks if large jump occurs (delta_lon > 10 degrees
    # or delta_lat > 5 degrees)
    # This avoids two tracks being accidentally combined when seasons and track
    # numbers match but basins are different as occurs in the IBTrACS dataset.
    # This problem can also be prevented if the 'tcserialno' column is
    # specified.
    indicator[np.where(delta_lon > 10)[0] + 1] = 1
    indicator[np.where(delta_lat > 5)[0] + 1] = 1

    pressure = filterPressure(np.array(inputData['pressure'], 'd'),
                              inputPressureUnits, missingValue)
    try:
        windspeed = np.array(inputData['vmax'], 'd')
        novalue_index = np.where(windspeed == sys.maxint)
        windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps")
        windspeed[novalue_index] = missingValue
    except (ValueError,KeyError):
        logger.debug("No max wind speed data - all values will be zero")
        windspeed = np.zeros(indicator.size, 'f')
    assert lat.size == indicator.size
    assert lon.size == indicator.size
    assert pressure.size == indicator.size

    try:
        rmax = np.array(inputData['rmax'])
        novalue_index = np.where(rmax == missingValue)
        rmax = metutils.convert(rmax, inputLengthUnits, "km")
        rmax[novalue_index] = missingValue

    except (ValueError, KeyError):
        logger.debug("No radius to max wind data - all values will be zero")
        rmax = np.zeros(indicator.size, 'f')

    if 'penv' in inputData.dtype.names:
        penv = np.array(inputData['penv'], 'd')
    else:
        logger.debug("No ambient MSLP data in this input file")
        logger.debug("Sampling data from MSLP data defined in "
                    "configuration file")
        # Warning: using sampled data will likely lead to some odd behaviour
        # near the boundary of the MSLP grid boundaries - higher resolution
        # MSLP data will decrease this unusual behaviour.

        try:
            ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile')
        except:
            logger.exception("No input MSLP file specified in configuration")
            raise
        time = getTime(year, month, day, hour, minute)
        penv = ltmPressure(jdays, time, lon, lat, ncfile)

    speed, bearing = getSpeedBearing(indicator, lon, lat, dt,
                                     missingValue=missingValue)

    if calculateWindSpeed:
        windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, penv)

    TCID = np.cumsum(indicator)

    data = np.empty(len(indicator), 
                        dtype={
                               'names': trackFields,
                               'formats': trackTypes
                               } )
    for key, value in zip(trackFields, [indicator, TCID, year, month,
                                           day, hour, minute, timeElapsed, datetimes,
                                           lon, lat, speed, bearing,
                                           pressure, windspeed, rmax, penv]):
        data[key] = value
        
    tracks = []
    n = np.max(TCID)
    for i in range(1, n + 1):
        track = Track(data[TCID == i])
        track.trackId = (i, n)
        track.trackfile = trackFile
        getMinPressure(track, missingValue)
        getMaxWind(track, missingValue)
        tracks.append(track)

    return tracks
Example #3
0
def loadTrackFile(configFile,
                  trackFile,
                  source,
                  missingValue=0,
                  calculateWindSpeed=True):
    """
    Load TC track data from the given input file, from a specified source.
    The configFile is a configuration file that contains a section called
    'source' that describes the data.
    This returns a collection of :class:`Track` objects that contains
    the details of the TC tracks in the input file.

    :param str configFile: Configuration file with a section ``source``.
    :param str trackFile: Path to a csv-formatted file containing TC data.
    :pararm str source: Name of the source format of the TC data. There
                        *must* be a section in ``configFile`` matching
                        this string, containing the details of the format
                        of the data.
    :param missingValue: Replace all null values in the input data with
                         this value (default=0).
    :param boolean calculateWindSpeed: Calculate maximum wind speed using
                                       a pressure-wind relation described
                                       in :func:`maxWindSpeed`

    :returns: A collection of :class:`Track` objects.
              If any of the variables are not present in the input
              dataset, they are (where possible) calculated
              (date/time/windspeed), sampled from default datasets
              (e.g. environmental pressure) or set to the missing value.

    Example::

      >>> tracks = loadTrackFile('tcrm.ini', 'IBTRaCS.csv', 'IBTrACS' )

    """

    LOG.info("Loading %s" % trackFile)
    inputData = colReadCSV(configFile, trackFile, source)  #,
    #nullValue=missingValue)

    config = ConfigParser()
    config.read(configFile)

    inputSpeedUnits = config.get(source, 'SpeedUnits')
    inputPressureUnits = config.get(source, 'PressureUnits')
    inputLengthUnits = config.get(source, 'LengthUnits')
    inputDateFormat = config.get(source, 'DateFormat')

    if config.getboolean('DataProcess', 'FilterSeasons'):
        startSeason = config.getint('DataProcess', 'StartSeason')
        idx = np.where(inputData['season'] >= startSeason)[0]
        inputData = inputData[idx]

    # Determine the initial TC positions...
    indicator = getInitialPositions(inputData)

    # Sort date/time information
    if 'age' in inputData.dtype.names:
        year, month, day, hour, minute, datetimes = parseAge(
            inputData, indicator)
        timeElapsed = inputData['age']
    else:
        year, month, day, hour, minute, datetimes = parseDates(
            inputData, indicator, inputDateFormat)
        timeElapsed = getTimeElapsed(indicator, year, month, day, hour, minute)

    # Time between observations:
    dt = getTimeDelta(year, month, day, hour, minute)

    # Calculate julian days
    jdays = julianDays(year, month, day, hour, minute)

    lat = np.array(inputData['lat'], 'd')
    lon = np.mod(np.array(inputData['lon'], 'd'), 360)
    delta_lon = np.diff(lon)
    delta_lat = np.diff(lat)

    # Split into separate tracks if large jump occurs (delta_lon > 10 degrees
    # or delta_lat > 5 degrees)
    # This avoids two tracks being accidentally combined when seasons and track
    # numbers match but basins are different as occurs in the IBTrACS dataset.
    # This problem can also be prevented if the 'tcserialno' column is
    # specified.
    indicator[np.where(delta_lon > 10)[0] + 1] = 1
    indicator[np.where(delta_lat > 5)[0] + 1] = 1

    pressure = filterPressure(np.array(inputData['pressure'], 'd'),
                              inputPressureUnits, missingValue)
    try:
        windspeed = np.array(inputData['vmax'], 'd')
        novalue_index = np.where(windspeed == sys.maxint)
        windspeed = metutils.convert(windspeed, inputSpeedUnits, "mps")
        windspeed[novalue_index] = missingValue
    except (ValueError, KeyError):
        LOG.debug("No max wind speed data - all values will be zero")
        windspeed = np.zeros(indicator.size, 'f')
    assert lat.size == indicator.size
    assert lon.size == indicator.size
    assert pressure.size == indicator.size

    try:
        rmax = np.array(inputData['rmax'])
        novalue_index = np.where(rmax == missingValue)
        rmax = metutils.convert(rmax, inputLengthUnits, "km")
        rmax[novalue_index] = missingValue

    except (ValueError, KeyError):
        LOG.debug("No radius to max wind data - all values will be zero")
        rmax = np.zeros(indicator.size, 'f')

    if 'penv' in inputData.dtype.names:
        penv = np.array(inputData['penv'], 'd')
    else:
        LOG.debug("No ambient MSLP data in this input file")
        LOG.debug("Sampling data from MSLP data defined in "
                  "configuration file")
        # Warning: using sampled data will likely lead to some odd behaviour
        # near the boundary of the MSLP grid boundaries - higher resolution
        # MSLP data will decrease this unusual behaviour.

        try:
            ncfile = cnfGetIniValue(configFile, 'Input', 'MSLPFile')
        except:
            LOG.exception("No input MSLP file specified in configuration")
            raise
        time = getTime(year, month, day, hour, minute)
        penv = ltmPressure(jdays, time, lon, lat, ncfile)

    if 'poci' in inputData.dtype.names:
        poci = np.array(inputData['poci'], 'd')
    else:
        LOG.debug("Determining poci")
        eps = np.random.normal(0, scale=2.5717)
        poci = getPoci(penv, pressure, lat, jdays, eps)

    speed, bearing = getSpeedBearing(indicator,
                                     lon,
                                     lat,
                                     dt,
                                     missingValue=missingValue)

    if calculateWindSpeed:
        windspeed = maxWindSpeed(indicator, dt, lon, lat, pressure, poci)

    TCID = np.cumsum(indicator)

    data = np.empty(len(indicator),
                    dtype={
                        'names': trackFields,
                        'formats': trackTypes
                    })
    for key, value in zip(trackFields, [
            indicator, TCID, year, month, day, hour, minute, timeElapsed,
            datetimes, lon, lat, speed, bearing, pressure, windspeed, rmax,
            poci
    ]):
        data[key] = value

    tracks = []
    n = np.max(TCID)
    for i in range(1, n + 1):
        track = Track(data[TCID == i])
        track.trackId = (i, n)
        track.trackfile = trackFile
        getMinPressure(track, missingValue)
        getMaxWind(track, missingValue)
        tracks.append(track)

    return tracks
Example #4
0
def interpolate(track, delta, interpolation_type=None):
    """
    Interpolate the records in time to have a uniform time difference between
    records. Each of the input arrays represent the values for a single TC
    event.

    :param track: :class:`Track` object containing all data for the track.
    :param delta: `float` time difference to interpolate the dataset to. Must be
                  positive.
    :param interpolation_type: Optional ['linear', 'akima'], specify the type
                               of interpolation used for the locations (i.e.
                               longitude and latitude) of the records.

    # FIXME: Need to address masking values - scipy.interpolate.interp1d
    handles numpy.ma masked arrays.
    """
    LOG.debug("Performing interpolation of TC track")
    if not hasattr(track, 'Datetime'):
        day_ = [
            datetime(*x) for x in zip(track.Year, track.Month, track.Day,
                                      track.Hour, track.Minute)
        ]
    else:
        day_ = track.Datetime

    timestep = timedelta(delta / 24.)
    try:
        time_ = np.array(
            [d.toordinal() + (d.hour + d.minute / 60.) / 24.0 for d in day_],
            dtype=float)
    except AttributeError:
        import cftime
        if isinstance(day_[0], cftime.DatetimeJulian):
            day__ = [d._to_real_datetime() for d in day_]
            time_ = np.array([
                d.toordinal() + (d.hour + d.minute / 60.) / 24. for d in day__
            ],
                             dtype=float)
        else:
            raise
    dt_ = 24.0 * np.diff(time_)
    dt = np.zeros(len(track.data), dtype=float)
    dt[1:] = dt_

    # Convert all times to a time after initial observation:
    timestep = 24.0 * (time_ - time_[0])

    newtime = np.arange(timestep[0], timestep[-1] + .01, delta)
    newtime[-1] = timestep[-1]
    _newtime = (newtime / 24.) + time_[0]
    newdates = num2date(_newtime)
    newdates = np.array([n.replace(tzinfo=None) for n in newdates])

    if not hasattr(track, 'Speed'):
        idx = np.zeros(len(track.data))
        idx[0] = 1
        # TODO: Possibly could change `np.mean(dt)` to `dt`?
        track.WindSpeed = maxWindSpeed(idx, np.mean(dt), track.Longitude,
                                       track.Latitude, track.CentralPressure,
                                       track.EnvPressure)
    # Find the indices of valid pressure observations:
    validIdx = np.where(track.CentralPressure < sys.maxsize)[0]

    # FIXME: Need to address the issue when the time between obs is less
    # than delta (e.g. only two obs 5 hrs apart, but delta = 6 hrs).

    if len(track.data) <= 3:
        # Use linear interpolation only (only a start and end point given):
        nLon = interp1d(timestep, track.Longitude, kind='linear')(newtime)
        nLat = interp1d(timestep, track.Latitude, kind='linear')(newtime)

        if len(validIdx) >= 2:
            npCentre = interp1d(timestep, track.CentralPressure,
                                kind='linear')(newtime)
            nwSpd = interp1d(timestep, track.WindSpeed, kind='linear')(newtime)

        elif len(validIdx) == 1:
            # If one valid observation, assume no change and
            # apply value to all times
            npCentre = np.ones(len(newtime)) * track.CentralPressure[validIdx]
            nwSpd = np.ones(len(newtime)) * track.WindSpeed[validIdx]

        else:
            npCentre = np.zeros(len(newtime))
            nwSpd = np.zeros(len(newtime))

        npEnv = interp1d(timestep, track.EnvPressure, kind='linear')(newtime)
        nrMax = interp1d(timestep, track.rMax, kind='linear')(newtime)

    else:
        if interpolation_type == 'akima':
            # Use the Akima interpolation method:
            try:
                import akima
            except ImportError:
                LOG.exception(("Akima interpolation module unavailable "
                               " - default to scipy.interpolate"))
                nLon = splev(newtime,
                             splrep(timestep, track.Longitude, s=0),
                             der=0)
                nLat = splev(newtime,
                             splrep(timestep, track.Latitude, s=0),
                             der=0)

            else:
                nLon = akima.interpolate(timestep, track.Longitude, newtime)
                nLat = akima.interpolate(timestep, track.Latitude, newtime)

        elif interpolation_type == 'linear':
            nLon = interp1d(timestep, track.Longitude, kind='linear')(newtime)
            nLat = interp1d(timestep, track.Latitude, kind='linear')(newtime)

        else:
            nLon = splev(newtime,
                         splrep(timestep, track.Longitude, s=0),
                         der=0)
            nLat = splev(newtime, splrep(timestep, track.Latitude, s=0), der=0)

        if len(validIdx) >= 2:
            # No valid data at the final new time,
            # would require extrapolation:
            firsttime = np.where(newtime >= timestep[validIdx[0]])[0][0]
            lasttime = np.where(newtime <= timestep[validIdx[-1]])[0][-1]

            if firsttime == lasttime:
                # only one valid observation:
                npCentre = np.zeros(len(newtime))
                nwSpd = np.zeros(len(newtime))
                npCentre[firsttime] = track.CentralPressure[validIdx[0]]
                nwSpd[firsttime] = track.WindSpeed[validIdx[0]]

            else:
                npCentre = np.zeros(len(newtime))
                nwSpd = np.zeros(len(newtime))
                _npCentre = interp1d(timestep[validIdx],
                                     track.CentralPressure[validIdx],
                                     kind='linear')(
                                         newtime[firsttime:lasttime])

                _nwSpd = interp1d(timestep[validIdx],
                                  track.Speed[validIdx],
                                  kind='linear')(newtime[firsttime:lasttime])

                npCentre[firsttime:lasttime] = _npCentre
                nwSpd[firsttime:lasttime] = _nwSpd
                npCentre[lasttime] = _npCentre[-1]
                nwSpd[lasttime] = _nwSpd[-1]

        elif len(validIdx) == 1:
            npCentre = np.ones(len(newtime)) * track.CentralPressure[validIdx]
            nwSpd = np.ones(len(newtime)) * track.WindSpeed[validIdx]
        else:
            npCentre = np.zeros(len(newtime))
            nwSpd = np.zeros(len(newtime))

        npEnv = interp1d(timestep, track.EnvPressure, kind='linear')(newtime)
        nrMax = interp1d(timestep, track.rMax, kind='linear')(newtime)

    if len(nLat) >= 2:
        bear_, dist_ = latLon2Azi(nLat, nLon, 1, azimuth=0)
        nthetaFm = np.zeros(newtime.size, dtype=float)
        nthetaFm[:-1] = bear_
        nthetaFm[-1] = bear_[-1]
        dist = np.zeros(newtime.size, dtype=float)
        dist[:-1] = dist_
        dist[-1] = dist_[-1]
        nvFm = dist / delta

    else:
        nvFm = track.Speed[-1]
        nthetaFm = track.Bearing[-1]

    nYear = [date.year for date in newdates]
    nMonth = [date.month for date in newdates]
    nDay = [date.day for date in newdates]
    nHour = [date.hour for date in newdates]
    nMin = [date.minute for date in newdates]
    np.putmask(npCentre, npCentre > 10e+6, sys.maxsize)
    np.putmask(npCentre, npCentre < 700, sys.maxsize)

    newindex = np.zeros(len(newtime))
    newindex[0] = 1
    newTCID = np.ones(len(newtime)) * track.trackId[0]

    newdata = np.empty(len(newtime),
                       dtype={
                           'names': TRACKFILE_COLS,
                           'formats': TRACKFILE_FMTS
                       })

    for key, val in zip(TRACKFILE_COLS, [
            newindex, newTCID, nYear, nMonth, nDay, nHour, nMin, newtime,
            newdates, nLon, nLat, nvFm, nthetaFm, npCentre, nwSpd, nrMax, npEnv
    ]):
        newdata[key] = val
    newtrack = Track(newdata)
    newtrack.trackId = track.trackId
    newtrack.trackfile = track.trackfile

    return newtrack