def test_dateInput(self): """Test parseDates returns correct values when passed date info""" year, month, day, hour, minute, dt = loadData.parseDates( self.input_dates, self.indicator) assert_almost_equal(year, self.year) assert_almost_equal(month, self.month) assert_almost_equal(day, self.day) assert_almost_equal(hour, self.hour) assert_almost_equal(minute, self.minute)
def test_dateInput(self): """Test parseDates returns correct values when passed date info""" year, month, day, hour, minute, dt = loadData.parseDates(self.input_dates, self.indicator) assert_almost_equal(year, self.year) assert_almost_equal(month, self.month) assert_almost_equal(day, self.day) assert_almost_equal(hour, self.hour) assert_almost_equal(minute, self.minute)
def test_ParseDatesNoMinsInput(self): """Test parseDates with year, month, day, hour (no minutes) input""" inputdata = dict(year=self.year, month=self.month, day=self.day, hour=self.hour) year, month, day, hour, minute, dt = loadData.parseDates( inputdata, self.indicator) assert_almost_equal(year, self.year) assert_almost_equal(month, self.month) assert_almost_equal(day, self.day) assert_almost_equal(hour, self.hour) assert_almost_equal(minute, np.zeros((self.hour.size), 'i'))
def test_parseDatesYMDHInput(self): """Test parseDates with year, month, day, hourminute (HHMM) input""" inputdata = dict(year=self.year, month=self.month, day=self.day, hour=self.hourmin) year, month, day, hour, minute, dt = loadData.parseDates( inputdata, self.indicator) assert_almost_equal(year, self.year) assert_almost_equal(month, self.month) assert_almost_equal(day, self.day) assert_almost_equal(hour, self.hour) assert_almost_equal(minute, self.minute)
def test_ParseDatesNoMinsInput(self): """Test parseDates with year, month, day, hour (no minutes) input""" inputdata = dict(year=self.year, month=self.month, day=self.day, hour=self.hour) year, month, day, hour, minute, dt = loadData.parseDates(inputdata, self.indicator) assert_almost_equal(year, self.year) assert_almost_equal(month, self.month) assert_almost_equal(day, self.day) assert_almost_equal(hour, self.hour) assert_almost_equal(minute, np.zeros((self.hour.size), 'i'))
def test_parseDatesYMDHInput(self): """Test parseDates with year, month, day, hourminute (HHMM) input""" inputdata = dict(year=self.year, month=self.month, day=self.day, hour=self.hourmin) year, month, day, hour, minute, dt = loadData.parseDates(inputdata, self.indicator) assert_almost_equal(year, self.year) assert_almost_equal(month, self.month) assert_almost_equal(day, self.day) assert_almost_equal(hour, self.hour) assert_almost_equal(minute, self.minute)
def processData(self, restrictToWindfieldDomain=False): """ Process raw data into ASCII files that can be read by the main components of the system :param bool restrictToWindfieldDomain: if True, only process data within the wind field domain, otherwise, process data from across the track generation domain. """ config = ConfigParser() config.read(self.configFile) self.logger.info("Running {0}".format(flModuleName())) if config.has_option('DataProcess', 'InputFile'): inputFile = config.get('DataProcess', 'InputFile') if config.has_option('DataProcess', 'Source'): source = config.get('DataProcess', 'Source') self.logger.info('Loading %s dataset', source) fn = config.get(source, 'filename') path = config.get(source, 'path') inputFile = pjoin(path, fn) # If input file has no path information, default to tcrm input folder if len(os.path.dirname(inputFile)) == 0: inputFile = pjoin(self.tcrm_input_dir, inputFile) self.logger.info("Processing {0}".format(inputFile)) self.source = config.get('DataProcess', 'Source') inputData = colReadCSV(self.configFile, inputFile, self.source) inputSpeedUnits = config.get(self.source, 'SpeedUnits') inputPressureUnits = config.get(self.source, 'PressureUnits') inputLengthUnits = config.get(self.source, 'LengthUnits') startSeason = config.getint('DataProcess', 'StartSeason') indicator = loadData.getInitialPositions(inputData) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) if restrictToWindfieldDomain: # Filter the input arrays to only retain the tracks that # pass through the windfield domain. CD = CalcTrackDomain(self.configFile) self.domain = CD.calcDomainFromTracks(indicator, lon, lat) domainIndex = self.extractTracks(indicator, lon, lat) inputData = inputData[domainIndex] indicator = indicator[domainIndex] lon = lon[domainIndex] lat = lat[domainIndex] if self.progressbar is not None: self.progressbar.update(0.125) # Sort date/time information try: dt = np.empty(indicator.size, 'f') dt[1:] = np.diff(inputData['age']) except (ValueError, KeyError): try: self.logger.info(("Filtering input data by season:" "season > {0}".format(startSeason))) # Find indicies that satisfy minimum season filter idx = np.where(inputData['season'] >= startSeason)[0] # Filter records: inputData = inputData[idx] indicator = indicator[idx] lon = lon[idx] lat = lat[idx] except (ValueError, KeyError): pass year, month, day, hour, minute, datetimes \ = loadData.parseDates(inputData, indicator) # Time between observations: dt = loadData.getTimeDelta(year, month, day, hour, minute) # Calculate julian days: jdays = loadData.julianDays(year, month, day, hour, minute) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > # 15 degrees or delta_lat > 5 degrees) This avoids two tracks # being accidentally combined when seasons and track numbers # match but basins are different as occurs in the IBTrACS # dataset. This problem can also be prevented if the # 'tcserialno' column is specified. indicator[np.where(delta_lon > 15)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 # Save information required for frequency auto-calculation try: origin_seasonOrYear = np.array(inputData['season'], 'i').compress(indicator) header = 'Season' except (ValueError, KeyError): origin_seasonOrYear = year.compress(indicator) header = 'Year' flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear), header, ',', fmt='%d') pressure = np.array(inputData['pressure'], 'd') novalue_index = np.where(pressure == sys.maxint) pressure = metutils.convert(pressure, inputPressureUnits, "hPa") pressure[novalue_index] = sys.maxint # Convert any non-physical central pressure values to maximum integer # This is required because IBTrACS has a mix of missing value codes # (i.e. -999, 0, 9999) in the same global dataset. pressure = np.where((pressure < 600) | (pressure > 1100), sys.maxint, pressure) if self.progressbar is not None: self.progressbar.update(0.25) try: vmax = np.array(inputData['vmax'], 'd') except (ValueError, KeyError): self.logger.warning("No max wind speed data") vmax = np.empty(indicator.size, 'f') else: novalue_index = np.where(vmax == sys.maxint) vmax = metutils.convert(vmax, inputSpeedUnits, "mps") vmax[novalue_index] = sys.maxint assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size #assert vmax.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == sys.maxint) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = sys.maxint self._rmax(rmax, indicator) self._rmaxRate(rmax, dt, indicator) except (ValueError, KeyError): self.logger.warning("No rmax data available") if self.ncflag: self.data['index'] = indicator # ieast : parameter used in latLon2Azi # FIXME: should be a config setting describing the input data. ieast = 1 # Determine the index of initial cyclone observations, excluding # those cyclones that have only one observation. This is used # for calculating initial bearing and speed indicator2 = np.where(indicator > 0, 1, 0) initIndex = np.concatenate( [np.where(np.diff(indicator2) == -1, 1, 0), [0]]) # Calculate the bearing and distance (km) of every two # consecutive records using ll2azi bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0) assert bear_.size == indicator.size - 1 assert dist_.size == indicator.size - 1 bear = np.empty(indicator.size, 'f') bear[1:] = bear_ dist = np.empty(indicator.size, 'f') dist[1:] = dist_ self._lonLat(lon, lat, indicator, initIndex) self._bearing(bear, indicator, initIndex) self._bearingRate(bear, dt, indicator) if self.progressbar is not None: self.progressbar.update(0.375) self._speed(dist, dt, indicator, initIndex) self._speedRate(dist, dt, indicator) self._pressure(pressure, indicator) self._pressureRate(pressure, dt, indicator) self._windSpeed(vmax) try: self._frequency(year, indicator) self._juliandays(jdays, indicator, year) except (ValueError, KeyError): pass self.logger.info("Completed {0}".format(flModuleName())) if self.progressbar is not None: self.progressbar.update(0.5)
def processData(self, restrictToWindfieldDomain=False): """ Process raw data into ASCII files that can be read by the main components of the system :param bool restrictToWindfieldDomain: if True, only process data within the wind field domain, otherwise, process data from across the track generation domain. """ config = ConfigParser() config.read(self.configFile) self.logger.info("Running %s" % flModuleName()) if config.has_option('DataProcess', 'InputFile'): inputFile = config.get('DataProcess', 'InputFile') if config.has_option('DataProcess', 'Source'): source = config.get('DataProcess', 'Source') self.logger.info('Loading %s dataset', source) fn = config.get(source, 'filename') path = config.get(source, 'path') inputFile = pjoin(path, fn) # If input file has no path information, default to tcrm input folder if len(os.path.dirname(inputFile)) == 0: inputFile = pjoin(self.tcrm_input_dir, inputFile) self.logger.info("Processing %s" % inputFile) self.source = config.get('DataProcess', 'Source') inputData = colReadCSV(self.configFile, inputFile, self.source) inputSpeedUnits = config.get(self.source, 'SpeedUnits') inputPressureUnits = config.get(self.source, 'PressureUnits') inputLengthUnits = config.get(self.source, 'LengthUnits') startSeason = config.getint('DataProcess', 'StartSeason') indicator = loadData.getInitialPositions(inputData) lat = np.array(inputData['lat'], 'd') lon = np.mod(np.array(inputData['lon'], 'd'), 360) if restrictToWindfieldDomain: # Filter the input arrays to only retain the tracks that # pass through the windfield domain. CD = CalcTrackDomain(self.configFile) self.domain = CD.calcDomainFromTracks(indicator, lon, lat) domainIndex = self.extractTracks(indicator, lon, lat) inputData = inputData[domainIndex] indicator = indicator[domainIndex] lon = lon[domainIndex] lat = lat[domainIndex] if self.progressbar is not None: self.progressbar.update(0.125) # Sort date/time information try: dt = np.empty(indicator.size, 'f') dt[1:] = np.diff(inputData['age']) except (ValueError, KeyError): try: self.logger.info("Filtering input data by season: season > %d"%startSeason) # Find indicies that satisfy minimum season filter idx = np.where(inputData['season'] >= startSeason)[0] # Filter records: inputData = inputData[idx] indicator = indicator[idx] lon = lon[idx] lat = lat[idx] except (ValueError, KeyError): pass year, month, day, hour, minute, datetimes \ = loadData.parseDates(inputData, indicator) # Time between observations: dt = loadData.getTimeDelta(year, month, day, hour, minute) # Calculate julian days: jdays = loadData.julianDays(year, month, day, hour, minute) delta_lon = np.diff(lon) delta_lat = np.diff(lat) # Split into separate tracks if large jump occurs (delta_lon > # 15 degrees or delta_lat > 5 degrees) This avoids two tracks # being accidentally combined when seasons and track numbers # match but basins are different as occurs in the IBTrACS # dataset. This problem can also be prevented if the # 'tcserialno' column is specified. indicator[np.where(delta_lon > 15)[0] + 1] = 1 indicator[np.where(delta_lat > 5)[0] + 1] = 1 # Save information required for frequency auto-calculation try: origin_seasonOrYear = np.array( inputData['season'], 'i').compress(indicator) header = 'Season' except (ValueError, KeyError): origin_seasonOrYear = year.compress(indicator) header = 'Year' flSaveFile(self.origin_year, np.transpose(origin_seasonOrYear), header, ',', fmt='%d') pressure = np.array(inputData['pressure'], 'd') novalue_index = np.where(pressure == sys.maxint) pressure = metutils.convert(pressure, inputPressureUnits, "hPa") pressure[novalue_index] = sys.maxint # Convert any non-physical central pressure values to maximum integer # This is required because IBTrACS has a mix of missing value codes # (i.e. -999, 0, 9999) in the same global dataset. pressure = np.where((pressure < 600) | (pressure > 1100), sys.maxint, pressure) if self.progressbar is not None: self.progressbar.update(0.25) try: vmax = np.array(inputData['vmax'], 'd') except (ValueError, KeyError): self.logger.warning("No max wind speed data") vmax = np.empty(indicator.size, 'f') else: novalue_index = np.where(vmax == sys.maxint) vmax = metutils.convert(vmax, inputSpeedUnits, "mps") vmax[novalue_index] = sys.maxint assert lat.size == indicator.size assert lon.size == indicator.size assert pressure.size == indicator.size #assert vmax.size == indicator.size try: rmax = np.array(inputData['rmax']) novalue_index = np.where(rmax == sys.maxint) rmax = metutils.convert(rmax, inputLengthUnits, "km") rmax[novalue_index] = sys.maxint self._rmax(rmax, indicator) self._rmaxRate(rmax, dt, indicator) except (ValueError, KeyError): self.logger.warning("No rmax data available") if self.ncflag: self.data['index'] = indicator # ieast : parameter used in latLon2Azi # FIXME: should be a config setting describing the input data. ieast = 1 # Determine the index of initial cyclone observations, excluding # those cyclones that have only one observation. This is used # for calculating initial bearing and speed indicator2 = np.where(indicator > 0, 1, 0) initIndex = np.concatenate([np.where(np.diff(indicator2) == -1, 1, 0), [0]]) # Calculate the bearing and distance (km) of every two # consecutive records using ll2azi bear_, dist_ = maputils.latLon2Azi(lat, lon, ieast, azimuth=0) assert bear_.size == indicator.size - 1 assert dist_.size == indicator.size - 1 bear = np.empty(indicator.size, 'f') bear[1:] = bear_ dist = np.empty(indicator.size, 'f') dist[1:] = dist_ self._lonLat(lon, lat, indicator, initIndex) self._bearing(bear, indicator, initIndex) self._bearingRate(bear, dt, indicator) if self.progressbar is not None: self.progressbar.update(0.375) self._speed(dist, dt, indicator, initIndex) self._speedRate(dist, dt, indicator) self._pressure(pressure, indicator) self._pressureRate(pressure, dt, indicator) self._windSpeed(vmax) try: self._frequency(year, indicator) self._juliandays(jdays, indicator, year) except (ValueError, KeyError): pass self.logger.info("Completed %s" % flModuleName()) if self.progressbar is not None: self.progressbar.update(0.5)