def setup_and_check(self): # {{{ """ Perform steps to set up the analysis and check for errors in the setup. """ # Authors # ------- # Xylar Asay-Davis # first, call setup_and_check from the base class (AnalysisTask), # which will perform some common setup, including storing: # self.runDirectory , self.historyDirectory, self.plotsDirectory, # self.namelist, self.runStreams, self.historyStreams, # self.calendar super(ComputeAnomalySubtask, self).setup_and_check() startDate = self.config.get('timeSeries', 'startDate') endDate = self.config.get('timeSeries', 'endDate') delta = MpasRelativeDelta(string_to_datetime(endDate), string_to_datetime(startDate), calendar=self.calendar) months = delta.months + 12*delta.years if months <= self.movingAveragePoints: raise ValueError('Cannot meaninfully perform a rolling mean ' 'because the time series is too short.') self.mpasTimeSeriesTask.add_variables(variableList=self.variableList) self.inputFile = self.mpasTimeSeriesTask.outputFile
def setup_and_check(self): # {{{ ''' Perform steps to set up the analysis and check for errors in the setup. ''' # Authors # ------- # Xylar Asay-Davis # first, call setup_and_check from the base class (AnalysisTask), # which will perform some common setup, including storing: # self.runDirectory , self.historyDirectory, self.plotsDirectory, # self.namelist, self.runStreams, self.historyStreams, # self.calendar super(IndexNino34, self).setup_and_check() startDate = self.config.get('index', 'startDate') endDate = self.config.get('index', 'endDate') delta = MpasRelativeDelta(string_to_datetime(endDate), string_to_datetime(startDate), calendar=self.calendar) months = delta.months + 12 * delta.years if months <= 12: raise ValueError('Cannot meaninfully analyze El Nino climate ' 'index because the time series is too short.') self.variableList = \ ['timeMonthly_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] self.mpasTimeSeriesTask.add_variables(variableList=self.variableList) self.inputFile = self.mpasTimeSeriesTask.outputFile mainRunName = self.config.get('runs', 'mainRunName') config = self.config regionToPlot = config.get('indexNino34', 'region') if regionToPlot not in ['nino3.4', 'nino3', 'nino4']: raise ValueError( 'Unexpectes El Nino Index region {}'.format(regionToPlot)) ninoIndexNumber = regionToPlot[4:] self.xmlFileNames = [] for filePrefix in [ 'nino{}_{}'.format(ninoIndexNumber, mainRunName), 'nino{}_spectra_{}'.format(ninoIndexNumber, mainRunName) ]: self.xmlFileNames.append('{}/{}.xml'.format( self.plotsDirectory, filePrefix))
def test_iselvals(self): fileName = str(self.datadir.join('example_jan.nc')) calendar = 'gregorian_noleap' simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] iselvals = {'nVertLevels': slice(0, 3)} ds = mpas_xarray.open_multifile_dataset( fileNames=fileName, calendar=calendar, simulationStartTime=simulationStartTime, timeVariableName=timestr, variableList=variableList, iselValues=iselvals) dsVarList = list(ds.data_vars.keys()) + list(ds.coords.keys()) assert (numpy.all([var in dsVarList for var in variableList])) self.assertEqual(ds[variableList[0]].shape, (1, 7, 3)) self.assertEqual(ds['refBottomDepth'].shape, (3, )) self.assertApproxEqual(ds['refBottomDepth'][-1], 4.882000207901) self.assertEqual( days_to_datetime(days=ds.Time.values[0], referenceDate='0001-01-01', calendar=calendar), string_to_datetime('0005-01-14 12:24:14'))
def test_subset_variables(self): fileName = str(self.datadir.join('example_jan.nc')) calendar = 'gregorian_noleap' timestr = ['xtime_start', 'xtime_end'] variableList = \ ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] # first, test loading the whole data set and then calling # subset_variables explicitly ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, calendar=calendar, timeVariableName=timestr) ds = mpas_xarray.subset_variables(ds, variableList) dsVarList = list(ds.data_vars.keys()) + list(ds.coords.keys()) assert (numpy.all([var in dsVarList for var in variableList])) self.assertEqual( days_to_datetime(days=ds.Time.values, referenceDate='0001-01-01', calendar=calendar), string_to_datetime('0005-01-16 12:22:30')) # next, test the same with the onlyvars argument ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, calendar=calendar, timeVariableName=timestr, variableList=variableList) self.assertEqual(list(ds.data_vars.keys()), variableList) with six.assertRaisesRegex(self, ValueError, 'Empty dataset is returned.'): missingvars = ['foo', 'bar'] ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, calendar=calendar, timeVariableName=timestr, variableList=missingvars)
def test_days_to_datetime(self): referenceDate = '0001-01-01' for calendar in ['gregorian', 'gregorian_noleap']: for dateString, days in [('0001-01-01', 0.), ('0001-01-02', 1.), ('0001-02-01', 31.), ('0002-01-01', 365.)]: datetime = days_to_datetime(days=days, calendar=calendar, referenceDate=referenceDate) self.assertEqual(datetime, string_to_datetime(dateString)) referenceDate = '2016-01-01' for calendar, days in [('gregorian', 366.), ('gregorian_noleap', 365.)]: datetime = days_to_datetime(days=days, calendar=calendar, referenceDate=referenceDate) self.assertEqual(datetime, string_to_datetime('2017-01-01'))
def test_datetime_to_days(self): referenceDate = '0001-01-01' for calendar in ['gregorian', 'gregorian_noleap']: for dateString, expected_days in [('0001-01-01', 0.), ('0001-01-02', 1.), ('0001-02-01', 31.), ('0002-01-01', 365.)]: days = datetime_to_days(dates=string_to_datetime(dateString), calendar=calendar, referenceDate=referenceDate) self.assertApproxEqual(days, expected_days) referenceDate = '2016-01-01' for calendar, expected_days in [('gregorian', 366.), ('gregorian_noleap', 365.)]: days = datetime_to_days(dates=string_to_datetime('2017-01-01'), calendar=calendar, referenceDate=referenceDate) self.assertApproxEqual(days, expected_days)
def test_no_units(self): fileName = str(self.datadir.join('example_no_units_jan.nc')) calendar = 'gregorian_noleap' simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] ds = mpas_xarray.open_multifile_dataset( fileNames=fileName, calendar=calendar, simulationStartTime=simulationStartTime, timeVariableName=timestr, variableList=variableList) self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList)) self.assertEqual( days_to_datetime(days=ds.Time.values[0], referenceDate='0001-01-01', calendar=calendar), string_to_datetime('0005-01-14 12:24:14'))
def test_timekeeping(self): # test each possible format: # YYYY-MM-DD_hh:mm:ss # YYYY-MM-DD_hh.mm.ss # YYYY-MM-DD_SSSSS # DDD_hh:mm:ss # DDD_hh.mm.ss # DDD_SSSSS # hh.mm.ss # hh:mm:ss # YYYY-MM-DD # SSSSS for calendar in ['gregorian', 'gregorian_noleap']: # test datetime.datetime # YYYY-MM-DD_hh:mm:ss date1 = string_to_datetime('0001-01-01_00:00:00') date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, second=0) self.assertEqual(date1, date2) delta1 = string_to_relative_delta('0001-00-00_00:00:00', calendar=calendar) delta2 = MpasRelativeDelta(years=1, months=0, days=0, hours=0, minutes=0, seconds=0, calendar=calendar) self.assertEqual(delta1, delta2) # YYYY-MM-DD_hh.mm.ss date1 = string_to_datetime('0001-01-01_00.00.00') date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, second=0) self.assertEqual(date1, date2) # YYYY-MM-DD_SSSSS date1 = string_to_datetime('0001-01-01_00002') date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, second=2) self.assertEqual(date1, date2) # DDD_hh:mm:ss delta1 = string_to_relative_delta('0001_00:00:01', calendar=calendar) delta2 = MpasRelativeDelta(years=0, months=0, days=1, hours=0, minutes=0, seconds=1, calendar=calendar) self.assertEqual(delta1, delta2) # DDD_hh.mm.ss delta1 = string_to_relative_delta('0002_01.00.01', calendar=calendar) delta2 = MpasRelativeDelta(years=0, months=0, days=2, hours=1, minutes=0, seconds=1, calendar=calendar) self.assertEqual(delta1, delta2) # DDD_SSSSS delta1 = string_to_relative_delta('0002_00003', calendar=calendar) delta2 = MpasRelativeDelta(years=0, months=0, days=2, hours=0, minutes=0, seconds=3, calendar=calendar) self.assertEqual(delta1, delta2) # hh:mm:ss date1 = string_to_datetime('00:00:01') date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, second=1) self.assertEqual(date1, date2) # hh.mm.ss delta1 = string_to_relative_delta('00.00.01', calendar=calendar) delta2 = MpasRelativeDelta(years=0, months=0, days=0, hours=0, minutes=0, seconds=1, calendar=calendar) self.assertEqual(delta1, delta2) # YYYY-MM-DD date1 = string_to_datetime('0001-01-01') date2 = datetime.datetime(year=1, month=1, day=1, hour=0, minute=0, second=0) self.assertEqual(date1, date2) # SSSSS delta1 = string_to_relative_delta('00005', calendar=calendar) delta2 = MpasRelativeDelta(years=0, months=0, days=0, hours=0, minutes=0, seconds=5, calendar=calendar) self.assertEqual(delta1, delta2) date1 = string_to_datetime('1996-01-15') delta = string_to_relative_delta('0005-00-00', calendar=calendar) date2 = date1-delta self.assertEqual(date2, string_to_datetime('1991-01-15')) date1 = string_to_datetime('1996-01-15') delta = string_to_relative_delta('0000-02-00', calendar=calendar) date2 = date1-delta self.assertEqual(date2, string_to_datetime('1995-11-15')) date1 = string_to_datetime('1996-01-15') delta = string_to_relative_delta('0000-00-20', calendar=calendar) date2 = date1-delta self.assertEqual(date2, string_to_datetime('1995-12-26'))
def test_MpasRelativeDeltaOps(self): # test if the calendars behave as they should close to leap day # also, test addition and subtraction of the form # datetime.datetime +/- MpasRelativeDelta above # both calendars with adding one day for calendar, expected in zip(['gregorian', 'gregorian_noleap'], ['2016-02-29', '2016-03-01']): self.assertEqual(string_to_datetime('2016-02-28') + string_to_relative_delta('0000-00-01', calendar=calendar), string_to_datetime(expected)) # both calendars with subtracting one day for calendar, expected in zip(['gregorian', 'gregorian_noleap'], ['2016-02-29', '2016-02-28']): self.assertEqual(string_to_datetime('2016-03-01') - string_to_relative_delta('0000-00-01', calendar=calendar), string_to_datetime(expected)) # both calendars with adding one month for calendar, expected in zip(['gregorian', 'gregorian_noleap'], ['2016-02-29', '2016-02-28']): self.assertEqual(string_to_datetime('2016-01-31') + string_to_relative_delta('0000-01-00', calendar=calendar), string_to_datetime(expected)) # both calendars with subtracting one month for calendar, expected in zip(['gregorian', 'gregorian_noleap'], ['2016-02-29', '2016-02-28']): self.assertEqual(string_to_datetime('2016-03-31') - string_to_relative_delta('0000-01-00', calendar=calendar), string_to_datetime(expected)) for calendar in ['gregorian', 'gregorian_noleap']: delta1 = string_to_relative_delta('0000-01-00', calendar=calendar) delta2 = string_to_relative_delta('0000-00-01', calendar=calendar) deltaSum = string_to_relative_delta('0000-01-01', calendar=calendar) # test MpasRelativeDelta + MpasRelativeDelta self.assertEqual(delta1 + delta2, deltaSum) # test MpasRelativeDelta - MpasRelativeDelta self.assertEqual(deltaSum - delta2, delta1) # test MpasRelativeDelta(date1, date2) date1 = string_to_datetime('0002-02-02') date2 = string_to_datetime('0001-01-01') delta = string_to_relative_delta('0001-01-01', calendar=calendar) self.assertEqual(MpasRelativeDelta(dt1=date1, dt2=date2, calendar=calendar), delta) # test MpasRelativeDelta + datetime.datetime (an odd order but # it's allowed...) date1 = string_to_datetime('0001-01-01') delta = string_to_relative_delta('0001-01-01', calendar=calendar) date2 = string_to_datetime('0002-02-02') self.assertEqual(delta + date1, date2) # test multiplication/division by scalars delta1 = string_to_relative_delta('0001-01-01', calendar=calendar) delta2 = string_to_relative_delta('0002-02-02', calendar=calendar) self.assertEqual(2*delta1, delta2) self.assertEqual(delta2/2, delta1) # make sure there's an error when we try to add MpasRelativeDeltas # with different calendars with self.assertRaisesRegexp(ValueError, 'MpasRelativeDelta objects can only be ' 'added if their calendars match.'): delta1 = string_to_relative_delta('0000-01-00', calendar='gregorian') delta2 = string_to_relative_delta('0000-00-01', calendar='gregorian_noleap') deltaSum = delta1 + delta2
def _parse_dataset_time(ds, inTimeVariableName, calendar, simulationStartTime, outTimeVariableName, referenceDate): # {{{ """ A helper function for computing a time coordinate from an MPAS time variable. Given a data set and a time variable name (or tuple of 2 time names), returns a new data set with time coordinate `outTimeVariableName` filled with days since `referenceDate` Parameters ---------- ds : xarray.DataSet object The data set containing an MPAS time variable to be used to build an xarray time coordinate. inTimeVariableName : string or tuple or list of strings The name of the time variable in the MPAS data set that will be used to build the 'Time' coordinate. The array(s) named by inTimeVariableName should contain date strings or the number of days since the start of the simulation. Typically, inTimeVariableName is one of {'daysSinceStartOfSim','xtime'}. If a list of two variable names is provided, times from the two are averaged together to determine the value of the time coordinate. In such cases, inTimeVariableName is typically {['xtime_start', 'xtime_end']}. calendar : {'gregorian', 'gregorian_noleap'} The name of one of the calendars supported by MPAS cores simulationStartTime : string The start date of the simulation, used to convert from time variables expressed as days since the start of the simulation to days since the reference date. `simulationStartTime` takes one of the following forms:: 0001-01-01 0001-01-01 00:00:00 simulationStartTime is only required if the MPAS time variable (identified by timeVariableName) is a number of days since the start of the simulation. outTimeVariableName : string The name of the coordinate to assign times to, typically 'Time'. referenceDate : string The reference date for the time variable, typically '0001-01-01', taking one of the following forms:: 0001-01-01 0001-01-01 00:00:00 Returns ------- dataset : xarray.dataset object A copy of the input data set with the `outTimeVariableName` coordinate containing the time coordinate parsed from `inTimeVariableName`. Raises ------ TypeError If the time variable has an unsupported type (not a date string or a floating-pont number of days since the start of the simulatio). ValueError If the time variable is a number of days since the start of the simulation but simulationStartTime is None. """ # Authors # ------- # Xylar Asay-Davis if isinstance(inTimeVariableName, (tuple, list)): # we want to average the two assert (len(inTimeVariableName) == 2) dsStart = _parse_dataset_time(ds=ds, inTimeVariableName=inTimeVariableName[0], calendar=calendar, simulationStartTime=simulationStartTime, outTimeVariableName=outTimeVariableName, referenceDate=referenceDate) dsEnd = _parse_dataset_time(ds=ds, inTimeVariableName=inTimeVariableName[1], calendar=calendar, simulationStartTime=simulationStartTime, outTimeVariableName=outTimeVariableName, referenceDate=referenceDate) starts = dsStart[outTimeVariableName].values ends = dsEnd[outTimeVariableName].values # replace the time in starts with the mean of starts and ends dsOut = dsStart.copy() dsOut.coords['startTime'] = (outTimeVariableName, starts) dsOut.coords['endTime'] = (outTimeVariableName, ends) dsOut.coords[outTimeVariableName] = (outTimeVariableName, [ starts[i] + (ends[i] - starts[i]) / 2 for i in range(len(starts)) ]) else: # there is just one time variable (either because we're recursively # calling the function or because we're not averaging). # The contents of the time variable is expected to be either a string # (|S64) or a float (meaning days since start of the simulation). timeVar = ds[inTimeVariableName] if timeVar.dtype == '|S64': # this is an array of date strings like 'xtime' # convert to string timeStrings = [ ''.join(str(xtime.astype('U'))).strip() for xtime in timeVar.values ] days = string_to_days_since_date(dateString=timeStrings, referenceDate=referenceDate, calendar=calendar) elif timeVar.dtype == 'float64': # this array contains floating-point days like # 'daysSinceStartOfSim' if simulationStartTime is None: raise ValueError('MPAS time variable {} appears to be a ' 'number of days since start \n' 'of sim but simulationStartTime was not' ' supplied.'.format(inTimeVariableName)) if (string_to_datetime(referenceDate) == string_to_datetime( simulationStartTime)): days = timeVar.values else: # a conversion may be required dates = days_to_datetime(days=timeVar.values, referenceDate=simulationStartTime, calendar=calendar) days = datetime_to_days(dates=dates, referenceDate=referenceDate, calendar=calendar) elif timeVar.dtype == 'timedelta64[ns]': raise TypeError('timeVar of unsupported type {}. This is likely ' 'because xarray.open_dataset \n' 'was called with decode_times=True, which can ' 'mangle MPAS times.'.format(timeVar.dtype)) else: raise TypeError("timeVar of unsupported type {}".format( timeVar.dtype)) dsOut = ds.copy() dsOut.coords[outTimeVariableName] = (outTimeVariableName, days) return dsOut # }}}
def readpath(self, streamName, startDate=None, endDate=None, calendar=None): """ Given the name of a stream and optionally start and end dates and a calendar type, returns a list of files that match the file template in the stream. Parameters ---------- streamName : string The name of a stream that produced the files startDate, endDate : string or datetime.datetime, optional String or datetime.datetime objects identifying the beginning and end dates to be found. Note: a buffer of one output interval is subtracted from startDate and added to endDate because the file date might be the first or last date contained in the file (or anything in between). calendar : {'gregorian', 'gregorian_noleap'}, optional The name of one of the calendars supported by MPAS cores, and is required if startDate and/or endDate are supplied Returns ------- fileList : list A list of file names produced by the stream that fall between the startDate and endDate (if supplied) Raises ------ ValueError If no files from the stream are found. """ # Authors # ------- # Xylar Asay-Davis template = self.read(streamName, 'filename_template') if template is None: raise ValueError('Stream {} not found in streams file {}.'.format( streamName, self.fname)) replacements = { '$Y': '[0-9][0-9][0-9][0-9]', '$M': '[0-9][0-9]', '$D': '[0-9][0-9]', '$S': '[0-9][0-9][0-9][0-9][0-9]', '$h': '[0-9][0-9]', '$m': '[0-9][0-9]', '$s': '[0-9][0-9]' } path = template for old in replacements: path = path.replace(old, replacements[old]) if not os.path.isabs(path): # this is not an absolute path, so make it an absolute path path = '{}/{}'.format(self.streamsdir, path) fileList = paths(path) if len(fileList) == 0: raise ValueError( "Path {} in streams file {} for '{}' not found.".format( path, self.fname, streamName)) if (startDate is None) and (endDate is None): return fileList if startDate is not None: # read one extra file before the start date to be on the safe side if isinstance(startDate, six.string_types): startDate = string_to_datetime(startDate) if endDate is not None: # read one extra file after the end date to be on the safe side if isinstance(endDate, six.string_types): endDate = string_to_datetime(endDate) # remove any path that's part of the template template = os.path.basename(template) dateStartIndex = template.find('$') if dateStartIndex == -1: # there is no date in the template, so we can't exclude any files # based on date return fileList dateEndOffset = len(template) - (template.rfind('$') + 2) outFileList = [] for fileName in fileList: # get just the baseName = os.path.basename(fileName) dateEndIndex = len(baseName) - dateEndOffset fileDateString = baseName[dateStartIndex:dateEndIndex] fileDate = string_to_datetime(fileDateString) add = True if startDate is not None and startDate > fileDate: add = False if endDate is not None and endDate < fileDate: add = False if add: outFileList.append(fileName) return outFileList