def plot_xtick_format(calendar, minDays, maxDays, maxXTicks, yearStride=None): ''' Formats tick labels and positions along the x-axis for time series / index plots Parameters ---------- calendar : str the calendar to use for formatting the time axis minDays : float start time for labels maxDays : float end time for labels maxXTicks : int the maximum number of tick marks to display, used to sub-sample ticks if there are too many yearStride : int, optional the number of years to skip over between ticks ''' # Authors # ------- # Xylar Asay-Davis ax = plt.gca() start = days_to_datetime(np.amin(minDays), calendar=calendar) end = days_to_datetime(np.amax(maxDays), calendar=calendar) if yearStride is not None or end.year - start.year > maxXTicks/2: if yearStride is None: yearStride = 1 else: maxXTicks = None major = [date_to_days(year=year, calendar=calendar) for year in np.arange(start.year, end.year+1, yearStride)] formatterFun = partial(_date_tick, calendar=calendar, includeMonth=False) else: # add ticks for months major = [] for year in range(start.year, end.year+1): for month in range(1, 13): major.append(date_to_days(year=year, month=month, calendar=calendar)) formatterFun = partial(_date_tick, calendar=calendar, includeMonth=True) ax.xaxis.set_major_locator(FixedLocator(major, maxXTicks)) ax.xaxis.set_major_formatter(FuncFormatter(formatterFun)) plt.setp(ax.get_xticklabels(), rotation=30) plt.autoscale(enable=True, axis='x', tight=True)
def test_iselvals(self): fileName = str(self.datadir.join('example_jan.nc')) calendar = 'gregorian_noleap' simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] iselvals = {'nVertLevels': slice(0, 3)} ds = mpas_xarray.open_multifile_dataset( fileNames=fileName, calendar=calendar, simulationStartTime=simulationStartTime, timeVariableName=timestr, variableList=variableList, iselValues=iselvals) dsVarList = list(ds.data_vars.keys()) + list(ds.coords.keys()) assert (numpy.all([var in dsVarList for var in variableList])) self.assertEqual(ds[variableList[0]].shape, (1, 7, 3)) self.assertEqual(ds['refBottomDepth'].shape, (3, )) self.assertApproxEqual(ds['refBottomDepth'][-1], 4.882000207901) self.assertEqual( days_to_datetime(days=ds.Time.values[0], referenceDate='0001-01-01', calendar=calendar), string_to_datetime('0005-01-14 12:24:14'))
def test_subset_variables(self): fileName = str(self.datadir.join('example_jan.nc')) calendar = 'gregorian_noleap' timestr = ['xtime_start', 'xtime_end'] variableList = \ ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature'] # first, test loading the whole data set and then calling # subset_variables explicitly ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, calendar=calendar, timeVariableName=timestr) ds = mpas_xarray.subset_variables(ds, variableList) dsVarList = list(ds.data_vars.keys()) + list(ds.coords.keys()) assert (numpy.all([var in dsVarList for var in variableList])) self.assertEqual( days_to_datetime(days=ds.Time.values, referenceDate='0001-01-01', calendar=calendar), string_to_datetime('0005-01-16 12:22:30')) # next, test the same with the onlyvars argument ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, calendar=calendar, timeVariableName=timestr, variableList=variableList) self.assertEqual(list(ds.data_vars.keys()), variableList) with six.assertRaisesRegex(self, ValueError, 'Empty dataset is returned.'): missingvars = ['foo', 'bar'] ds = mpas_xarray.open_multifile_dataset(fileNames=fileName, calendar=calendar, timeVariableName=timestr, variableList=missingvars)
def _date_tick(days, pos, calendar='gregorian', includeMonth=True): days = np.maximum(days, 0.) date = days_to_datetime(days, calendar) if includeMonth: return '{:04d}-{:02d}'.format(date.year, date.month) else: return '{:04d}'.format(date.year)
def test_days_to_datetime(self): referenceDate = '0001-01-01' for calendar in ['gregorian', 'gregorian_noleap']: for dateString, days in [('0001-01-01', 0.), ('0001-01-02', 1.), ('0001-02-01', 31.), ('0002-01-01', 365.)]: datetime = days_to_datetime(days=days, calendar=calendar, referenceDate=referenceDate) self.assertEqual(datetime, string_to_datetime(dateString)) referenceDate = '2016-01-01' for calendar, days in [('gregorian', 366.), ('gregorian_noleap', 365.)]: datetime = days_to_datetime(days=days, calendar=calendar, referenceDate=referenceDate) self.assertEqual(datetime, string_to_datetime('2017-01-01'))
def test_no_units(self): fileName = str(self.datadir.join('example_no_units_jan.nc')) calendar = 'gregorian_noleap' simulationStartTime = '0001-01-01' timestr = 'time_avg_daysSinceStartOfSim' variableList = \ ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature', 'refBottomDepth'] ds = mpas_xarray.open_multifile_dataset( fileNames=fileName, calendar=calendar, simulationStartTime=simulationStartTime, timeVariableName=timestr, variableList=variableList) self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList)) self.assertEqual( days_to_datetime(days=ds.Time.values[0], referenceDate='0001-01-01', calendar=calendar), string_to_datetime('0005-01-14 12:24:14'))
def add_years_months_days_in_month(ds, calendar=None): # {{{ ''' Add ``year``, ``month`` and ``daysInMonth`` as data arrays in ``ds``. The number of days in each month of ``ds`` is computed either using the ``startTime`` and ``endTime`` if available or assuming ``gregorian_noleap`` calendar and ignoring leap years. ``year`` and ``month`` are computed accounting correctly for the the calendar. Parameters ---------- ds : ``xarray.Dataset`` or ``xarray.DataArray`` object A data set with a ``Time`` coordinate expressed as days since 0001-01-01 calendar : {'gregorian', 'gregorian_noleap'}, optional The name of one of the calendars supported by MPAS cores, used to determine ``year`` and ``month`` from ``Time`` coordinate Returns ------- ds : object of same type as ``ds`` The data set with ``year``, ``month`` and ``daysInMonth`` data arrays added (if not already present) ''' # Authors # ------- # Xylar Asay-Davis if ('year' in ds.coords and 'month' in ds.coords and 'daysInMonth' in ds.coords): return ds ds = ds.copy() if 'year' not in ds.coords or 'month' not in ds.coords: if calendar is None: raise ValueError('calendar must be provided if month and year ' 'coordinate is not in ds.') datetimes = days_to_datetime(ds.Time, calendar=calendar) if 'year' not in ds.coords: ds.coords['year'] = ('Time', [date.year for date in datetimes]) if 'month' not in ds.coords: ds.coords['month'] = ('Time', [date.month for date in datetimes]) if 'daysInMonth' not in ds.coords: if 'startTime' in ds.coords and 'endTime' in ds.coords: ds.coords['daysInMonth'] = ds.endTime - ds.startTime else: if calendar == 'gregorian': print('Warning: The MPAS run used the Gregorian calendar ' 'but does not appear to have\n' 'supplied start and end times. Climatologies ' 'will be computed with\n' 'month durations ignoring leap years.') daysInMonth = numpy.array( [constants.daysInMonth[int(month) - 1] for month in ds.month.values], float) ds.coords['daysInMonth'] = ('Time', daysInMonth) return ds # }}}
def run_task(self): # {{{ """ Compute time series of regional profiles """ # Authors # ------- # Milena Veneziani, Mark Petersen, Phillip J. Wolfram, Xylar Asay-Davis self.logger.info("\nCompute time series of regional profiles...") startDate = '{:04d}-01-01_00:00:00'.format(self.startYear) endDate = '{:04d}-12-31_23:59:59'.format(self.endYear) timeSeriesName = self.masksSubtask.regionGroup.replace(' ', '') outputDirectory = '{}/{}/'.format( build_config_full_path(self.config, 'output', 'timeseriesSubdirectory'), timeSeriesName) try: os.makedirs(outputDirectory) except OSError: pass outputFileName = '{}/regionalProfiles_{}_{:04d}-{:04d}.nc'.format( outputDirectory, timeSeriesName, self.startYear, self.endYear) inputFiles = sorted(self.historyStreams.readpath( 'timeSeriesStatsMonthlyOutput', startDate=startDate, endDate=endDate, calendar=self.calendar)) years, months = get_files_year_month(inputFiles, self.historyStreams, 'timeSeriesStatsMonthlyOutput') variableList = [field['mpas'] for field in self.fields] outputExists = os.path.exists(outputFileName) outputValid = outputExists if outputExists: with open_mpas_dataset(fileName=outputFileName, calendar=self.calendar, timeVariableNames=None, variableList=None, startDate=startDate, endDate=endDate) as dsIn: for inIndex in range(dsIn.dims['Time']): mask = np.logical_and( dsIn.year[inIndex].values == years, dsIn.month[inIndex].values == months) if np.count_nonzero(mask) == 0: outputValid = False break if outputValid: self.logger.info(' Time series exists -- Done.') return # get areaCell restartFileName = \ self.runStreams.readpath('restart')[0] dsRestart = xr.open_dataset(restartFileName) dsRestart = dsRestart.isel(Time=0) areaCell = dsRestart.areaCell nVertLevels = dsRestart.sizes['nVertLevels'] vertIndex = \ xr.DataArray.from_dict({'dims': ('nVertLevels',), 'data': np.arange(nVertLevels)}) vertMask = vertIndex < dsRestart.maxLevelCell # get region masks regionMaskFileName = self.masksSubtask.maskFileName dsRegionMask = xr.open_dataset(regionMaskFileName) # figure out the indices of the regions to plot regionNames = decode_strings(dsRegionMask.regionNames) regionIndices = [] for regionToPlot in self.regionNames: for index, regionName in enumerate(regionNames): if regionToPlot == regionName: regionIndices.append(index) break # select only those regions we want to plot dsRegionMask = dsRegionMask.isel(nRegions=regionIndices) cellMasks = dsRegionMask.regionCellMasks regionNamesVar = dsRegionMask.regionNames totalArea = (cellMasks * areaCell * vertMask).sum('nCells') datasets = [] for timeIndex, fileName in enumerate(inputFiles): dsLocal = open_mpas_dataset( fileName=fileName, calendar=self.calendar, variableList=variableList, startDate=startDate, endDate=endDate) dsLocal = dsLocal.isel(Time=0) time = dsLocal.Time.values date = days_to_datetime(time, calendar=self.calendar) self.logger.info(' date: {:04d}-{:02d}'.format(date.year, date.month)) # for each region and variable, compute area-weighted sum and # squared sum for field in self.fields: variableName = field['mpas'] prefix = field['prefix'] self.logger.info(' {}'.format(field['titleName'])) var = dsLocal[variableName].where(vertMask) meanName = '{}_mean'.format(prefix) dsLocal[meanName] = \ (cellMasks * areaCell * var).sum('nCells') / totalArea meanSquaredName = '{}_meanSquared'.format(prefix) dsLocal[meanSquaredName] = \ (cellMasks * areaCell * var**2).sum('nCells') / totalArea # drop the original variables dsLocal = dsLocal.drop_vars(variableList) datasets.append(dsLocal) # combine data sets into a single data set dsOut = xr.concat(datasets, 'Time') dsOut.coords['regionNames'] = regionNamesVar dsOut['totalArea'] = totalArea dsOut.coords['year'] = (('Time',), years) dsOut['year'].attrs['units'] = 'years' dsOut.coords['month'] = (('Time',), months) dsOut['month'].attrs['units'] = 'months' # Note: restart file, not a mesh file because we need refBottomDepth, # not in a mesh file try: restartFile = self.runStreams.readpath('restart')[0] except ValueError: raise IOError('No MPAS-O restart file found: need at least one ' 'restart file for plotting time series vs. depth') with xr.open_dataset(restartFile) as dsRestart: depths = dsRestart.refBottomDepth.values z = np.zeros(depths.shape) z[0] = -0.5 * depths[0] z[1:] = -0.5 * (depths[0:-1] + depths[1:]) dsOut.coords['z'] = (('nVertLevels',), z) dsOut['z'].attrs['units'] = 'meters' write_netcdf(dsOut, outputFileName)
def run_task(self): # {{{ """ Compute vertical agregates of the data and plot the time series """ # Authors # ------- # Xylar Asay-Davis, Milena Veneziani, Greg Streletz self.logger.info("\nPlotting depth-integrated time series of " "{}...".format(self.fieldNameInTitle)) config = self.config calendar = self.calendar mainRunName = config.get('runs', 'mainRunName') plotTitles = config.getExpression('regions', 'plotTitles') allRegionNames = config.getExpression('regions', 'regions') regionIndex = allRegionNames.index(self.regionName) regionNameInTitle = plotTitles[regionIndex] startDate = config.get('timeSeries', 'startDate') endDate = config.get('timeSeries', 'endDate') # Load data self.logger.info(' Load ocean data...') ds = open_mpas_dataset(fileName=self.inFileName, calendar=calendar, variableList=[self.mpasFieldName, 'depth'], timeVariableNames=None, startDate=startDate, endDate=endDate) ds = ds.isel(nOceanRegionsTmp=regionIndex) depths = ds.depth.values divisionDepths = config.getExpression(self.sectionName, 'depths') # for each depth interval to plot, determine the top and bottom depth topDepths = [0, 0] + divisionDepths bottomDepths = [depths[-1]] + divisionDepths + [depths[-1]] legends = [] for top, bottom in zip(topDepths, bottomDepths): if bottom == depths[-1]: legends.append('{}m-bottom'.format(top)) else: legends.append('{}m-{}m'.format(top, bottom)) # more possible symbols than we typically use lines = ['-', '-', '--', None, None, None, None] markers = [None, None, None, '+', 'o', '^', 'v'] widths = [5, 3, 3, 3, 3, 3, 3] points = [None, None, None, 300, 300, 300, 300] color = 'k' xLabel = 'Time [years]' yLabel = self.yAxisLabel title = '{}, {} \n {} (black)'.format(self.fieldNameInTitle, regionNameInTitle, mainRunName) outFileName = '{}/{}.png'.format(self.plotsDirectory, self.filePrefix) timeSeries = [] lineColors = [] lineStyles = [] lineMarkers = [] lineWidths = [] maxPoints = [] legendText = [] for rangeIndex in range(len(topDepths)): top = topDepths[rangeIndex] bottom = bottomDepths[rangeIndex] field = ds[self.mpasFieldName].where(ds.depth > top) field = field.where(ds.depth <= bottom) timeSeries.append(field.sum('nVertLevels')) lineColors.append(color) lineStyles.append(lines[rangeIndex]) lineMarkers.append(markers[rangeIndex]) lineWidths.append(widths[rangeIndex]) maxPoints.append(points[rangeIndex]) legendText.append(legends[rangeIndex]) preprocessedReferenceRunName = config.get( 'runs', 'preprocessedReferenceRunName') if preprocessedReferenceRunName != 'None': preprocessedInputDirectory = config.get( 'oceanPreprocessedReference', 'baseDirectory') self.logger.info(' Load in preprocessed reference data...') preprocessedFilePrefix = config.get(self.sectionName, 'preprocessedFilePrefix') inFilesPreprocessed = '{}/{}.{}.year*.nc'.format( preprocessedInputDirectory, preprocessedFilePrefix, preprocessedReferenceRunName) combine_time_series_with_ncrcat( inFilesPreprocessed, self.preprocessedIntermediateFileName, logger=self.logger) dsPreprocessed = open_mpas_dataset( fileName=self.preprocessedIntermediateFileName, calendar=calendar, timeVariableNames='xtime') yearStart = days_to_datetime(ds.Time.min(), calendar=calendar).year yearEnd = days_to_datetime(ds.Time.max(), calendar=calendar).year timeStart = date_to_days(year=yearStart, month=1, day=1, calendar=calendar) timeEnd = date_to_days(year=yearEnd, month=12, day=31, calendar=calendar) yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(), calendar=calendar).year if yearStart <= yearEndPreprocessed: dsPreprocessed = dsPreprocessed.sel( Time=slice(timeStart, timeEnd)) else: self.logger.warning('Warning: Preprocessed time series ends ' 'before the timeSeries startYear and will ' 'not be plotted.') preprocessedReferenceRunName = 'None' # rolling mean seems to have trouble with dask data sets so we # write out the data set and read it back as a single-file data set # (without dask) dsPreprocessed = dsPreprocessed.drop('xtime') write_netcdf(dsPreprocessed, self.preprocessedFileName) dsPreprocessed = xarray.open_dataset(self.preprocessedFileName) if preprocessedReferenceRunName != 'None': color = 'purple' title = '{} \n {} (purple)'.format(title, preprocessedReferenceRunName) preprocessedFieldPrefix = config.get(self.sectionName, 'preprocessedFieldPrefix') movingAveragePoints = config.getint(self.sectionName, 'movingAveragePoints') suffixes = ['tot' ] + ['{}m'.format(depth) for depth in divisionDepths] + ['btm'] # these preprocessed data are already anomalies dsPreprocessed = compute_moving_avg(dsPreprocessed, movingAveragePoints) for rangeIndex in range(len(suffixes)): variableName = '{}_{}'.format(preprocessedFieldPrefix, suffixes[rangeIndex]) if variableName in list(dsPreprocessed.data_vars.keys()): timeSeries.append(dsPreprocessed[variableName]) else: self.logger.warning( 'Warning: Preprocessed variable {} ' 'not found. Skipping.'.format(variableName)) timeSeries.extend(None) lineColors.append(color) lineStyles.append(lines[rangeIndex]) lineMarkers.append(markers[rangeIndex]) lineWidths.append(widths[rangeIndex]) maxPoints.append(points[rangeIndex]) legendText.append(None) if self.controlConfig is not None: controlRunName = self.controlConfig.get('runs', 'mainRunName') title = '{} \n {} (red)'.format(title, controlRunName) self.logger.info(' Load ocean data from control run...') controlStartYear = self.controlConfig.getint( 'timeSeries', 'startYear') controlEndYear = self.controlConfig.getint('timeSeries', 'endYear') controlStartDate = '{:04d}-01-01_00:00:00'.format(controlStartYear) controlEndDate = '{:04d}-12-31_23:59:59'.format(controlEndYear) dsRef = open_mpas_dataset( fileName=self.refFileName, calendar=calendar, variableList=[self.mpasFieldName, 'depth'], timeVariableNames=None, startDate=controlStartDate, endDate=controlEndDate) dsRef = dsRef.isel(nOceanRegionsTmp=regionIndex) color = 'r' for rangeIndex in range(len(topDepths)): top = topDepths[rangeIndex] bottom = bottomDepths[rangeIndex] field = dsRef[self.mpasFieldName].where(dsRef.depth > top) field = field.where(dsRef.depth <= bottom) timeSeries.append(field.sum('nVertLevels')) lineColors.append(color) lineStyles.append(lines[rangeIndex]) lineMarkers.append(markers[rangeIndex]) lineWidths.append(widths[rangeIndex]) maxPoints.append(points[rangeIndex]) legendText.append(None) if config.has_option(self.taskName, 'firstYearXTicks'): firstYearXTicks = config.getint(self.taskName, 'firstYearXTicks') else: firstYearXTicks = None if config.has_option(self.taskName, 'yearStrideXTicks'): yearStrideXTicks = config.getint(self.taskName, 'yearStrideXTicks') else: yearStrideXTicks = None timeseries_analysis_plot(config=config, dsvalues=timeSeries, calendar=calendar, title=title, xlabel=xLabel, ylabel=yLabel, movingAveragePoints=None, lineColors=lineColors, lineStyles=lineStyles, markers=lineMarkers, lineWidths=lineWidths, legendText=legendText, maxPoints=maxPoints, firstYearXTicks=firstYearXTicks, yearStrideXTicks=yearStrideXTicks) savefig(outFileName) write_image_xml(config=config, filePrefix=self.filePrefix, componentName='Ocean', componentSubdirectory='ocean', galleryGroup=self.galleryGroup, groupLink=self.groupLink, gallery=self.galleryName, thumbnailDescription='{} {}'.format( self.regionName, self.thumbnailSuffix), imageDescription=self.imageCaption, imageCaption=self.imageCaption)
def run_task(self): # {{{ """ Performs analysis of the time-series output of sea-surface temperature (SST). """ # Authors # ------- # Xylar Asay-Davis, Milena Veneziani self.logger.info("\nPlotting SST time series...") self.logger.info(' Load SST data...') config = self.config calendar = self.calendar mainRunName = config.get('runs', 'mainRunName') preprocessedReferenceRunName = \ config.get('runs', 'preprocessedReferenceRunName') preprocessedInputDirectory = config.get('oceanPreprocessedReference', 'baseDirectory') movingAveragePoints = config.getint('timeSeriesSST', 'movingAveragePoints') regions = config.getExpression('regions', 'regions') plotTitles = config.getExpression('regions', 'plotTitles') regionsToPlot = config.getExpression('timeSeriesSST', 'regions') regionIndicesToPlot = [ regions.index(region) for region in regionsToPlot ] outputDirectory = build_config_full_path(config, 'output', 'timeseriesSubdirectory') make_directories(outputDirectory) dsSST = open_mpas_dataset(fileName=self.inputFile, calendar=calendar, variableList=self.variableList, startDate=self.startDate, endDate=self.endDate) yearStart = days_to_datetime(dsSST.Time.min(), calendar=calendar).year yearEnd = days_to_datetime(dsSST.Time.max(), calendar=calendar).year timeStart = date_to_days(year=yearStart, month=1, day=1, calendar=calendar) timeEnd = date_to_days(year=yearEnd, month=12, day=31, calendar=calendar) if self.refConfig is not None: baseDirectory = build_config_full_path(self.refConfig, 'output', 'timeSeriesSubdirectory') refFileName = '{}/{}.nc'.format( baseDirectory, self.mpasTimeSeriesTask.fullTaskName) refStartYear = self.refConfig.getint('timeSeries', 'startYear') refEndYear = self.refConfig.getint('timeSeries', 'endYear') refStartDate = '{:04d}-01-01_00:00:00'.format(refStartYear) refEndDate = '{:04d}-12-31_23:59:59'.format(refEndYear) dsRefSST = open_mpas_dataset(fileName=refFileName, calendar=calendar, variableList=self.variableList, startDate=refStartDate, endDate=refEndDate) else: dsRefSST = None if preprocessedReferenceRunName != 'None': self.logger.info(' Load in SST for a preprocesses reference ' 'run...') inFilesPreprocessed = '{}/SST.{}.year*.nc'.format( preprocessedInputDirectory, preprocessedReferenceRunName) outFolder = '{}/preprocessed'.format(outputDirectory) make_directories(outFolder) outFileName = '{}/sst.nc'.format(outFolder) combine_time_series_with_ncrcat(inFilesPreprocessed, outFileName, logger=self.logger) dsPreprocessed = open_mpas_dataset(fileName=outFileName, calendar=calendar, timeVariableNames='xtime') yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(), calendar=calendar).year if yearStart <= yearEndPreprocessed: dsPreprocessedTimeSlice = \ dsPreprocessed.sel(Time=slice(timeStart, timeEnd)) else: self.logger.warning('Preprocessed time series ends before the ' 'timeSeries startYear and will not be ' 'plotted.') preprocessedReferenceRunName = 'None' self.logger.info(' Make plots...') for regionIndex in regionIndicesToPlot: region = regions[regionIndex] title = '{} SST'.format(plotTitles[regionIndex]) xLabel = 'Time [years]' yLabel = '[$\degree$C]' varName = self.variableList[0] SST = dsSST[varName].isel(nOceanRegions=regionIndex) filePrefix = self.filePrefixes[region] figureName = '{}/{}.png'.format(self.plotsDirectory, filePrefix) lineColors = ['k'] lineWidths = [3] fields = [SST] legendText = [mainRunName] if dsRefSST is not None: refSST = dsRefSST[varName].isel(nOceanRegions=regionIndex) fields.append(refSST) lineColors.append('r') lineWidths.append(1.5) refRunName = self.refConfig.get('runs', 'mainRunName') legendText.append(refRunName) if preprocessedReferenceRunName != 'None': SST_v0 = dsPreprocessedTimeSlice.SST fields.append(SST_v0) lineColors.append('purple') lineWidths.append(1.5) legendText.append(preprocessedReferenceRunName) if config.has_option(self.taskName, 'firstYearXTicks'): firstYearXTicks = config.getint(self.taskName, 'firstYearXTicks') else: firstYearXTicks = None if config.has_option(self.taskName, 'yearStrideXTicks'): yearStrideXTicks = config.getint(self.taskName, 'yearStrideXTicks') else: yearStrideXTicks = None timeseries_analysis_plot(config, fields, movingAveragePoints, title, xLabel, yLabel, figureName, calendar=calendar, lineColors=lineColors, lineWidths=lineWidths, legendText=legendText, firstYearXTicks=firstYearXTicks, yearStrideXTicks=yearStrideXTicks) caption = 'Running Mean of {} Sea Surface Temperature'.format( region) write_image_xml(config=config, filePrefix=filePrefix, componentName='Ocean', componentSubdirectory='ocean', galleryGroup='Time Series', groupLink='timeseries', thumbnailDescription='{} SST'.format(region), imageDescription=caption, imageCaption=caption)
def _compute_moc_time_series_analysismember(self): # {{{ '''compute MOC time series from analysis member''' # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array) self.logger.info( '\n Compute Atlantic MOC time series from analysis member...') self.logger.info(' Load data...') outputDirectory = build_config_full_path(self.config, 'output', 'timeseriesSubdirectory') try: os.makedirs(outputDirectory) except OSError: pass outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory) streamName = 'timeSeriesStatsMonthlyOutput' # Get bin latitudes and index of 26.5N binBoundaryMocStreamfunction = None # first try timeSeriesStatsMonthly for bin boundaries, then try # mocStreamfunctionOutput stream as a backup option for streamName in [ 'timeSeriesStatsMonthlyOutput', 'mocStreamfunctionOutput' ]: try: inputFile = self.historyStreams.readpath(streamName)[0] except ValueError: raise IOError('At least one file from stream {} is needed ' 'to compute MOC'.format(streamName)) with xr.open_dataset(inputFile) as ds: if 'binBoundaryMocStreamfunction' in ds.data_vars: binBoundaryMocStreamfunction = \ ds.binBoundaryMocStreamfunction.values break if binBoundaryMocStreamfunction is None: raise ValueError('Could not find binBoundaryMocStreamfunction in ' 'either timeSeriesStatsMonthlyOutput or ' 'mocStreamfunctionOutput streams') binBoundaryMocStreamfunction = np.rad2deg(binBoundaryMocStreamfunction) dLat = binBoundaryMocStreamfunction - 26.5 indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat))) inputFilesTseries = sorted( self.historyStreams.readpath(streamName, startDate=self.startDateTseries, endDate=self.endDateTseries, calendar=self.calendar)) years, months = get_files_year_month(inputFilesTseries, self.historyStreams, 'timeSeriesStatsMonthlyOutput') mocRegion = np.zeros(len(inputFilesTseries)) times = np.zeros(len(inputFilesTseries)) computed = np.zeros(len(inputFilesTseries), bool) continueOutput = os.path.exists(outputFileTseries) if continueOutput: self.logger.info(' Read in previously computed MOC time series') with open_mpas_dataset(fileName=outputFileTseries, calendar=self.calendar, timeVariableNames=None, variableList=['mocAtlantic26'], startDate=self.startDateTseries, endDate=self.endDateTseries) as dsMOCIn: dsMOCIn.load() # first, copy all computed data for inIndex in range(dsMOCIn.dims['Time']): mask = np.logical_and( dsMOCIn.year[inIndex].values == years, dsMOCIn.month[inIndex].values == months) outIndex = np.where(mask)[0][0] mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex] times[outIndex] = dsMOCIn.Time[inIndex] computed[outIndex] = True if np.all(computed): # no need to waste time writing out the data set again return dsMOCIn for timeIndex, fileName in enumerate(inputFilesTseries): if computed[timeIndex]: continue dsLocal = open_mpas_dataset(fileName=fileName, calendar=self.calendar, variableList=self.variableList, startDate=self.startDateTseries, endDate=self.endDateTseries) dsLocal = dsLocal.isel(Time=0) time = dsLocal.Time.values times[timeIndex] = time date = days_to_datetime(time, calendar=self.calendar) self.logger.info(' date: {:04d}-{:02d}'.format( date.year, date.month)) # hard-wire region=0 (Atlantic) for now indRegion = 0 mocTop = dsLocal.timeMonthly_avg_mocStreamvalLatAndDepthRegion[ indRegion, :, :].values mocRegion[timeIndex] = np.amax(mocTop[:, indlat26]) description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \ 'Array latitude (26.5N)' dictonary = { 'dims': ['Time'], 'coords': { 'Time': { 'dims': ('Time'), 'data': times, 'attrs': { 'units': 'days since 0001-01-01' } }, 'year': { 'dims': ('Time'), 'data': years, 'attrs': { 'units': 'year' } }, 'month': { 'dims': ('Time'), 'data': months, 'attrs': { 'units': 'month' } } }, 'data_vars': { 'mocAtlantic26': { 'dims': ('Time'), 'data': mocRegion, 'attrs': { 'units': 'Sv (10^6 m^3/s)', 'description': description } } } } dsMOCTimeSeries = xr.Dataset.from_dict(dictonary) write_netcdf(dsMOCTimeSeries, outputFileTseries) return dsMOCTimeSeries # }}}
def _replicate_cycle(self, ds, dsToReplicate, calendar): # {{{ """ Replicates a periodic time series `dsToReplicate` to cover the timeframe of the dataset `ds`. Parameters ---------- ds : dataset used to find the start and end time of the replicated cycle dsToReplicate : dataset to replicate. The period of the cycle is the length of dsToReplicate plus the time between the first two time values (typically one year total). calendar : {'gregorian', 'gregorian_noleap'} The name of one of the calendars supported by MPAS cores Returns: -------- dsShift : a cyclicly repeated version of `dsToReplicte` covering the range of time of `ds`. """ # Authors # ------- # Xylar Asay-Davis, Milena Veneziani dsStartTime = days_to_datetime(ds.Time.min(), calendar=calendar) dsEndTime = days_to_datetime(ds.Time.max(), calendar=calendar) repStartTime = days_to_datetime(dsToReplicate.Time.min(), calendar=calendar) repEndTime = days_to_datetime(dsToReplicate.Time.max(), calendar=calendar) repSecondTime = days_to_datetime(dsToReplicate.Time.isel(Time=1), calendar=calendar) period = (MpasRelativeDelta(repEndTime, repStartTime) + MpasRelativeDelta(repSecondTime, repStartTime)) startIndex = 0 while(dsStartTime > repStartTime + (startIndex + 1) * period): startIndex += 1 endIndex = 0 while(dsEndTime > repEndTime + endIndex * period): endIndex += 1 dsShift = dsToReplicate.copy() times = days_to_datetime(dsShift.Time, calendar=calendar) dsShift.coords['Time'] = ('Time', datetime_to_days(times + startIndex * period, calendar=calendar)) # replicate cycle: for cycleIndex in range(startIndex, endIndex): dsNew = dsToReplicate.copy() dsNew.coords['Time'] = \ ('Time', datetime_to_days(times + (cycleIndex + 1) * period, calendar=calendar)) dsShift = xr.concat([dsShift, dsNew], dim='Time') # clip dsShift to the range of ds dsStartTime = dsShift.Time.sel(Time=ds.Time.min(), method=str('nearest')).values dsEndTime = dsShift.Time.sel(Time=ds.Time.max(), method=str('nearest')).values dsShift = dsShift.sel(Time=slice(dsStartTime, dsEndTime)) return dsShift # }}}
def run_task(self): # {{{ """ Performs analysis of time series of sea-ice properties. """ # Authors # ------- # Xylar Asay-Davis, Milena Veneziani self.logger.info("\nPlotting sea-ice area and volume time series...") config = self.config calendar = self.calendar sectionName = self.taskName plotTitles = {'iceArea': 'Sea-ice area', 'iceVolume': 'Sea-ice volume', 'iceThickness': 'Sea-ice mean thickness'} units = {'iceArea': '[km$^2$]', 'iceVolume': '[10$^3$ km$^3$]', 'iceThickness': '[m]'} obsFileNames = { 'iceArea': {'NH': build_obs_path( config, 'seaIce', relativePathOption='areaNH', relativePathSection=sectionName), 'SH': build_obs_path( config, 'seaIce', relativePathOption='areaSH', relativePathSection=sectionName)}, 'iceVolume': {'NH': build_obs_path( config, 'seaIce', relativePathOption='volNH', relativePathSection=sectionName), 'SH': build_obs_path( config, 'seaIce', relativePathOption='volSH', relativePathSection=sectionName)}} # Some plotting rules titleFontSize = config.get('timeSeriesSeaIceAreaVol', 'titleFontSize') mainRunName = config.get('runs', 'mainRunName') preprocessedReferenceRunName = \ config.get('runs', 'preprocessedReferenceRunName') preprocessedReferenceDirectory = \ config.get('seaIcePreprocessedReference', 'baseDirectory') compareWithObservations = config.getboolean('timeSeriesSeaIceAreaVol', 'compareWithObservations') movingAveragePoints = config.getint('timeSeriesSeaIceAreaVol', 'movingAveragePoints') polarPlot = config.getboolean('timeSeriesSeaIceAreaVol', 'polarPlot') outputDirectory = build_config_full_path(config, 'output', 'timeseriesSubdirectory') make_directories(outputDirectory) self.logger.info(' Load sea-ice data...') # Load mesh dsTimeSeries = self._compute_area_vol() yearStart = days_to_datetime(dsTimeSeries['NH'].Time.min(), calendar=calendar).year yearEnd = days_to_datetime(dsTimeSeries['NH'].Time.max(), calendar=calendar).year timeStart = date_to_days(year=yearStart, month=1, day=1, calendar=calendar) timeEnd = date_to_days(year=yearEnd, month=12, day=31, calendar=calendar) if preprocessedReferenceRunName != 'None': # determine if we're beyond the end of the preprocessed data # (and go ahead and cache the data set while we're checking) outFolder = '{}/preprocessed'.format(outputDirectory) make_directories(outFolder) inFilesPreprocessed = '{}/icevol.{}.year*.nc'.format( preprocessedReferenceDirectory, preprocessedReferenceRunName) outFileName = '{}/iceVolume.nc'.format(outFolder) combine_time_series_with_ncrcat(inFilesPreprocessed, outFileName, logger=self.logger) dsPreprocessed = open_mpas_dataset(fileName=outFileName, calendar=calendar, timeVariableNames='xtime') preprocessedYearEnd = days_to_datetime(dsPreprocessed.Time.max(), calendar=calendar).year if yearStart <= preprocessedYearEnd: dsPreprocessedTimeSlice = \ dsPreprocessed.sel(Time=slice(timeStart, timeEnd)) else: self.logger.warning('Preprocessed time series ends before the ' 'timeSeries startYear and will not be ' 'plotted.') preprocessedReferenceRunName = 'None' if self.controlConfig is not None: dsTimeSeriesRef = {} baseDirectory = build_config_full_path( self.controlConfig, 'output', 'timeSeriesSubdirectory') controlRunName = self.controlConfig.get('runs', 'mainRunName') for hemisphere in ['NH', 'SH']: inFileName = '{}/seaIceAreaVol{}.nc'.format(baseDirectory, hemisphere) dsTimeSeriesRef[hemisphere] = xr.open_dataset(inFileName) norm = {'iceArea': 1e-6, # m^2 to km^2 'iceVolume': 1e-12, # m^3 to 10^3 km^3 'iceThickness': 1.} xLabel = 'Time [years]' galleryGroup = 'Time Series' groupLink = 'timeseries' obs = {} preprocessed = {} figureNameStd = {} figureNamePolar = {} title = {} plotVars = {} obsLegend = {} plotVarsRef = {} for hemisphere in ['NH', 'SH']: self.logger.info(' Make {} plots...'.format(hemisphere)) for variableName in ['iceArea', 'iceVolume']: key = (hemisphere, variableName) # apply the norm to each variable plotVars[key] = (norm[variableName] * dsTimeSeries[hemisphere][variableName]) if self.controlConfig is not None: plotVarsRef[key] = norm[variableName] * \ dsTimeSeriesRef[hemisphere][variableName] prefix = '{}/{}{}_{}'.format(self.plotsDirectory, variableName, hemisphere, mainRunName) figureNameStd[key] = '{}.png'.format(prefix) figureNamePolar[key] = '{}_polar.png'.format(prefix) title[key] = '{} ({})'.format(plotTitles[variableName], hemisphere) if compareWithObservations: key = (hemisphere, 'iceArea') obsLegend[key] = 'SSM/I observations, annual cycle ' if hemisphere == 'NH': key = (hemisphere, 'iceVolume') obsLegend[key] = 'PIOMAS, annual cycle (blue)' if preprocessedReferenceRunName != 'None': for variableName in ['iceArea', 'iceVolume']: key = (hemisphere, variableName) if compareWithObservations: outFolder = '{}/obs'.format(outputDirectory) make_directories(outFolder) outFileName = '{}/iceArea{}.nc'.format(outFolder, hemisphere) combine_time_series_with_ncrcat( obsFileNames['iceArea'][hemisphere], outFileName, logger=self.logger) dsObs = open_mpas_dataset(fileName=outFileName, calendar=calendar, timeVariableNames='xtime') key = (hemisphere, 'iceArea') obs[key] = self._replicate_cycle(plotVars[key], dsObs.IceArea, calendar) key = (hemisphere, 'iceVolume') if hemisphere == 'NH': outFileName = '{}/iceVolume{}.nc'.format(outFolder, hemisphere) combine_time_series_with_ncrcat( obsFileNames['iceVolume'][hemisphere], outFileName, logger=self.logger) dsObs = open_mpas_dataset(fileName=outFileName, calendar=calendar, timeVariableNames='xtime') obs[key] = self._replicate_cycle(plotVars[key], dsObs.IceVol, calendar) else: obs[key] = None if preprocessedReferenceRunName != 'None': outFolder = '{}/preprocessed'.format(outputDirectory) inFilesPreprocessed = '{}/icearea.{}.year*.nc'.format( preprocessedReferenceDirectory, preprocessedReferenceRunName) outFileName = '{}/iceArea.nc'.format(outFolder) combine_time_series_with_ncrcat(inFilesPreprocessed, outFileName, logger=self.logger) dsPreprocessed = open_mpas_dataset(fileName=outFileName, calendar=calendar, timeVariableNames='xtime') dsPreprocessedTimeSlice = dsPreprocessed.sel( Time=slice(timeStart, timeEnd)) key = (hemisphere, 'iceArea') preprocessed[key] = dsPreprocessedTimeSlice[ 'icearea_{}'.format(hemisphere.lower())] inFilesPreprocessed = '{}/icevol.{}.year*.nc'.format( preprocessedReferenceDirectory, preprocessedReferenceRunName) outFileName = '{}/iceVolume.nc'.format(outFolder) combine_time_series_with_ncrcat(inFilesPreprocessed, outFileName, logger=self.logger) dsPreprocessed = open_mpas_dataset(fileName=outFileName, calendar=calendar, timeVariableNames='xtime') dsPreprocessedTimeSlice = dsPreprocessed.sel( Time=slice(timeStart, timeEnd)) key = (hemisphere, 'iceVolume') preprocessed[key] = dsPreprocessedTimeSlice[ 'icevolume_{}'.format(hemisphere.lower())] for variableName in ['iceArea', 'iceVolume']: key = (hemisphere, variableName) dsvalues = [plotVars[key]] legendText = [mainRunName] lineColors = ['k'] lineWidths = [3] if compareWithObservations and key in obsLegend.keys(): dsvalues.append(obs[key]) legendText.append(obsLegend[key]) lineColors.append('b') lineWidths.append(1.2) if preprocessedReferenceRunName != 'None': dsvalues.append(preprocessed[key]) legendText.append(preprocessedReferenceRunName) lineColors.append('purple') lineWidths.append(1.2) if self.controlConfig is not None: dsvalues.append(plotVarsRef[key]) legendText.append(controlRunName) lineColors.append('r') lineWidths.append(1.2) if config.has_option(sectionName, 'firstYearXTicks'): firstYearXTicks = config.getint(sectionName, 'firstYearXTicks') else: firstYearXTicks = None if config.has_option(sectionName, 'yearStrideXTicks'): yearStrideXTicks = config.getint(sectionName, 'yearStrideXTicks') else: yearStrideXTicks = None # separate plots for nothern and southern hemispheres timeseries_analysis_plot(config, dsvalues, movingAveragePoints, title[key], xLabel, units[variableName], calendar=calendar, lineColors=lineColors, lineWidths=lineWidths, legendText=legendText, titleFontSize=titleFontSize, firstYearXTicks=firstYearXTicks, yearStrideXTicks=yearStrideXTicks) savefig(figureNameStd[key]) filePrefix = '{}{}_{}'.format(variableName, hemisphere, mainRunName) thumbnailDescription = '{} {}'.format( hemisphere, plotTitles[variableName]) caption = 'Running mean of {}'.format( thumbnailDescription) write_image_xml( config, filePrefix, componentName='Sea Ice', componentSubdirectory='sea_ice', galleryGroup=galleryGroup, groupLink=groupLink, thumbnailDescription=thumbnailDescription, imageDescription=caption, imageCaption=caption) if (polarPlot): timeseries_analysis_plot_polar( config, dsvalues, movingAveragePoints, title[key], lineColors=lineColors, lineWidths=lineWidths, legendText=legendText, titleFontSize=titleFontSize) savefig(figureNamePolar[key]) filePrefix = '{}{}_{}_polar'.format(variableName, hemisphere, mainRunName) write_image_xml( config, filePrefix, componentName='Sea Ice', componentSubdirectory='sea_ice', galleryGroup=galleryGroup, groupLink=groupLink, thumbnailDescription=thumbnailDescription, imageDescription=caption, imageCaption=caption)
def open_mpas_dataset( fileName, calendar, timeVariableNames=['xtime_startMonthly', 'xtime_endMonthly'], variableList=None, startDate=None, endDate=None): # {{{ """ Opens and returns an xarray data set given file name(s) and the MPAS calendar name. Parameters ---------- fileName : str File path to read calendar : {``'gregorian'``, ``'gregorian_noleap'``}, optional The name of one of the calendars supported by MPAS cores timeVariableNames : str or list of 2 str, optional The name of the time variable (typically ``'xtime'`` or ``['xtime_startMonthly', 'xtime_endMonthly']``), or ``None`` if time does not need to be parsed (and is already in the ``Time`` variable) variableList : list of strings, optional If present, a list of variables to be included in the data set startDate, endDate : string or datetime.datetime, optional If present, the first and last dates to be used in the data set. The time variable is sliced to only include dates within this range. Returns ------- ds : ``xarray.Dataset`` Raises ------ TypeError If the time variable has an unsupported type (not a date string). ValueError If the time variable is not found in the data set """ # Authors # ------- # Xylar Asay-Davis ds = xarray.open_dataset(fileName, decode_cf=True, decode_times=False, lock=False) if timeVariableNames is not None: ds = _parse_dataset_time(ds, timeVariableNames, calendar) if startDate is not None and endDate is not None: if isinstance(startDate, six.string_types): startDate = string_to_days_since_date(dateString=startDate, calendar=calendar) if isinstance(endDate, six.string_types): endDate = string_to_days_since_date(dateString=endDate, calendar=calendar) # select only the data in the specified range of dates ds = ds.sel(Time=slice(startDate, endDate)) if ds.dims['Time'] == 0: raise ValueError('The data set contains no Time entries between ' 'dates {} and {}.'.format( days_to_datetime(startDate, calendar=calendar), days_to_datetime(endDate, calendar=calendar))) if variableList is not None: ds = subset_variables(ds, variableList) return ds # }}}
def cache_time_series(timesInDataSet, timeSeriesCalcFunction, cacheFileName, calendar, yearsPerCacheUpdate=1, logger=None): # {{{ ''' Create or update a NetCDF file ``cacheFileName`` containing the given time series, calculated with ``timeSeriesCalcFunction`` over the given times, start and end year, and time frequency with which results are cached. Note: only works with climatologies where the mask (locations of ``NaN`` values) doesn't vary with time. Parameters ---------- timesInDataSet : array-like Times at which the time series is to be calculated, typically taken from ``ds.Times.values`` for a data set from which the time series will be extracted or computed. timeSeriesCalcFunction : function A function with arguments ``timeIndices``, indicating the entries in ``timesInDataSet`` to be computed, and ``firstCall``, indicating whether this is the first call to the funciton (useful for printing progress information). cacheFileName : str The absolute path to the cache file where the times series will be stored calendar : {'gregorian', 'gregorian_noleap'} The name of one of the calendars supported by MPAS cores, used to determine ``year`` and ``month`` from ``Time`` coordinate yearsPerCacheUpdate : int, optional The frequency with which the cache file is updated as the computation progresses. If the computation is expensive, it may be useful to output the file frequently. If not, there will be needless overhead in caching the file too frequently. logger : ``logging.Logger``, optional A logger to which to write output as the time series is computed Returns ------- climatology : object of same type as ``ds`` A data set without the ``'Time'`` coordinate containing the mean of ds over all months in monthValues, weighted by the number of days in each month. ''' # Authors # ------- # Xylar Asay-Davis timesProcessed = numpy.zeros(len(timesInDataSet), bool) # figure out which files to load and which years go in each file continueOutput = os.path.exists(cacheFileName) cacheDataSetExists = False if continueOutput: if logger is not None: logger.info(' Read in previously computed time series') # read in what we have so far try: dsCache = xr.open_dataset(cacheFileName, decode_times=False) cacheDataSetExists = True except IOError: # assuming the cache file is corrupt, so deleting it. message = 'Deleting cache file {}, which appears to have ' \ 'been corrupted.'.format(cacheFileName) if logger is None: print('Warning: {}'.format(message)) else: logger.warning(message) os.remove(cacheFileName) if cacheDataSetExists: # force loading and then close so we can overwrite the file later dsCache.load() dsCache.close() for time in dsCache.Time.values: timesProcessed[timesInDataSet == time] = True datetimes = days_to_datetime(timesInDataSet, calendar=calendar) yearsInDataSet = numpy.array([date.year for date in datetimes]) startYear = yearsInDataSet[0] endYear = yearsInDataSet[-1] firstProcessed = True for firstYear in range(startYear, endYear + 1, yearsPerCacheUpdate): years = range( firstYear, numpy.minimum(endYear + 1, firstYear + yearsPerCacheUpdate)) mask = numpy.zeros(len(yearsInDataSet), bool) for year in years: mask = numpy.logical_or(mask, yearsInDataSet == year) mask = numpy.logical_and(mask, numpy.logical_not(timesProcessed)) timeIndices = numpy.nonzero(mask)[0] if len(timeIndices) == 0: # no unprocessed time entries in this data range continue if logger is not None: if firstProcessed: logger.info(' Process and save time series') if yearsPerCacheUpdate == 1: logger.info(' {:04d}'.format(years[0])) else: logger.info(' {:04d}-{:04d}'.format(years[0], years[-1])) ds = timeSeriesCalcFunction(timeIndices, firstProcessed) firstProcessed = False if cacheDataSetExists: dsCache = xr.concat([dsCache, ds], dim='Time') # now sort the Time dimension: dsCache = dsCache.loc[{'Time': sorted(dsCache.Time.values)}] else: dsCache = ds cacheDataSetExists = True dsCache.to_netcdf(cacheFileName) return dsCache.sel(Time=slice(timesInDataSet[0], timesInDataSet[-1]))
def open_multifile_dataset(fileNames, calendar, config, simulationStartTime=None, timeVariableName='Time', variableList=None, selValues=None, iselValues=None, variableMap=None, startDate=None, endDate=None, chunking=None): # {{{ """ Opens and returns an xarray data set given file name(s) and the MPAS calendar name. Parameters ---------- fileNames : list of strings A lsit of file paths to read calendar : {``'gregorian'``, ``'gregorian_noleap'``}, optional The name of one of the calendars supported by MPAS cores config : instance of ``MpasAnalysisConfigParser`` Contains configuration options simulationStartTime : string, optional The start date of the simulation, used to convert from time variables expressed as days since the start of the simulation to days since the reference date. ``simulationStartTime`` takes one of the following forms:: 0001-01-01 0001-01-01 00:00:00 ``simulationStartTime`` is only required if the MPAS time variable (identified by ``timeVariableName``) is a number of days since the start of the simulation. timeVariableName : string, optional The name of the time variable (typically ``'Time'`` if using a ``variableMap`` or ``'xtime'`` if not using a ``variableMap``) variableList : list of strings, optional If present, a list of variables to be included in the data set selValues : dict, optional A dictionary of coordinate names (keys) and values or arrays of values used to slice the variales in the data set. See ``xarray.DataSet.sel()`` for details on how this dictonary is used. An example:: selectCorrdValues = {'cellLon': 180.0} iselValues : dict, optional A dictionary of coordinate names (keys) and indices, slices or arrays of indices used to slice the variales in the data set. See ``xarray.DataSet.isel()`` for details on how this dictonary is used. An example:: iselValues = {'nVertLevels': slice(0, 3), 'nCells': cellIDs} variableMap : dict, optional A dictionary with keys that are variable names used by MPAS-Analysis and values that are lists of possible names for the same variable in the MPAS dycore that produced the data set (which may differ between versions). startDate, endDate : string or datetime.datetime, optional If present, the first and last dates to be used in the data set. The time variable is sliced to only include dates within this range. chunking : None, int, True, dict, optional If integer is present, applies maximum chunk size from config file value ``maxChunkSize``, otherwise if None do not perform chunking. If True, use automated chunking using default config value ``maxChunkSize``. If chunking is a dict use dictionary values for chunking. Returns ------- ds : ``xarray.Dataset`` Raises ------ TypeError If the time variable has an unsupported type (not a date string, a floating-pont number of days since the start of the simulation or a ``numpy.datatime64`` object). ValueError If the time variable is not found in the data set or if the time variable is a number of days since the start of the simulation but simulationStartTime is None. """ # Authors # ------- # Xylar Asay-Davis, Phillip J. Wolfram preprocess_partial = partial(_preprocess, calendar=calendar, simulationStartTime=simulationStartTime, timeVariableName=timeVariableName, variableList=variableList, selValues=selValues, iselValues=iselValues, variableMap=variableMap, startDate=startDate, endDate=endDate) kwargs = {'decode_times': False, 'concat_dim': 'Time'} autocloseFileLimitFraction = config.getfloat('input', 'autocloseFileLimitFraction') # get the number of files that can be open at the same time. We want the # "soft" limit because we'll get a crash if we exceed it. softLimit = resource.getrlimit(resource.RLIMIT_NOFILE)[0] # use autoclose if we will use more than autocloseFileLimitFraction (50% # by default) of the soft limit of open files autoclose = len(fileNames) > softLimit*autocloseFileLimitFraction try: ds = xarray.open_mfdataset(fileNames, preprocess=preprocess_partial, autoclose=autoclose, **kwargs) except TypeError as e: if 'autoclose' in str(e): if autoclose: # This indicates that xarray version doesn't support autoclose print('Warning: open_multifile_dataset is trying to use ' 'autoclose=True but\n' 'it appears your xarray version doesn\'t support this ' 'argument. Will\n' 'try again without autoclose argument.') ds = xarray.open_mfdataset(fileNames, preprocess=preprocess_partial, **kwargs) else: raise e ds = mpas_xarray.remove_repeated_time_index(ds) if startDate is not None and endDate is not None: if isinstance(startDate, six.string_types): startDate = string_to_days_since_date(dateString=startDate, calendar=calendar) if isinstance(endDate, six.string_types): endDate = string_to_days_since_date(dateString=endDate, calendar=calendar) # select only the data in the specified range of dates ds = ds.sel(Time=slice(startDate, endDate)) if ds.dims['Time'] == 0: raise ValueError('The data set contains no Time entries between ' 'dates {} and {}.'.format( days_to_datetime(startDate, calendar=calendar), days_to_datetime(endDate, calendar=calendar))) # process chunking if chunking is True: # limit chunk size to prevent memory error chunking = config.getint('input', 'maxChunkSize') ds = mpas_xarray.process_chunking(ds, chunking) # private record of autoclose use ds.attrs['_autoclose'] = int(autoclose) return ds # }}}
def _parse_dataset_time(ds, inTimeVariableName, calendar, simulationStartTime, outTimeVariableName, referenceDate): # {{{ """ A helper function for computing a time coordinate from an MPAS time variable. Given a data set and a time variable name (or tuple of 2 time names), returns a new data set with time coordinate `outTimeVariableName` filled with days since `referenceDate` Parameters ---------- ds : xarray.DataSet object The data set containing an MPAS time variable to be used to build an xarray time coordinate. inTimeVariableName : string or tuple or list of strings The name of the time variable in the MPAS data set that will be used to build the 'Time' coordinate. The array(s) named by inTimeVariableName should contain date strings or the number of days since the start of the simulation. Typically, inTimeVariableName is one of {'daysSinceStartOfSim','xtime'}. If a list of two variable names is provided, times from the two are averaged together to determine the value of the time coordinate. In such cases, inTimeVariableName is typically {['xtime_start', 'xtime_end']}. calendar : {'gregorian', 'gregorian_noleap'} The name of one of the calendars supported by MPAS cores simulationStartTime : string The start date of the simulation, used to convert from time variables expressed as days since the start of the simulation to days since the reference date. `simulationStartTime` takes one of the following forms:: 0001-01-01 0001-01-01 00:00:00 simulationStartTime is only required if the MPAS time variable (identified by timeVariableName) is a number of days since the start of the simulation. outTimeVariableName : string The name of the coordinate to assign times to, typically 'Time'. referenceDate : string The reference date for the time variable, typically '0001-01-01', taking one of the following forms:: 0001-01-01 0001-01-01 00:00:00 Returns ------- dataset : xarray.dataset object A copy of the input data set with the `outTimeVariableName` coordinate containing the time coordinate parsed from `inTimeVariableName`. Raises ------ TypeError If the time variable has an unsupported type (not a date string or a floating-pont number of days since the start of the simulatio). ValueError If the time variable is a number of days since the start of the simulation but simulationStartTime is None. """ # Authors # ------- # Xylar Asay-Davis if isinstance(inTimeVariableName, (tuple, list)): # we want to average the two assert (len(inTimeVariableName) == 2) dsStart = _parse_dataset_time(ds=ds, inTimeVariableName=inTimeVariableName[0], calendar=calendar, simulationStartTime=simulationStartTime, outTimeVariableName=outTimeVariableName, referenceDate=referenceDate) dsEnd = _parse_dataset_time(ds=ds, inTimeVariableName=inTimeVariableName[1], calendar=calendar, simulationStartTime=simulationStartTime, outTimeVariableName=outTimeVariableName, referenceDate=referenceDate) starts = dsStart[outTimeVariableName].values ends = dsEnd[outTimeVariableName].values # replace the time in starts with the mean of starts and ends dsOut = dsStart.copy() dsOut.coords['startTime'] = (outTimeVariableName, starts) dsOut.coords['endTime'] = (outTimeVariableName, ends) dsOut.coords[outTimeVariableName] = (outTimeVariableName, [ starts[i] + (ends[i] - starts[i]) / 2 for i in range(len(starts)) ]) else: # there is just one time variable (either because we're recursively # calling the function or because we're not averaging). # The contents of the time variable is expected to be either a string # (|S64) or a float (meaning days since start of the simulation). timeVar = ds[inTimeVariableName] if timeVar.dtype == '|S64': # this is an array of date strings like 'xtime' # convert to string timeStrings = [ ''.join(str(xtime.astype('U'))).strip() for xtime in timeVar.values ] days = string_to_days_since_date(dateString=timeStrings, referenceDate=referenceDate, calendar=calendar) elif timeVar.dtype == 'float64': # this array contains floating-point days like # 'daysSinceStartOfSim' if simulationStartTime is None: raise ValueError('MPAS time variable {} appears to be a ' 'number of days since start \n' 'of sim but simulationStartTime was not' ' supplied.'.format(inTimeVariableName)) if (string_to_datetime(referenceDate) == string_to_datetime( simulationStartTime)): days = timeVar.values else: # a conversion may be required dates = days_to_datetime(days=timeVar.values, referenceDate=simulationStartTime, calendar=calendar) days = datetime_to_days(dates=dates, referenceDate=referenceDate, calendar=calendar) elif timeVar.dtype == 'timedelta64[ns]': raise TypeError('timeVar of unsupported type {}. This is likely ' 'because xarray.open_dataset \n' 'was called with decode_times=True, which can ' 'mangle MPAS times.'.format(timeVar.dtype)) else: raise TypeError("timeVar of unsupported type {}".format( timeVar.dtype)) dsOut = ds.copy() dsOut.coords[outTimeVariableName] = (outTimeVariableName, days) return dsOut # }}}
def _compute_moc_time_series_postprocess(self): # {{{ '''compute MOC time series as a post-process''' # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array) self.logger.info('\n Compute and/or plot post-processed Atlantic MOC ' 'time series...') self.logger.info(' Load data...') outputDirectory = build_config_full_path(self.config, 'output', 'timeseriesSubdirectory') try: os.makedirs(outputDirectory) except OSError: pass outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory) dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ refTopDepth, refLayerThickness = self._load_mesh() latAtlantic = self.lat['Atlantic'] dLat = latAtlantic - 26.5 indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat))) dictRegion = self.dictRegion['Atlantic'] maxEdgesInTransect = dictRegion['maxEdgesInTransect'] transectEdgeGlobalIDs = dictRegion['transectEdgeGlobalIDs'] transectEdgeMaskSigns = dictRegion['transectEdgeMaskSigns'] regionCellMask = dictRegion['cellMask'] streamName = 'timeSeriesStatsMonthlyOutput' inputFilesTseries = sorted( self.historyStreams.readpath(streamName, startDate=self.startDateTseries, endDate=self.endDateTseries, calendar=self.calendar)) years, months = get_files_year_month(inputFilesTseries, self.historyStreams, 'timeSeriesStatsMonthlyOutput') mocRegion = np.zeros(len(inputFilesTseries)) times = np.zeros(len(inputFilesTseries)) computed = np.zeros(len(inputFilesTseries), bool) continueOutput = os.path.exists(outputFileTseries) if continueOutput: self.logger.info(' Read in previously computed MOC time series') with open_mpas_dataset(fileName=outputFileTseries, calendar=self.calendar, timeVariableNames=None, variableList=['mocAtlantic26'], startDate=self.startDateTseries, endDate=self.endDateTseries) as dsMOCIn: dsMOCIn.load() # first, copy all computed data for inIndex in range(dsMOCIn.dims['Time']): mask = np.logical_and( dsMOCIn.year[inIndex].values == years, dsMOCIn.month[inIndex].values == months) outIndex = np.where(mask)[0][0] mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex] times[outIndex] = dsMOCIn.Time[inIndex] computed[outIndex] = True if np.all(computed): # no need to waste time writing out the data set again return dsMOCIn for timeIndex, fileName in enumerate(inputFilesTseries): if computed[timeIndex]: continue dsLocal = open_mpas_dataset(fileName=fileName, calendar=self.calendar, variableList=self.variableList, startDate=self.startDateTseries, endDate=self.endDateTseries) dsLocal = dsLocal.isel(Time=0) time = dsLocal.Time.values times[timeIndex] = time date = days_to_datetime(time, calendar=self.calendar) self.logger.info(' date: {:04d}-{:02d}'.format( date.year, date.month)) if self.includeBolus: dsLocal['avgNormalVelocity'] = \ dsLocal['timeMonthly_avg_normalVelocity'] + \ dsLocal['timeMonthly_avg_normalGMBolusVelocity'] dsLocal['avgVertVelocityTop'] = \ dsLocal['timeMonthly_avg_vertVelocityTop'] + \ dsLocal['timeMonthly_avg_vertGMBolusVelocityTop'] else: # rename some variables for convenience dsLocal = dsLocal.rename({ 'timeMonthly_avg_normalVelocity': 'avgNormalVelocity', 'timeMonthly_avg_vertVelocityTop': 'avgVertVelocityTop' }) horizontalVel = dsLocal.avgNormalVelocity.values verticalVel = dsLocal.avgVertVelocityTop.values velArea = verticalVel * areaCell[:, np.newaxis] transportZ = self._compute_transport(maxEdgesInTransect, transectEdgeGlobalIDs, transectEdgeMaskSigns, nVertLevels, dvEdge, refLayerThickness, horizontalVel) mocTop = self._compute_moc(latAtlantic, nVertLevels, latCell, regionCellMask, transportZ, velArea) mocRegion[timeIndex] = np.amax(mocTop[:, indlat26]) description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \ 'Array latitude (26.5N)' dictonary = { 'dims': ['Time'], 'coords': { 'Time': { 'dims': ('Time'), 'data': times, 'attrs': { 'units': 'days since 0001-01-01' } }, 'year': { 'dims': ('Time'), 'data': years, 'attrs': { 'units': 'year' } }, 'month': { 'dims': ('Time'), 'data': months, 'attrs': { 'units': 'month' } } }, 'data_vars': { 'mocAtlantic26': { 'dims': ('Time'), 'data': mocRegion, 'attrs': { 'units': 'Sv (10^6 m^3/s)', 'description': description } } } } dsMOCTimeSeries = xr.Dataset.from_dict(dictonary) write_netcdf(dsMOCTimeSeries, outputFileTseries) return dsMOCTimeSeries # }}}