Beispiel #1
0
def plot_xtick_format(calendar, minDays, maxDays, maxXTicks, yearStride=None):
    '''
    Formats tick labels and positions along the x-axis for time series
    / index plots

    Parameters
    ----------
    calendar : str
        the calendar to use for formatting the time axis

    minDays : float
        start time for labels

    maxDays : float
        end time for labels

    maxXTicks : int
        the maximum number of tick marks to display, used to sub-sample ticks
        if there are too many

    yearStride : int, optional
        the number of years to skip over between ticks
    '''
    # Authors
    # -------
    # Xylar Asay-Davis

    ax = plt.gca()

    start = days_to_datetime(np.amin(minDays), calendar=calendar)
    end = days_to_datetime(np.amax(maxDays), calendar=calendar)

    if yearStride is not None or end.year - start.year > maxXTicks/2:
        if yearStride is None:
            yearStride = 1
        else:
            maxXTicks = None
        major = [date_to_days(year=year, calendar=calendar)
                 for year in np.arange(start.year, end.year+1, yearStride)]
        formatterFun = partial(_date_tick, calendar=calendar,
                               includeMonth=False)
    else:
        # add ticks for months
        major = []
        for year in range(start.year, end.year+1):
            for month in range(1, 13):
                major.append(date_to_days(year=year, month=month,
                                          calendar=calendar))
        formatterFun = partial(_date_tick, calendar=calendar,
                               includeMonth=True)

    ax.xaxis.set_major_locator(FixedLocator(major, maxXTicks))
    ax.xaxis.set_major_formatter(FuncFormatter(formatterFun))

    plt.setp(ax.get_xticklabels(), rotation=30)

    plt.autoscale(enable=True, axis='x', tight=True)
Beispiel #2
0
    def test_iselvals(self):
        fileName = str(self.datadir.join('example_jan.nc'))
        calendar = 'gregorian_noleap'
        simulationStartTime = '0001-01-01'
        timestr = 'time_avg_daysSinceStartOfSim'
        variableList = \
            ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature',
             'refBottomDepth']

        iselvals = {'nVertLevels': slice(0, 3)}
        ds = mpas_xarray.open_multifile_dataset(
            fileNames=fileName,
            calendar=calendar,
            simulationStartTime=simulationStartTime,
            timeVariableName=timestr,
            variableList=variableList,
            iselValues=iselvals)

        dsVarList = list(ds.data_vars.keys()) + list(ds.coords.keys())
        assert (numpy.all([var in dsVarList for var in variableList]))
        self.assertEqual(ds[variableList[0]].shape, (1, 7, 3))
        self.assertEqual(ds['refBottomDepth'].shape, (3, ))
        self.assertApproxEqual(ds['refBottomDepth'][-1], 4.882000207901)

        self.assertEqual(
            days_to_datetime(days=ds.Time.values[0],
                             referenceDate='0001-01-01',
                             calendar=calendar),
            string_to_datetime('0005-01-14 12:24:14'))
Beispiel #3
0
    def test_subset_variables(self):
        fileName = str(self.datadir.join('example_jan.nc'))
        calendar = 'gregorian_noleap'
        timestr = ['xtime_start', 'xtime_end']
        variableList = \
            ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']

        # first, test loading the whole data set and then calling
        # subset_variables explicitly
        ds = mpas_xarray.open_multifile_dataset(fileNames=fileName,
                                                calendar=calendar,
                                                timeVariableName=timestr)
        ds = mpas_xarray.subset_variables(ds, variableList)
        dsVarList = list(ds.data_vars.keys()) + list(ds.coords.keys())
        assert (numpy.all([var in dsVarList for var in variableList]))
        self.assertEqual(
            days_to_datetime(days=ds.Time.values,
                             referenceDate='0001-01-01',
                             calendar=calendar),
            string_to_datetime('0005-01-16 12:22:30'))
        # next, test the same with the onlyvars argument
        ds = mpas_xarray.open_multifile_dataset(fileNames=fileName,
                                                calendar=calendar,
                                                timeVariableName=timestr,
                                                variableList=variableList)
        self.assertEqual(list(ds.data_vars.keys()), variableList)

        with six.assertRaisesRegex(self, ValueError,
                                   'Empty dataset is returned.'):
            missingvars = ['foo', 'bar']
            ds = mpas_xarray.open_multifile_dataset(fileNames=fileName,
                                                    calendar=calendar,
                                                    timeVariableName=timestr,
                                                    variableList=missingvars)
Beispiel #4
0
def _date_tick(days, pos, calendar='gregorian', includeMonth=True):
    days = np.maximum(days, 0.)
    date = days_to_datetime(days, calendar)
    if includeMonth:
        return '{:04d}-{:02d}'.format(date.year, date.month)
    else:
        return '{:04d}'.format(date.year)
    def test_days_to_datetime(self):
        referenceDate = '0001-01-01'
        for calendar in ['gregorian', 'gregorian_noleap']:
            for dateString, days in [('0001-01-01', 0.), ('0001-01-02', 1.),
                                     ('0001-02-01', 31.),
                                     ('0002-01-01', 365.)]:
                datetime = days_to_datetime(days=days,
                                            calendar=calendar,
                                            referenceDate=referenceDate)
                self.assertEqual(datetime, string_to_datetime(dateString))

        referenceDate = '2016-01-01'
        for calendar, days in [('gregorian', 366.),
                               ('gregorian_noleap', 365.)]:
            datetime = days_to_datetime(days=days,
                                        calendar=calendar,
                                        referenceDate=referenceDate)
            self.assertEqual(datetime, string_to_datetime('2017-01-01'))
    def test_no_units(self):
        fileName = str(self.datadir.join('example_no_units_jan.nc'))
        calendar = 'gregorian_noleap'
        simulationStartTime = '0001-01-01'
        timestr = 'time_avg_daysSinceStartOfSim'
        variableList = \
            ['time_avg_avgValueWithinOceanLayerRegion_avgLayerTemperature',
             'refBottomDepth']

        ds = mpas_xarray.open_multifile_dataset(
            fileNames=fileName,
            calendar=calendar,
            simulationStartTime=simulationStartTime,
            timeVariableName=timestr,
            variableList=variableList)
        self.assertEqual(sorted(ds.data_vars.keys()), sorted(variableList))

        self.assertEqual(
            days_to_datetime(days=ds.Time.values[0],
                             referenceDate='0001-01-01',
                             calendar=calendar),
            string_to_datetime('0005-01-14 12:24:14'))
def add_years_months_days_in_month(ds, calendar=None):  # {{{
    '''
    Add ``year``, ``month`` and ``daysInMonth`` as data arrays in ``ds``.
    The number of days in each month of ``ds`` is computed either using the
    ``startTime`` and ``endTime`` if available or assuming ``gregorian_noleap``
    calendar and ignoring leap years.  ``year`` and ``month`` are computed
    accounting correctly for the the calendar.

    Parameters
    ----------
    ds : ``xarray.Dataset`` or ``xarray.DataArray`` object
        A data set with a ``Time`` coordinate expressed as days since
        0001-01-01

    calendar : {'gregorian', 'gregorian_noleap'}, optional
        The name of one of the calendars supported by MPAS cores, used to
        determine ``year`` and ``month`` from ``Time`` coordinate

    Returns
    -------
    ds : object of same type as ``ds``
        The data set with ``year``, ``month`` and ``daysInMonth`` data arrays
        added (if not already present)
    '''
    # Authors
    # -------
    # Xylar Asay-Davis

    if ('year' in ds.coords and 'month' in ds.coords and
            'daysInMonth' in ds.coords):
        return ds

    ds = ds.copy()

    if 'year' not in ds.coords or 'month' not in ds.coords:
        if calendar is None:
            raise ValueError('calendar must be provided if month and year '
                             'coordinate is not in ds.')
        datetimes = days_to_datetime(ds.Time, calendar=calendar)

    if 'year' not in ds.coords:
        ds.coords['year'] = ('Time', [date.year for date in datetimes])

    if 'month' not in ds.coords:
        ds.coords['month'] = ('Time', [date.month for date in datetimes])

    if 'daysInMonth' not in ds.coords:
        if 'startTime' in ds.coords and 'endTime' in ds.coords:
            ds.coords['daysInMonth'] = ds.endTime - ds.startTime
        else:
            if calendar == 'gregorian':
                print('Warning: The MPAS run used the Gregorian calendar '
                      'but does not appear to have\n'
                      'supplied start and end times.  Climatologies '
                      'will be computed with\n'
                      'month durations ignoring leap years.')

            daysInMonth = numpy.array(
                [constants.daysInMonth[int(month) - 1] for
                 month in ds.month.values], float)
            ds.coords['daysInMonth'] = ('Time', daysInMonth)

    return ds  # }}}
Beispiel #8
0
    def run_task(self):  # {{{
        """
        Compute time series of regional profiles
        """
        # Authors
        # -------
        # Milena Veneziani, Mark Petersen, Phillip J. Wolfram, Xylar Asay-Davis

        self.logger.info("\nCompute time series of regional profiles...")

        startDate = '{:04d}-01-01_00:00:00'.format(self.startYear)
        endDate = '{:04d}-12-31_23:59:59'.format(self.endYear)

        timeSeriesName = self.masksSubtask.regionGroup.replace(' ', '')

        outputDirectory = '{}/{}/'.format(
            build_config_full_path(self.config, 'output',
                                   'timeseriesSubdirectory'),
            timeSeriesName)
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outputFileName = '{}/regionalProfiles_{}_{:04d}-{:04d}.nc'.format(
            outputDirectory, timeSeriesName, self.startYear, self.endYear)

        inputFiles = sorted(self.historyStreams.readpath(
            'timeSeriesStatsMonthlyOutput', startDate=startDate,
            endDate=endDate, calendar=self.calendar))

        years, months = get_files_year_month(inputFiles,
                                             self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        variableList = [field['mpas'] for field in self.fields]

        outputExists = os.path.exists(outputFileName)
        outputValid = outputExists
        if outputExists:
            with open_mpas_dataset(fileName=outputFileName,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=None,
                                   startDate=startDate,
                                   endDate=endDate) as dsIn:

                for inIndex in range(dsIn.dims['Time']):

                    mask = np.logical_and(
                        dsIn.year[inIndex].values == years,
                        dsIn.month[inIndex].values == months)
                    if np.count_nonzero(mask) == 0:
                        outputValid = False
                        break

        if outputValid:
            self.logger.info('  Time series exists -- Done.')
            return

        # get areaCell
        restartFileName = \
            self.runStreams.readpath('restart')[0]

        dsRestart = xr.open_dataset(restartFileName)
        dsRestart = dsRestart.isel(Time=0)
        areaCell = dsRestart.areaCell

        nVertLevels = dsRestart.sizes['nVertLevels']

        vertIndex = \
            xr.DataArray.from_dict({'dims': ('nVertLevels',),
                                    'data': np.arange(nVertLevels)})

        vertMask = vertIndex < dsRestart.maxLevelCell

        # get region masks
        regionMaskFileName = self.masksSubtask.maskFileName
        dsRegionMask = xr.open_dataset(regionMaskFileName)

        # figure out the indices of the regions to plot
        regionNames = decode_strings(dsRegionMask.regionNames)

        regionIndices = []
        for regionToPlot in self.regionNames:
            for index, regionName in enumerate(regionNames):
                if regionToPlot == regionName:
                    regionIndices.append(index)
                    break

        # select only those regions we want to plot
        dsRegionMask = dsRegionMask.isel(nRegions=regionIndices)
        cellMasks = dsRegionMask.regionCellMasks
        regionNamesVar = dsRegionMask.regionNames

        totalArea = (cellMasks * areaCell * vertMask).sum('nCells')

        datasets = []
        for timeIndex, fileName in enumerate(inputFiles):

            dsLocal = open_mpas_dataset(
                fileName=fileName,
                calendar=self.calendar,
                variableList=variableList,
                startDate=startDate,
                endDate=endDate)
            dsLocal = dsLocal.isel(Time=0)
            time = dsLocal.Time.values
            date = days_to_datetime(time, calendar=self.calendar)

            self.logger.info('    date: {:04d}-{:02d}'.format(date.year,
                                                              date.month))

            # for each region and variable, compute area-weighted sum and
            # squared sum
            for field in self.fields:
                variableName = field['mpas']
                prefix = field['prefix']
                self.logger.info('      {}'.format(field['titleName']))

                var = dsLocal[variableName].where(vertMask)

                meanName = '{}_mean'.format(prefix)
                dsLocal[meanName] = \
                    (cellMasks * areaCell * var).sum('nCells') / totalArea

                meanSquaredName = '{}_meanSquared'.format(prefix)
                dsLocal[meanSquaredName] = \
                    (cellMasks * areaCell * var**2).sum('nCells') / totalArea

            # drop the original variables
            dsLocal = dsLocal.drop_vars(variableList)

            datasets.append(dsLocal)

        # combine data sets into a single data set
        dsOut = xr.concat(datasets, 'Time')

        dsOut.coords['regionNames'] = regionNamesVar
        dsOut['totalArea'] = totalArea
        dsOut.coords['year'] = (('Time',), years)
        dsOut['year'].attrs['units'] = 'years'
        dsOut.coords['month'] = (('Time',), months)
        dsOut['month'].attrs['units'] = 'months'

        # Note: restart file, not a mesh file because we need refBottomDepth,
        # not in a mesh file
        try:
            restartFile = self.runStreams.readpath('restart')[0]
        except ValueError:
            raise IOError('No MPAS-O restart file found: need at least one '
                          'restart file for plotting time series vs. depth')

        with xr.open_dataset(restartFile) as dsRestart:
            depths = dsRestart.refBottomDepth.values
            z = np.zeros(depths.shape)
            z[0] = -0.5 * depths[0]
            z[1:] = -0.5 * (depths[0:-1] + depths[1:])

        dsOut.coords['z'] = (('nVertLevels',), z)
        dsOut['z'].attrs['units'] = 'meters'

        write_netcdf(dsOut, outputFileName)
    def run_task(self):  # {{{
        """
        Compute vertical agregates of the data and plot the time series
        """
        # Authors
        # -------
        # Xylar Asay-Davis, Milena Veneziani, Greg Streletz

        self.logger.info("\nPlotting depth-integrated time series of "
                         "{}...".format(self.fieldNameInTitle))

        config = self.config
        calendar = self.calendar

        mainRunName = config.get('runs', 'mainRunName')

        plotTitles = config.getExpression('regions', 'plotTitles')
        allRegionNames = config.getExpression('regions', 'regions')
        regionIndex = allRegionNames.index(self.regionName)
        regionNameInTitle = plotTitles[regionIndex]

        startDate = config.get('timeSeries', 'startDate')
        endDate = config.get('timeSeries', 'endDate')

        # Load data
        self.logger.info('  Load ocean data...')
        ds = open_mpas_dataset(fileName=self.inFileName,
                               calendar=calendar,
                               variableList=[self.mpasFieldName, 'depth'],
                               timeVariableNames=None,
                               startDate=startDate,
                               endDate=endDate)
        ds = ds.isel(nOceanRegionsTmp=regionIndex)

        depths = ds.depth.values

        divisionDepths = config.getExpression(self.sectionName, 'depths')

        # for each depth interval to plot, determine the top and bottom depth
        topDepths = [0, 0] + divisionDepths
        bottomDepths = [depths[-1]] + divisionDepths + [depths[-1]]

        legends = []
        for top, bottom in zip(topDepths, bottomDepths):
            if bottom == depths[-1]:
                legends.append('{}m-bottom'.format(top))
            else:
                legends.append('{}m-{}m'.format(top, bottom))

        # more possible symbols than we typically use
        lines = ['-', '-', '--', None, None, None, None]
        markers = [None, None, None, '+', 'o', '^', 'v']
        widths = [5, 3, 3, 3, 3, 3, 3]
        points = [None, None, None, 300, 300, 300, 300]

        color = 'k'

        xLabel = 'Time [years]'
        yLabel = self.yAxisLabel

        title = '{}, {} \n {} (black)'.format(self.fieldNameInTitle,
                                              regionNameInTitle, mainRunName)

        outFileName = '{}/{}.png'.format(self.plotsDirectory, self.filePrefix)

        timeSeries = []
        lineColors = []
        lineStyles = []
        lineMarkers = []
        lineWidths = []
        maxPoints = []
        legendText = []

        for rangeIndex in range(len(topDepths)):
            top = topDepths[rangeIndex]
            bottom = bottomDepths[rangeIndex]
            field = ds[self.mpasFieldName].where(ds.depth > top)
            field = field.where(ds.depth <= bottom)
            timeSeries.append(field.sum('nVertLevels'))

            lineColors.append(color)
            lineStyles.append(lines[rangeIndex])
            lineMarkers.append(markers[rangeIndex])
            lineWidths.append(widths[rangeIndex])
            maxPoints.append(points[rangeIndex])
            legendText.append(legends[rangeIndex])

        preprocessedReferenceRunName = config.get(
            'runs', 'preprocessedReferenceRunName')
        if preprocessedReferenceRunName != 'None':
            preprocessedInputDirectory = config.get(
                'oceanPreprocessedReference', 'baseDirectory')

            self.logger.info('  Load in preprocessed reference data...')
            preprocessedFilePrefix = config.get(self.sectionName,
                                                'preprocessedFilePrefix')
            inFilesPreprocessed = '{}/{}.{}.year*.nc'.format(
                preprocessedInputDirectory, preprocessedFilePrefix,
                preprocessedReferenceRunName)

            combine_time_series_with_ncrcat(
                inFilesPreprocessed,
                self.preprocessedIntermediateFileName,
                logger=self.logger)
            dsPreprocessed = open_mpas_dataset(
                fileName=self.preprocessedIntermediateFileName,
                calendar=calendar,
                timeVariableNames='xtime')

            yearStart = days_to_datetime(ds.Time.min(), calendar=calendar).year
            yearEnd = days_to_datetime(ds.Time.max(), calendar=calendar).year
            timeStart = date_to_days(year=yearStart,
                                     month=1,
                                     day=1,
                                     calendar=calendar)
            timeEnd = date_to_days(year=yearEnd,
                                   month=12,
                                   day=31,
                                   calendar=calendar)

            yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(),
                                                   calendar=calendar).year
            if yearStart <= yearEndPreprocessed:
                dsPreprocessed = dsPreprocessed.sel(
                    Time=slice(timeStart, timeEnd))
            else:
                self.logger.warning('Warning: Preprocessed time series ends '
                                    'before the timeSeries startYear and will '
                                    'not be plotted.')
                preprocessedReferenceRunName = 'None'

            # rolling mean seems to have trouble with dask data sets so we
            # write out the data set and read it back as a single-file data set
            # (without dask)
            dsPreprocessed = dsPreprocessed.drop('xtime')
            write_netcdf(dsPreprocessed, self.preprocessedFileName)
            dsPreprocessed = xarray.open_dataset(self.preprocessedFileName)

        if preprocessedReferenceRunName != 'None':
            color = 'purple'
            title = '{} \n {} (purple)'.format(title,
                                               preprocessedReferenceRunName)

            preprocessedFieldPrefix = config.get(self.sectionName,
                                                 'preprocessedFieldPrefix')

            movingAveragePoints = config.getint(self.sectionName,
                                                'movingAveragePoints')

            suffixes = ['tot'
                        ] + ['{}m'.format(depth)
                             for depth in divisionDepths] + ['btm']

            # these preprocessed data are already anomalies
            dsPreprocessed = compute_moving_avg(dsPreprocessed,
                                                movingAveragePoints)
            for rangeIndex in range(len(suffixes)):
                variableName = '{}_{}'.format(preprocessedFieldPrefix,
                                              suffixes[rangeIndex])
                if variableName in list(dsPreprocessed.data_vars.keys()):
                    timeSeries.append(dsPreprocessed[variableName])
                else:
                    self.logger.warning(
                        'Warning: Preprocessed variable {} '
                        'not found. Skipping.'.format(variableName))
                    timeSeries.extend(None)

                lineColors.append(color)
                lineStyles.append(lines[rangeIndex])
                lineMarkers.append(markers[rangeIndex])
                lineWidths.append(widths[rangeIndex])
                maxPoints.append(points[rangeIndex])
                legendText.append(None)

        if self.controlConfig is not None:

            controlRunName = self.controlConfig.get('runs', 'mainRunName')

            title = '{} \n {} (red)'.format(title, controlRunName)

            self.logger.info('  Load ocean data from control run...')
            controlStartYear = self.controlConfig.getint(
                'timeSeries', 'startYear')
            controlEndYear = self.controlConfig.getint('timeSeries', 'endYear')
            controlStartDate = '{:04d}-01-01_00:00:00'.format(controlStartYear)
            controlEndDate = '{:04d}-12-31_23:59:59'.format(controlEndYear)
            dsRef = open_mpas_dataset(
                fileName=self.refFileName,
                calendar=calendar,
                variableList=[self.mpasFieldName, 'depth'],
                timeVariableNames=None,
                startDate=controlStartDate,
                endDate=controlEndDate)
            dsRef = dsRef.isel(nOceanRegionsTmp=regionIndex)

            color = 'r'

            for rangeIndex in range(len(topDepths)):
                top = topDepths[rangeIndex]
                bottom = bottomDepths[rangeIndex]
                field = dsRef[self.mpasFieldName].where(dsRef.depth > top)
                field = field.where(dsRef.depth <= bottom)
                timeSeries.append(field.sum('nVertLevels'))

                lineColors.append(color)
                lineStyles.append(lines[rangeIndex])
                lineMarkers.append(markers[rangeIndex])
                lineWidths.append(widths[rangeIndex])
                maxPoints.append(points[rangeIndex])
                legendText.append(None)

        if config.has_option(self.taskName, 'firstYearXTicks'):
            firstYearXTicks = config.getint(self.taskName, 'firstYearXTicks')
        else:
            firstYearXTicks = None

        if config.has_option(self.taskName, 'yearStrideXTicks'):
            yearStrideXTicks = config.getint(self.taskName, 'yearStrideXTicks')
        else:
            yearStrideXTicks = None

        timeseries_analysis_plot(config=config,
                                 dsvalues=timeSeries,
                                 calendar=calendar,
                                 title=title,
                                 xlabel=xLabel,
                                 ylabel=yLabel,
                                 movingAveragePoints=None,
                                 lineColors=lineColors,
                                 lineStyles=lineStyles,
                                 markers=lineMarkers,
                                 lineWidths=lineWidths,
                                 legendText=legendText,
                                 maxPoints=maxPoints,
                                 firstYearXTicks=firstYearXTicks,
                                 yearStrideXTicks=yearStrideXTicks)

        savefig(outFileName)

        write_image_xml(config=config,
                        filePrefix=self.filePrefix,
                        componentName='Ocean',
                        componentSubdirectory='ocean',
                        galleryGroup=self.galleryGroup,
                        groupLink=self.groupLink,
                        gallery=self.galleryName,
                        thumbnailDescription='{} {}'.format(
                            self.regionName, self.thumbnailSuffix),
                        imageDescription=self.imageCaption,
                        imageCaption=self.imageCaption)
    def run_task(self):  # {{{
        """
        Performs analysis of the time-series output of sea-surface temperature
        (SST).
        """
        # Authors
        # -------
        # Xylar Asay-Davis, Milena Veneziani

        self.logger.info("\nPlotting SST time series...")

        self.logger.info('  Load SST data...')

        config = self.config
        calendar = self.calendar

        mainRunName = config.get('runs', 'mainRunName')
        preprocessedReferenceRunName = \
            config.get('runs', 'preprocessedReferenceRunName')
        preprocessedInputDirectory = config.get('oceanPreprocessedReference',
                                                'baseDirectory')

        movingAveragePoints = config.getint('timeSeriesSST',
                                            'movingAveragePoints')

        regions = config.getExpression('regions', 'regions')
        plotTitles = config.getExpression('regions', 'plotTitles')
        regionsToPlot = config.getExpression('timeSeriesSST', 'regions')

        regionIndicesToPlot = [
            regions.index(region) for region in regionsToPlot
        ]

        outputDirectory = build_config_full_path(config, 'output',
                                                 'timeseriesSubdirectory')

        make_directories(outputDirectory)

        dsSST = open_mpas_dataset(fileName=self.inputFile,
                                  calendar=calendar,
                                  variableList=self.variableList,
                                  startDate=self.startDate,
                                  endDate=self.endDate)

        yearStart = days_to_datetime(dsSST.Time.min(), calendar=calendar).year
        yearEnd = days_to_datetime(dsSST.Time.max(), calendar=calendar).year
        timeStart = date_to_days(year=yearStart,
                                 month=1,
                                 day=1,
                                 calendar=calendar)
        timeEnd = date_to_days(year=yearEnd,
                               month=12,
                               day=31,
                               calendar=calendar)

        if self.refConfig is not None:
            baseDirectory = build_config_full_path(self.refConfig, 'output',
                                                   'timeSeriesSubdirectory')

            refFileName = '{}/{}.nc'.format(
                baseDirectory, self.mpasTimeSeriesTask.fullTaskName)

            refStartYear = self.refConfig.getint('timeSeries', 'startYear')
            refEndYear = self.refConfig.getint('timeSeries', 'endYear')
            refStartDate = '{:04d}-01-01_00:00:00'.format(refStartYear)
            refEndDate = '{:04d}-12-31_23:59:59'.format(refEndYear)

            dsRefSST = open_mpas_dataset(fileName=refFileName,
                                         calendar=calendar,
                                         variableList=self.variableList,
                                         startDate=refStartDate,
                                         endDate=refEndDate)
        else:
            dsRefSST = None

        if preprocessedReferenceRunName != 'None':
            self.logger.info('  Load in SST for a preprocesses reference '
                             'run...')
            inFilesPreprocessed = '{}/SST.{}.year*.nc'.format(
                preprocessedInputDirectory, preprocessedReferenceRunName)

            outFolder = '{}/preprocessed'.format(outputDirectory)
            make_directories(outFolder)
            outFileName = '{}/sst.nc'.format(outFolder)

            combine_time_series_with_ncrcat(inFilesPreprocessed,
                                            outFileName,
                                            logger=self.logger)
            dsPreprocessed = open_mpas_dataset(fileName=outFileName,
                                               calendar=calendar,
                                               timeVariableNames='xtime')
            yearEndPreprocessed = days_to_datetime(dsPreprocessed.Time.max(),
                                                   calendar=calendar).year
            if yearStart <= yearEndPreprocessed:
                dsPreprocessedTimeSlice = \
                    dsPreprocessed.sel(Time=slice(timeStart, timeEnd))
            else:
                self.logger.warning('Preprocessed time series ends before the '
                                    'timeSeries startYear and will not be '
                                    'plotted.')
                preprocessedReferenceRunName = 'None'

        self.logger.info('  Make plots...')
        for regionIndex in regionIndicesToPlot:
            region = regions[regionIndex]

            title = '{} SST'.format(plotTitles[regionIndex])
            xLabel = 'Time [years]'
            yLabel = '[$\degree$C]'

            varName = self.variableList[0]
            SST = dsSST[varName].isel(nOceanRegions=regionIndex)

            filePrefix = self.filePrefixes[region]

            figureName = '{}/{}.png'.format(self.plotsDirectory, filePrefix)

            lineColors = ['k']
            lineWidths = [3]

            fields = [SST]
            legendText = [mainRunName]

            if dsRefSST is not None:
                refSST = dsRefSST[varName].isel(nOceanRegions=regionIndex)
                fields.append(refSST)
                lineColors.append('r')
                lineWidths.append(1.5)
                refRunName = self.refConfig.get('runs', 'mainRunName')
                legendText.append(refRunName)

            if preprocessedReferenceRunName != 'None':
                SST_v0 = dsPreprocessedTimeSlice.SST
                fields.append(SST_v0)
                lineColors.append('purple')
                lineWidths.append(1.5)
                legendText.append(preprocessedReferenceRunName)

            if config.has_option(self.taskName, 'firstYearXTicks'):
                firstYearXTicks = config.getint(self.taskName,
                                                'firstYearXTicks')
            else:
                firstYearXTicks = None

            if config.has_option(self.taskName, 'yearStrideXTicks'):
                yearStrideXTicks = config.getint(self.taskName,
                                                 'yearStrideXTicks')
            else:
                yearStrideXTicks = None

            timeseries_analysis_plot(config,
                                     fields,
                                     movingAveragePoints,
                                     title,
                                     xLabel,
                                     yLabel,
                                     figureName,
                                     calendar=calendar,
                                     lineColors=lineColors,
                                     lineWidths=lineWidths,
                                     legendText=legendText,
                                     firstYearXTicks=firstYearXTicks,
                                     yearStrideXTicks=yearStrideXTicks)

            caption = 'Running Mean of {} Sea Surface Temperature'.format(
                region)
            write_image_xml(config=config,
                            filePrefix=filePrefix,
                            componentName='Ocean',
                            componentSubdirectory='ocean',
                            galleryGroup='Time Series',
                            groupLink='timeseries',
                            thumbnailDescription='{} SST'.format(region),
                            imageDescription=caption,
                            imageCaption=caption)
    def _compute_moc_time_series_analysismember(self):  # {{{
        '''compute MOC time series from analysis member'''

        # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array)
        self.logger.info(
            '\n  Compute Atlantic MOC time series from analysis member...')
        self.logger.info('   Load data...')

        outputDirectory = build_config_full_path(self.config, 'output',
                                                 'timeseriesSubdirectory')
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory)

        streamName = 'timeSeriesStatsMonthlyOutput'

        # Get bin latitudes and index of 26.5N
        binBoundaryMocStreamfunction = None
        # first try timeSeriesStatsMonthly for bin boundaries, then try
        # mocStreamfunctionOutput stream as a backup option
        for streamName in [
                'timeSeriesStatsMonthlyOutput', 'mocStreamfunctionOutput'
        ]:
            try:
                inputFile = self.historyStreams.readpath(streamName)[0]
            except ValueError:
                raise IOError('At least one file from stream {} is needed '
                              'to compute MOC'.format(streamName))

            with xr.open_dataset(inputFile) as ds:
                if 'binBoundaryMocStreamfunction' in ds.data_vars:
                    binBoundaryMocStreamfunction = \
                        ds.binBoundaryMocStreamfunction.values
                    break

        if binBoundaryMocStreamfunction is None:
            raise ValueError('Could not find binBoundaryMocStreamfunction in '
                             'either timeSeriesStatsMonthlyOutput or '
                             'mocStreamfunctionOutput streams')

        binBoundaryMocStreamfunction = np.rad2deg(binBoundaryMocStreamfunction)
        dLat = binBoundaryMocStreamfunction - 26.5
        indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat)))

        inputFilesTseries = sorted(
            self.historyStreams.readpath(streamName,
                                         startDate=self.startDateTseries,
                                         endDate=self.endDateTseries,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFilesTseries,
                                             self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        mocRegion = np.zeros(len(inputFilesTseries))
        times = np.zeros(len(inputFilesTseries))
        computed = np.zeros(len(inputFilesTseries), bool)

        continueOutput = os.path.exists(outputFileTseries)
        if continueOutput:
            self.logger.info('   Read in previously computed MOC time series')
            with open_mpas_dataset(fileName=outputFileTseries,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=['mocAtlantic26'],
                                   startDate=self.startDateTseries,
                                   endDate=self.endDateTseries) as dsMOCIn:

                dsMOCIn.load()

                # first, copy all computed data
                for inIndex in range(dsMOCIn.dims['Time']):

                    mask = np.logical_and(
                        dsMOCIn.year[inIndex].values == years,
                        dsMOCIn.month[inIndex].values == months)

                    outIndex = np.where(mask)[0][0]

                    mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex]
                    times[outIndex] = dsMOCIn.Time[inIndex]
                    computed[outIndex] = True

                if np.all(computed):
                    # no need to waste time writing out the data set again
                    return dsMOCIn

        for timeIndex, fileName in enumerate(inputFilesTseries):
            if computed[timeIndex]:
                continue

            dsLocal = open_mpas_dataset(fileName=fileName,
                                        calendar=self.calendar,
                                        variableList=self.variableList,
                                        startDate=self.startDateTseries,
                                        endDate=self.endDateTseries)
            dsLocal = dsLocal.isel(Time=0)
            time = dsLocal.Time.values
            times[timeIndex] = time
            date = days_to_datetime(time, calendar=self.calendar)

            self.logger.info('     date: {:04d}-{:02d}'.format(
                date.year, date.month))

            # hard-wire region=0 (Atlantic) for now
            indRegion = 0
            mocTop = dsLocal.timeMonthly_avg_mocStreamvalLatAndDepthRegion[
                indRegion, :, :].values
            mocRegion[timeIndex] = np.amax(mocTop[:, indlat26])

        description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \
            'Array latitude (26.5N)'

        dictonary = {
            'dims': ['Time'],
            'coords': {
                'Time': {
                    'dims': ('Time'),
                    'data': times,
                    'attrs': {
                        'units': 'days since 0001-01-01'
                    }
                },
                'year': {
                    'dims': ('Time'),
                    'data': years,
                    'attrs': {
                        'units': 'year'
                    }
                },
                'month': {
                    'dims': ('Time'),
                    'data': months,
                    'attrs': {
                        'units': 'month'
                    }
                }
            },
            'data_vars': {
                'mocAtlantic26': {
                    'dims': ('Time'),
                    'data': mocRegion,
                    'attrs': {
                        'units': 'Sv (10^6 m^3/s)',
                        'description': description
                    }
                }
            }
        }
        dsMOCTimeSeries = xr.Dataset.from_dict(dictonary)
        write_netcdf(dsMOCTimeSeries, outputFileTseries)

        return dsMOCTimeSeries  # }}}
Beispiel #12
0
    def _replicate_cycle(self, ds, dsToReplicate, calendar):  # {{{
        """
        Replicates a periodic time series `dsToReplicate` to cover the
        timeframe of the dataset `ds`.

        Parameters
        ----------
        ds : dataset used to find the start and end time of the replicated
            cycle

        dsToReplicate : dataset to replicate.  The period of the cycle is the
            length of dsToReplicate plus the time between the first two time
            values (typically one year total).

        calendar : {'gregorian', 'gregorian_noleap'}
            The name of one of the calendars supported by MPAS cores

        Returns:
        --------
        dsShift : a cyclicly repeated version of `dsToReplicte` covering the
            range of time of `ds`.
        """
        # Authors
        # -------
        # Xylar Asay-Davis, Milena Veneziani

        dsStartTime = days_to_datetime(ds.Time.min(), calendar=calendar)
        dsEndTime = days_to_datetime(ds.Time.max(), calendar=calendar)
        repStartTime = days_to_datetime(dsToReplicate.Time.min(),
                                        calendar=calendar)
        repEndTime = days_to_datetime(dsToReplicate.Time.max(),
                                      calendar=calendar)

        repSecondTime = days_to_datetime(dsToReplicate.Time.isel(Time=1),
                                         calendar=calendar)

        period = (MpasRelativeDelta(repEndTime, repStartTime) +
                  MpasRelativeDelta(repSecondTime, repStartTime))

        startIndex = 0
        while(dsStartTime > repStartTime + (startIndex + 1) * period):
            startIndex += 1

        endIndex = 0
        while(dsEndTime > repEndTime + endIndex * period):
            endIndex += 1

        dsShift = dsToReplicate.copy()

        times = days_to_datetime(dsShift.Time, calendar=calendar)
        dsShift.coords['Time'] = ('Time',
                                  datetime_to_days(times + startIndex * period,
                                                   calendar=calendar))
        # replicate cycle:
        for cycleIndex in range(startIndex, endIndex):
            dsNew = dsToReplicate.copy()
            dsNew.coords['Time'] = \
                ('Time', datetime_to_days(times + (cycleIndex + 1) * period,
                                          calendar=calendar))
            dsShift = xr.concat([dsShift, dsNew], dim='Time')

        # clip dsShift to the range of ds
        dsStartTime = dsShift.Time.sel(Time=ds.Time.min(),
                                       method=str('nearest')).values
        dsEndTime = dsShift.Time.sel(Time=ds.Time.max(),
                                     method=str('nearest')).values
        dsShift = dsShift.sel(Time=slice(dsStartTime, dsEndTime))

        return dsShift  # }}}
Beispiel #13
0
    def run_task(self):  # {{{
        """
        Performs analysis of time series of sea-ice properties.
        """
        # Authors
        # -------
        # Xylar Asay-Davis, Milena Veneziani

        self.logger.info("\nPlotting sea-ice area and volume time series...")

        config = self.config
        calendar = self.calendar

        sectionName = self.taskName

        plotTitles = {'iceArea': 'Sea-ice area',
                      'iceVolume': 'Sea-ice volume',
                      'iceThickness': 'Sea-ice mean thickness'}

        units = {'iceArea': '[km$^2$]',
                 'iceVolume': '[10$^3$ km$^3$]',
                 'iceThickness': '[m]'}

        obsFileNames = {
            'iceArea': {'NH': build_obs_path(
                config, 'seaIce',
                relativePathOption='areaNH',
                relativePathSection=sectionName),
                'SH': build_obs_path(
                config, 'seaIce',
                relativePathOption='areaSH',
                relativePathSection=sectionName)},
            'iceVolume': {'NH': build_obs_path(
                config, 'seaIce',
                relativePathOption='volNH',
                relativePathSection=sectionName),
                'SH': build_obs_path(
                config, 'seaIce',
                relativePathOption='volSH',
                relativePathSection=sectionName)}}

        # Some plotting rules
        titleFontSize = config.get('timeSeriesSeaIceAreaVol', 'titleFontSize')

        mainRunName = config.get('runs', 'mainRunName')
        preprocessedReferenceRunName = \
            config.get('runs', 'preprocessedReferenceRunName')
        preprocessedReferenceDirectory = \
            config.get('seaIcePreprocessedReference', 'baseDirectory')

        compareWithObservations = config.getboolean('timeSeriesSeaIceAreaVol',
                                                    'compareWithObservations')

        movingAveragePoints = config.getint('timeSeriesSeaIceAreaVol',
                                            'movingAveragePoints')

        polarPlot = config.getboolean('timeSeriesSeaIceAreaVol', 'polarPlot')

        outputDirectory = build_config_full_path(config, 'output',
                                                 'timeseriesSubdirectory')

        make_directories(outputDirectory)

        self.logger.info('  Load sea-ice data...')
        # Load mesh

        dsTimeSeries = self._compute_area_vol()

        yearStart = days_to_datetime(dsTimeSeries['NH'].Time.min(),
                                     calendar=calendar).year
        yearEnd = days_to_datetime(dsTimeSeries['NH'].Time.max(),
                                   calendar=calendar).year
        timeStart = date_to_days(year=yearStart, month=1, day=1,
                                 calendar=calendar)
        timeEnd = date_to_days(year=yearEnd, month=12, day=31,
                               calendar=calendar)

        if preprocessedReferenceRunName != 'None':
            # determine if we're beyond the end of the preprocessed data
            # (and go ahead and cache the data set while we're checking)
            outFolder = '{}/preprocessed'.format(outputDirectory)
            make_directories(outFolder)
            inFilesPreprocessed = '{}/icevol.{}.year*.nc'.format(
                preprocessedReferenceDirectory, preprocessedReferenceRunName)
            outFileName = '{}/iceVolume.nc'.format(outFolder)

            combine_time_series_with_ncrcat(inFilesPreprocessed,
                                            outFileName,
                                            logger=self.logger)
            dsPreprocessed = open_mpas_dataset(fileName=outFileName,
                                               calendar=calendar,
                                               timeVariableNames='xtime')
            preprocessedYearEnd = days_to_datetime(dsPreprocessed.Time.max(),
                                                   calendar=calendar).year
            if yearStart <= preprocessedYearEnd:
                dsPreprocessedTimeSlice = \
                    dsPreprocessed.sel(Time=slice(timeStart, timeEnd))
            else:
                self.logger.warning('Preprocessed time series ends before the '
                                    'timeSeries startYear and will not be '
                                    'plotted.')
                preprocessedReferenceRunName = 'None'

        if self.controlConfig is not None:

            dsTimeSeriesRef = {}
            baseDirectory = build_config_full_path(
                self.controlConfig, 'output', 'timeSeriesSubdirectory')

            controlRunName = self.controlConfig.get('runs', 'mainRunName')

            for hemisphere in ['NH', 'SH']:
                inFileName = '{}/seaIceAreaVol{}.nc'.format(baseDirectory,
                                                            hemisphere)

                dsTimeSeriesRef[hemisphere] = xr.open_dataset(inFileName)

        norm = {'iceArea': 1e-6,  # m^2 to km^2
                'iceVolume': 1e-12,  # m^3 to 10^3 km^3
                'iceThickness': 1.}

        xLabel = 'Time [years]'

        galleryGroup = 'Time Series'
        groupLink = 'timeseries'

        obs = {}
        preprocessed = {}
        figureNameStd = {}
        figureNamePolar = {}
        title = {}
        plotVars = {}
        obsLegend = {}
        plotVarsRef = {}

        for hemisphere in ['NH', 'SH']:

            self.logger.info('  Make {} plots...'.format(hemisphere))

            for variableName in ['iceArea', 'iceVolume']:
                key = (hemisphere, variableName)

                # apply the norm to each variable
                plotVars[key] = (norm[variableName] *
                                 dsTimeSeries[hemisphere][variableName])

                if self.controlConfig is not None:
                    plotVarsRef[key] = norm[variableName] * \
                        dsTimeSeriesRef[hemisphere][variableName]

                prefix = '{}/{}{}_{}'.format(self.plotsDirectory,
                                             variableName,
                                             hemisphere,
                                             mainRunName)

                figureNameStd[key] = '{}.png'.format(prefix)
                figureNamePolar[key] = '{}_polar.png'.format(prefix)

                title[key] = '{} ({})'.format(plotTitles[variableName],
                                              hemisphere)

            if compareWithObservations:
                key = (hemisphere, 'iceArea')
                obsLegend[key] = 'SSM/I observations, annual cycle '
                if hemisphere == 'NH':
                    key = (hemisphere, 'iceVolume')
                    obsLegend[key] = 'PIOMAS, annual cycle (blue)'

            if preprocessedReferenceRunName != 'None':
                for variableName in ['iceArea', 'iceVolume']:
                    key = (hemisphere, variableName)

            if compareWithObservations:

                outFolder = '{}/obs'.format(outputDirectory)
                make_directories(outFolder)
                outFileName = '{}/iceArea{}.nc'.format(outFolder, hemisphere)

                combine_time_series_with_ncrcat(
                    obsFileNames['iceArea'][hemisphere],
                    outFileName, logger=self.logger)
                dsObs = open_mpas_dataset(fileName=outFileName,
                                          calendar=calendar,
                                          timeVariableNames='xtime')
                key = (hemisphere, 'iceArea')
                obs[key] = self._replicate_cycle(plotVars[key], dsObs.IceArea,
                                                 calendar)

                key = (hemisphere, 'iceVolume')
                if hemisphere == 'NH':
                    outFileName = '{}/iceVolume{}.nc'.format(outFolder,
                                                             hemisphere)
                    combine_time_series_with_ncrcat(
                        obsFileNames['iceVolume'][hemisphere],
                        outFileName, logger=self.logger)
                    dsObs = open_mpas_dataset(fileName=outFileName,
                                              calendar=calendar,
                                              timeVariableNames='xtime')
                    obs[key] = self._replicate_cycle(plotVars[key],
                                                     dsObs.IceVol,
                                                     calendar)
                else:
                    obs[key] = None

            if preprocessedReferenceRunName != 'None':
                outFolder = '{}/preprocessed'.format(outputDirectory)
                inFilesPreprocessed = '{}/icearea.{}.year*.nc'.format(
                    preprocessedReferenceDirectory,
                    preprocessedReferenceRunName)

                outFileName = '{}/iceArea.nc'.format(outFolder)

                combine_time_series_with_ncrcat(inFilesPreprocessed,
                                                outFileName,
                                                logger=self.logger)
                dsPreprocessed = open_mpas_dataset(fileName=outFileName,
                                                   calendar=calendar,
                                                   timeVariableNames='xtime')
                dsPreprocessedTimeSlice = dsPreprocessed.sel(
                    Time=slice(timeStart, timeEnd))
                key = (hemisphere, 'iceArea')
                preprocessed[key] = dsPreprocessedTimeSlice[
                    'icearea_{}'.format(hemisphere.lower())]

                inFilesPreprocessed = '{}/icevol.{}.year*.nc'.format(
                    preprocessedReferenceDirectory,
                    preprocessedReferenceRunName)
                outFileName = '{}/iceVolume.nc'.format(outFolder)

                combine_time_series_with_ncrcat(inFilesPreprocessed,
                                                outFileName,
                                                logger=self.logger)
                dsPreprocessed = open_mpas_dataset(fileName=outFileName,
                                                   calendar=calendar,
                                                   timeVariableNames='xtime')
                dsPreprocessedTimeSlice = dsPreprocessed.sel(
                    Time=slice(timeStart, timeEnd))
                key = (hemisphere, 'iceVolume')
                preprocessed[key] = dsPreprocessedTimeSlice[
                    'icevolume_{}'.format(hemisphere.lower())]

            for variableName in ['iceArea', 'iceVolume']:
                key = (hemisphere, variableName)
                dsvalues = [plotVars[key]]
                legendText = [mainRunName]
                lineColors = ['k']
                lineWidths = [3]
                if compareWithObservations and key in obsLegend.keys():
                    dsvalues.append(obs[key])
                    legendText.append(obsLegend[key])
                    lineColors.append('b')
                    lineWidths.append(1.2)
                if preprocessedReferenceRunName != 'None':
                    dsvalues.append(preprocessed[key])
                    legendText.append(preprocessedReferenceRunName)
                    lineColors.append('purple')
                    lineWidths.append(1.2)

                if self.controlConfig is not None:
                    dsvalues.append(plotVarsRef[key])
                    legendText.append(controlRunName)
                    lineColors.append('r')
                    lineWidths.append(1.2)

                if config.has_option(sectionName, 'firstYearXTicks'):
                    firstYearXTicks = config.getint(sectionName,
                                                    'firstYearXTicks')
                else:
                    firstYearXTicks = None

                if config.has_option(sectionName, 'yearStrideXTicks'):
                    yearStrideXTicks = config.getint(sectionName,
                                                     'yearStrideXTicks')
                else:
                    yearStrideXTicks = None

                # separate plots for nothern and southern hemispheres
                timeseries_analysis_plot(config, dsvalues,
                                         movingAveragePoints,
                                         title[key], xLabel,
                                         units[variableName],
                                         calendar=calendar,
                                         lineColors=lineColors,
                                         lineWidths=lineWidths,
                                         legendText=legendText,
                                         titleFontSize=titleFontSize,
                                         firstYearXTicks=firstYearXTicks,
                                         yearStrideXTicks=yearStrideXTicks)

                savefig(figureNameStd[key])

                filePrefix = '{}{}_{}'.format(variableName,
                                              hemisphere,
                                              mainRunName)
                thumbnailDescription = '{} {}'.format(
                    hemisphere, plotTitles[variableName])
                caption = 'Running mean of {}'.format(
                    thumbnailDescription)
                write_image_xml(
                    config,
                    filePrefix,
                    componentName='Sea Ice',
                    componentSubdirectory='sea_ice',
                    galleryGroup=galleryGroup,
                    groupLink=groupLink,
                    thumbnailDescription=thumbnailDescription,
                    imageDescription=caption,
                    imageCaption=caption)

                if (polarPlot):
                    timeseries_analysis_plot_polar(
                        config,
                        dsvalues,
                        movingAveragePoints,
                        title[key],
                        lineColors=lineColors,
                        lineWidths=lineWidths,
                        legendText=legendText,
                        titleFontSize=titleFontSize)

                    savefig(figureNamePolar[key])

                    filePrefix = '{}{}_{}_polar'.format(variableName,
                                                        hemisphere,
                                                        mainRunName)
                    write_image_xml(
                        config,
                        filePrefix,
                        componentName='Sea Ice',
                        componentSubdirectory='sea_ice',
                        galleryGroup=galleryGroup,
                        groupLink=groupLink,
                        thumbnailDescription=thumbnailDescription,
                        imageDescription=caption,
                        imageCaption=caption)
def open_mpas_dataset(
        fileName,
        calendar,
        timeVariableNames=['xtime_startMonthly', 'xtime_endMonthly'],
        variableList=None,
        startDate=None,
        endDate=None):  # {{{
    """
    Opens and returns an xarray data set given file name(s) and the MPAS
    calendar name.

    Parameters
    ----------
    fileName : str
        File path to read

    calendar : {``'gregorian'``, ``'gregorian_noleap'``}, optional
        The name of one of the calendars supported by MPAS cores

    timeVariableNames : str or list of 2 str, optional
        The name of the time variable (typically ``'xtime'``
        or ``['xtime_startMonthly', 'xtime_endMonthly']``), or ``None`` if
        time does not need to be parsed (and is already in the ``Time``
        variable)

    variableList : list of strings, optional
        If present, a list of variables to be included in the data set

    startDate, endDate : string or datetime.datetime, optional
        If present, the first and last dates to be used in the data set.  The
        time variable is sliced to only include dates within this range.

    Returns
    -------
    ds : ``xarray.Dataset``

    Raises
    ------
    TypeError
        If the time variable has an unsupported type (not a date string).

    ValueError
        If the time variable is not found in the data set
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    ds = xarray.open_dataset(fileName,
                             decode_cf=True,
                             decode_times=False,
                             lock=False)

    if timeVariableNames is not None:
        ds = _parse_dataset_time(ds, timeVariableNames, calendar)

    if startDate is not None and endDate is not None:
        if isinstance(startDate, six.string_types):
            startDate = string_to_days_since_date(dateString=startDate,
                                                  calendar=calendar)
        if isinstance(endDate, six.string_types):
            endDate = string_to_days_since_date(dateString=endDate,
                                                calendar=calendar)

        # select only the data in the specified range of dates
        ds = ds.sel(Time=slice(startDate, endDate))

    if ds.dims['Time'] == 0:
        raise ValueError('The data set contains no Time entries between '
                         'dates {} and {}.'.format(
                             days_to_datetime(startDate, calendar=calendar),
                             days_to_datetime(endDate, calendar=calendar)))
    if variableList is not None:
        ds = subset_variables(ds, variableList)

    return ds  # }}}
Beispiel #15
0
def cache_time_series(timesInDataSet,
                      timeSeriesCalcFunction,
                      cacheFileName,
                      calendar,
                      yearsPerCacheUpdate=1,
                      logger=None):  # {{{
    '''
    Create or update a NetCDF file ``cacheFileName`` containing the given time
    series, calculated with ``timeSeriesCalcFunction`` over the given times,
    start and end year, and time frequency with which results are cached.

    Note: only works with climatologies where the mask (locations of ``NaN``
    values) doesn't vary with time.

    Parameters
    ----------
    timesInDataSet : array-like
        Times at which the time series is to be calculated, typically taken
        from ``ds.Times.values`` for a data set from which the time series
        will be extracted or computed.

    timeSeriesCalcFunction : function
        A function with arguments ``timeIndices``, indicating the entries in
        ``timesInDataSet`` to be computed, and ``firstCall``, indicating
        whether this is the first call to the funciton (useful for printing
        progress information).

    cacheFileName :  str
        The absolute path to the cache file where the times series will be
        stored

    calendar : {'gregorian', 'gregorian_noleap'}
        The name of one of the calendars supported by MPAS cores, used to
        determine ``year`` and ``month`` from ``Time`` coordinate

    yearsPerCacheUpdate : int, optional
        The frequency with which the cache file is updated as the computation
        progresses.  If the computation is expensive, it may be useful to
        output the file frequently.  If not, there will be needless overhead
        in caching the file too frequently.

    logger : ``logging.Logger``, optional
        A logger to which to write output as the time series is computed

    Returns
    -------
    climatology : object of same type as ``ds``
        A data set without the ``'Time'`` coordinate containing the mean
        of ds over all months in monthValues, weighted by the number of days
        in each month.
    '''
    # Authors
    # -------
    # Xylar Asay-Davis

    timesProcessed = numpy.zeros(len(timesInDataSet), bool)
    # figure out which files to load and which years go in each file
    continueOutput = os.path.exists(cacheFileName)
    cacheDataSetExists = False
    if continueOutput:
        if logger is not None:
            logger.info('   Read in previously computed time series')
        # read in what we have so far

        try:
            dsCache = xr.open_dataset(cacheFileName, decode_times=False)
            cacheDataSetExists = True
        except IOError:
            # assuming the cache file is corrupt, so deleting it.
            message = 'Deleting cache file {}, which appears to have ' \
                      'been corrupted.'.format(cacheFileName)
            if logger is None:
                print('Warning: {}'.format(message))
            else:
                logger.warning(message)
            os.remove(cacheFileName)

        if cacheDataSetExists:
            # force loading and then close so we can overwrite the file later
            dsCache.load()
            dsCache.close()
            for time in dsCache.Time.values:
                timesProcessed[timesInDataSet == time] = True

    datetimes = days_to_datetime(timesInDataSet, calendar=calendar)
    yearsInDataSet = numpy.array([date.year for date in datetimes])

    startYear = yearsInDataSet[0]
    endYear = yearsInDataSet[-1]

    firstProcessed = True
    for firstYear in range(startYear, endYear + 1, yearsPerCacheUpdate):
        years = range(
            firstYear,
            numpy.minimum(endYear + 1, firstYear + yearsPerCacheUpdate))

        mask = numpy.zeros(len(yearsInDataSet), bool)
        for year in years:
            mask = numpy.logical_or(mask, yearsInDataSet == year)
        mask = numpy.logical_and(mask, numpy.logical_not(timesProcessed))

        timeIndices = numpy.nonzero(mask)[0]

        if len(timeIndices) == 0:
            # no unprocessed time entries in this data range
            continue

        if logger is not None:
            if firstProcessed:
                logger.info('   Process and save time series')
            if yearsPerCacheUpdate == 1:
                logger.info('     {:04d}'.format(years[0]))
            else:
                logger.info('     {:04d}-{:04d}'.format(years[0], years[-1]))

        ds = timeSeriesCalcFunction(timeIndices, firstProcessed)
        firstProcessed = False

        if cacheDataSetExists:
            dsCache = xr.concat([dsCache, ds], dim='Time')
            # now sort the Time dimension:
            dsCache = dsCache.loc[{'Time': sorted(dsCache.Time.values)}]
        else:
            dsCache = ds
            cacheDataSetExists = True

        dsCache.to_netcdf(cacheFileName)

    return dsCache.sel(Time=slice(timesInDataSet[0], timesInDataSet[-1]))
Beispiel #16
0
def open_multifile_dataset(fileNames, calendar, config,
                           simulationStartTime=None,
                           timeVariableName='Time',
                           variableList=None, selValues=None,
                           iselValues=None, variableMap=None,
                           startDate=None, endDate=None,
                           chunking=None):  # {{{
    """
    Opens and returns an xarray data set given file name(s) and the MPAS
    calendar name.

    Parameters
    ----------
    fileNames : list of strings
        A lsit of file paths to read

    calendar : {``'gregorian'``, ``'gregorian_noleap'``}, optional
        The name of one of the calendars supported by MPAS cores

    config :  instance of ``MpasAnalysisConfigParser``
        Contains configuration options

    simulationStartTime : string, optional
        The start date of the simulation, used to convert from time variables
        expressed as days since the start of the simulation to days since the
        reference date. ``simulationStartTime`` takes one of the following
        forms::

            0001-01-01
            0001-01-01 00:00:00

        ``simulationStartTime`` is only required if the MPAS time variable
        (identified by ``timeVariableName``) is a number of days since the
        start of the simulation.

    timeVariableName : string, optional
        The name of the time variable (typically ``'Time'`` if using a
        ``variableMap`` or ``'xtime'`` if not using a ``variableMap``)

    variableList : list of strings, optional
        If present, a list of variables to be included in the data set

    selValues : dict, optional
        A dictionary of coordinate names (keys) and values or arrays of
        values used to slice the variales in the data set.  See
        ``xarray.DataSet.sel()`` for details on how this dictonary is used.
        An example::

            selectCorrdValues = {'cellLon': 180.0}

    iselValues : dict, optional
        A dictionary of coordinate names (keys) and indices, slices or
        arrays of indices used to slice the variales in the data set.  See
        ``xarray.DataSet.isel()`` for details on how this dictonary is used.
        An example::

            iselValues = {'nVertLevels': slice(0, 3),
                          'nCells': cellIDs}

    variableMap : dict, optional
        A dictionary with keys that are variable names used by
        MPAS-Analysis and values that are lists of possible names for the same
        variable in the MPAS dycore that produced the data set (which may
        differ between versions).

    startDate, endDate : string or datetime.datetime, optional
        If present, the first and last dates to be used in the data set.  The
        time variable is sliced to only include dates within this range.

    chunking : None, int, True, dict, optional
        If integer is present, applies maximum chunk size from config file
        value ``maxChunkSize``, otherwise if None do not perform chunking.  If
        True, use automated chunking using default config value
        ``maxChunkSize``. If chunking is a dict use dictionary values for
        chunking.

    Returns
    -------
    ds : ``xarray.Dataset``

    Raises
    ------
    TypeError
        If the time variable has an unsupported type (not a date string,
        a floating-pont number of days since the start of the simulation
        or a ``numpy.datatime64`` object).

    ValueError
        If the time variable is not found in the data set or if the time
        variable is a number of days since the start of the simulation but
        simulationStartTime is None.
    """
    # Authors
    # -------
    # Xylar Asay-Davis, Phillip J. Wolfram

    preprocess_partial = partial(_preprocess,
                                 calendar=calendar,
                                 simulationStartTime=simulationStartTime,
                                 timeVariableName=timeVariableName,
                                 variableList=variableList,
                                 selValues=selValues,
                                 iselValues=iselValues,
                                 variableMap=variableMap,
                                 startDate=startDate,
                                 endDate=endDate)

    kwargs = {'decode_times': False,
              'concat_dim': 'Time'}

    autocloseFileLimitFraction = config.getfloat('input',
                                                 'autocloseFileLimitFraction')

    # get the number of files that can be open at the same time.  We want the
    # "soft" limit because we'll get a crash if we exceed it.
    softLimit = resource.getrlimit(resource.RLIMIT_NOFILE)[0]

    # use autoclose if we will use more than autocloseFileLimitFraction (50%
    # by default) of the soft limit of open files
    autoclose = len(fileNames) > softLimit*autocloseFileLimitFraction

    try:
        ds = xarray.open_mfdataset(fileNames,
                                   preprocess=preprocess_partial,
                                   autoclose=autoclose, **kwargs)
    except TypeError as e:
        if 'autoclose' in str(e):
            if autoclose:
                # This indicates that xarray version doesn't support autoclose
                print('Warning: open_multifile_dataset is trying to use '
                      'autoclose=True but\n'
                      'it appears your xarray version doesn\'t support this '
                      'argument. Will\n'
                      'try again without autoclose argument.')

            ds = xarray.open_mfdataset(fileNames,
                                       preprocess=preprocess_partial,
                                       **kwargs)
        else:
            raise e

    ds = mpas_xarray.remove_repeated_time_index(ds)

    if startDate is not None and endDate is not None:
        if isinstance(startDate, six.string_types):
            startDate = string_to_days_since_date(dateString=startDate,
                                                  calendar=calendar)
        if isinstance(endDate, six.string_types):
            endDate = string_to_days_since_date(dateString=endDate,
                                                calendar=calendar)

    # select only the data in the specified range of dates
    ds = ds.sel(Time=slice(startDate, endDate))

    if ds.dims['Time'] == 0:
        raise ValueError('The data set contains no Time entries between '
                         'dates {} and {}.'.format(
                             days_to_datetime(startDate, calendar=calendar),
                             days_to_datetime(endDate, calendar=calendar)))
    # process chunking
    if chunking is True:
        # limit chunk size to prevent memory error
        chunking = config.getint('input', 'maxChunkSize')

    ds = mpas_xarray.process_chunking(ds, chunking)

    # private record of autoclose use
    ds.attrs['_autoclose'] = int(autoclose)

    return ds  # }}}
Beispiel #17
0
def _parse_dataset_time(ds, inTimeVariableName, calendar, simulationStartTime,
                        outTimeVariableName, referenceDate):  # {{{
    """
    A helper function for computing a time coordinate from an MPAS time
    variable.  Given a data set and a time variable name (or tuple of 2
    time names), returns a new data set with time coordinate
    `outTimeVariableName` filled with days since `referenceDate`

    Parameters
    ----------
    ds : xarray.DataSet object
        The data set containing an MPAS time variable to be used to build
        an xarray time coordinate.

    inTimeVariableName : string or tuple or list of strings
        The name of the time variable in the MPAS data set that will be
        used to build the 'Time' coordinate.  The array(s) named by
        inTimeVariableName should contain date strings or the number of
        days since the start of the simulation. Typically,
        inTimeVariableName is one of {'daysSinceStartOfSim','xtime'}.
        If a list of two variable
        names is provided, times from the two are averaged together to
        determine the value of the time coordinate.  In such cases,
        inTimeVariableName is typically {['xtime_start', 'xtime_end']}.

    calendar : {'gregorian', 'gregorian_noleap'}
        The name of one of the calendars supported by MPAS cores


    simulationStartTime : string
        The start date of the simulation, used to convert from time variables
        expressed as days since the start of the simulation to days since the
        reference date. `simulationStartTime` takes one of the following
        forms::

            0001-01-01
            0001-01-01 00:00:00

        simulationStartTime is only required if the MPAS time variable
        (identified by timeVariableName) is a number of days since the
        start of the simulation.

    outTimeVariableName : string
        The name of the coordinate to assign times to, typically 'Time'.

    referenceDate : string
        The reference date for the time variable, typically '0001-01-01',
        taking one of the following forms::

            0001-01-01
            0001-01-01 00:00:00

    Returns
    -------
    dataset : xarray.dataset object
        A copy of the input data set with the `outTimeVariableName`
        coordinate containing the time coordinate parsed from
        `inTimeVariableName`.

    Raises
    ------
    TypeError
        If the time variable has an unsupported type (not a date string
        or a floating-pont number of days since the start of the simulatio).
    ValueError
        If  the time variable is a number of days since the start of the
        simulation but simulationStartTime is None.
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    if isinstance(inTimeVariableName, (tuple, list)):
        # we want to average the two
        assert (len(inTimeVariableName) == 2)

        dsStart = _parse_dataset_time(ds=ds,
                                      inTimeVariableName=inTimeVariableName[0],
                                      calendar=calendar,
                                      simulationStartTime=simulationStartTime,
                                      outTimeVariableName=outTimeVariableName,
                                      referenceDate=referenceDate)
        dsEnd = _parse_dataset_time(ds=ds,
                                    inTimeVariableName=inTimeVariableName[1],
                                    calendar=calendar,
                                    simulationStartTime=simulationStartTime,
                                    outTimeVariableName=outTimeVariableName,
                                    referenceDate=referenceDate)
        starts = dsStart[outTimeVariableName].values
        ends = dsEnd[outTimeVariableName].values

        # replace the time in starts with the mean of starts and ends
        dsOut = dsStart.copy()

        dsOut.coords['startTime'] = (outTimeVariableName, starts)
        dsOut.coords['endTime'] = (outTimeVariableName, ends)

        dsOut.coords[outTimeVariableName] = (outTimeVariableName, [
            starts[i] + (ends[i] - starts[i]) / 2 for i in range(len(starts))
        ])

    else:

        # there is just one time variable (either because we're recursively
        # calling the function or because we're not averaging).

        # The contents of the time variable is expected to be either a string
        # (|S64) or a float (meaning days since start of the simulation).

        timeVar = ds[inTimeVariableName]

        if timeVar.dtype == '|S64':
            # this is an array of date strings like 'xtime'
            # convert to string
            timeStrings = [
                ''.join(str(xtime.astype('U'))).strip()
                for xtime in timeVar.values
            ]
            days = string_to_days_since_date(dateString=timeStrings,
                                             referenceDate=referenceDate,
                                             calendar=calendar)

        elif timeVar.dtype == 'float64':
            # this array contains floating-point days like
            # 'daysSinceStartOfSim'

            if simulationStartTime is None:
                raise ValueError('MPAS time variable {} appears to be a '
                                 'number of days since start \n'
                                 'of sim but simulationStartTime was not'
                                 '  supplied.'.format(inTimeVariableName))

            if (string_to_datetime(referenceDate) == string_to_datetime(
                    simulationStartTime)):
                days = timeVar.values
            else:
                # a conversion may be required
                dates = days_to_datetime(days=timeVar.values,
                                         referenceDate=simulationStartTime,
                                         calendar=calendar)
                days = datetime_to_days(dates=dates,
                                        referenceDate=referenceDate,
                                        calendar=calendar)

        elif timeVar.dtype == 'timedelta64[ns]':
            raise TypeError('timeVar of unsupported type {}.  This is likely '
                            'because xarray.open_dataset \n'
                            'was called with decode_times=True, which can '
                            'mangle MPAS times.'.format(timeVar.dtype))
        else:
            raise TypeError("timeVar of unsupported type {}".format(
                timeVar.dtype))

        dsOut = ds.copy()
        dsOut.coords[outTimeVariableName] = (outTimeVariableName, days)

    return dsOut  # }}}
    def _compute_moc_time_series_postprocess(self):  # {{{
        '''compute MOC time series as a post-process'''

        # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array)
        self.logger.info('\n  Compute and/or plot post-processed Atlantic MOC '
                         'time series...')
        self.logger.info('   Load data...')

        outputDirectory = build_config_full_path(self.config, 'output',
                                                 'timeseriesSubdirectory')
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory)

        dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \
            refTopDepth, refLayerThickness = self._load_mesh()

        latAtlantic = self.lat['Atlantic']
        dLat = latAtlantic - 26.5
        indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat)))

        dictRegion = self.dictRegion['Atlantic']
        maxEdgesInTransect = dictRegion['maxEdgesInTransect']
        transectEdgeGlobalIDs = dictRegion['transectEdgeGlobalIDs']
        transectEdgeMaskSigns = dictRegion['transectEdgeMaskSigns']
        regionCellMask = dictRegion['cellMask']

        streamName = 'timeSeriesStatsMonthlyOutput'
        inputFilesTseries = sorted(
            self.historyStreams.readpath(streamName,
                                         startDate=self.startDateTseries,
                                         endDate=self.endDateTseries,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFilesTseries,
                                             self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        mocRegion = np.zeros(len(inputFilesTseries))
        times = np.zeros(len(inputFilesTseries))
        computed = np.zeros(len(inputFilesTseries), bool)

        continueOutput = os.path.exists(outputFileTseries)
        if continueOutput:
            self.logger.info('   Read in previously computed MOC time series')
            with open_mpas_dataset(fileName=outputFileTseries,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=['mocAtlantic26'],
                                   startDate=self.startDateTseries,
                                   endDate=self.endDateTseries) as dsMOCIn:

                dsMOCIn.load()

                # first, copy all computed data
                for inIndex in range(dsMOCIn.dims['Time']):

                    mask = np.logical_and(
                        dsMOCIn.year[inIndex].values == years,
                        dsMOCIn.month[inIndex].values == months)

                    outIndex = np.where(mask)[0][0]

                    mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex]
                    times[outIndex] = dsMOCIn.Time[inIndex]
                    computed[outIndex] = True

                if np.all(computed):
                    # no need to waste time writing out the data set again
                    return dsMOCIn

        for timeIndex, fileName in enumerate(inputFilesTseries):
            if computed[timeIndex]:
                continue

            dsLocal = open_mpas_dataset(fileName=fileName,
                                        calendar=self.calendar,
                                        variableList=self.variableList,
                                        startDate=self.startDateTseries,
                                        endDate=self.endDateTseries)
            dsLocal = dsLocal.isel(Time=0)
            time = dsLocal.Time.values
            times[timeIndex] = time
            date = days_to_datetime(time, calendar=self.calendar)

            self.logger.info('     date: {:04d}-{:02d}'.format(
                date.year, date.month))

            if self.includeBolus:
                dsLocal['avgNormalVelocity'] = \
                    dsLocal['timeMonthly_avg_normalVelocity'] + \
                    dsLocal['timeMonthly_avg_normalGMBolusVelocity']

                dsLocal['avgVertVelocityTop'] = \
                    dsLocal['timeMonthly_avg_vertVelocityTop'] + \
                    dsLocal['timeMonthly_avg_vertGMBolusVelocityTop']
            else:
                # rename some variables for convenience
                dsLocal = dsLocal.rename({
                    'timeMonthly_avg_normalVelocity':
                    'avgNormalVelocity',
                    'timeMonthly_avg_vertVelocityTop':
                    'avgVertVelocityTop'
                })

            horizontalVel = dsLocal.avgNormalVelocity.values
            verticalVel = dsLocal.avgVertVelocityTop.values
            velArea = verticalVel * areaCell[:, np.newaxis]
            transportZ = self._compute_transport(maxEdgesInTransect,
                                                 transectEdgeGlobalIDs,
                                                 transectEdgeMaskSigns,
                                                 nVertLevels, dvEdge,
                                                 refLayerThickness,
                                                 horizontalVel)
            mocTop = self._compute_moc(latAtlantic, nVertLevels, latCell,
                                       regionCellMask, transportZ, velArea)
            mocRegion[timeIndex] = np.amax(mocTop[:, indlat26])

        description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \
            'Array latitude (26.5N)'

        dictonary = {
            'dims': ['Time'],
            'coords': {
                'Time': {
                    'dims': ('Time'),
                    'data': times,
                    'attrs': {
                        'units': 'days since 0001-01-01'
                    }
                },
                'year': {
                    'dims': ('Time'),
                    'data': years,
                    'attrs': {
                        'units': 'year'
                    }
                },
                'month': {
                    'dims': ('Time'),
                    'data': months,
                    'attrs': {
                        'units': 'month'
                    }
                }
            },
            'data_vars': {
                'mocAtlantic26': {
                    'dims': ('Time'),
                    'data': mocRegion,
                    'attrs': {
                        'units': 'Sv (10^6 m^3/s)',
                        'description': description
                    }
                }
            }
        }
        dsMOCTimeSeries = xr.Dataset.from_dict(dictonary)
        write_netcdf(dsMOCTimeSeries, outputFileTseries)

        return dsMOCTimeSeries  # }}}