Example #1
0
    def test_time_coordinates_24_hour(self):
        """Time coordinate should be a series of mid points calculated from the
        start hour + half the period. Each should have an associated pair of
        bounds that show the range over which the extrema values have been
        calculated.

        The first day is the first day for which any site has valid data. The
        UTC time coord starts at 00 UTC on 27th April 2017, so the first day
        in which any data falls is 26th April 2017 (for any UTC-N sites).
        The first 24 hours starting at 00 therefore runs 00 26th to 00 27th.
        Subsequent 24 hour periods are then expected to the latest day for
        which any site has data; a UTC+14 site.

        Input data spans 48 hours, this will spread to three days with timezone
        adjustments."""

        n_periods = 72 / 24
        mid_start = mktime(dt(2017, 3, 26, 12).utctimetuple()) / 3600.
        lower_bound = mktime(dt(2017, 3, 26, 00).utctimetuple()) / 3600.
        upper_bound = mktime(dt(2017, 3, 27, 00).utctimetuple()) / 3600.

        result = Plugin(24, start_hour=0).process(self.cube)
        result = result.extract(Constraint(name='air_temperature_max'))

        # Expected time coordinate values.
        for i in range(n_periods):
            mid_time = mid_start + (i * 24.)
            low_bound = lower_bound + (i * 24.)
            up_bound = upper_bound + (i * 24.)

            self.assertEqual(result[i].coord('time').points, [mid_time])
            self.assertEqual(result[i].coord('time').bounds[0, 0], [low_bound])
            self.assertEqual(result[i].coord('time').bounds[0, 1], [up_bound])
Example #2
0
    def test_extrema_values_day1(self):
        """Test the actual values returned by the collapse method to ensure it
        is successfully extracting the maximum/minimum temperatures in the
        defined period.

        UTC times : 00  01  02  ... 10 11 12 13 14
        UTC offset:-12 -11 -10  ... -2 -1  0  1  2
        site index:  0   1   2  ... 10 11 12 13 14
        Local time: 12  13  14  ... 22 23 00 01 02

        site_index 12 at time_index 0 should be adjusted to fall at 00 27th
        April 2017 in local time, so is expected to fall outside day 1. Thus
        setting a high value for this site should leave the maximum unset for
        site 12.

        site_index 2 at time_index 9 (09Z 27 April 2017) will fall at 23 26th
        April 2017 local time, so setting this to 40 should modify the maximum
        for site 2 on day 1."""

        self.cube.data[9, 2] = 40
        self.cube.data[0, 12] = 40

        # Expected data array.
        expected = np.arange(0, 27).astype(float)
        expected[2] = 40.

        result = Plugin(24, start_hour=0).process(self.cube)
        result = result.extract(Constraint(name='air_temperature_max'))
        self.assertTrue(result[0].data[12].mask)
        self.assertArrayEqual(result[0].data, expected)
Example #3
0
    def test_data_arrays_day1(self):
        """Test extraction of maxima and minima values from the time localised
        cube in the first 24 hours. The first day is the first day for which
        any site has valid data. The UTC time coord starts at 00 UTC on 27th
        April 2017, so the first day in which any data falls is 26th April
        2017 (for any UTC-N sites). The first 24 hours starting at 00
        therefore runs 00 26th to 00 27th. Any sites UTC+N will be have no
        valid data for this first day. That the correct sites return valid data
        is tested here."""

        # Expected time coordinate values.
        mid_time = mktime(dt(2017, 3, 26, 12).utctimetuple()) / 3600.
        lower_bound = mktime(dt(2017, 3, 26, 00).utctimetuple()) / 3600.
        upper_bound = mktime(dt(2017, 3, 27, 00).utctimetuple()) / 3600.

        # Expected data array.
        expected = np.full(self.n_data, np.nan)
        expected[0:12] = range(12)
        expected = np.ma.masked_invalid(expected)

        result = Plugin(24, start_hour=0).process(self.cube)
        result = result.extract(Constraint(name='air_temperature_max'))
        self.assertArrayEqual(expected, result[0].data)
        self.assertEqual(result[0].coord('time').points, [mid_time])
        self.assertEqual(result[0].coord('time').bounds[0, 0], [lower_bound])
        self.assertEqual(result[0].coord('time').bounds[0, 1], [upper_bound])
Example #4
0
    def test_data_arrays_day2(self):
        """Test extraction of maxima and minima values from the time localised
        cube in the second 24 hours. All sites should return valid data during
        day 2 which runs 00 27th to 00 28th."""

        # Expected time coordinate values.
        mid_time = mktime(dt(2017, 3, 27, 12).utctimetuple()) / 3600.
        lower_bound = mktime(dt(2017, 3, 27, 00).utctimetuple()) / 3600.
        upper_bound = mktime(dt(2017, 3, 28, 00).utctimetuple()) / 3600.

        # Expected data array.
        expected = np.arange(0, 27)

        result = Plugin(24, start_hour=0).process(self.cube)
        result = result.extract(Constraint(name='air_temperature_max'))
        self.assertArrayEqual(expected, result[2].data)
        self.assertEqual(result[1].coord('time').points, [mid_time])
        self.assertEqual(result[1].coord('time').bounds[0, 0], [lower_bound])
        self.assertEqual(result[1].coord('time').bounds[0, 1], [upper_bound])
Example #5
0
def process_diagnostic(diagnostic,
                       neighbours,
                       sites,
                       forecast_times,
                       data_path,
                       ancillary_data,
                       output_path=None):
    """
    Extract data and write output for a given diagnostic.

    Args:
    -----
    diagnostic : string
        String naming the diagnostic to be processed.

    neighbours : numpy.array
        Array of neigbouring grid points that are associated with sites
        in the SortedDictionary of sites.

    sites : dict
        A dictionary containing the properties of spotdata sites.

    forecast_times : list[datetime.datetime objects]
        A list of datetimes representing forecast times for which data is
        required.

    data_path : string
        Path to diagnostic data files.

    ancillary_data : dict
        A dictionary containing additional model data that is needed.
        e.g. {'orography': <cube of orography>}

    output_path : str
        Path to which output file containing processed diagnostic should be
        written.

    Returns:
    --------
    None

    Raises:
    -------
    IOError : If no relevant data cubes are found at given path.
    Exception : No spotdata returned.

    """
    # Search directory structure for all files relevant to current diagnostic.
    files_to_read = [
        os.path.join(dirpath, filename)
        for dirpath, _, files in os.walk(data_path) for filename in files
        if diagnostic['filepath'] in filename
    ]
    if not files_to_read:
        raise IOError('No relevant data files found in {}.'.format(data_path))

    # Load cubes into an iris.cube.CubeList.
    cubes = Load('multi_file').process(files_to_read,
                                       diagnostic['diagnostic_name'])

    # Grab the relevant set of grid point neighbours for the neighbour finding
    # method being used by this diagnostic.
    neighbour_hash = construct_neighbour_hash(diagnostic['neighbour_finding'])
    neighbour_list = neighbours[neighbour_hash]

    # Check if additional diagnostics are needed (e.g. multi-level data).
    # If required, load into the additional_diagnostics dictionary.
    additional_diagnostics = get_method_prerequisites(
        diagnostic['interpolation_method'], data_path)

    # Create empty iris.cube.CubeList to hold extracted data cubes.
    resulting_cubes = CubeList()

    # Get optional kwargs that may be set to override defaults.
    optionals = [
        'upper_level', 'lower_level', 'no_neighbours', 'dz_tolerance',
        'dthetadz_threshold', 'dz_max_adjustment'
    ]
    kwargs = {}
    if ancillary_data.get('config_constants') is not None:
        for optional in optionals:
            constant = ancillary_data.get('config_constants').get(optional)
            if constant is not None:
                kwargs[optional] = constant

    # Loop over forecast times.
    for a_time in forecast_times:
        # Extract Cube from CubeList at current time.
        time_extract = datetime_constraint(a_time)
        cube = extract_cube_at_time(cubes, a_time, time_extract)
        if cube is None:
            # If no cube is available at given time, try the next time.
            continue

        ad = {}
        if additional_diagnostics is not None:
            # Extract additional diagnostcs at current time.
            ad = extract_ad_at_time(additional_diagnostics, a_time,
                                    time_extract)

        args = (cube, sites, neighbour_list, ancillary_data, ad)

        # Extract diagnostic data using defined method.
        resulting_cubes.append(
            ExtractData(diagnostic['interpolation_method']).process(
                *args, **kwargs))

    # Concatenate CubeList into Cube, creating a time DimCoord, and write out.
    if resulting_cubes:
        cube_out, = resulting_cubes.concatenate()
        WriteOutput('as_netcdf', dir_path=output_path).process(cube_out)
    else:
        raise Exception('No data available at given forecast times.')

    # If set in the configuration, extract the diagnostic maxima and minima
    # values.
    if diagnostic['extrema']:
        extrema_cubes = ExtractExtrema(24, start_hour=9).process(cube_out)
        extrema_cubes = extrema_cubes.merge()
        for extrema_cube in extrema_cubes:
            WriteOutput('as_netcdf',
                        dir_path=output_path).process(extrema_cube)
Example #6
0
def process_diagnostic(diagnostics, neighbours, sites, ancillary_data,
                       diagnostic_name):
    """
    Extract data and write output for a given diagnostic.

    Args:
        diagnostics (dict):
            Dictionary containing information regarding how the diagnostics
            are to be processed.

            For example::

              {
                  "temperature": {
                      "diagnostic_name": "air_temperature",
                      "extrema": true,
                      "filepath": "temperature_at_screen_level",
                      "interpolation_method":
                          "model_level_temperature_lapse_rate",
                      "neighbour_finding": {
                          "land_constraint": false,
                          "method": "fast_nearest_neighbour",
                          "vertical_bias": null
                      }
                  }
              }

        neighbours (numpy.array):
            Array of neigbouring grid points that are associated with sites
            in the SortedDictionary of sites.

        sites (dict):
            A dictionary containing the properties of spotdata sites.

        ancillary_data (dict):
            A dictionary containing additional model data that is needed.
            e.g. {'orography': <cube of orography>}

        diagnostic_name (string):
            A string matching the keys in the diagnostics dictionary that
            will be used to access information regarding how the diagnostic
            is to be processed.

    Returns:
        (tuple): tuple containing:
            **resulting_cube** (iris.cube.Cube or None):
                Cube after extracting the diagnostic requested using the
                desired extraction method.
                None is returned if the "resulting_cubes" is an empty CubeList
                after processing.
            **extrema_cubes** (iris.cube.CubeList or None):
                CubeList containing extrema values, if the 'extrema' diagnostic
                is requested.
                None is returned if the value for diagnostic_dict["extrema"]
                is False, so that the extrema calculation is not required.

    """
    diagnostic_dict = diagnostics[diagnostic_name]

    # Grab the relevant set of grid point neighbours for the neighbour finding
    # method being used by this diagnostic.
    neighbour_hash = (construct_neighbour_hash(
        diagnostic_dict['neighbour_finding']))
    neighbour_list = neighbours[neighbour_hash]

    # Get optional kwargs that may be set to override defaults.
    optionals = [
        'upper_level', 'lower_level', 'no_neighbours', 'dz_tolerance',
        'dthetadz_threshold', 'dz_max_adjustment'
    ]
    kwargs = {}
    if ancillary_data.get('config_constants') is not None:
        for optional in optionals:
            constant = ancillary_data.get('config_constants').get(optional)
            if constant is not None:
                kwargs[optional] = constant

    # Create a list of datetimes to loop through.
    forecast_times = []
    for cube in diagnostic_dict["data"]:
        time = cube.coord("time")
        forecast_times.extend(time.units.num2date(time.points))

    # Create empty iris.cube.CubeList to hold extracted data cubes.
    resulting_cubes = CubeList()

    # Loop over forecast times.
    for a_time in forecast_times:
        # Extract Cube from CubeList at current time.
        time_extract = datetime_constraint(a_time)
        cube = extract_cube_at_time(diagnostic_dict["data"], a_time,
                                    time_extract)
        if cube is None:
            # If no cube is available at given time, try the next time.
            continue

        ad = {}
        if diagnostic_dict["additional_data"] is not None:
            # Extract additional diagnostics at current time.
            ad = extract_ad_at_time(diagnostic_dict["additional_data"], a_time,
                                    time_extract)

        args = (cube, sites, neighbour_list, ancillary_data, ad)

        # Extract diagnostic data using defined method.
        resulting_cubes.append(
            ExtractData(diagnostic_dict['interpolation_method']).process(
                *args, **kwargs))

    if resulting_cubes:
        # Concatenate CubeList into Cube for cubes with different
        # forecast times.
        resulting_cube = resulting_cubes.concatenate_cube()
    else:
        resulting_cube = None

    if diagnostic_dict['extrema']:
        extrema_cubes = (ExtractExtrema(24, start_hour=9).process(
            resulting_cube.copy()))
        extrema_cubes = extrema_cubes.merge()
    else:
        extrema_cubes = None

    return resulting_cube, extrema_cubes