def test_invalid_method(self):
        """
        Test that the plugin can handle an invalid method being passed in.

        """
        plugin = Plugin('smallest distance')
        msg = 'Unknown method'
        with self.assertRaisesRegex(AttributeError, msg):
            plugin.process(self.cube, self.sites, self.ancillary_data)
 def without_ancillary_data(self,
                            method,
                            vertical_bias=None,
                            land_constraint=False):
     """Test plugins behaviour with no ancillary data provided."""
     plugin = Plugin(method, vertical_bias, land_constraint)
     if method == 'fast_nearest_neighbour':
         result = plugin.process(self.cube, self.sites, {})
         self.assertIsInstance(result, np.ndarray)
     else:
         with self.assertRaises(KeyError):
             plugin.process(self.cube, self.sites, {})
 def correct_neighbour(self,
                       method,
                       i_expected,
                       j_expected,
                       dz_expected,
                       vertical_bias=None,
                       land_constraint=False):
     """Test that the plugin returns the expected neighbour."""
     plugin = Plugin(method, vertical_bias, land_constraint)
     result = plugin.process(self.cube, self.sites, self.ancillary_data)
     self.assertEqual(result['i'], i_expected)
     self.assertEqual(result['j'], j_expected)
     self.assertEqual(result['dz'], dz_expected)
    def test_invalid_no_neighbours(self):
        """
        Test use of a larger but invalid no of neighbours over which to find
        the minimum vertical displacement.

        """
        plugin = Plugin(method='minimum_height_error_neighbour',
                        vertical_bias=None,
                        land_constraint=False)
        msg = 'Invalid nearest no'
        with self.assertRaisesRegex(ValueError, msg):
            plugin.process(self.cube,
                           self.sites,
                           self.ancillary_data,
                           no_neighbours=20)
    def test_variable_no_neighbours(self):
        """
        Test that the plugin can handle a variable number of neigbours to use
        when relaxing the 'nearest' condition. Make the smallest displacement
        point 2-grid cells away, so it should be captured with no_neighbours
        set to 25.

        """
        self.ancillary_data['orography'].data[13, 10] = 10.
        plugin = Plugin(method='minimum_height_error_neighbour',
                        vertical_bias=None,
                        land_constraint=False)
        result = plugin.process(self.cube,
                                self.sites,
                                self.ancillary_data,
                                no_neighbours=25)
        self.assertEqual(result['i'], 13)
        self.assertEqual(result['j'], 10)
        self.assertEqual(result['dz'], 0.)
 def return_types(self, method, vertical_bias=None, land_constraint=False):
     """Test that the plugin returns a numpy array."""
     plugin = Plugin(method, vertical_bias, land_constraint)
     result = plugin.process(self.cube, self.sites, self.ancillary_data)
     self.assertIsInstance(result, np.ndarray)
     self.assertEqual(result.dtype, self.neighbour_list.dtype)
Example #7
0
def run_spotdata(config_file_path,
                 data_path,
                 ancillary_path,
                 diagnostic_list=None,
                 site_path=None,
                 constants_file_path=None,
                 site_properties=None,
                 forecast_date=None,
                 forecast_time=None,
                 forecast_length=168,
                 output_path=None,
                 use_multiprocessing=False):
    """
    A routine that calls the components of the spotdata code. This includes
    building site data into a suitable format, finding grid neighbours to
    those sites with the chosen method, and then extracting data with the
    chosen method. The final results are written out to new irregularly
    gridded iris.cube.Cubes.

    Args:
    -----
    config_file_path : string
        Path to a json file defining the recipes for extracting diagnostics at
        SpotData sites from gridded data.

    data_path : string
        Path to diagnostic data files.

    ancillary_path : string
        Path to ancillary data files.

    diagnostic_list : list of strings
        List of diagnostic names that match those defined in the config_file
        to select which diagnostics to process.

    site_path : string
        Path to site data file if in use. If no lats/lons are specified at the
        command line, this file path is needed.

    constants_file_path : string
        Path to a json file defining constants to be used in methods that have
        tolerances that may be set. e.g. maxiumum vertical extrapolation/
        interpolation of temperatures using a temperature lapse rate method.

    site_properties : dict
        Contains:

        latitudes : list of ints/floats or None
            A list of latitudes for running on the fly for a custom set of
            sites. The order should correspond to the subsequent latitudes and
            altitudes variables to construct each site.

        longitudes : list of ints/floats or None
            A list of longitudes for running on the fly for a custom set of
            sites.

        altitudes : list of ints/floats or None
            A list of altitudes for running on the fly for a custom set of
            sites.

        site_ids : list of ints or None
            A list of site_ids to associate with the above on the fly
            constructed sites. This must be ordered the same as the latitudes/
            longitudes/altitudes lists.

    forecast_date : string (YYYYMMDD)
        A string of format YYYYMMDD defining the start date for which forecasts
        are required.

    forecast_time : integer
        An integer giving the hour on the forecast_date at which to start the
        forecast output; 24hr clock such that 17 = 17Z for example.

    forecast_length : integer
        An integer giving the desired length of the forecast output in hours
        (e.g. 48 for a two day forecast period). Defaults to 168 (7 days).

    output_path : string
        Path to which output file containing processed diagnostic should be
        written.

    use_multiprocessing : boolean
        A switch determining whether to use multiprocessing in the data
        extraction step.

    Returns:
    --------
    Writes out cubes of the requested diagnostics, with data extracted to the
    sites read from a file or defined at run time.

    0 upon successful completion.

    Raises:
    -------
    ValueError : raised if no site specifications are provided.
    IOError : if required data files are not found at given data_path.

    """
    # Establish forecast time list based upon input specifications, or if not
    # provided, use defaults.
    forecast_times = get_forecast_times(forecast_length,
                                        forecast_date=forecast_date,
                                        forecast_time=forecast_time)

    # Check site data has been provided.
    if site_path is None and not site_properties:
        raise ValueError("No SpotData site information has been provided "
                         "from a file or defined at runtime.")

    # If using locations set at command line, set optional information such
    # as site altitude and site_id. If a site definition file is provided it
    # will take precedence.
    if site_path is None:
        sites = ImportSiteData('runtime_list').process(site_properties)
    else:
        sites = ImportSiteData('from_file').process(site_path)

    # Read in extraction recipes for all diagnostics.
    with open(config_file_path, 'r') as input_file:
        all_diagnostics = json.load(input_file)

    # Read in constants to use; if not available, defaults will be used.
    config_constants = None
    neighbour_kwargs = {}
    if constants_file_path is not None:
        with open(constants_file_path, 'r') as input_file:
            config_constants = json.load(input_file)
        no_neighbours = config_constants.get('no_neighbours')
        if no_neighbours is not None:
            neighbour_kwargs['no_neighbours'] = no_neighbours

    # Use the diagnostic_list to establish which diagnostics are to be
    # processed; if unset, use all.
    diagnostics = all_diagnostics
    if diagnostic_list is not None:
        diagnostics = dict((diagnostic, all_diagnostics[diagnostic])
                           for diagnostic in diagnostic_list)

    # Load ancillary data files; fields that don't vary in time.
    ancillary_data = get_ancillary_data(diagnostics, ancillary_path)

    # Add configuration constants to ancillaries (may be None if unset).
    ancillary_data['config_constants'] = config_constants

    # Set up site-grid point neighbour list using default method. Other IGPS
    # methods will use this as a starting point so it must always be done.
    # Assumes orography file is on the same grid as the diagnostic data.
    neighbours = {}
    default_neighbours = {
        'method': 'fast_nearest_neighbour',
        'vertical_bias': None,
        'land_constraint': False
    }
    default_hash = construct_neighbour_hash(default_neighbours)
    neighbours[default_hash] = PointSelection(**default_neighbours).process(
        ancillary_data['orography'],
        sites,
        ancillary_data=ancillary_data,
        **neighbour_kwargs)

    # Set up site-grid point neighbour lists for all IGPS methods being used.
    for key in diagnostics.keys():
        neighbour_finding = diagnostics[key]['neighbour_finding']
        neighbour_hash = construct_neighbour_hash(neighbour_finding)
        # Check if defined neighbour method results already exist.
        if neighbour_hash not in neighbours.keys():
            # If not, find neighbours with new method.
            neighbours[neighbour_hash] = (PointSelection(
                **neighbour_finding).process(
                    ancillary_data['orography'],
                    sites,
                    ancillary_data=ancillary_data,
                    default_neighbours=neighbours[default_hash],
                    **neighbour_kwargs))

    if use_multiprocessing:
        # Process diagnostics on separate threads if multiprocessing is
        # selected. Determine number of diagnostics to establish
        # multiprocessing pool size.
        n_diagnostic_threads = min(len(diagnostics.keys()), mp.cpu_count())

        # Establish multiprocessing pool - each diagnostic processed on its
        # own thread.
        diagnostic_pool = mp.Pool(processes=n_diagnostic_threads)

        for key in diagnostics.keys():
            diagnostic = diagnostics[key]
            diagnostic_pool.apply_async(process_diagnostic,
                                        args=(diagnostic, neighbours, sites,
                                              forecast_times, ancillary_data,
                                              output_path))

        diagnostic_pool.close()
        diagnostic_pool.join()

    else:
        # Process diagnostics serially on one thread.
        for key in diagnostics.keys():
            diagnostic = diagnostics[key]
            process_diagnostic(diagnostic,
                               neighbours,
                               sites,
                               forecast_times,
                               data_path,
                               ancillary_data,
                               output_path=output_path)

    return 0
Example #8
0
def run_spotdata(diagnostics,
                 ancillary_data,
                 sites,
                 config_constants,
                 use_multiprocessing=False):
    """
    A routine that calls the components of the spotdata code. This includes
    building site data into a suitable format, finding grid neighbours to
    those sites with the chosen method, and then extracting data with the
    chosen method. The final results are written out to new irregularly
    gridded iris.cube.Cubes.

    Args:
        diagnostics (dict):
            Dictionary containing the information regarding the methods that
            will be applied for a specific diagnostic, as well as the data
            following loading in a cube, and any additional data required to
            be able to compute the methods requested.

            For example::

              {
                  "temperature": {
                      "diagnostic_name": "air_temperature",
                      "extrema": True,
                      "filepath": "temperature_at_screen_level",
                      "interpolation_method": "use_nearest",
                      "neighbour_finding": {
                          "land_constraint": False,
                          "method": "fast_nearest_neighbour",
                          "vertical_bias": None
                      "data": iris.cube.CubeList
                      "additional_data" : iris.cube.CubeList
                      }
                  }
              }

        ancillary_data (dict):
            Dictionary containing named ancillary data; the key gives the name
            and the item is the iris.cube.Cube of data.

        sites (dict):
            Contains:

            latitudes (list of ints/floats or None):
                A list of latitudes for running for a custom set of
                sites. The order should correspond to the subsequent latitudes
                and altitudes variables to construct each site.

            longitudes (list of ints/floats or None):
                A list of longitudes for running for a custom set of
                sites.

            altitudes (list of ints/floats or None):
                A list of altitudes for running for a custom set of
                sites.

            site_ids (list of ints or None):
                A list of site_ids to associate with the above
                constructed sites. This must be ordered the same as the
                latitudes/longitudes/altitudes lists.

        config_constants (dict):
            Dictionary defining constants to be used in methods that have
            tolerances that may be set. e.g. maximum vertical extrapolation/
            interpolation of temperatures using a temperature lapse rate
            method.

        use_multiprocessing (boolean):
            A switch determining whether to use multiprocessing in the data
            extraction step.

    Returns:
        (tuple): tuple containing:
            **resulting_cube** (iris.cube.Cube or None):
                Cube after extracting the diagnostic requested using the
                desired extraction method.
                None is returned if the "resulting_cubes" is an empty CubeList
                after processing.
            **extrema_cubes** (iris.cube.CubeList or None):
                CubeList containing extrema values, if the 'extrema' diagnostic
                is requested.
                None is returned if the value for diagnostic_dict["extrema"]
                is False, so that the extrema calculation is not required.
    """
    # Read in constants to use; if not available, defaults will be used.
    neighbour_kwargs = {}
    if config_constants is not None:
        no_neighbours = config_constants.get('no_neighbours')
        if no_neighbours is not None:
            neighbour_kwargs['no_neighbours'] = no_neighbours

    # Add configuration constants to ancillaries (may be None if unset).
    ancillary_data['config_constants'] = config_constants

    # Set up site-grid point neighbour list using default method. Other IGPS
    # methods will use this as a starting point so it must always be done.
    # Assumes orography file is on the same grid as the diagnostic data.
    neighbours = {}
    default_neighbours = {
        'method': 'fast_nearest_neighbour',
        'vertical_bias': None,
        'land_constraint': False
    }
    default_hash = construct_neighbour_hash(default_neighbours)
    neighbours[default_hash] = PointSelection(**default_neighbours).process(
        ancillary_data['orography'],
        sites,
        ancillary_data=ancillary_data,
        **neighbour_kwargs)

    # Set up site-grid point neighbour lists for all IGPS methods being used.
    for key in diagnostics.keys():
        neighbour_finding = diagnostics[key]['neighbour_finding']
        neighbour_hash = construct_neighbour_hash(neighbour_finding)
        # Check if defined neighbour method results already exist.
        if neighbour_hash not in neighbours.keys():
            # If not, find neighbours with new method.
            neighbours[neighbour_hash] = (PointSelection(
                **neighbour_finding).process(
                    ancillary_data['orography'],
                    sites,
                    ancillary_data=ancillary_data,
                    default_neighbours=neighbours[default_hash],
                    **neighbour_kwargs))

    if use_multiprocessing:
        # Process diagnostics on separate threads if multiprocessing is
        # selected. Determine number of diagnostics to establish
        # multiprocessing pool size.
        n_diagnostic_threads = min(len(diagnostics.keys()), mp.cpu_count())

        # Establish multiprocessing pool - each diagnostic processed on its
        # own thread.
        diagnostic_pool = mp.Pool(processes=n_diagnostic_threads)

        diagnostic_keys = [
            diagnostic_name for diagnostic_name in diagnostics.keys()
        ]

        result = (diagnostic_pool.map_async(
            partial(process_diagnostic, diagnostics, neighbours, sites,
                    ancillary_data), diagnostic_keys))
        diagnostic_pool.close()
        diagnostic_pool.join()
        resulting_cubes = CubeList()
        extrema_cubes = CubeList()
        for result in result.get():
            resulting_cubes.append(result[0])
            extrema_cubes.append(result[1:])
    else:
        # Process diagnostics serially on one thread.
        resulting_cubes = CubeList()
        extrema_cubes = CubeList()
        for key in diagnostics.keys():
            resulting_cube, extrema_cubelist = (process_diagnostic(
                diagnostics, neighbours, sites, ancillary_data, key))
            resulting_cubes.append(resulting_cube)
            extrema_cubes.append(extrema_cubelist)
    return resulting_cubes, extrema_cubes